commit dfd05756fa283e3f97359884f629f9d10a1cee61
parent 1fd6905fbc07dd54d9dd9b5265911d0bb34acea5
Author: phoebos <ben@bvnf.space>
Date: Mon, 6 Nov 2023 13:02:38 +0000
add search
Diffstat:
4 files changed, 163 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
# these files are ignored by git.
*.html
build-page
+db.sqlite
diff --git a/Makefile b/Makefile
@@ -3,7 +3,7 @@
CFLAGS = -Wall -Wextra -std=c99 -pedantic
LDFLAGS = -static -s
-all: html
+all: html db.sqlite
html: build-page smu/smu htmlclean
find wiki -type d -exec sh -ec 'PATH="$$PWD/smu:$$PATH" ./build-page "$$0" >$$0/index.html' {} \;
@@ -12,6 +12,9 @@ htmlclean:
-find wiki -type f -name \*.html -exec rm {} \;
-find wiki -type d -exec rmdir {} \; 2>/dev/null
+db.sqlite:
+ tclsh update_db.tcl
+
build-page: build-page.c
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ build-page.c
@@ -22,4 +25,4 @@ smu/smu:
clean:
rm -f build-page
-.PHONY: clean
+.PHONY: all clean html htmlclean db.sqlite
diff --git a/cgi-bin/search b/cgi-bin/search
@@ -0,0 +1,117 @@
+#!/usr/bin/env tclsh8.6
+
+proc htmlize str {
+ string map {< < > > \x22 " \x26 &} $str
+ string map {< < > > \x22 " \x26 &} $str
+}
+
+proc url-decode str {
+ # adapted from https://wiki.tcl-lang.org/page/url%2Dencoding
+ set str [string map [list + { }] $str]
+
+ set pos 0
+ while { -1 != [set pos [string first "%" $str $pos]]} {
+ set hexNumber "[string range $str $pos+1 $pos+2]"
+ set str [string range $str 0 $pos-1][binary decode hex $hexNumber][string range $str $pos+3 end]
+ incr pos
+ }
+ return [encoding convertfrom utf-8 $str]
+}
+
+proc query {db q} {
+ set e {select list(path, snippet(files, 0, '<b>', '</b>', '', 40), snippet(files, 1, '<b>', '</b>', ' … ', 40)) from files where files MATCH :q ORDER BY RANK LIMIT 200;}
+ if {[catch { set ll [$db eval $e] }]} {
+ set x ""
+ foreach word [split $q " "] {
+ append x " \"[string map [list "\"" "\"\""] $word]\""
+ }
+ set q [string trim $x]
+ set ll [$db eval $e]
+ }
+ return $ll
+}
+
+proc format_results {r t} {
+ set x ""
+ append x "<p>[llength $r] results in $t</p>"
+ if {[llength $r] == 0} {
+ append x "<p>No results.</p>"
+ return $x
+ }
+ append x {<table border=0>}
+ foreach res $r {
+ lassign $res path title snippet
+ append x "<tr><td><a href=\"$path\">$title</a><br /><p>$snippet<td></tr>\n"
+ }
+ append x {</table>}
+ return $x
+}
+
+proc main {} {
+ set t0 [clock milliseconds]
+ set doc {}
+
+ if {[info exists ::env(QUERY_STRING)]} {
+ set qs [split $::env(QUERY_STRING) &]
+ foreach q $qs {
+ lassign [split $q =] key val
+ if {$key == "query"} {
+ set searchterm $val
+ break
+ }
+ }
+ }
+
+ if {![info exists searchterm] || $searchterm eq {}} {
+ # no query, print plain searchbox
+ set ::status "200 OK"
+ set ::title Search
+ return {<h1>Search</h1><p><form><input type="text" name="query" /><input type="submit" value="Search" /></form></p>}
+ }
+
+ set searchterm [url-decode $searchterm]
+ set htmlsearchterm [htmlize $searchterm]
+
+ # do the search
+ package require sqlite3
+ sqlite3 db -create false "../db.sqlite"
+ db function list ::list
+ db timeout 2000 ;# does this do anything?
+
+ set results [query db $searchterm]
+ db close
+
+ set t1 [clock milliseconds]
+ set t "[expr {($t1-$t0)/1000.}]s"
+
+ set ::status "200 OK"
+ set ::title "$htmlsearchterm - search results"
+ set doc "<p>Search results for: <b>$htmlsearchterm</b></p><p><form><input type=\"text\" name=\"query\" value=\"$htmlsearchterm\" /><input type=\"submit\" value=\"Search\" /></form></p>"
+
+ append doc [format_results $results $t]
+ return $doc
+}
+
+if {0==[catch main res]} {
+ set document "<!DOCTYPE html><html><head><meta charset=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"><title>$::title</title><link rel=\"stylesheet\" type=\"text/css\" href=\"/style.css\"/><link rel=\"icon\" href=\"data:image/gif;base64,R0lGODlhEAAQAPEDAAAAAP8AAP///wAAACH5BAX//wMALAAAAAAQABAAAAImnB+ni8kf4mNRzmbvqE5zPQDiqI3kBQhmqZ5TuqKtyMavCZT4UgAAOw==\"/></head>"
+ append document {<body><div id="menu"><a href="/"><b>KISS Community Wiki</b></a>
+ <span class="right">
+ <a href="https://github.com/kiss-community">Github</a>
+ <a href="https://codeberg.org/kiss-community">Codeberg</a>
+ <a href="https://kisslinux.org">Official site</a>
+ </span></div><div id="content"><div id="main">
+ }
+ append document $res {</div></div></body></html>}
+} else {
+ set ::status 500
+ set ::title Error
+ set document "<!DOCTYPE html><pre>Error: $res</pre>"
+}
+
+puts "Status: $::status\r"
+puts "Content-Type: text/html\r"
+puts "Content-Length: [string bytelength $document]\r"
+puts "\r"
+puts $document
+flush stdout
+close stdout
diff --git a/update_db.tcl b/update_db.tcl
@@ -0,0 +1,40 @@
+#!/usr/bin/env tclsh8.6
+# This script is run in the post-receive hook.
+
+package require sqlite3
+
+proc htmlize str {
+ string map {< < > > \x22 " \x26 & } $str
+}
+
+proc removespaces {content} {
+ set content [string map {__ {} ++ {} == {} -- {}} $content]
+ regsub -all {[ \t\n]+} $content { }
+}
+
+set files [split [exec find wiki -name *.md -o -name *.txt] \n]
+
+sqlite3 db db.sqlite
+db eval {
+ DROP TABLE IF EXISTS files;
+ CREATE VIRTUAL TABLE IF NOT EXISTS files USING fts5(path, content, tokenize = "porter unicode61 tokenchars '&;'")
+}
+
+foreach path $files {
+ set fh [open $path r]
+ set content [read $fh]
+ set content [removespaces $content]
+ if {[string match *.md $path]} {
+ # txts should already have chars escaped.
+ set content [htmlize $content]
+ }
+ close $fh
+
+ # ./wiki/foo/bar/index.xxx -> /foo/bar
+ set path [file join / {*}[lreplace [lreplace [file split $path] 0 0] end end]]
+ # remove duplicates (eg if both index.txt and index.md in same dir)
+ set path [lsort -unique $path]
+ db eval {INSERT INTO files VALUES($path, $content)}
+}
+
+db close