commit dfd05756fa283e3f97359884f629f9d10a1cee61
parent 1fd6905fbc07dd54d9dd9b5265911d0bb34acea5
Author: phoebos <ben@bvnf.space>
Date:   Mon,  6 Nov 2023 13:02:38 +0000
add search
Diffstat:
4 files changed, 163 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 # these files are ignored by git.
 *.html
 build-page
+db.sqlite
diff --git a/Makefile b/Makefile
@@ -3,7 +3,7 @@
 CFLAGS = -Wall -Wextra -std=c99 -pedantic
 LDFLAGS = -static -s
 
-all: html
+all: html db.sqlite
 
 html: build-page smu/smu htmlclean
 	find wiki -type d -exec sh -ec 'PATH="$$PWD/smu:$$PATH" ./build-page "$$0" >$$0/index.html' {} \;
@@ -12,6 +12,9 @@ htmlclean:
 	-find wiki -type f -name \*.html -exec rm {} \;
 	-find wiki -type d -exec rmdir {} \; 2>/dev/null
 
+db.sqlite:
+	tclsh update_db.tcl
+
 build-page: build-page.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ build-page.c
 
@@ -22,4 +25,4 @@ smu/smu:
 clean:
 	rm -f build-page
 
-.PHONY: clean
+.PHONY: all clean html htmlclean db.sqlite
diff --git a/cgi-bin/search b/cgi-bin/search
@@ -0,0 +1,117 @@
+#!/usr/bin/env tclsh8.6
+
+proc htmlize str {
+	string map {< < > > \x22 " \x26 &} $str
+	string map {< < > > \x22 " \x26 &} $str
+}
+
+proc url-decode str {
+	# adapted from https://wiki.tcl-lang.org/page/url%2Dencoding
+    set str [string map [list + { }] $str]
+	
+    set pos 0
+	while { -1 != [set pos [string first "%" $str $pos]]} {
+		set hexNumber "[string range $str $pos+1 $pos+2]"
+		set str [string range $str 0 $pos-1][binary decode hex $hexNumber][string range $str $pos+3 end]
+		incr pos
+	}
+	return [encoding convertfrom utf-8 $str]
+}
+
+proc query {db q} {
+	set e {select list(path, snippet(files, 0, '<b>', '</b>', '', 40), snippet(files, 1, '<b>', '</b>', ' … ', 40)) from files where files MATCH :q ORDER BY RANK LIMIT 200;}
+	if {[catch { set ll [$db eval $e] }]} {
+		set x ""
+		foreach word [split $q " "] {
+			append x " \"[string map [list "\"" "\"\""] $word]\""
+		}
+		set q [string trim $x]
+		set ll [$db eval $e]
+	}
+	return $ll
+}
+
+proc format_results {r t} {
+	set x ""
+	append x "<p>[llength $r] results in $t</p>"
+	if {[llength $r] == 0} {
+		append x "<p>No results.</p>"
+		return $x
+	}
+	append x {<table border=0>}
+	foreach res $r {
+		lassign $res path title snippet
+		append x "<tr><td><a href=\"$path\">$title</a><br /><p>$snippet<td></tr>\n"
+	}
+	append x {</table>}
+	return $x
+}
+
+proc main {} {
+	set t0 [clock milliseconds]
+	set doc {}
+
+	if {[info exists ::env(QUERY_STRING)]} {
+		set qs [split $::env(QUERY_STRING) &]
+		foreach q $qs {
+			lassign [split $q =] key val
+			if {$key == "query"} {
+				set searchterm $val
+				break
+			}
+		}
+	}
+
+	if {![info exists searchterm] || $searchterm eq {}} {
+		# no query, print plain searchbox
+		set ::status "200 OK"
+		set ::title Search
+		return {<h1>Search</h1><p><form><input type="text" name="query" /><input type="submit" value="Search" /></form></p>}
+	}
+
+	set searchterm [url-decode $searchterm]
+	set htmlsearchterm [htmlize $searchterm]
+
+	# do the search
+	package require sqlite3
+	sqlite3 db -create false "../db.sqlite"
+	db function list ::list
+	db timeout 2000 ;# does this do anything?
+
+	set results [query db $searchterm]
+	db close
+
+	set t1 [clock milliseconds]
+	set t "[expr {($t1-$t0)/1000.}]s"
+
+	set ::status "200 OK"
+	set ::title "$htmlsearchterm - search results"
+	set doc "<p>Search results for: <b>$htmlsearchterm</b></p><p><form><input type=\"text\" name=\"query\" value=\"$htmlsearchterm\" /><input type=\"submit\" value=\"Search\" /></form></p>"
+
+	append doc [format_results $results $t]
+	return $doc
+}
+
+if {0==[catch main res]} {
+	set document "<!DOCTYPE html><html><head><meta charset=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"><title>$::title</title><link rel=\"stylesheet\" type=\"text/css\" href=\"/style.css\"/><link rel=\"icon\" href=\"\"/></head>"
+	append document {<body><div id="menu"><a href="/"><b>KISS Community Wiki</b></a>
+        <span class="right">
+                <a href="https://github.com/kiss-community">Github</a>
+                <a href="https://codeberg.org/kiss-community">Codeberg</a>
+                <a href="https://kisslinux.org">Official site</a>
+        </span></div><div id="content"><div id="main">
+	}
+	append document $res {</div></div></body></html>}
+} else {
+	set ::status 500
+	set ::title Error
+	set document "<!DOCTYPE html><pre>Error: $res</pre>"
+}
+
+puts "Status: $::status\r"
+puts "Content-Type: text/html\r"
+puts "Content-Length: [string bytelength $document]\r"
+puts "\r"
+puts $document
+flush stdout
+close stdout
diff --git a/update_db.tcl b/update_db.tcl
@@ -0,0 +1,40 @@
+#!/usr/bin/env tclsh8.6
+# This script is run in the post-receive hook.
+
+package require sqlite3
+
+proc htmlize str {
+	string map {< < > > \x22 " \x26 & } $str
+}
+
+proc removespaces {content} {
+	set content [string map {__ {} ++ {} == {} -- {}} $content]
+	regsub -all {[ \t\n]+} $content { }
+}
+
+set files [split [exec find wiki -name *.md -o -name *.txt] \n]
+
+sqlite3 db db.sqlite
+db eval {
+	DROP TABLE IF EXISTS files;
+	CREATE VIRTUAL TABLE IF NOT EXISTS files USING fts5(path, content, tokenize = "porter unicode61 tokenchars '&;'")
+}
+
+foreach path $files {
+	set fh [open $path r]
+	set content [read $fh]
+	set content [removespaces $content]
+	if {[string match *.md $path]} {
+		# txts should already have chars escaped.
+		set content [htmlize $content]
+	}
+	close $fh
+
+	# ./wiki/foo/bar/index.xxx -> /foo/bar
+	set path [file join / {*}[lreplace [lreplace [file split $path] 0 0] end end]]
+	# remove duplicates (eg if both index.txt and index.md in same dir)
+	set path [lsort -unique $path]
+	db eval {INSERT INTO files VALUES($path, $content)}
+}
+
+db close