summaryrefslogtreecommitdiff
path: root/search/fts5/README
blob: 5ab27ede5764c39c4aaea5eef49c99d998eb2f84 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Search index with Sqlite3 FTS5 Full Text Search

# generate JSON dynamically with a JSON output generator,
# see https://halfelf.org/2017/hugos-making-json/
#curl http://localhost:1313/index.json > posts.json
curl http://www.andreasbaumann.cc/index.json > posts.json

# we need sqlite and FTS5
# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/
pacman -S sqlite3

# use posts.json to create entries in the virtual FTS5 table
rm posts.db
cat <<EOF | sqlite3 posts.db
CREATE VIRTUAL TABLE posts USING fts5(uri,title,content);
EOF

# https://jqlang.github.io/jq/manual/
jq -j '.[] | "INSERT INTO posts(uri, title, content) VALUES (", ( [ .uri, .title, .content // empty ] | map(.|gsub("'"'"'";"`")|gsub("\n";" ")|@sh) | join(",")), ");\n"' \
	posts.json > posts.sql
sqlite3 posts.db < posts.sql
cp posts.db ../../static/index/.

# some test queries
# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/
# https://sqlite.org/fts5.html
select uri from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts);
select uri,highlight(posts, 1, '<b>', '</b>'),snippet(posts, 2, '<b>', '</b>', '...', 50) from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts) limit 5 offset 0;

# TODO: make a server-side search (search window and result page)
# this would mean we have to render the hugo and the ranklist part on the server?

# typical "modern" web development craziness.. :-)
pacman --needed -S sha3sum
trizen -G emsdk
cd emdsk && makepkg -sif
sudo emsdk install latest
sudo emsdk activate latest
sudo chown -R abaumann:users /usr/lib/emsdk/
source /usr/lib/emsdk/emsdk_env.sh
git clone --recursive https://github.com/jlongster/sql.js
# add -DSQLITE_ENABLE_FTS5 to CFLAGS in Makefile
# disable 'sha3sum -c cache/check.txt' (is broken)
# emcc: error: setting `INLINING_LIMIT` expects `bool` but got `int`
# set -sINLINING_LIMIT as booelean flag, modern emcc doesn't allow a cost integer here anymore it seems
make
# tons of errors, the containersized version uses VCode and WSL, so this things is
# hairy to build
# let's try a precompiled one
# https://verdicts.listen.dev/npm/sql.js-fts5
npm install sql.js-fts5
sudo pacman -S browserify
browserify main.js -o sql-fts5.js
cp sql-fts5.js ../../themes/new_theme/static/js/.
cp node_modules/sql.js-fts5/dist/sql-wasm.js ../../themes/new_theme/static/js/.

# TODO: add as sqlite.js to page with data (local sqlite fts search)
# https://blog.ouseful.info/2022/04/06/compiling-full-text-search-fts5-into-sqlite-wasm-build/
# https://jlongster.com/future-sql-web
# https://www.skypack.dev/view/sql.js-fts5
# https://github.com/phiresky/sql.js-httpvfs
# https://github.com/psanford/sqlite3vfshttp
# https://phiresky.github.io/blog/2021/hosting-sqlite-databases-on-github-pages/
# https://github.com/kbumsik/sqlite-wasm