summaryrefslogtreecommitdiff
path: root/search/fts5/README
blob: 681b425b22a826b03c71b148a70ada65695f49ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Search index with Sqlite3 FTS5 Full Text Search

# generate JSON dynamically with a JSON output generator,
# see https://halfelf.org/2017/hugos-making-json/
#curl http://localhost:1313/index.json > posts.json
curl http://www.andreasbaumann.cc/index.json > posts.json

# we need sqlite and FTS5
# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/
pacman -S sqlite3

# use posts.json to create entries in the virtual FTS5 table
rm posts.db
cat <<EOF | sqlite3 posts.db
CREATE VIRTUAL TABLE posts USING fts5(uri,title,content);
EOF

# create post SQL statements from JSON with JQ (https://jqlang.github.io/jq/manual/)
jq -j '.[] | "INSERT INTO posts(uri, title, content) VALUES (", ( [ .uri, .title, .content // empty ] | map(.|gsub("'"'"'";"`")|gsub("\n";" ")|@sh) | join(",")), ");\n"' \
	posts.json > posts.sql
sqlite3 posts.db < posts.sql
cp posts.db ../../static/index/.

# some test queries
# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/
# https://sqlite.org/fts5.html
sqlite3 posts.db
sqlite> select uri from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts);
sqlite> select uri,highlight(posts, 1, '<b>', '</b>'),snippet(posts, 2, '<b>', '</b>', '...', 50) from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts) limit 5 offset 0;

# TODO: make a server-side search (search window and result page)
# this would mean we have to render the hugo and the ranklist part on the server?

# typical "modern" web development craziness.. :-)
pacman --needed -S sha3sum
trizen -G emsdk
cd emdsk && makepkg -sif
sudo emsdk install latest
sudo emsdk activate latest
sudo chown -R abaumann:users /usr/lib/emsdk/
source /usr/lib/emsdk/emsdk_env.sh
git clone --recursive https://github.com/jlongster/sql.js
# add -DSQLITE_ENABLE_FTS5 to CFLAGS in Makefile
# disable 'sha3sum -c cache/check.txt' (is broken)
# emcc: error: setting `INLINING_LIMIT` expects `bool` but got `int`
# set -sINLINING_LIMIT as booelean flag, modern emcc doesn't allow a cost integer here anymore it seems
make
#https://github.com/sql-js/sql.js/issues/546

building emscripten2
/data/INSTALL/emscripten2/src/llvm-project/llvm/include/llvm/Support/Signals.h:119:8: error: variable or field ‘CleanupOnSignal’ declared void
  119 |   void CleanupOnSignal(uintptr_t Context);
      |        ^~~~~~~~~~~~~~~
In file included from /data/INSTALL/emscripten2/src/llvm-project/llvm/lib/Support/Signals.cpp:251:
/data/INSTALL/emscripten2/src/llvm-project/llvm/lib/Support/Unix/Signals.inc:348:44: error: ‘void llvm::sys::CleanupOnSignal(uintptr_t)’ should have been declared inside ‘llvm::sys’
  348 | void sys::CleanupOnSignal(uintptr_t Context) {


# tons of errors, the containersized version uses VCode and WSL, so this things is
# hairy to build
# let's try a precompiled one
# https://verdicts.listen.dev/npm/sql.js-fts5
# https://www.skypack.dev/view/sql.js-fts5
npm install sql.js-fts5
cp node_modules/sql.js-fts5/dist/sql-wasm.js ../../themes/new_theme/static/js/.
cp node_modules/sql.js-fts5/dist/sql-wasm.wasm ../../themes/new_theme/static/js/.

# WASM is not read with self-hosted hugo server because of a charset-utf-8 added
# to application/wasm: https://github.com/gohugoio/hugo/issues/10734
# => rebuild Hugo with hugo-0.120.1-charset.patch

# https://jlongster.com/future-sql-web
# https://github.com/jlongster/absurd-sql
npm install absurd-sql
cp node_modules/absurd-sql/src/sqlite-fs.js ../../themes/new_theme/static/js/.
cp node_modules/absurd-sql/dist/indexeddb-backend.js ../../themes/new_theme/static/js/.

# TODO: add as sqlite.js to page with data (local sqlite fts search)
# https://blog.ouseful.info/2022/04/06/compiling-full-text-search-fts5-into-sqlite-wasm-build/
# https://github.com/phiresky/sql.js-httpvfs
# https://github.com/psanford/sqlite3vfshttp
# https://phiresky.github.io/blog/2021/hosting-sqlite-databases-on-github-pages/

# https://github.com/kbumsik/sqlite-wasm
sudo pacman -S typescript
# typescript compile errors
# 362             this.wasm._free(blobPtr);