diff options
Diffstat (limited to 'search/elasticlunr')
-rw-r--r-- | search/elasticlunr/README | 24 | ||||
-rwxr-xr-x | search/elasticlunr/create_xml.sh | 58 |
2 files changed, 82 insertions, 0 deletions
diff --git a/search/elasticlunr/README b/search/elasticlunr/README new file mode 100644 index 0000000..c1c318c --- /dev/null +++ b/search/elasticlunr/README @@ -0,0 +1,24 @@ +# Search index with elasticlunr + +# Create a search index which can be served statically along the +# static HTML pages to staticlunr.js. + +# YAML/TOML/JSON and Markdown: +# remarshal (https://github.com/dbohdan/remarshal) +# pandoc (http://pandoc.org/) + +# create XML and clean up some problematic constructs +./create_xml.sh > posts.xml +sed -i 's/xlink:href/href/g' posts.xml +sed -i 's/ xml:id="[^"]\+"//g' posts.xml +xmllint -noout posts.xml + +# convert XML to JSON (at least the relevant fields for the index) + +# use posts.json in a | node create_index.js pipeline +# -> results in posts_index.json + +# add as static contents to hugo site + +# load from JS search code on demand (first query) if possible, +# if small, do it immediatelly when loading the search widget. diff --git a/search/elasticlunr/create_xml.sh b/search/elasticlunr/create_xml.sh new file mode 100755 index 0000000..50ce4b3 --- /dev/null +++ b/search/elasticlunr/create_xml.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +cat <<EOF +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<posts> +EOF + +if test `uname -s` = 'Darwin'; then + SED=gsed +else + SED=sed +fi + +for file in `find ../../content/ -name '*.md'`; do + echo "$file.." 1>&2 + slug=`echo $file | $SED 's@../../content@@g' | $SED 's@/_index.md$@@g' | $SED 's@.md$@@g'` + if test "x$slug" = "x"; then + slug="/" + fi + + slug=`echo $slug | sed 's@^//@/@g'` + + awk 'BEGIN { i = 0 } /\+\+\+/{x="F"++i;}{print > x;}' $file >/dev/null 2>&1 + + if test ! -f F1 -a -f F2; then + continue + fi + + tail -n +2 F1 > meta.toml + tail -n +3 F2 > body.md + + $SED -i 's/\&/&/g' meta.toml + $SED -i 's/</\</g' meta.toml + $SED -i 's/>/\>/g' meta.toml + $SED -i 's/\&/&/g' body.md + $SED -i 's/</\</g' body.md + $SED -i 's/>/\>/g' body.md + + remarshal -if toml -of json meta.toml > meta.json + pandoc -f markdown -t docbook body.md > body.xml + + echo "<post>" + echo "<slug>$slug</slug>" + echo "<filename>$file</filename>" + echo "<meta>" + cat meta.json + echo "</meta>" + echo "<body>" + cat body.xml + echo "</body>" + echo "</post>" + + rm -f meta.* body.* F1 F2 +done + +cat <<EOF +</posts> +EOF |