summaryrefslogtreecommitdiff
path: root/search
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2018-04-29 12:14:58 +0200
committerAndreas Baumann <mail@andreasbaumann.cc>2018-04-29 12:14:58 +0200
commita19b726625d74206b3a8e307722230d5ff265dec (patch)
tree7e33f08a69a1d929e227ecec00fcf57613c3b66e /search
parenta5ca48ae1d0ddb487614bff39d618095cd81be9d (diff)
downloadwww-andreasbaumann-cc-a19b726625d74206b3a8e307722230d5ff265dec.tar.gz
www-andreasbaumann-cc-a19b726625d74206b3a8e307722230d5ff265dec.tar.bz2
bulding json version of pages, added small offsite indexer and query test program (elasticlunr)
Diffstat (limited to 'search')
-rw-r--r--search/elasticlunr/README17
-rwxr-xr-xsearch/elasticlunr/create_index31
-rwxr-xr-xsearch/elasticlunr/create_xml.sh58
-rwxr-xr-xsearch/elasticlunr/query_index19
4 files changed, 56 insertions, 69 deletions
diff --git a/search/elasticlunr/README b/search/elasticlunr/README
index c1c318c..2295f36 100644
--- a/search/elasticlunr/README
+++ b/search/elasticlunr/README
@@ -3,20 +3,15 @@
# Create a search index which can be served statically along the
# static HTML pages to staticlunr.js.
-# YAML/TOML/JSON and Markdown:
-# remarshal (https://github.com/dbohdan/remarshal)
-# pandoc (http://pandoc.org/)
-
-# create XML and clean up some problematic constructs
-./create_xml.sh > posts.xml
-sed -i 's/xlink:href/href/g' posts.xml
-sed -i 's/ xml:id="[^"]\+"//g' posts.xml
-xmllint -noout posts.xml
-
-# convert XML to JSON (at least the relevant fields for the index)
+# generate JSON dynamically with a JSON output generator,
+# see https://halfelf.org/2017/hugos-making-json/
+curl http://localhost:1313/index.json > posts.json
+#curl http://www.andreasbaumann.cc/index.json > posts.json
+# we need nodejs and npm
# use posts.json in a | node create_index.js pipeline
# -> results in posts_index.json
+npm install JSONStream event-stream
# add as static contents to hugo site
diff --git a/search/elasticlunr/create_index b/search/elasticlunr/create_index
new file mode 100755
index 0000000..2fb3adc
--- /dev/null
+++ b/search/elasticlunr/create_index
@@ -0,0 +1,31 @@
+#!/usr/bin/env node
+
+const elasticlunr = require( './elasticlunr.min' );
+const fs = require( 'fs' );
+const JSONStream = require( 'JSONStream' );
+const es = require( 'event-stream' );
+
+console.log( 'Creating ElasticLunr index..' );
+
+const index = new elasticlunr.Index( );
+index.addField( 'title' );
+index.addField( 'content' );
+index.setRef( 'uri' );
+index.saveDocument( true );
+
+var stream = fs.createReadStream( './posts.json' );
+
+stream.pipe( JSONStream.parse( '*') )
+ .pipe( es.mapSync( function( data ) {
+ index.addDoc( data );
+ return data;
+ } )
+ .on( 'end', function( ) {
+ fs.writeFile( './posts.index', JSON.stringify( index ),
+ function( err ) {
+ if( err ) throw err;
+ console.log( 'Finished creating index..' );
+ }
+ );
+ } )
+)
diff --git a/search/elasticlunr/create_xml.sh b/search/elasticlunr/create_xml.sh
deleted file mode 100755
index 50ce4b3..0000000
--- a/search/elasticlunr/create_xml.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-cat <<EOF
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<posts>
-EOF
-
-if test `uname -s` = 'Darwin'; then
- SED=gsed
-else
- SED=sed
-fi
-
-for file in `find ../../content/ -name '*.md'`; do
- echo "$file.." 1>&2
- slug=`echo $file | $SED 's@../../content@@g' | $SED 's@/_index.md$@@g' | $SED 's@.md$@@g'`
- if test "x$slug" = "x"; then
- slug="/"
- fi
-
- slug=`echo $slug | sed 's@^//@/@g'`
-
- awk 'BEGIN { i = 0 } /\+\+\+/{x="F"++i;}{print > x;}' $file >/dev/null 2>&1
-
- if test ! -f F1 -a -f F2; then
- continue
- fi
-
- tail -n +2 F1 > meta.toml
- tail -n +3 F2 > body.md
-
- $SED -i 's/\&/&amp;/g' meta.toml
- $SED -i 's/</\&lt;/g' meta.toml
- $SED -i 's/>/\&gt;/g' meta.toml
- $SED -i 's/\&/&amp;/g' body.md
- $SED -i 's/</\&lt;/g' body.md
- $SED -i 's/>/\&gt;/g' body.md
-
- remarshal -if toml -of json meta.toml > meta.json
- pandoc -f markdown -t docbook body.md > body.xml
-
- echo "<post>"
- echo "<slug>$slug</slug>"
- echo "<filename>$file</filename>"
- echo "<meta>"
- cat meta.json
- echo "</meta>"
- echo "<body>"
- cat body.xml
- echo "</body>"
- echo "</post>"
-
- rm -f meta.* body.* F1 F2
-done
-
-cat <<EOF
-</posts>
-EOF
diff --git a/search/elasticlunr/query_index b/search/elasticlunr/query_index
new file mode 100755
index 0000000..4630ab7
--- /dev/null
+++ b/search/elasticlunr/query_index
@@ -0,0 +1,19 @@
+#!/usr/bin/env node
+
+const elasticlunr = require( './elasticlunr.min' );
+const fs = require( 'fs' );
+
+console.log( 'Loading index..' );
+fs.readFile( './posts.index', 'utf8', function( err, data ) {
+ if( err ) {
+ console.log( err );
+ }
+ var index = elasticlunr.Index.load( JSON.parse( data ) );
+ console.log( 'Index loaded..' );
+ var results = index.search( 'the', { fields: { title : { boost : 2}, content : { boost : 1 } } } );
+
+ for( var i = 0; i < results.length; i++ ) {
+ console.log( i + ". " + results[i]['ref'] + " " + results[i]['doc']['title'] );
+ console.log( " " + results[i]['doc']['abstract'] );
+ }
+} );