From a19b726625d74206b3a8e307722230d5ff265dec Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sun, 29 Apr 2018 12:14:58 +0200 Subject: bulding json version of pages, added small offsite indexer and query test program (elasticlunr) --- search/elasticlunr/README | 17 +++++------- search/elasticlunr/create_index | 31 +++++++++++++++++++++ search/elasticlunr/create_xml.sh | 58 ---------------------------------------- search/elasticlunr/query_index | 19 +++++++++++++ 4 files changed, 56 insertions(+), 69 deletions(-) create mode 100755 search/elasticlunr/create_index delete mode 100755 search/elasticlunr/create_xml.sh create mode 100755 search/elasticlunr/query_index (limited to 'search') diff --git a/search/elasticlunr/README b/search/elasticlunr/README index c1c318c..2295f36 100644 --- a/search/elasticlunr/README +++ b/search/elasticlunr/README @@ -3,20 +3,15 @@ # Create a search index which can be served statically along the # static HTML pages to staticlunr.js. -# YAML/TOML/JSON and Markdown: -# remarshal (https://github.com/dbohdan/remarshal) -# pandoc (http://pandoc.org/) - -# create XML and clean up some problematic constructs -./create_xml.sh > posts.xml -sed -i 's/xlink:href/href/g' posts.xml -sed -i 's/ xml:id="[^"]\+"//g' posts.xml -xmllint -noout posts.xml - -# convert XML to JSON (at least the relevant fields for the index) +# generate JSON dynamically with a JSON output generator, +# see https://halfelf.org/2017/hugos-making-json/ +curl http://localhost:1313/index.json > posts.json +#curl http://www.andreasbaumann.cc/index.json > posts.json +# we need nodejs and npm # use posts.json in a | node create_index.js pipeline # -> results in posts_index.json +npm install JSONStream event-stream # add as static contents to hugo site diff --git a/search/elasticlunr/create_index b/search/elasticlunr/create_index new file mode 100755 index 0000000..2fb3adc --- /dev/null +++ b/search/elasticlunr/create_index @@ -0,0 +1,31 @@ +#!/usr/bin/env node + +const elasticlunr = require( './elasticlunr.min' ); +const fs = require( 'fs' ); +const JSONStream = require( 'JSONStream' ); +const es = require( 'event-stream' ); + +console.log( 'Creating ElasticLunr index..' ); + +const index = new elasticlunr.Index( ); +index.addField( 'title' ); +index.addField( 'content' ); +index.setRef( 'uri' ); +index.saveDocument( true ); + +var stream = fs.createReadStream( './posts.json' ); + +stream.pipe( JSONStream.parse( '*') ) + .pipe( es.mapSync( function( data ) { + index.addDoc( data ); + return data; + } ) + .on( 'end', function( ) { + fs.writeFile( './posts.index', JSON.stringify( index ), + function( err ) { + if( err ) throw err; + console.log( 'Finished creating index..' ); + } + ); + } ) +) diff --git a/search/elasticlunr/create_xml.sh b/search/elasticlunr/create_xml.sh deleted file mode 100755 index 50ce4b3..0000000 --- a/search/elasticlunr/create_xml.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -cat < - -EOF - -if test `uname -s` = 'Darwin'; then - SED=gsed -else - SED=sed -fi - -for file in `find ../../content/ -name '*.md'`; do - echo "$file.." 1>&2 - slug=`echo $file | $SED 's@../../content@@g' | $SED 's@/_index.md$@@g' | $SED 's@.md$@@g'` - if test "x$slug" = "x"; then - slug="/" - fi - - slug=`echo $slug | sed 's@^//@/@g'` - - awk 'BEGIN { i = 0 } /\+\+\+/{x="F"++i;}{print > x;}' $file >/dev/null 2>&1 - - if test ! -f F1 -a -f F2; then - continue - fi - - tail -n +2 F1 > meta.toml - tail -n +3 F2 > body.md - - $SED -i 's/\&/&/g' meta.toml - $SED -i 's//\>/g' meta.toml - $SED -i 's/\&/&/g' body.md - $SED -i 's//\>/g' body.md - - remarshal -if toml -of json meta.toml > meta.json - pandoc -f markdown -t docbook body.md > body.xml - - echo "" - echo "$slug" - echo "$file" - echo "" - cat meta.json - echo "" - echo "" - cat body.xml - echo "" - echo "" - - rm -f meta.* body.* F1 F2 -done - -cat < -EOF diff --git a/search/elasticlunr/query_index b/search/elasticlunr/query_index new file mode 100755 index 0000000..4630ab7 --- /dev/null +++ b/search/elasticlunr/query_index @@ -0,0 +1,19 @@ +#!/usr/bin/env node + +const elasticlunr = require( './elasticlunr.min' ); +const fs = require( 'fs' ); + +console.log( 'Loading index..' ); +fs.readFile( './posts.index', 'utf8', function( err, data ) { + if( err ) { + console.log( err ); + } + var index = elasticlunr.Index.load( JSON.parse( data ) ); + console.log( 'Index loaded..' ); + var results = index.search( 'the', { fields: { title : { boost : 2}, content : { boost : 1 } } } ); + + for( var i = 0; i < results.length; i++ ) { + console.log( i + ". " + results[i]['ref'] + " " + results[i]['doc']['title'] ); + console.log( " " + results[i]['doc']['abstract'] ); + } +} ); -- cgit v1.2.3-54-g00ecf