diff options
Diffstat (limited to 'search')
-rw-r--r-- | search/elasticlunr/LINKS | 4 | ||||
-rw-r--r-- | search/elasticlunr/README | 22 | ||||
-rwxr-xr-x | search/elasticlunr/create_index | 31 | ||||
-rw-r--r-- | search/elasticlunr/elasticlunr.min.js | 10 | ||||
-rwxr-xr-x | search/elasticlunr/query_index | 19 | ||||
-rw-r--r-- | search/fts5/README | 87 | ||||
-rw-r--r-- | search/fts5/hugo-0.120.1-charset.patch | 12 | ||||
-rw-r--r-- | search/strus/README | 26 | ||||
-rwxr-xr-x | search/strus/create_xml.sh | 58 | ||||
-rw-r--r-- | search/strus/document.ana | 27 |
10 files changed, 296 insertions, 0 deletions
diff --git a/search/elasticlunr/LINKS b/search/elasticlunr/LINKS new file mode 100644 index 0000000..ea4de3a --- /dev/null +++ b/search/elasticlunr/LINKS @@ -0,0 +1,4 @@ +https://www.macrone.de/javascript-offline-search-elasictlunr-made-easy/ + +javascript only search synced json index +https://discourse.gohugo.io/t/how-to-add-lunr-js-to-your-site/9249/4 diff --git a/search/elasticlunr/README b/search/elasticlunr/README new file mode 100644 index 0000000..8cbebea --- /dev/null +++ b/search/elasticlunr/README @@ -0,0 +1,22 @@ +# Search index with elasticlunr + +# Create a search index which can be served statically along the +# static HTML pages to staticlunr.js. + +# generate JSON dynamically with a JSON output generator, +# see https://halfelf.org/2017/hugos-making-json/ +#curl http://localhost:1313/index.json > posts.json +curl http://www.andreasbaumann.cc/index.json > posts.json + +# we need nodejs and npm +npm install JSONStream event-stream + +# use posts.json in a | node create_index.js pipeline +# -> results in posts_index.json +./create_index +cp posts.index ../../static/index/. + +# add as static contents to hugo site + +# load from JS search code on demand (first query) if possible, +# if small, do it immediatelly when loading the search widget. diff --git a/search/elasticlunr/create_index b/search/elasticlunr/create_index new file mode 100755 index 0000000..2fb3adc --- /dev/null +++ b/search/elasticlunr/create_index @@ -0,0 +1,31 @@ +#!/usr/bin/env node + +const elasticlunr = require( './elasticlunr.min' ); +const fs = require( 'fs' ); +const JSONStream = require( 'JSONStream' ); +const es = require( 'event-stream' ); + +console.log( 'Creating ElasticLunr index..' ); + +const index = new elasticlunr.Index( ); +index.addField( 'title' ); +index.addField( 'content' ); +index.setRef( 'uri' ); +index.saveDocument( true ); + +var stream = fs.createReadStream( './posts.json' ); + +stream.pipe( JSONStream.parse( '*') ) + .pipe( es.mapSync( function( data ) { + index.addDoc( data ); + return data; + } ) + .on( 'end', function( ) { + fs.writeFile( './posts.index', JSON.stringify( index ), + function( err ) { + if( err ) throw err; + console.log( 'Finished creating index..' ); + } + ); + } ) +) diff --git a/search/elasticlunr/elasticlunr.min.js b/search/elasticlunr/elasticlunr.min.js new file mode 100644 index 0000000..94b20dd --- /dev/null +++ b/search/elasticlunr/elasticlunr.min.js @@ -0,0 +1,10 @@ +/** + * elasticlunr - http://weixsong.github.io + * Lightweight full-text search engine in Javascript for browser search and offline search. - 0.9.5 + * + * Copyright (C) 2017 Oliver Nightingale + * Copyright (C) 2017 Wei Song + * MIT Licensed + * @license + */ +!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u<s.length;u++){var a=s[u];r[a]=this.pipeline.run(t.tokenizer(e[a]))}var l={};for(var c in o){var d=r[c]||r.any;if(d){var f=this.fieldSearch(d,c,o),h=o[c].boost;for(var p in f)f[p]=f[p]*h;for(var p in f)p in l?l[p]+=f[p]:l[p]=f[p]}}var v,g=[];for(var p in l)v={ref:p,score:l[p]},this.documentStore.hasDoc(p)&&(v.doc=this.documentStore.getDoc(p)),g.push(v);return g.sort(function(e,t){return t.score-e.score}),g},t.Index.prototype.fieldSearch=function(e,t,n){var i=n[t].bool,o=n[t].expand,r=n[t].boost,s=null,u={};return 0!==r?(e.forEach(function(e){var n=[e];1==o&&(n=this.index[t].expandToken(e));var r={};n.forEach(function(n){var o=this.index[t].getDocs(n),a=this.idf(n,t);if(s&&"AND"==i){var l={};for(var c in s)c in o&&(l[c]=o[c]);o=l}n==e&&this.fieldSearchStats(u,n,o);for(var c in o){var d=this.index[t].getTermFrequency(n,c),f=this.documentStore.getFieldLength(c,t),h=1;0!=f&&(h=1/Math.sqrt(f));var p=1;n!=e&&(p=.15*(1-(n.length-e.length)/n.length));var v=d*a*h*p;c in r?r[c]+=v:r[c]=v}},this),s=this.mergeScores(s,r,i)},this),s=this.coordNorm(s,u,e.length)):void 0},t.Index.prototype.mergeScores=function(e,t,n){if(!e)return t;if("AND"==n){var i={};for(var o in t)o in e&&(i[o]=e[o]+t[o]);return i}for(var o in t)o in e?e[o]+=t[o]:e[o]=t[o];return e},t.Index.prototype.fieldSearchStats=function(e,t,n){for(var i in n)i in e?e[i].push(t):e[i]=[t]},t.Index.prototype.coordNorm=function(e,t,n){for(var i in e)if(i in t){var o=t[i].length;e[i]=e[i]*o/n}return e},t.Index.prototype.toJSON=function(){var e={};return this._fields.forEach(function(t){e[t]=this.index[t].toJSON()},this),{version:t.version,fields:this._fields,ref:this._ref,documentStore:this.documentStore.toJSON(),index:e,pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(e){var t=Array.prototype.slice.call(arguments,1);t.unshift(this),e.apply(this,t)},t.DocumentStore=function(e){this._save=null===e||void 0===e?!0:e,this.docs={},this.docInfo={},this.length=0},t.DocumentStore.load=function(e){var t=new this;return t.length=e.length,t.docs=e.docs,t.docInfo=e.docInfo,t._save=e.save,t},t.DocumentStore.prototype.isDocStored=function(){return this._save},t.DocumentStore.prototype.addDoc=function(t,n){this.hasDoc(t)||this.length++,this.docs[t]=this._save===!0?e(n):null},t.DocumentStore.prototype.getDoc=function(e){return this.hasDoc(e)===!1?null:this.docs[e]},t.DocumentStore.prototype.hasDoc=function(e){return e in this.docs},t.DocumentStore.prototype.removeDoc=function(e){this.hasDoc(e)&&(delete this.docs[e],delete this.docInfo[e],this.length--)},t.DocumentStore.prototype.addFieldLength=function(e,t,n){null!==e&&void 0!==e&&0!=this.hasDoc(e)&&(this.docInfo[e]||(this.docInfo[e]={}),this.docInfo[e][t]=n)},t.DocumentStore.prototype.updateFieldLength=function(e,t,n){null!==e&&void 0!==e&&0!=this.hasDoc(e)&&this.addFieldLength(e,t,n)},t.DocumentStore.prototype.getFieldLength=function(e,t){return null===e||void 0===e?0:e in this.docs&&t in this.docInfo[e]?this.docInfo[e][t]:0},t.DocumentStore.prototype.toJSON=function(){return{docs:this.docs,docInfo:this.docInfo,length:this.length,save:this._save}},t.stemmer=function(){var e={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},t={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",o=n+"[^aeiouy]*",r=i+"[aeiou]*",s="^("+o+")?"+r+o,u="^("+o+")?"+r+o+"("+r+")?$",a="^("+o+")?"+r+o+r+o,l="^("+o+")?"+i,c=new RegExp(s),d=new RegExp(a),f=new RegExp(u),h=new RegExp(l),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,x=new RegExp("([^aeiouylsz])\\1$"),w=new RegExp("^"+o+i+"[^aeiouwxy]$"),I=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,D=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,F=/^(.+?)(s|t)(ion)$/,_=/^(.+?)e$/,P=/ll$/,k=new RegExp("^"+o+i+"[^aeiouwxy]$"),z=function(n){var i,o,r,s,u,a,l;if(n.length<3)return n;if(r=n.substr(0,1),"y"==r&&(n=r.toUpperCase()+n.substr(1)),s=p,u=v,s.test(n)?n=n.replace(s,"$1$2"):u.test(n)&&(n=n.replace(u,"$1$2")),s=g,u=m,s.test(n)){var z=s.exec(n);s=c,s.test(z[1])&&(s=y,n=n.replace(s,""))}else if(u.test(n)){var z=u.exec(n);i=z[1],u=h,u.test(i)&&(n=i,u=S,a=x,l=w,u.test(n)?n+="e":a.test(n)?(s=y,n=n.replace(s,"")):l.test(n)&&(n+="e"))}if(s=I,s.test(n)){var z=s.exec(n);i=z[1],n=i+"i"}if(s=b,s.test(n)){var z=s.exec(n);i=z[1],o=z[2],s=c,s.test(i)&&(n=i+e[o])}if(s=E,s.test(n)){var z=s.exec(n);i=z[1],o=z[2],s=c,s.test(i)&&(n=i+t[o])}if(s=D,u=F,s.test(n)){var z=s.exec(n);i=z[1],s=d,s.test(i)&&(n=i)}else if(u.test(n)){var z=u.exec(n);i=z[1]+z[2],u=d,u.test(i)&&(n=i)}if(s=_,s.test(n)){var z=s.exec(n);i=z[1],s=d,u=f,a=k,(s.test(i)||u.test(i)&&!a.test(i))&&(n=i)}return s=P,u=d,s.test(n)&&u.test(n)&&(s=y,n=n.replace(s,"")),"y"==r&&(n=r.toLowerCase()+n.substr(1)),n};return z}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.stopWordFilter=function(e){return e&&t.stopWordFilter.stopWords[e]!==!0?e:void 0},t.clearStopWords=function(){t.stopWordFilter.stopWords={}},t.addStopWords=function(e){null!=e&&Array.isArray(e)!==!1&&e.forEach(function(e){t.stopWordFilter.stopWords[e]=!0},this)},t.resetStopWords=function(){t.stopWordFilter.stopWords=t.defaultStopWords},t.defaultStopWords={"":!0,a:!0,able:!0,about:!0,across:!0,after:!0,all:!0,almost:!0,also:!0,am:!0,among:!0,an:!0,and:!0,any:!0,are:!0,as:!0,at:!0,be:!0,because:!0,been:!0,but:!0,by:!0,can:!0,cannot:!0,could:!0,dear:!0,did:!0,"do":!0,does:!0,either:!0,"else":!0,ever:!0,every:!0,"for":!0,from:!0,get:!0,got:!0,had:!0,has:!0,have:!0,he:!0,her:!0,hers:!0,him:!0,his:!0,how:!0,however:!0,i:!0,"if":!0,"in":!0,into:!0,is:!0,it:!0,its:!0,just:!0,least:!0,let:!0,like:!0,likely:!0,may:!0,me:!0,might:!0,most:!0,must:!0,my:!0,neither:!0,no:!0,nor:!0,not:!0,of:!0,off:!0,often:!0,on:!0,only:!0,or:!0,other:!0,our:!0,own:!0,rather:!0,said:!0,say:!0,says:!0,she:!0,should:!0,since:!0,so:!0,some:!0,than:!0,that:!0,the:!0,their:!0,them:!0,then:!0,there:!0,these:!0,they:!0,"this":!0,tis:!0,to:!0,too:!0,twas:!0,us:!0,wants:!0,was:!0,we:!0,were:!0,what:!0,when:!0,where:!0,which:!0,"while":!0,who:!0,whom:!0,why:!0,will:!0,"with":!0,would:!0,yet:!0,you:!0,your:!0},t.stopWordFilter.stopWords=t.defaultStopWords,t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(e){if(null===e||void 0===e)throw new Error("token should not be undefined");return e.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.InvertedIndex=function(){this.root={docs:{},df:0}},t.InvertedIndex.load=function(e){var t=new this;return t.root=e.root,t},t.InvertedIndex.prototype.addToken=function(e,t,n){for(var n=n||this.root,i=0;i<=e.length-1;){var o=e[i];o in n||(n[o]={docs:{},df:0}),i+=1,n=n[o]}var r=t.ref;n.docs[r]?n.docs[r]={tf:t.tf}:(n.docs[r]={tf:t.tf},n.df+=1)},t.InvertedIndex.prototype.hasToken=function(e){if(!e)return!1;for(var t=this.root,n=0;n<e.length;n++){if(!t[e[n]])return!1;t=t[e[n]]}return!0},t.InvertedIndex.prototype.getNode=function(e){if(!e)return null;for(var t=this.root,n=0;n<e.length;n++){if(!t[e[n]])return null;t=t[e[n]]}return t},t.InvertedIndex.prototype.getDocs=function(e){var t=this.getNode(e);return null==t?{}:t.docs},t.InvertedIndex.prototype.getTermFrequency=function(e,t){var n=this.getNode(e);return null==n?0:t in n.docs?n.docs[t].tf:0},t.InvertedIndex.prototype.getDocFreq=function(e){var t=this.getNode(e);return null==t?0:t.df},t.InvertedIndex.prototype.removeToken=function(e,t){if(e){var n=this.getNode(e);null!=n&&t in n.docs&&(delete n.docs[t],n.df-=1)}},t.InvertedIndex.prototype.expandToken=function(e,t,n){if(null==e||""==e)return[];var t=t||[];if(void 0==n&&(n=this.getNode(e),null==n))return t;n.df>0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e<arguments.length;e++)t=arguments[e],~this.indexOf(t)||this.elements.splice(this.locationFor(t),0,t);this.length=this.elements.length},lunr.SortedSet.prototype.toArray=function(){return this.elements.slice()},lunr.SortedSet.prototype.map=function(e,t){return this.elements.map(e,t)},lunr.SortedSet.prototype.forEach=function(e,t){return this.elements.forEach(e,t)},lunr.SortedSet.prototype.indexOf=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]<u[i]?n++:s[n]>u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o<r.length;o++)i.add(r[o]);return i},lunr.SortedSet.prototype.toJSON=function(){return this.toArray()},function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():e.elasticlunr=t()}(this,function(){return t})}();
\ No newline at end of file diff --git a/search/elasticlunr/query_index b/search/elasticlunr/query_index new file mode 100755 index 0000000..90f61e5 --- /dev/null +++ b/search/elasticlunr/query_index @@ -0,0 +1,19 @@ +#!/usr/bin/env node + +const elasticlunr = require( './elasticlunr.min' ); +const fs = require( 'fs' ); + +console.log( 'Loading index..' ); +fs.readFile( './posts.index', 'utf8', function( err, data ) { + if( err ) { + console.log( err ); + } + var index = elasticlunr.Index.load( JSON.parse( data ) ); + console.log( 'Index loaded..' ); + var results = index.search( 'the', { fields: { title : { boost : 2 }, content : { boost : 1 } } } ); + + for( var i = 0; i < results.length; i++ ) { + console.log( i + ". " + results[i]['ref'] + " " + results[i]['doc']['title'] ); + console.log( " " + results[i]['doc']['abstract'] ); + } +} ); diff --git a/search/fts5/README b/search/fts5/README new file mode 100644 index 0000000..681b425 --- /dev/null +++ b/search/fts5/README @@ -0,0 +1,87 @@ +# Search index with Sqlite3 FTS5 Full Text Search + +# generate JSON dynamically with a JSON output generator, +# see https://halfelf.org/2017/hugos-making-json/ +#curl http://localhost:1313/index.json > posts.json +curl http://www.andreasbaumann.cc/index.json > posts.json + +# we need sqlite and FTS5 +# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/ +pacman -S sqlite3 + +# use posts.json to create entries in the virtual FTS5 table +rm posts.db +cat <<EOF | sqlite3 posts.db +CREATE VIRTUAL TABLE posts USING fts5(uri,title,content); +EOF + +# create post SQL statements from JSON with JQ (https://jqlang.github.io/jq/manual/) +jq -j '.[] | "INSERT INTO posts(uri, title, content) VALUES (", ( [ .uri, .title, .content // empty ] | map(.|gsub("'"'"'";"`")|gsub("\n";" ")|@sh) | join(",")), ");\n"' \ + posts.json > posts.sql +sqlite3 posts.db < posts.sql +cp posts.db ../../static/index/. + +# some test queries +# https://www.legendu.net/misc/blog/hands-on-full-text-search-in-sqlite3/ +# https://sqlite.org/fts5.html +sqlite3 posts.db +sqlite> select uri from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts); +sqlite> select uri,highlight(posts, 1, '<b>', '</b>'),snippet(posts, 2, '<b>', '</b>', '...', 50) from posts where posts MATCH 'OpenBSD' ORDER BY bm25(posts) limit 5 offset 0; + +# TODO: make a server-side search (search window and result page) +# this would mean we have to render the hugo and the ranklist part on the server? + +# typical "modern" web development craziness.. :-) +pacman --needed -S sha3sum +trizen -G emsdk +cd emdsk && makepkg -sif +sudo emsdk install latest +sudo emsdk activate latest +sudo chown -R abaumann:users /usr/lib/emsdk/ +source /usr/lib/emsdk/emsdk_env.sh +git clone --recursive https://github.com/jlongster/sql.js +# add -DSQLITE_ENABLE_FTS5 to CFLAGS in Makefile +# disable 'sha3sum -c cache/check.txt' (is broken) +# emcc: error: setting `INLINING_LIMIT` expects `bool` but got `int` +# set -sINLINING_LIMIT as booelean flag, modern emcc doesn't allow a cost integer here anymore it seems +make +#https://github.com/sql-js/sql.js/issues/546 + +building emscripten2 +/data/INSTALL/emscripten2/src/llvm-project/llvm/include/llvm/Support/Signals.h:119:8: error: variable or field ‘CleanupOnSignal’ declared void + 119 | void CleanupOnSignal(uintptr_t Context); + | ^~~~~~~~~~~~~~~ +In file included from /data/INSTALL/emscripten2/src/llvm-project/llvm/lib/Support/Signals.cpp:251: +/data/INSTALL/emscripten2/src/llvm-project/llvm/lib/Support/Unix/Signals.inc:348:44: error: ‘void llvm::sys::CleanupOnSignal(uintptr_t)’ should have been declared inside ‘llvm::sys’ + 348 | void sys::CleanupOnSignal(uintptr_t Context) { + + +# tons of errors, the containersized version uses VCode and WSL, so this things is +# hairy to build +# let's try a precompiled one +# https://verdicts.listen.dev/npm/sql.js-fts5 +# https://www.skypack.dev/view/sql.js-fts5 +npm install sql.js-fts5 +cp node_modules/sql.js-fts5/dist/sql-wasm.js ../../themes/new_theme/static/js/. +cp node_modules/sql.js-fts5/dist/sql-wasm.wasm ../../themes/new_theme/static/js/. + +# WASM is not read with self-hosted hugo server because of a charset-utf-8 added +# to application/wasm: https://github.com/gohugoio/hugo/issues/10734 +# => rebuild Hugo with hugo-0.120.1-charset.patch + +# https://jlongster.com/future-sql-web +# https://github.com/jlongster/absurd-sql +npm install absurd-sql +cp node_modules/absurd-sql/src/sqlite-fs.js ../../themes/new_theme/static/js/. +cp node_modules/absurd-sql/dist/indexeddb-backend.js ../../themes/new_theme/static/js/. + +# TODO: add as sqlite.js to page with data (local sqlite fts search) +# https://blog.ouseful.info/2022/04/06/compiling-full-text-search-fts5-into-sqlite-wasm-build/ +# https://github.com/phiresky/sql.js-httpvfs +# https://github.com/psanford/sqlite3vfshttp +# https://phiresky.github.io/blog/2021/hosting-sqlite-databases-on-github-pages/ + +# https://github.com/kbumsik/sqlite-wasm +sudo pacman -S typescript +# typescript compile errors +# 362 this.wasm._free(blobPtr); diff --git a/search/fts5/hugo-0.120.1-charset.patch b/search/fts5/hugo-0.120.1-charset.patch new file mode 100644 index 0000000..f22c1a3 --- /dev/null +++ b/search/fts5/hugo-0.120.1-charset.patch @@ -0,0 +1,12 @@ +diff -rauN hugo-0.120.1/hugolib/site.go hugo-0.120.1-charset-patch/hugolib/site.go +--- hugo-0.120.1/hugolib/site.go 2023-10-30 17:44:31.000000000 +0100 ++++ hugo-0.120.1-charset-patch/hugolib/site.go 2023-12-12 18:12:53.960153106 +0100 +@@ -400,7 +400,7 @@ + func (s *Site) RegisterMediaTypes() { + for _, mt := range s.conf.MediaTypes.Config { + for _, suffix := range mt.Suffixes() { +- _ = mime.AddExtensionType(mt.Delimiter+suffix, mt.Type+"; charset=utf-8") ++ _ = mime.AddExtensionType(mt.Delimiter+suffix, mt.Type) + } + } + } diff --git a/search/strus/README b/search/strus/README new file mode 100644 index 0000000..4f2ad15 --- /dev/null +++ b/search/strus/README @@ -0,0 +1,26 @@ +# Search index with strus + +# For now create an XML from the content, later have a directory iterator +# over 'content' and read TOML/YAML headers and markdown... + +# TODO: this becomes obsolete with a Hugo segmenter which undestands +# YAML/TOML/JSON and Markdown: +# remarshal (https://github.com/dbohdan/remarshal) +# pandoc (http://pandoc.org/) +# client-side needs: +# https://github.com/fortnightlabs/snowball-js + +./create_xml.sh > posts.xml + +xmllint -noout posts.xml + +# test configuration of document analysis + +strusAnalyze document.ana posts.xml |& less + +# Create the strus search index: + +rm -rf storage +mkdir storage +strusCreate -s 'path=storage/wwwandreasbaumanncc; metadata=doclen UINT16, publish_date UINT16' +strusInsert -c 1000 -f 1 -t 1 -s "path=storage/wwwandreasbaumanncc" document.ana posts.xml diff --git a/search/strus/create_xml.sh b/search/strus/create_xml.sh new file mode 100755 index 0000000..50ce4b3 --- /dev/null +++ b/search/strus/create_xml.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +cat <<EOF +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<posts> +EOF + +if test `uname -s` = 'Darwin'; then + SED=gsed +else + SED=sed +fi + +for file in `find ../../content/ -name '*.md'`; do + echo "$file.." 1>&2 + slug=`echo $file | $SED 's@../../content@@g' | $SED 's@/_index.md$@@g' | $SED 's@.md$@@g'` + if test "x$slug" = "x"; then + slug="/" + fi + + slug=`echo $slug | sed 's@^//@/@g'` + + awk 'BEGIN { i = 0 } /\+\+\+/{x="F"++i;}{print > x;}' $file >/dev/null 2>&1 + + if test ! -f F1 -a -f F2; then + continue + fi + + tail -n +2 F1 > meta.toml + tail -n +3 F2 > body.md + + $SED -i 's/\&/&/g' meta.toml + $SED -i 's/</\</g' meta.toml + $SED -i 's/>/\>/g' meta.toml + $SED -i 's/\&/&/g' body.md + $SED -i 's/</\</g' body.md + $SED -i 's/>/\>/g' body.md + + remarshal -if toml -of json meta.toml > meta.json + pandoc -f markdown -t docbook body.md > body.xml + + echo "<post>" + echo "<slug>$slug</slug>" + echo "<filename>$file</filename>" + echo "<meta>" + cat meta.json + echo "</meta>" + echo "<body>" + cat body.xml + echo "</body>" + echo "</post>" + + rm -f meta.* body.* F1 F2 +done + +cat <<EOF +</posts> +EOF diff --git a/search/strus/document.ana b/search/strus/document.ana new file mode 100644 index 0000000..8fbcf3e --- /dev/null +++ b/search/strus/document.ana @@ -0,0 +1,27 @@ +[Document] + post = /posts/post; + +[Content] + "encoding=UTF-8; content=JSON;" /posts/post/meta(); + +[Attribute] + docid = orig content /posts/post/slug(); + title = orig content /posts/post/meta()/title(); + categories = orig content /posts/post/meta()/categories(); + thumbnail = orig content /posts/post/meta()/thumbnail(); + +[SearchIndex] + word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/meta()/title(); + word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/body//para(); + sentence = empty punctuation("en") /posts/post/body//para(); + +[ForwardIndex] + title = orig split /posts/post/meta()/title(); + text = orig split /posts/post/body//para(); + +#[MetaData] +# release_date = date2int("d 1877-01-01", "%Y-%m-%d %H:%M:%s *") content /posts/post/meta()/date; + +[Aggregator] + doclen = count( word ); + |