diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2018-04-27 20:45:12 +0200 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2018-04-27 20:45:12 +0200 |
commit | 74087a15b63dbc062e1c8a785917f44cda5f14c7 (patch) | |
tree | bd20677420b33aae27bb3ba6065c1757880caecf /strus | |
parent | e372ee22511a0e084b7957316909705bf9628d01 (diff) | |
download | www-andreasbaumann-cc-74087a15b63dbc062e1c8a785917f44cda5f14c7.tar.gz www-andreasbaumann-cc-74087a15b63dbc062e1c8a785917f44cda5f14c7.tar.bz2 |
better GDPR page
Diffstat (limited to 'strus')
-rw-r--r-- | strus/README | 26 | ||||
-rwxr-xr-x | strus/create_xml.sh | 58 | ||||
-rw-r--r-- | strus/document.ana | 27 |
3 files changed, 0 insertions, 111 deletions
diff --git a/strus/README b/strus/README deleted file mode 100644 index 4f2ad15..0000000 --- a/strus/README +++ /dev/null @@ -1,26 +0,0 @@ -# Search index with strus - -# For now create an XML from the content, later have a directory iterator -# over 'content' and read TOML/YAML headers and markdown... - -# TODO: this becomes obsolete with a Hugo segmenter which undestands -# YAML/TOML/JSON and Markdown: -# remarshal (https://github.com/dbohdan/remarshal) -# pandoc (http://pandoc.org/) -# client-side needs: -# https://github.com/fortnightlabs/snowball-js - -./create_xml.sh > posts.xml - -xmllint -noout posts.xml - -# test configuration of document analysis - -strusAnalyze document.ana posts.xml |& less - -# Create the strus search index: - -rm -rf storage -mkdir storage -strusCreate -s 'path=storage/wwwandreasbaumanncc; metadata=doclen UINT16, publish_date UINT16' -strusInsert -c 1000 -f 1 -t 1 -s "path=storage/wwwandreasbaumanncc" document.ana posts.xml diff --git a/strus/create_xml.sh b/strus/create_xml.sh deleted file mode 100755 index 4ef5c5a..0000000 --- a/strus/create_xml.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -cat <<EOF -<?xml version="1.0" encoding="UTF-8" standalone="yes"?> -<posts> -EOF - -if test `uname -s` = 'Darwin'; then - SED=gsed -else - SED=sed -fi - -for file in `find ../content/ -name '*.md'`; do - echo "$file.." 1>&2 - slug=`echo $file | $SED 's@../content@@g' | $SED 's@/_index.md$@@g' | $SED 's@.md$@@g'` - if test "x$slug" = "x"; then - slug="/" - fi - - slug=`echo $slug | sed 's@^//@/@g'` - - awk 'BEGIN { i = 0 } /\+\+\+/{x="F"++i;}{print > x;}' $file >/dev/null 2>&1 - - if test ! -f F1 -a -f F2; then - continue - fi - - tail -n +2 F1 > meta.toml - tail -n +3 F2 > body.md - - $SED -i 's/\&/&/g' meta.toml - $SED -i 's/</\</g' meta.toml - $SED -i 's/>/\>/g' meta.toml - $SED -i 's/\&/&/g' body.md - $SED -i 's/</\</g' body.md - $SED -i 's/>/\>/g' body.md - - remarshal -if toml -of json meta.toml > meta.json - pandoc -f markdown -t docbook body.md > body.xml - - echo "<post>" - echo "<slug>$slug</slug>" - echo "<filename>$file</filename>" - echo "<meta>" - cat meta.json - echo "</meta>" - echo "<body>" - cat body.xml - echo "</body>" - echo "</post>" - - rm -f meta.* body.* F1 F2 -done - -cat <<EOF -</posts> -EOF diff --git a/strus/document.ana b/strus/document.ana deleted file mode 100644 index 8fbcf3e..0000000 --- a/strus/document.ana +++ /dev/null @@ -1,27 +0,0 @@ -[Document] - post = /posts/post; - -[Content] - "encoding=UTF-8; content=JSON;" /posts/post/meta(); - -[Attribute] - docid = orig content /posts/post/slug(); - title = orig content /posts/post/meta()/title(); - categories = orig content /posts/post/meta()/categories(); - thumbnail = orig content /posts/post/meta()/thumbnail(); - -[SearchIndex] - word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/meta()/title(); - word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/body//para(); - sentence = empty punctuation("en") /posts/post/body//para(); - -[ForwardIndex] - title = orig split /posts/post/meta()/title(); - text = orig split /posts/post/body//para(); - -#[MetaData] -# release_date = date2int("d 1877-01-01", "%Y-%m-%d %H:%M:%s *") content /posts/post/meta()/date; - -[Aggregator] - doclen = count( word ); - |