summaryrefslogtreecommitdiff
path: root/strus
diff options
context:
space:
mode:
authorAndreas Baumann <mail@andreasbaumann.cc>2017-03-05 09:57:46 +0100
committerAndreas Baumann <mail@andreasbaumann.cc>2017-03-05 09:57:46 +0100
commitd6d7641cb9d5bf91f534e560c32d62227999d30b (patch)
treeb16e6b420c5a6e1ec50d82f5491b0a075010aebc /strus
parent2bc6ab49e7950b75b232488ba2676dc8b2ced4e0 (diff)
downloadwww-andreasbaumann-cc-d6d7641cb9d5bf91f534e560c32d62227999d30b.tar.gz
www-andreasbaumann-cc-d6d7641cb9d5bf91f534e560c32d62227999d30b.tar.bz2
added stemming of query
Diffstat (limited to 'strus')
-rw-r--r--strus/README9
-rw-r--r--strus/document.ana4
2 files changed, 9 insertions, 4 deletions
diff --git a/strus/README b/strus/README
index d503c0c..4f2ad15 100644
--- a/strus/README
+++ b/strus/README
@@ -7,17 +7,20 @@
# YAML/TOML/JSON and Markdown:
# remarshal (https://github.com/dbohdan/remarshal)
# pandoc (http://pandoc.org/)
+# client-side needs:
+# https://github.com/fortnightlabs/snowball-js
./create_xml.sh > posts.xml
xmllint -noout posts.xml
+# test configuration of document analysis
+
+strusAnalyze document.ana posts.xml |& less
+
# Create the strus search index:
rm -rf storage
mkdir storage
strusCreate -s 'path=storage/wwwandreasbaumanncc; metadata=doclen UINT16, publish_date UINT16'
-
-strusAnalyze document.ana posts.xml |& less
-
strusInsert -c 1000 -f 1 -t 1 -s "path=storage/wwwandreasbaumanncc" document.ana posts.xml
diff --git a/strus/document.ana b/strus/document.ana
index 3ffefa8..e4fc8db 100644
--- a/strus/document.ana
+++ b/strus/document.ana
@@ -12,10 +12,12 @@
[SearchIndex]
# word = lc:convdia(en):stem(en):lc word /posts/post/body/para();
# word = orig split /posts/post/body/para();
- word = lc regex("([A-Za-z']+)") /posts/post/body/para();
+ word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/body/para();
+ word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/meta()/title();
[ForwardIndex]
word = orig split /posts/post/body/para();
+ word = orig split /posts/post/meta()/title();
#[MetaData]
# release_date = date2int("d 1877-01-01", "%Y-%m-%d %H:%M:%s *") content /posts/post/meta()/date;