diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2017-03-05 09:57:46 +0100 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2017-03-05 09:57:46 +0100 |
commit | d6d7641cb9d5bf91f534e560c32d62227999d30b (patch) | |
tree | b16e6b420c5a6e1ec50d82f5491b0a075010aebc /strus | |
parent | 2bc6ab49e7950b75b232488ba2676dc8b2ced4e0 (diff) | |
download | www-andreasbaumann-cc-d6d7641cb9d5bf91f534e560c32d62227999d30b.tar.gz www-andreasbaumann-cc-d6d7641cb9d5bf91f534e560c32d62227999d30b.tar.bz2 |
added stemming of query
Diffstat (limited to 'strus')
-rw-r--r-- | strus/README | 9 | ||||
-rw-r--r-- | strus/document.ana | 4 |
2 files changed, 9 insertions, 4 deletions
diff --git a/strus/README b/strus/README index d503c0c..4f2ad15 100644 --- a/strus/README +++ b/strus/README @@ -7,17 +7,20 @@ # YAML/TOML/JSON and Markdown: # remarshal (https://github.com/dbohdan/remarshal) # pandoc (http://pandoc.org/) +# client-side needs: +# https://github.com/fortnightlabs/snowball-js ./create_xml.sh > posts.xml xmllint -noout posts.xml +# test configuration of document analysis + +strusAnalyze document.ana posts.xml |& less + # Create the strus search index: rm -rf storage mkdir storage strusCreate -s 'path=storage/wwwandreasbaumanncc; metadata=doclen UINT16, publish_date UINT16' - -strusAnalyze document.ana posts.xml |& less - strusInsert -c 1000 -f 1 -t 1 -s "path=storage/wwwandreasbaumanncc" document.ana posts.xml diff --git a/strus/document.ana b/strus/document.ana index 3ffefa8..e4fc8db 100644 --- a/strus/document.ana +++ b/strus/document.ana @@ -12,10 +12,12 @@ [SearchIndex] # word = lc:convdia(en):stem(en):lc word /posts/post/body/para(); # word = orig split /posts/post/body/para(); - word = lc regex("([A-Za-z']+)") /posts/post/body/para(); + word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/body/para(); + word = lc:convdia(en):stem(en):lc regex("([A-Za-z']+)") /posts/post/meta()/title(); [ForwardIndex] word = orig split /posts/post/body/para(); + word = orig split /posts/post/meta()/title(); #[MetaData] # release_date = date2int("d 1877-01-01", "%Y-%m-%d %H:%M:%s *") content /posts/post/meta()/date; |