From 579d25872a2b6f4b671e0511335af588a3728be7 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sun, 5 Feb 2012 13:17:04 +0100 Subject: added -s (SolR) option, opening the index in Solr style to provide better maps for values (depending on schema.xml) --- pom.xml | 11 ++-- .../org/dyndns/andreasbaumann/LuceneAnalyzer.java | 58 ++++++++++++++++++++-- src/test/post.sh | 28 +++++++++++ src/test/test.xml | 18 +++++++ 4 files changed, 107 insertions(+), 8 deletions(-) create mode 100755 src/test/post.sh create mode 100644 src/test/test.xml diff --git a/pom.xml b/pom.xml index 6619cd4..a44266b 100644 --- a/pom.xml +++ b/pom.xml @@ -11,15 +11,20 @@ lucene-core 3.5.0 + + jargs + jargs + 1.0.0 + org.apache.solr solr-core 3.5.0 - jargs - jargs - 1.0.0 + org.slf4j + slf4j-simple + 1.5.6 junit diff --git a/src/main/java/org/dyndns/andreasbaumann/LuceneAnalyzer.java b/src/main/java/org/dyndns/andreasbaumann/LuceneAnalyzer.java index 3f5cbdd..1467f1e 100644 --- a/src/main/java/org/dyndns/andreasbaumann/LuceneAnalyzer.java +++ b/src/main/java/org/dyndns/andreasbaumann/LuceneAnalyzer.java @@ -26,6 +26,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.ArrayList; import java.util.List; +import java.util.Properties; import jargs.gnu.CmdLineParser; import jargs.gnu.CmdLineParser.Option; @@ -43,6 +44,18 @@ import org.apache.lucene.index.TermPositions; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import java.util.logging.LogManager; +import java.util.logging.Logger; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.core.SolrConfig; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.SolrIndexSearcher; +import javax.xml.parsers.ParserConfigurationException; +import org.xml.sax.SAXException; + /** * Lucene index analyzer. Works for file system indexes only (not * for indexes fully in RAM or in different persistence systems as @@ -241,6 +254,8 @@ public class LuceneAnalyzer "print statistics per term" ); Option headers = addHelp( parser.addBooleanOption( 'H', "headers" ), "print headers for sections" ); + Option solr = addHelp( parser.addBooleanOption( 's', "solr" ), + "treat index as a Solr index, indexDir is the Solr base dir" ); // read the command line options try { @@ -277,6 +292,10 @@ public class LuceneAnalyzer printHeaders = true; } + boolean isSolr = false; + if( (Boolean)parser.getOptionValue( solr, Boolean.FALSE ) ) { + isSolr = true; + } // read command line arguments String[] otherArgs = parser.getRemainingArgs( ); @@ -287,19 +306,48 @@ public class LuceneAnalyzer System.exit( 1 ); } - File indexDir = new File( otherArgs[0] ); + String basePath = otherArgs[0]; + String indexPath = otherArgs[0]; + if( isSolr ) { + indexPath += "/data/index"; + } + File indexDir = new File( indexPath ); if( !indexDir.exists( ) ) { - System.err.println( indexDir + " doesn't exist" ); + System.err.println( indexPath + " doesn't exist" ); System.exit( 1 ); } if( !indexDir.isDirectory( ) ) { - System.err.println( indexDir + " is not a directory" ); + System.err.println( indexPath + " is not a directory" ); System.exit( 1 ); } - + + SolrIndexSearcher solrSearcher; Directory luceneDirectory = new SimpleFSDirectory( indexDir ); IndexReader indexReader = IndexReader.open( luceneDirectory ); - + if( isSolr ) { + try { + Properties p = System.getProperties( ); + p.setProperty( "solr.solr.home", basePath ); + LogManager.getLogManager( ).reset( ); + Logger globalLogger = Logger.getLogger( java.util.logging.Logger.GLOBAL_LOGGER_NAME ); + globalLogger.setLevel( java.util.logging.Level.OFF ); + + CoreContainer cores = new CoreContainer( new SolrResourceLoader( basePath ) ); + SolrConfig solrConfig = new SolrConfig( basePath, SolrConfig.DEFAULT_CONF_FILE, null ); + CoreDescriptor descrCore = new CoreDescriptor( cores, "", solrConfig.getResourceLoader( ).getInstanceDir( ) ); + IndexSchema solrSchema = new IndexSchema( solrConfig, basePath + "/conf/schema.xml", null ); + SolrCore solrCore = new SolrCore( basePath, solrSchema ); + solrSearcher = new SolrIndexSearcher( solrCore, solrSchema, "test", + luceneDirectory, true, false ); + } catch( javax.xml.parsers.ParserConfigurationException e ) { + System.err.println( "Illegal Solr configuration: " + e ); + System.exit( 1 ); + } catch( org.xml.sax.SAXException e ) { + System.err.println( "Illegal Solr configuration: " + e ); + System.exit( 1 ); + } + } + if( (Boolean)parser.getOptionValue( globals, Boolean.FALSE ) ) { printGlobalInfo( indexReader, printHeaders ); } diff --git a/src/test/post.sh b/src/test/post.sh new file mode 100755 index 0000000..d9dd4ed --- /dev/null +++ b/src/test/post.sh @@ -0,0 +1,28 @@ +#!/bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FILES=$* +URL=http://localhost:8983/solr/update + +for f in $FILES; do + echo Posting file $f to $URL + curl $URL --data-binary @$f -H 'Content-type:application/xml' + echo +done + +#send the commit command to make sure all the changes are flushed and visible +curl $URL --data-binary '' -H 'Content-type:application/xml' +echo diff --git a/src/test/test.xml b/src/test/test.xml new file mode 100644 index 0000000..80d60c7 --- /dev/null +++ b/src/test/test.xml @@ -0,0 +1,18 @@ + + + + doc1 + true + Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133 + + + + doc2 + false + Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300 + + + + + + -- cgit v1.2.3-54-g00ecf