Created
December 19, 2011 04:56
-
-
Save karthikshiraly/1495448 to your computer and use it in GitHub Desktop.
Function testing for Solr 1.4 schema.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This class uses Solr 1.4 classes to load its schema.xml and run input text through the analyzers defined | |
in it. | |
*/ | |
public class SchemaTester { | |
public static void main(String[] args) { | |
try { | |
InputStream solrCfgIs = new FileInputStream( | |
"solr/conf/solrconfig.xml"); | |
SolrConfig solrConfig = new SolrConfig(null, solrCfgIs); | |
InputStream solrSchemaIs = new FileInputStream( | |
"solr/conf/schema.xml"); | |
IndexSchema solrSchema = new IndexSchema(solrConfig, null, | |
solrSchemaIs); | |
// Dumps all analyzer definitions in schema... | |
Map fieldTypes = solrSchema.getFieldTypes(); | |
for (Iterator<Entry<String, FieldType>> iter = fieldTypes.entrySet().iterator(); | |
iter.hasNext();) { | |
Entry entry = iter.next(); | |
FieldType fldType = entry.getValue(); | |
Analyzer analyzer = fldType.getAnalyzer(); | |
System.out.println(entry.getKey() + ":" + analyzer.toString()); | |
} | |
//String inputText = "HELLO_WORLD d:\\filepath\\filename.ext wi-fi wi-fi-3500 running TV camelCase test-hyphenated file.txt"; | |
String inputText = args[0]; | |
// Name of the field type in your schema.xml. ex: "textgen" | |
FieldType fieldTypeText = fieldTypes.get("textgen"); | |
System.out.println("Indexing analysis:"); | |
Analyzer analyzer = fieldTypeText.getAnalyzer(); | |
TokenStream tokenStream = analyzer.tokenStream("dummyfield", | |
new StringReader(inputText)); | |
TermAttribute termAttr = (TermAttribute) tokenStream.getAttribute(TermAttribute.class); | |
while (tokenStream.incrementToken()) { | |
System.out.println(termAttr.term()); | |
} | |
System.out.println("\n\nQuerying analysis:"); | |
Analyzer qryAnalyzer = fieldTypeText.getQueryAnalyzer(); | |
TokenStream qrytokenStream = qryAnalyzer.tokenStream("dummyfield", | |
new StringReader(inputText)); | |
TermAttribute termAttr2 = (TermAttribute) qrytokenStream.getAttribute(TermAttribute.class); | |
while (qrytokenStream.incrementToken()) { | |
System.out.println(termAttr2.term()); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment