< prev index next >
apps/samples/Ensemble8/src/compiletime/java/ensemble/compiletime/search/BuildEnsembleSearchIndex.java
Print this page
rev 9898 : 8178275: Ensemble: Upgrade version of Lucene to 7.1.0
Reviewed-by: aghaisas, prr
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates.
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates.
* All rights reserved. Use is subject to license terms.
*
* This file is available and licensed under the following license:
*
* Redistribution and use in source and binary forms, with or without
@@ -30,11 +30,15 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package ensemble.compiletime.search;
import ensemble.compiletime.Sample;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
@@ -49,16 +53,19 @@
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
/**
* Generate the lucene index that Ensemble uses for its search
*/
public class BuildEnsembleSearchIndex {
@@ -69,10 +76,12 @@
List<Callable<List<Document>>> tasks = new ArrayList<>();
// create callables to collect data
System.out.println("Creating Documents for Samples...");
docs.addAll(indexSamples(allSamples));
System.out.println("Creating tasks for getting all documentation...");
+ System.out.println("javaDocBaseUrl = " + javaDocBaseUrl);
+ System.out.println("javafxDocumentationHome = " + javafxDocumentationHome);
tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl));
tasks.addAll(indexAllDocumentation(javafxDocumentationHome));
// execute all the tasks in 32 threads, collecting all the documents to write
System.out.println("Executing tasks getting all documentation...");
try {
@@ -92,28 +101,26 @@
} catch (ExecutionException | InterruptedException ex) {
Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
}
// create index
System.out.println("Indexing to directory '" + indexDir + "'...");
- Directory dir = FSDirectory.open(indexDir);
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
- IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
+ Directory dir = FSDirectory.open(indexDir.toPath());
+ Analyzer analyzer = new StandardAnalyzer();
+ IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
// write all docs
System.out.println("Writing ["+docs.size()+"] documents to index....");
writer.addDocuments(docs);
- // optimize the writen index
- System.out.println("Optimizing search index....");
- writer.optimize();
System.out.println("NUMBER OF INDEXED DOCUMENTS = ["+writer.numDocs()+"]");
}
// write file listing all the search index files, so we know what
// is in the jar file at runtime
try (FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"))) {
for (String fileName: dir.listAll()) {
- if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
+ // don't include the "listAll.txt" file or "write.lock"
+ if (!"listAll.txt".equals(fileName) && !"write.lock".equals(fileName)) {
Long length = dir.fileLength(fileName);
listAllOut.write(fileName);
listAllOut.write(':');
listAllOut.write(length.toString());
listAllOut.write('\n');
@@ -165,15 +172,15 @@
if (section.name == null) {
System.out.println("section.name = "+section.name+" docPage.bookTitle="+docPage.bookTitle+" "+docPageUrl);
}
// write documentation section entry to index
docs.add(createDocument(DocumentType.DOC,
- new Field("bookTitle", docPage.bookTitle, Field.Store.YES, Field.Index.ANALYZED),
- new Field("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES, Field.Index.ANALYZED),
- new Field("name", section.name, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", section.content, Field.Store.NO, Field.Index.ANALYZED),
- new Field("ensemblePath", section.url, Field.Store.YES, Field.Index.NOT_ANALYZED)
+ new TextField("bookTitle", docPage.bookTitle, Field.Store.YES),
+ new TextField("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES),
+ new TextField("name", section.name, Field.Store.YES),
+ new TextField("description", section.content, Field.Store.NO),
+ new StringField("ensemblePath", section.url, Field.Store.YES)
));
}
// handle next page if there is one
if (docPage.nextUrl != null) {
docs.addAll(indexDocumentationPage(docPage.nextUrl));
@@ -204,15 +211,15 @@
private static List<Document> indexSamples(List<Sample> allSamples) throws IOException {
List<Document> docs = new ArrayList<>();
for (Sample sample: allSamples) {
// write class entry to index
docs.add(createDocument(DocumentType.SAMPLE,
- new Field("name", sample.name, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", sample.description, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES, Field.Index.NOT_ANALYZED)
+ new TextField("name", sample.name, Field.Store.YES),
+ new TextField("description", sample.description, Field.Store.NO),
+ new StringField("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())),
+ Field.Store.YES),
+ new StringField("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES)
));
}
return docs;
}
@@ -255,17 +262,17 @@
classDescription = cleanHTML(classDescriptionMatcher.group(1));
}
///System.out.println("classDescription = " + classDescription);
// write class entry to index
docs.add(createDocument(documentType,
- new Field("name", className, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", classDescription, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("package", packageName, Field.Store.YES, Field.Index.ANALYZED),
- new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", url, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+ new TextField("name", className, Field.Store.YES),
+ new TextField("description", classDescription, Field.Store.NO),
+ new StringField("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())),
+ Field.Store.YES),
+ new TextField("package", packageName, Field.Store.YES),
+ new StringField("url", url, Field.Store.YES),
+ new StringField("ensemblePath", url, Field.Store.YES) // TODO what do we need here
));
// extract properties
Matcher propertySummaryMatcher = PROPERTY_SUMMARY.matcher(content);
if (propertySummaryMatcher.find()) {
@@ -280,18 +287,18 @@
//System.out.println(" url = " + url);
propUrl = url + "#" + propertyName;
//System.out.println(" oracle url = " + url);
// write class entry to index
docs.add(createDocument(DocumentType.PROPERTY,
- new Field("name", propertyName, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("url", propUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", url + "#" + propertyName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+ new TextField("name", propertyName, Field.Store.YES),
+ new TextField("description", description, Field.Store.NO),
+ new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+ Field.Store.YES),
+ new StringField("url", propUrl, Field.Store.YES),
+ new StringField("className", className, Field.Store.YES),
+ new StringField("package", packageName, Field.Store.YES),
+ new StringField("ensemblePath", url + "#" + propertyName, Field.Store.YES) // TODO what do we need here
));
}
}
// extract methods
Matcher methodSummaryMatcher = METHOD_SUMMARY.matcher(content);
@@ -307,18 +314,18 @@
//System.out.println(" url = " + url);
methodUrl = url + "#" + methodName+"()";
//System.out.println(" oracle url = " + url);
// write class entry to index
docs.add(createDocument(DocumentType.METHOD,
- new Field("name", methodName, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("url", methodUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", url + "#" + methodName + "()", Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+ new TextField("name", methodName, Field.Store.YES),
+ new TextField("description", description, Field.Store.NO),
+ new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+ Field.Store.YES),
+ new StringField("url", methodUrl, Field.Store.YES),
+ new StringField("className", className, Field.Store.YES),
+ new StringField("package", packageName, Field.Store.YES),
+ new StringField("ensemblePath", url + "#" + methodName + "()", Field.Store.YES) // TODO what do we need here
));
}
}
// extract fields
Matcher fieldSummaryMatcher = FIELD_SUMMARY.matcher(content);
@@ -334,18 +341,18 @@
//System.out.println(" url = " + url);
fieldUrl = url + "#" + fieldName;
//System.out.println(" oracle url = " + url);
// write class entry to index
docs.add(createDocument(DocumentType.FIELD,
- new Field("name", fieldName, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("url", fieldUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", url + "#" + fieldName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+ new TextField("name", fieldName, Field.Store.YES),
+ new TextField("description", description, Field.Store.NO),
+ new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+ Field.Store.YES),
+ new StringField("url", fieldUrl, Field.Store.YES),
+ new StringField("className", className, Field.Store.YES),
+ new StringField("package", packageName, Field.Store.YES),
+ new StringField("ensemblePath", url + "#" + fieldName, Field.Store.YES) // TODO what do we need here
));
}
}
// extract enums
Matcher enumSummaryMatcher = ENUM_SUMMARY.matcher(content);
@@ -361,18 +368,18 @@
//System.out.println(" url = " + url);
enumUrl = url + "#" + enumName;
///System.out.println(" oracle url = " + url);
// write class entry to index
docs.add(createDocument(DocumentType.ENUM,
- new Field("name", enumName, Field.Store.YES, Field.Index.ANALYZED),
- new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
- new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
- Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("url", enumUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
- new Field("ensemblePath", url+ "#" + enumName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+ new TextField("name", enumName, Field.Store.YES),
+ new TextField("description", description, Field.Store.NO),
+ new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+ Field.Store.YES),
+ new StringField("url", enumUrl, Field.Store.YES),
+ new StringField("className", className, Field.Store.YES),
+ new StringField("package", packageName, Field.Store.YES),
+ new StringField("ensemblePath", url+ "#" + enumName, Field.Store.YES) // TODO what do we need here
));
}
}
return docs;
}
@@ -385,12 +392,13 @@
* @throws IOException If there was problem writing doc
*/
private static Document createDocument(DocumentType documentType, Field... fields) throws IOException {
// make a new, empty document
Document doc = new Document();
- // add doc type field
- doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+ // add doc type field + sorting field
+ doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES));
+ doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString())));
// add other fields
if (fields != null) {
for (Field field : fields) {
doc.add(field);
}
@@ -407,12 +415,13 @@
* @throws IOException If there was problem writing doc
*/
private static void addDocument(IndexWriter writer, DocumentType documentType, Field... fields) throws IOException {
// make a new, empty document
Document doc = new Document();
- // add doc type field
- doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+ // add doc type field + sorting field
+ doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES));
+ doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString())));
// add other fields
if (fields != null) {
for (Field field : fields) {
doc.add(field);
}
< prev index next >