< prev index next >

apps/samples/Ensemble8/src/compiletime/java/ensemble/compiletime/search/BuildEnsembleSearchIndex.java

Print this page
rev 9898 : 8178275: Ensemble: Upgrade version of Lucene to 7.1.0
Reviewed-by: aghaisas, prr

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates.
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates.
  * All rights reserved. Use is subject to license terms.
  *
  * This file is available and licensed under the following license:
  *
  * Redistribution and use in source and binary forms, with or without

@@ -30,11 +30,15 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 package ensemble.compiletime.search;
 
 import ensemble.compiletime.Sample;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;

@@ -49,16 +53,19 @@
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Generate the lucene index that Ensemble uses for its search
  */
 public class BuildEnsembleSearchIndex {

@@ -69,10 +76,12 @@
             List<Callable<List<Document>>> tasks = new ArrayList<>();
             // create callables to collect data
             System.out.println("Creating Documents for Samples...");
             docs.addAll(indexSamples(allSamples));
             System.out.println("Creating tasks for getting all documentation...");
+            System.out.println("javaDocBaseUrl = " + javaDocBaseUrl);
+            System.out.println("javafxDocumentationHome = " + javafxDocumentationHome);
             tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl));
             tasks.addAll(indexAllDocumentation(javafxDocumentationHome));
             // execute all the tasks in 32 threads, collecting all the documents to write
             System.out.println("Executing tasks getting all documentation...");
             try {

@@ -92,28 +101,26 @@
             } catch (ExecutionException | InterruptedException ex) {
                 Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
             }
             // create index
             System.out.println("Indexing to directory '" + indexDir + "'...");
-            Directory dir = FSDirectory.open(indexDir);
-            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
-            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
+            Directory dir = FSDirectory.open(indexDir.toPath());
+            Analyzer analyzer = new StandardAnalyzer();
+            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
             iwc.setOpenMode(OpenMode.CREATE);
             try (IndexWriter writer = new IndexWriter(dir, iwc)) {
                 // write all docs
                 System.out.println("Writing ["+docs.size()+"] documents to index....");
                 writer.addDocuments(docs);
-                // optimize the writen index
-                System.out.println("Optimizing search index....");
-                writer.optimize();
                 System.out.println("NUMBER OF INDEXED DOCUMENTS = ["+writer.numDocs()+"]");
             }
             // write file listing all the search index files, so we know what
             // is in the jar file at runtime
             try (FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"))) {
                 for (String fileName: dir.listAll()) {
-                    if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
+                    // don't include the "listAll.txt" file or "write.lock"
+                    if (!"listAll.txt".equals(fileName) && !"write.lock".equals(fileName)) {
                         Long length = dir.fileLength(fileName);
                         listAllOut.write(fileName);
                         listAllOut.write(':');
                         listAllOut.write(length.toString());
                         listAllOut.write('\n');

@@ -165,15 +172,15 @@
                 if (section.name == null) {
                     System.out.println("section.name = "+section.name+" docPage.bookTitle="+docPage.bookTitle+"    "+docPageUrl);
                 }
                 // write documentation section entry to index
                 docs.add(createDocument(DocumentType.DOC,
-                    new Field("bookTitle", docPage.bookTitle, Field.Store.YES, Field.Index.ANALYZED),
-                    new Field("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES, Field.Index.ANALYZED),
-                    new Field("name", section.name, Field.Store.YES, Field.Index.ANALYZED),
-                    new Field("description", section.content, Field.Store.NO, Field.Index.ANALYZED),
-                    new Field("ensemblePath", section.url, Field.Store.YES, Field.Index.NOT_ANALYZED)
+                    new TextField("bookTitle", docPage.bookTitle, Field.Store.YES),
+                    new TextField("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES),
+                    new TextField("name", section.name, Field.Store.YES),
+                    new TextField("description", section.content, Field.Store.NO),
+                    new StringField("ensemblePath", section.url, Field.Store.YES)
                 ));
             }
             // handle next page if there is one
             if (docPage.nextUrl != null) {
                 docs.addAll(indexDocumentationPage(docPage.nextUrl));

@@ -204,15 +211,15 @@
     private static List<Document> indexSamples(List<Sample> allSamples) throws IOException {
         List<Document> docs = new ArrayList<>();
         for (Sample sample: allSamples) {
             // write class entry to index
             docs.add(createDocument(DocumentType.SAMPLE,
-                new Field("name", sample.name, Field.Store.YES, Field.Index.ANALYZED),
-                new Field("description", sample.description, Field.Store.NO, Field.Index.ANALYZED),
-                new Field("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())),
-                        Field.Store.YES, Field.Index.NOT_ANALYZED),
-                new Field("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES, Field.Index.NOT_ANALYZED)
+                new TextField("name", sample.name, Field.Store.YES),
+                new TextField("description", sample.description, Field.Store.NO),
+                new StringField("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())),
+                        Field.Store.YES),
+                new StringField("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES)
             ));
         }
         return docs;
     }
 

@@ -255,17 +262,17 @@
             classDescription = cleanHTML(classDescriptionMatcher.group(1));
         }
         ///System.out.println("classDescription = " + classDescription);
         // write class entry to index
         docs.add(createDocument(documentType,
-                new Field("name", className, Field.Store.YES, Field.Index.ANALYZED),
-                new Field("description", classDescription, Field.Store.NO, Field.Index.ANALYZED),
-                new Field("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())),
-                        Field.Store.YES, Field.Index.NOT_ANALYZED),
-                new Field("package", packageName, Field.Store.YES, Field.Index.ANALYZED),
-                new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                new Field("ensemblePath", url, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+                new TextField("name", className, Field.Store.YES),
+                new TextField("description", classDescription, Field.Store.NO),
+                new StringField("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())),
+                        Field.Store.YES),
+                new TextField("package", packageName, Field.Store.YES),
+                new StringField("url", url, Field.Store.YES),
+                new StringField("ensemblePath", url, Field.Store.YES) // TODO what do we need here
         ));
 
         // extract properties
         Matcher propertySummaryMatcher = PROPERTY_SUMMARY.matcher(content);
         if (propertySummaryMatcher.find()) {

@@ -280,18 +287,18 @@
                 //System.out.println("                    url = " + url);
                 propUrl = url + "#" + propertyName;
                 //System.out.println("                    oracle url = " + url);
                 // write class entry to index
                 docs.add(createDocument(DocumentType.PROPERTY,
-                        new Field("name", propertyName, Field.Store.YES, Field.Index.ANALYZED),
-                        new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
-                        new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
-                                Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("url", propUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("ensemblePath", url + "#" + propertyName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+                        new TextField("name", propertyName, Field.Store.YES),
+                        new TextField("description", description, Field.Store.NO),
+                        new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+                                Field.Store.YES),
+                        new StringField("url", propUrl, Field.Store.YES),
+                        new StringField("className", className, Field.Store.YES),
+                        new StringField("package", packageName, Field.Store.YES),
+                        new StringField("ensemblePath", url + "#" + propertyName, Field.Store.YES) // TODO what do we need here
                 ));
             }
         }
         // extract methods
         Matcher methodSummaryMatcher = METHOD_SUMMARY.matcher(content);

@@ -307,18 +314,18 @@
                 //System.out.println("                    url = " + url);
                 methodUrl = url + "#" + methodName+"()";
                 //System.out.println("                    oracle url = " + url);
                 // write class entry to index
                 docs.add(createDocument(DocumentType.METHOD,
-                        new Field("name", methodName, Field.Store.YES, Field.Index.ANALYZED),
-                        new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
-                        new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
-                                Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("url", methodUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("ensemblePath", url + "#" + methodName + "()", Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+                        new TextField("name", methodName, Field.Store.YES),
+                        new TextField("description", description, Field.Store.NO),
+                        new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+                                Field.Store.YES),
+                        new StringField("url", methodUrl, Field.Store.YES),
+                        new StringField("className", className, Field.Store.YES),
+                        new StringField("package", packageName, Field.Store.YES),
+                        new StringField("ensemblePath", url + "#" + methodName + "()", Field.Store.YES) // TODO what do we need here
                 ));
             }
         }
         // extract fields
         Matcher fieldSummaryMatcher = FIELD_SUMMARY.matcher(content);

@@ -334,18 +341,18 @@
                 //System.out.println("                    url = " + url);
                 fieldUrl = url + "#" + fieldName;
                 //System.out.println("                    oracle url = " + url);
                 // write class entry to index
                 docs.add(createDocument(DocumentType.FIELD,
-                        new Field("name", fieldName, Field.Store.YES, Field.Index.ANALYZED),
-                        new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
-                        new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
-                                Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("url", fieldUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("ensemblePath", url + "#" + fieldName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+                        new TextField("name", fieldName, Field.Store.YES),
+                        new TextField("description", description, Field.Store.NO),
+                        new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+                                Field.Store.YES),
+                        new StringField("url", fieldUrl, Field.Store.YES),
+                        new StringField("className", className, Field.Store.YES),
+                        new StringField("package", packageName, Field.Store.YES),
+                        new StringField("ensemblePath", url + "#" + fieldName, Field.Store.YES) // TODO what do we need here
                 ));
             }
         }
         // extract enums
         Matcher enumSummaryMatcher = ENUM_SUMMARY.matcher(content);

@@ -361,18 +368,18 @@
                 //System.out.println("                    url = " + url);
                 enumUrl = url + "#" + enumName;
                 ///System.out.println("                    oracle url = " + url);
                 // write class entry to index
                 docs.add(createDocument(DocumentType.ENUM,
-                        new Field("name", enumName, Field.Store.YES, Field.Index.ANALYZED),
-                        new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
-                        new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
-                                Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("url", enumUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
-                        new Field("ensemblePath", url+ "#" + enumName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
+                        new TextField("name", enumName, Field.Store.YES),
+                        new TextField("description", description, Field.Store.NO),
+                        new StringField("shortDescription", description.substring(0,Math.min(160,description.length())),
+                                Field.Store.YES),
+                        new StringField("url", enumUrl, Field.Store.YES),
+                        new StringField("className", className, Field.Store.YES),
+                        new StringField("package", packageName, Field.Store.YES),
+                        new StringField("ensemblePath", url+ "#" + enumName, Field.Store.YES) // TODO what do we need here
                 ));
             }
         }
         return docs;
     }

@@ -385,12 +392,13 @@
      * @throws IOException If there was problem writing doc
      */
     private static Document createDocument(DocumentType documentType, Field... fields) throws IOException {
         // make a new, empty document
         Document doc = new Document();
-        // add doc type field
-        doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+        // add doc type field + sorting field
+        doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES));
+        doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString())));
         // add other fields
         if (fields != null) {
             for (Field field : fields) {
                 doc.add(field);
             }

@@ -407,12 +415,13 @@
      * @throws IOException If there was problem writing doc
      */
     private static void addDocument(IndexWriter writer, DocumentType documentType, Field... fields) throws IOException {
         // make a new, empty document
         Document doc = new Document();
-        // add doc type field
-        doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+        // add doc type field + sorting field
+        doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES));
+        doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString())));
         // add other fields
         if (fields != null) {
             for (Field field : fields) {
                 doc.add(field);
             }
< prev index next >