< prev index next >

apps/samples/Ensemble8/src/compiletime/java/ensemble/compiletime/search/BuildEnsembleSearchIndex.java

Print this page
rev 9898 : 8178275: Ensemble: Upgrade version of Lucene to 7.1.0
Reviewed-by: aghaisas, prr

*** 1,7 **** /* ! * Copyright (c) 2008, 2015, Oracle and/or its affiliates. * All rights reserved. Use is subject to license terms. * * This file is available and licensed under the following license: * * Redistribution and use in source and binary forms, with or without --- 1,7 ---- /* ! * Copyright (c) 2008, 2017, Oracle and/or its affiliates. * All rights reserved. Use is subject to license terms. * * This file is available and licensed under the following license: * * Redistribution and use in source and binary forms, with or without
*** 30,40 **** * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package ensemble.compiletime.search; import ensemble.compiletime.Sample; ! import java.io.*; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; --- 30,44 ---- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package ensemble.compiletime.search; import ensemble.compiletime.Sample; ! import java.io.BufferedReader; ! import java.io.File; ! import java.io.FileWriter; ! import java.io.IOException; ! import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException;
*** 49,64 **** import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; ! import org.apache.lucene.util.Version; /** * Generate the lucene index that Ensemble uses for its search */ public class BuildEnsembleSearchIndex { --- 53,71 ---- import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; + import org.apache.lucene.document.SortedDocValuesField; + import org.apache.lucene.document.StringField; + import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; ! import org.apache.lucene.util.BytesRef; /** * Generate the lucene index that Ensemble uses for its search */ public class BuildEnsembleSearchIndex {
*** 69,78 **** --- 76,87 ---- List<Callable<List<Document>>> tasks = new ArrayList<>(); // create callables to collect data System.out.println("Creating Documents for Samples..."); docs.addAll(indexSamples(allSamples)); System.out.println("Creating tasks for getting all documentation..."); + System.out.println("javaDocBaseUrl = " + javaDocBaseUrl); + System.out.println("javafxDocumentationHome = " + javafxDocumentationHome); tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl)); tasks.addAll(indexAllDocumentation(javafxDocumentationHome)); // execute all the tasks in 32 threads, collecting all the documents to write System.out.println("Executing tasks getting all documentation..."); try {
*** 92,119 **** } catch (ExecutionException | InterruptedException ex) { Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex); } // create index System.out.println("Indexing to directory '" + indexDir + "'..."); ! Directory dir = FSDirectory.open(indexDir); ! Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); ! IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(dir, iwc)) { // write all docs System.out.println("Writing ["+docs.size()+"] documents to index...."); writer.addDocuments(docs); - // optimize the writen index - System.out.println("Optimizing search index...."); - writer.optimize(); System.out.println("NUMBER OF INDEXED DOCUMENTS = ["+writer.numDocs()+"]"); } // write file listing all the search index files, so we know what // is in the jar file at runtime try (FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"))) { for (String fileName: dir.listAll()) { ! if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n'); --- 101,126 ---- } catch (ExecutionException | InterruptedException ex) { Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex); } // create index System.out.println("Indexing to directory '" + indexDir + "'..."); ! Directory dir = FSDirectory.open(indexDir.toPath()); ! Analyzer analyzer = new StandardAnalyzer(); ! IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(dir, iwc)) { // write all docs System.out.println("Writing ["+docs.size()+"] documents to index...."); writer.addDocuments(docs); System.out.println("NUMBER OF INDEXED DOCUMENTS = ["+writer.numDocs()+"]"); } // write file listing all the search index files, so we know what // is in the jar file at runtime try (FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"))) { for (String fileName: dir.listAll()) { ! // don't include the "listAll.txt" file or "write.lock" ! if (!"listAll.txt".equals(fileName) && !"write.lock".equals(fileName)) { Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n');
*** 165,179 **** if (section.name == null) { System.out.println("section.name = "+section.name+" docPage.bookTitle="+docPage.bookTitle+" "+docPageUrl); } // write documentation section entry to index docs.add(createDocument(DocumentType.DOC, ! new Field("bookTitle", docPage.bookTitle, Field.Store.YES, Field.Index.ANALYZED), ! new Field("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES, Field.Index.ANALYZED), ! new Field("name", section.name, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", section.content, Field.Store.NO, Field.Index.ANALYZED), ! new Field("ensemblePath", section.url, Field.Store.YES, Field.Index.NOT_ANALYZED) )); } // handle next page if there is one if (docPage.nextUrl != null) { docs.addAll(indexDocumentationPage(docPage.nextUrl)); --- 172,186 ---- if (section.name == null) { System.out.println("section.name = "+section.name+" docPage.bookTitle="+docPage.bookTitle+" "+docPageUrl); } // write documentation section entry to index docs.add(createDocument(DocumentType.DOC, ! new TextField("bookTitle", docPage.bookTitle, Field.Store.YES), ! new TextField("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES), ! new TextField("name", section.name, Field.Store.YES), ! new TextField("description", section.content, Field.Store.NO), ! new StringField("ensemblePath", section.url, Field.Store.YES) )); } // handle next page if there is one if (docPage.nextUrl != null) { docs.addAll(indexDocumentationPage(docPage.nextUrl));
*** 204,218 **** private static List<Document> indexSamples(List<Sample> allSamples) throws IOException { List<Document> docs = new ArrayList<>(); for (Sample sample: allSamples) { // write class entry to index docs.add(createDocument(DocumentType.SAMPLE, ! new Field("name", sample.name, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", sample.description, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES, Field.Index.NOT_ANALYZED) )); } return docs; } --- 211,225 ---- private static List<Document> indexSamples(List<Sample> allSamples) throws IOException { List<Document> docs = new ArrayList<>(); for (Sample sample: allSamples) { // write class entry to index docs.add(createDocument(DocumentType.SAMPLE, ! new TextField("name", sample.name, Field.Store.YES), ! new TextField("description", sample.description, Field.Store.NO), ! new StringField("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())), ! Field.Store.YES), ! new StringField("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES) )); } return docs; }
*** 255,271 **** classDescription = cleanHTML(classDescriptionMatcher.group(1)); } ///System.out.println("classDescription = " + classDescription); // write class entry to index docs.add(createDocument(documentType, ! new Field("name", className, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", classDescription, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("package", packageName, Field.Store.YES, Field.Index.ANALYZED), ! new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", url, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here )); // extract properties Matcher propertySummaryMatcher = PROPERTY_SUMMARY.matcher(content); if (propertySummaryMatcher.find()) { --- 262,278 ---- classDescription = cleanHTML(classDescriptionMatcher.group(1)); } ///System.out.println("classDescription = " + classDescription); // write class entry to index docs.add(createDocument(documentType, ! new TextField("name", className, Field.Store.YES), ! new TextField("description", classDescription, Field.Store.NO), ! new StringField("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())), ! Field.Store.YES), ! new TextField("package", packageName, Field.Store.YES), ! new StringField("url", url, Field.Store.YES), ! new StringField("ensemblePath", url, Field.Store.YES) // TODO what do we need here )); // extract properties Matcher propertySummaryMatcher = PROPERTY_SUMMARY.matcher(content); if (propertySummaryMatcher.find()) {
*** 280,297 **** //System.out.println(" url = " + url); propUrl = url + "#" + propertyName; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.PROPERTY, ! new Field("name", propertyName, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", description, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("url", propUrl, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", url + "#" + propertyName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here )); } } // extract methods Matcher methodSummaryMatcher = METHOD_SUMMARY.matcher(content); --- 287,304 ---- //System.out.println(" url = " + url); propUrl = url + "#" + propertyName; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.PROPERTY, ! new TextField("name", propertyName, Field.Store.YES), ! new TextField("description", description, Field.Store.NO), ! new StringField("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES), ! new StringField("url", propUrl, Field.Store.YES), ! new StringField("className", className, Field.Store.YES), ! new StringField("package", packageName, Field.Store.YES), ! new StringField("ensemblePath", url + "#" + propertyName, Field.Store.YES) // TODO what do we need here )); } } // extract methods Matcher methodSummaryMatcher = METHOD_SUMMARY.matcher(content);
*** 307,324 **** //System.out.println(" url = " + url); methodUrl = url + "#" + methodName+"()"; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.METHOD, ! new Field("name", methodName, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", description, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("url", methodUrl, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", url + "#" + methodName + "()", Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here )); } } // extract fields Matcher fieldSummaryMatcher = FIELD_SUMMARY.matcher(content); --- 314,331 ---- //System.out.println(" url = " + url); methodUrl = url + "#" + methodName+"()"; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.METHOD, ! new TextField("name", methodName, Field.Store.YES), ! new TextField("description", description, Field.Store.NO), ! new StringField("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES), ! new StringField("url", methodUrl, Field.Store.YES), ! new StringField("className", className, Field.Store.YES), ! new StringField("package", packageName, Field.Store.YES), ! new StringField("ensemblePath", url + "#" + methodName + "()", Field.Store.YES) // TODO what do we need here )); } } // extract fields Matcher fieldSummaryMatcher = FIELD_SUMMARY.matcher(content);
*** 334,351 **** //System.out.println(" url = " + url); fieldUrl = url + "#" + fieldName; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.FIELD, ! new Field("name", fieldName, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", description, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("url", fieldUrl, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", url + "#" + fieldName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here )); } } // extract enums Matcher enumSummaryMatcher = ENUM_SUMMARY.matcher(content); --- 341,358 ---- //System.out.println(" url = " + url); fieldUrl = url + "#" + fieldName; //System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.FIELD, ! new TextField("name", fieldName, Field.Store.YES), ! new TextField("description", description, Field.Store.NO), ! new StringField("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES), ! new StringField("url", fieldUrl, Field.Store.YES), ! new StringField("className", className, Field.Store.YES), ! new StringField("package", packageName, Field.Store.YES), ! new StringField("ensemblePath", url + "#" + fieldName, Field.Store.YES) // TODO what do we need here )); } } // extract enums Matcher enumSummaryMatcher = ENUM_SUMMARY.matcher(content);
*** 361,378 **** //System.out.println(" url = " + url); enumUrl = url + "#" + enumName; ///System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.ENUM, ! new Field("name", enumName, Field.Store.YES, Field.Index.ANALYZED), ! new Field("description", description, Field.Store.NO, Field.Index.ANALYZED), ! new Field("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("url", enumUrl, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED), ! new Field("ensemblePath", url+ "#" + enumName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here )); } } return docs; } --- 368,385 ---- //System.out.println(" url = " + url); enumUrl = url + "#" + enumName; ///System.out.println(" oracle url = " + url); // write class entry to index docs.add(createDocument(DocumentType.ENUM, ! new TextField("name", enumName, Field.Store.YES), ! new TextField("description", description, Field.Store.NO), ! new StringField("shortDescription", description.substring(0,Math.min(160,description.length())), ! Field.Store.YES), ! new StringField("url", enumUrl, Field.Store.YES), ! new StringField("className", className, Field.Store.YES), ! new StringField("package", packageName, Field.Store.YES), ! new StringField("ensemblePath", url+ "#" + enumName, Field.Store.YES) // TODO what do we need here )); } } return docs; }
*** 385,396 **** * @throws IOException If there was problem writing doc */ private static Document createDocument(DocumentType documentType, Field... fields) throws IOException { // make a new, empty document Document doc = new Document(); ! // add doc type field ! doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // add other fields if (fields != null) { for (Field field : fields) { doc.add(field); } --- 392,404 ---- * @throws IOException If there was problem writing doc */ private static Document createDocument(DocumentType documentType, Field... fields) throws IOException { // make a new, empty document Document doc = new Document(); ! // add doc type field + sorting field ! doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES)); ! doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString()))); // add other fields if (fields != null) { for (Field field : fields) { doc.add(field); }
*** 407,418 **** * @throws IOException If there was problem writing doc */ private static void addDocument(IndexWriter writer, DocumentType documentType, Field... fields) throws IOException { // make a new, empty document Document doc = new Document(); ! // add doc type field ! doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // add other fields if (fields != null) { for (Field field : fields) { doc.add(field); } --- 415,427 ---- * @throws IOException If there was problem writing doc */ private static void addDocument(IndexWriter writer, DocumentType documentType, Field... fields) throws IOException { // make a new, empty document Document doc = new Document(); ! // add doc type field + sorting field ! doc.add(new StringField("documentType", documentType.toString(), Field.Store.YES)); ! doc.add(new SortedDocValuesField("documentType", new BytesRef(documentType.toString()))); // add other fields if (fields != null) { for (Field field : fields) { doc.add(field); }
< prev index next >