1 /*
   2  * Copyright (c) 2008, 2015, Oracle and/or its affiliates.
   3  * All rights reserved. Use is subject to license terms.
   4  *
   5  * This file is available and licensed under the following license:
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  *  - Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  *  - Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the distribution.
  16  *  - Neither the name of Oracle Corporation nor the names of its
  17  *    contributors may be used to endorse or promote products derived
  18  *    from this software without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32 package ensemble.compiletime.search;
  33 
  34 import ensemble.compiletime.Sample;
  35 import java.io.*;
  36 import java.net.URL;
  37 import java.util.ArrayList;
  38 import java.util.List;
  39 import java.util.concurrent.Callable;
  40 import java.util.concurrent.ExecutionException;
  41 import java.util.concurrent.Future;
  42 import java.util.concurrent.LinkedBlockingQueue;
  43 import java.util.concurrent.ThreadFactory;
  44 import java.util.concurrent.ThreadPoolExecutor;
  45 import java.util.concurrent.TimeUnit;
  46 import java.util.logging.Level;
  47 import java.util.logging.Logger;
  48 import java.util.regex.Matcher;
  49 import java.util.regex.Pattern;
  50 import org.apache.lucene.analysis.Analyzer;
  51 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  52 import org.apache.lucene.document.Document;
  53 import org.apache.lucene.document.Field;
  54 import org.apache.lucene.index.IndexWriter;
  55 import org.apache.lucene.index.IndexWriterConfig;
  56 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  57 import org.apache.lucene.store.Directory;
  58 import org.apache.lucene.store.FSDirectory;
  59 import org.apache.lucene.util.Version;
  60 
  61 /**
  62  * Generate the lucene index that Ensemble uses for its search
  63  */
  64 public class BuildEnsembleSearchIndex {
  65     
  66     public static void buildSearchIndex(List<Sample> allSamples, String javaDocBaseUrl, String javafxDocumentationHome, File indexDir){
  67         try {
  68             List<Document> docs = new ArrayList<>();
  69             List<Callable<List<Document>>> tasks = new ArrayList<>();
  70             // create callables to collect data
  71             System.out.println("Creating Documents for Samples...");
  72             docs.addAll(indexSamples(allSamples));
  73             System.out.println("Creating tasks for getting all documentation...");
  74             tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl));
  75             tasks.addAll(indexAllDocumentation(javafxDocumentationHome));
  76             // execute all the tasks in 32 threads, collecting all the documents to write
  77             System.out.println("Executing tasks getting all documentation...");
  78             try {
  79                 ThreadPoolExecutor executor = new ThreadPoolExecutor(32,32,30, TimeUnit.SECONDS,new LinkedBlockingQueue());
  80                 executor.setThreadFactory(new ThreadFactory() {
  81                     int index = 0;
  82                     @Override public Thread newThread(Runnable r) {
  83                         Thread thread = new Thread(r,"Thread-"+(++index));
  84                         thread.setDaemon(true);
  85                         return thread;
  86                     }
  87                 });
  88                 List<Future<List<Document>>> results = executor.invokeAll(tasks);
  89                 for(Future<List<Document>> future : results) {
  90                     docs.addAll(future.get());
  91                 }
  92             } catch (ExecutionException | InterruptedException ex) {
  93                 Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
  94             }
  95             // create index
  96             System.out.println("Indexing to directory '" + indexDir + "'...");
  97             Directory dir = FSDirectory.open(indexDir);
  98             Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
  99             IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
 100             iwc.setOpenMode(OpenMode.CREATE);
 101             try (IndexWriter writer = new IndexWriter(dir, iwc)) {
 102                 // write all docs
 103                 System.out.println("Writing ["+docs.size()+"] documents to index....");
 104                 writer.addDocuments(docs);
 105                 // optimize the writen index
 106                 System.out.println("Optimizing search index....");
 107                 writer.optimize();
 108                 System.out.println("NUMBER OF INDEXED DOCUMENTS = ["+writer.numDocs()+"]");
 109             }
 110             // write file listing all the search index files, so we know what 
 111             // is in the jar file at runtime
 112             try (FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"))) {
 113                 for (String fileName: dir.listAll()) {
 114                     if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
 115                         Long length = dir.fileLength(fileName);
 116                         listAllOut.write(fileName);
 117                         listAllOut.write(':');
 118                         listAllOut.write(length.toString());
 119                         listAllOut.write('\n');
 120                     }
 121                 }
 122                 listAllOut.flush();
 123             }
 124             System.out.println("Finished writing search index to directory '" + indexDir);
 125         } catch (IOException ex) {
 126             Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex);
 127         }
 128     }
 129     
 130     private static List<Callable<List<Document>>> indexAllDocumentation(String javafxDocumentationHome) throws IOException{
 131         List<Callable<List<Document>>> tasks = new ArrayList<>();
 132         CharSequence content = grabWebPage(javafxDocumentationHome);
 133         String baseUrl = javafxDocumentationHome.substring(0,javafxDocumentationHome.lastIndexOf('/')+1);
 134 //        System.out.println("baseUrl = " + baseUrl);
 135         // parse page finding all docs pages
 136         Matcher matcher = docsHomeLink.matcher(content);
 137         System.out.println("Building a list of documentation to index");
 138         while (matcher.find()) {
 139             String foundUrl = matcher.group(1);
 140 //            System.out.println("foundUrl = " + foundUrl);
 141             final String docPageUrl = (foundUrl.startsWith("http") ? foundUrl : baseUrl + foundUrl);
 142             if ("https://docs.oracle.com/javafx/2/api/javafx/scene/doc-files/cssref.html".equals(docPageUrl) || 
 143                     "https://docs.oracle.com/javafx/2/api/index.html".equals(docPageUrl) ||
 144                     "http://www.oracle.com/technetwork/java/javafx/downloads/supportedconfigurations-1506746.html".equals(docPageUrl) ||
 145                     "http://www.oracle.com/technetwork/java/javase/downloads/".equals(docPageUrl) ||
 146                     "https://docs.oracle.com/javafx/2/api/javafx/fxml/doc-files/introduction_to_fxml.html".equals(docPageUrl)) {
 147                 continue;
 148             }
 149             System.out.println(docPageUrl);
 150             tasks.add((Callable<List<Document>>) () -> indexDocumentationPage(docPageUrl));
 151         }
 152         System.out.println(" --- end of list ---");
 153         return tasks;
 154     }
 155     
 156     private static List<Document> indexDocumentationPage(String docPageUrl) throws IOException{
 157         List<Document> docs = new ArrayList<>();
 158         try {
 159 //            System.out.println("PROCESSING... ["+docPageUrl+"] on Thread ["+Thread.currentThread().getName()+"]");
 160 //            System.out.println("==================================================================");
 161 //            System.out.println("Parsing docs page ["+docPageUrl+"] ...");
 162             DocumentationIndexer.DocPage docPage = DocumentationIndexer.parseDocsPage(docPageUrl, grabWebPage(docPageUrl).toString());
 163 //            System.out.println("TITLE="+docPage.bookTitle+"   CHAPTER="+docPage.chapter+"    SECTIONS=["+docPage.sections.size()+"]");
 164             for (DocumentationIndexer.Section section: docPage.sections) {
 165                 if (section.name == null) {
 166                     System.out.println("section.name = "+section.name+" docPage.bookTitle="+docPage.bookTitle+"    "+docPageUrl);
 167                 }
 168                 // write documentation section entry to index
 169                 docs.add(createDocument(DocumentType.DOC,
 170                     new Field("bookTitle", docPage.bookTitle, Field.Store.YES, Field.Index.ANALYZED),
 171                     new Field("chapter", docPage.chapter==null? "" : docPage.chapter, Field.Store.YES, Field.Index.ANALYZED),
 172                     new Field("name", section.name, Field.Store.YES, Field.Index.ANALYZED),
 173                     new Field("description", section.content, Field.Store.NO, Field.Index.ANALYZED),
 174                     new Field("ensemblePath", section.url, Field.Store.YES, Field.Index.NOT_ANALYZED)
 175                 ));
 176             }
 177             // handle next page if there is one
 178             if (docPage.nextUrl != null) {
 179                 docs.addAll(indexDocumentationPage(docPage.nextUrl));
 180             }
 181             
 182         } catch (Exception ex) {
 183             System.out.println("FAILED TO PARSE DOCS PAGE SO IGNORED: ["+docPageUrl+"]");
 184             ex.printStackTrace(System.out);
 185         }
 186         return docs;
 187     }
 188 
 189     private static List<Callable<List<Document>>> indexJavaDocAllClasses(final String javaDocBaseUrl) throws IOException{
 190         CharSequence content = grabWebPage(javaDocBaseUrl+"allclasses-noframe.html");
 191         List<Callable<List<Document>>> tasks = new ArrayList<>();
 192         // parse package
 193         Matcher matcher = findClassUrl.matcher(content);
 194         while (matcher.find()) {
 195             final String classUrl = javaDocBaseUrl+matcher.group(1);
 196             tasks.add((Callable<List<Document>>) () -> indexApiDocs(classUrl));
 197         }
 198         return tasks;
 199     }
 200 
 201     /**
 202      * Add all samples to the search index
 203      */
 204     private static List<Document> indexSamples(List<Sample> allSamples) throws IOException {
 205         List<Document> docs = new ArrayList<>();
 206         for (Sample sample: allSamples) {
 207             // write class entry to index
 208             docs.add(createDocument(DocumentType.SAMPLE,
 209                 new Field("name", sample.name, Field.Store.YES, Field.Index.ANALYZED),
 210                 new Field("description", sample.description, Field.Store.NO, Field.Index.ANALYZED),
 211                 new Field("shortDescription", sample.description.substring(0, Math.min(160, sample.description.length())),
 212                         Field.Store.YES, Field.Index.NOT_ANALYZED),
 213                 new Field("ensemblePath", "sample://"+sample.ensemblePath, Field.Store.YES, Field.Index.NOT_ANALYZED)
 214             ));
 215         }
 216         return docs;
 217     }
 218 
 219     /**
 220      * Index a JavaDoc page for a single class, interface or enum
 221      *
 222      * @param writer The index writer to add documents to
 223      * @param url The url to the javadoc html file
 224      * @throws IOException If there was a problem indexing the file
 225      */
 226     private static List<Document> indexApiDocs(String url) throws IOException {
 227 //        System.out.println("PROCESSING... ["+url+"] on Thread ["+Thread.currentThread().getName()+"]");
 228         final List<Document> docs = new ArrayList<>();
 229         CharSequence content = grabWebPage(url);
 230         // extract package and class
 231         Matcher packageAndClassMatcher = PACKAGE_AND_CLASS.matcher(content);
 232         // search and if we fail to find ignore this file
 233         if (!packageAndClassMatcher.find()) {
 234             //System.out.println("!!!! Ignoring [" + file + "] because no class or package was found");
 235             return docs;
 236         } else {
 237             //System.out.println("Adding [" + file + "]");
 238         }
 239         //System.out.println("        fileUrl = " + fileUrl);
 240         String packageName = packageAndClassMatcher.group(1);
 241         //System.out.println("        packageName = " + packageName);
 242         String classType = packageAndClassMatcher.group(2).toLowerCase();
 243         //System.out.println("        classType = " + classType);
 244         String className = packageAndClassMatcher.group(3);
 245         //System.out.println("        className = " + className);
 246         // extract document type
 247         DocumentType documentType = DocumentType.CLASS;
 248         if ("enum".equals(classType)) {
 249             documentType = DocumentType.ENUM;
 250         }
 251         // extract javadoc description
 252         Matcher classDescriptionMatcher = CLASS_DESCRIPTION.matcher(content);
 253         String classDescription = "";
 254         if (classDescriptionMatcher.find()) {
 255             classDescription = cleanHTML(classDescriptionMatcher.group(1));
 256         }
 257         ///System.out.println("classDescription = " + classDescription);
 258         // write class entry to index
 259         docs.add(createDocument(documentType,
 260                 new Field("name", className, Field.Store.YES, Field.Index.ANALYZED),
 261                 new Field("description", classDescription, Field.Store.NO, Field.Index.ANALYZED),
 262                 new Field("shortDescription", classDescription.substring(0,Math.min(160,classDescription.length())),
 263                         Field.Store.YES, Field.Index.NOT_ANALYZED),
 264                 new Field("package", packageName, Field.Store.YES, Field.Index.ANALYZED),
 265                 new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED),
 266                 new Field("ensemblePath", url, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
 267         ));
 268 
 269         // extract properties
 270         Matcher propertySummaryMatcher = PROPERTY_SUMMARY.matcher(content);
 271         if (propertySummaryMatcher.find()) {
 272             String propertySummaryTable = propertySummaryMatcher.group(1);           
 273             Matcher propertyMatcher = PROPERTY.matcher(propertySummaryTable);
 274             while (propertyMatcher.find()) {
 275                 String propUrl = propertyMatcher.group(1);
 276                 String propertyName = propertyMatcher.group(2);
 277                 String description = cleanHTML(propertyMatcher.group(3));
 278                 //System.out.println("            propertyName = " + propertyName);
 279                 //System.out.println("                    description = " + description);
 280                 //System.out.println("                    url = " + url);
 281                 propUrl = url + "#" + propertyName;
 282                 //System.out.println("                    oracle url = " + url);
 283                 // write class entry to index
 284                 docs.add(createDocument(DocumentType.PROPERTY,
 285                         new Field("name", propertyName, Field.Store.YES, Field.Index.ANALYZED),
 286                         new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
 287                         new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
 288                                 Field.Store.YES, Field.Index.NOT_ANALYZED),
 289                         new Field("url", propUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
 290                         new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
 291                         new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
 292                         new Field("ensemblePath", url + "#" + propertyName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
 293                 ));
 294             }
 295         }
 296         // extract methods
 297         Matcher methodSummaryMatcher = METHOD_SUMMARY.matcher(content);
 298         if (methodSummaryMatcher.find()) {
 299             String methodSummaryTable = methodSummaryMatcher.group(1);
 300             Matcher methodMatcher = PROPERTY.matcher(methodSummaryTable);
 301             while (methodMatcher.find()) {
 302                 String methodUrl = methodMatcher.group(1);
 303                 String methodName = methodMatcher.group(2);
 304                 String description = cleanHTML(methodMatcher.group(3));
 305                 //System.out.println("            methodName = " + methodName);
 306                 //System.out.println("                    description = " + description);
 307                 //System.out.println("                    url = " + url);
 308                 methodUrl = url + "#" + methodName+"()";
 309                 //System.out.println("                    oracle url = " + url);
 310                 // write class entry to index
 311                 docs.add(createDocument(DocumentType.METHOD,
 312                         new Field("name", methodName, Field.Store.YES, Field.Index.ANALYZED),
 313                         new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
 314                         new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
 315                                 Field.Store.YES, Field.Index.NOT_ANALYZED),
 316                         new Field("url", methodUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
 317                         new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
 318                         new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
 319                         new Field("ensemblePath", url + "#" + methodName + "()", Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
 320                 ));
 321             }
 322         }
 323         // extract fields
 324         Matcher fieldSummaryMatcher = FIELD_SUMMARY.matcher(content);
 325         if (fieldSummaryMatcher.find()) {
 326             String fieldSummaryTable = fieldSummaryMatcher.group(1);
 327             Matcher fieldMatcher = PROPERTY.matcher(fieldSummaryTable);
 328             while (fieldMatcher.find()) {
 329                 String fieldUrl = fieldMatcher.group(1);
 330                 String fieldName = fieldMatcher.group(2);
 331                 String description = cleanHTML(fieldMatcher.group(3));
 332                 //System.out.println(" #####     fieldName = " + fieldName);
 333                 //System.out.println("                    description = " + description);
 334                 //System.out.println("                    url = " + url);
 335                 fieldUrl = url + "#" + fieldName;
 336                 //System.out.println("                    oracle url = " + url);
 337                 // write class entry to index
 338                 docs.add(createDocument(DocumentType.FIELD,
 339                         new Field("name", fieldName, Field.Store.YES, Field.Index.ANALYZED),
 340                         new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
 341                         new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
 342                                 Field.Store.YES, Field.Index.NOT_ANALYZED),
 343                         new Field("url", fieldUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
 344                         new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
 345                         new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
 346                         new Field("ensemblePath", url + "#" + fieldName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
 347                 ));
 348             }
 349         }
 350         // extract enums
 351         Matcher enumSummaryMatcher = ENUM_SUMMARY.matcher(content);
 352         if (enumSummaryMatcher.find()) {
 353             String enumSummaryTable = enumSummaryMatcher.group(1);
 354             Matcher enumMatcher = PROPERTY.matcher(enumSummaryTable);
 355             while (enumMatcher.find()) {
 356                 String enumUrl = enumMatcher.group(1);
 357                 String enumName = enumMatcher.group(2);
 358                 String description = cleanHTML(enumMatcher.group(3));
 359                 //System.out.println("            enumName = " + enumName);
 360                 //System.out.println("                    description = " + description);
 361                 //System.out.println("                    url = " + url);
 362                 enumUrl = url + "#" + enumName;
 363                 ///System.out.println("                    oracle url = " + url);
 364                 // write class entry to index
 365                 docs.add(createDocument(DocumentType.ENUM,
 366                         new Field("name", enumName, Field.Store.YES, Field.Index.ANALYZED),
 367                         new Field("description", description, Field.Store.NO, Field.Index.ANALYZED),
 368                         new Field("shortDescription", description.substring(0,Math.min(160,description.length())),
 369                                 Field.Store.YES, Field.Index.NOT_ANALYZED),
 370                         new Field("url", enumUrl, Field.Store.YES, Field.Index.NOT_ANALYZED),
 371                         new Field("className", className, Field.Store.YES, Field.Index.NOT_ANALYZED),
 372                         new Field("package", packageName, Field.Store.YES, Field.Index.NOT_ANALYZED),
 373                         new Field("ensemblePath", url+ "#" + enumName, Field.Store.YES, Field.Index.NOT_ANALYZED) // TODO what do we need here
 374                 ));
 375             }
 376         }
 377         return docs;
 378     }
 379 
 380     /**
 381      * Create a new document
 382      *
 383      * @param documentType The document type to save in the doc
 384      * @param fields       The searchable and data fields to write into doc
 385      * @throws IOException If there was problem writing doc
 386      */
 387     private static Document createDocument(DocumentType documentType, Field... fields) throws IOException {
 388         // make a new, empty document
 389         Document doc = new Document();
 390         // add doc type field
 391         doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
 392         // add other fields
 393         if (fields != null) {
 394             for (Field field : fields) {
 395                 doc.add(field);
 396             }
 397         }
 398         return doc;
 399     }
 400 
 401     /**
 402      * Create a new document and write it to the given writer
 403      *
 404      * @param writer       The writer to write out to
 405      * @param documentType The document type to save in the doc
 406      * @param fields       The searchable and data fields to write into doc
 407      * @throws IOException If there was problem writing doc
 408      */
 409     private static void addDocument(IndexWriter writer, DocumentType documentType, Field... fields) throws IOException {
 410         // make a new, empty document
 411         Document doc = new Document();
 412         // add doc type field
 413         doc.add(new Field("documentType", documentType.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
 414         // add other fields
 415         if (fields != null) {
 416             for (Field field : fields) {
 417                 doc.add(field);
 418             }
 419         }
 420         // write into index, assuming we are recreating every time
 421         writer.addDocument(doc);
 422     }
 423 
 424     /**
 425      * Clean HTML, removing all tags and un-escaping so that we can index it cleanly
 426      *
 427      * @param html The html to clean
 428      * @return cleaned html
 429      */
 430     private static String cleanHTML(String html) {
 431         html = html.replaceAll("(&nbsp;|\\s|[ ])+", " ").trim(); // cleanup whitespace
 432         html = html.replaceAll("<.*?>", " "); // remove html tags
 433         html = html.replaceAll("&lt;", "<"); // un-escape <
 434         html = html.replaceAll("&gt;", ">"); // un-escape >
 435         html = html.replaceAll("&quot;", "\""); // un-escape "
 436         html = html.replaceAll("&apos;", "\'"); // un-escape '
 437         html = html.replaceAll("&amp;", "&"); // un-escape &
 438         return html;
 439     }
 440     
 441     static CharSequence grabWebPage(String url) throws IOException {
 442         StringBuilder builder = new StringBuilder();
 443         try (BufferedReader reader = new BufferedReader(new InputStreamReader(new URL(url).openStream()))) {
 444             String line;
 445             while((line = reader.readLine()) != null) {
 446                 builder.append(line);
 447                 builder.append('\n');
 448             }
 449         }
 450         return builder;
 451     }
 452           
 453     
 454     // ===================  JAVAFX DOCUMENTATION PATTERNS ======================
 455     /*
 456      GET ALL LINKS FROM DOCS HOME PAGE
 457     <p class="fxblurblink"><a href="2/overview/jfxpub-overview.htm">What is JavaFX?</a></p>
 458 
 459     GROUP 1 = url
 460      */
 461     private static final Pattern docsHomeLink = Pattern.compile("<p\\s+class=\\\"fxblurblink\\\"\\s*>.*<a\\s*href=\\\"([^\\\"]+)");
 462     /*
 463      GET ALL LINKS FROM DOCS HOME PAGE
 464     <div id="bookTitle">
 465       <h1>Working With Layouts in JavaFX </h1>
 466     </div>
 467 
 468     GROUP 1 = book title
 469      */
 470     private static final Pattern bookTitle = Pattern.compile( "<div\\s+id=\\\"bookTitle\\\"\\s*>\\s*<h1>([^<]+)");
 471     /*
 472      GET ALL LINKS FROM DOCS HOME PAGE
 473     <h1 class="chapter">JavaFX Scene Builder Overview</h1>
 474 
 475     GROUP 1 = chapter name
 476      */
 477     private static final Pattern chapter = Pattern.compile("<h1\\s+class=\\\"chapter\\\"\\s*>([^<]+)");
 478     
 479     
 480     // ===================  API DOC PATTERNS ===================================
 481     /*
 482     Pull class urls from all classes page
 483      */
 484     private static final Pattern findClassUrl = Pattern.compile("a\\s+href=\\\"([^\\\"]+)\\\"");
 485     /*
 486     <div class="subTitle">javafx.scene</div>
 487     <h2 title="Class Scene" class="title">Class Scene</h2>
 488     </div>
 489 
 490     GROUP 1 = Package
 491     GROUP 2 = Class Type
 492     GROUP 3 = Class
 493      */
 494     //private static Pattern PACKAGE_AND_CLASS = Pattern.compile("<H2>\\s*<FONT SIZE=\"-1\">\\s*([^<]+)</FONT>\\s*<BR>\\s*(Class|Interface|Enum) ([^<&]+).*?</H2>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 495     private static Pattern PACKAGE_AND_CLASS = Pattern.compile("<div class=\"subTitle\">\\s*([^<]+)</div>\\s*<h2 title=\"(Class|Interface|Enum) ([^<&]+).*?\"\\sclass=\"title\">(Class|Interface|Enum) ([^<&]+).*?</h2>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 496     /*
 497     </PRE>
 498     <P>
 499     ...
 500     <HR>
 501     <div class="description">
 502     ...
 503     <p>
 504     ...
 505     </div>
 506 
 507     GROUP 1 = Class JavaDoc Description
 508      */
 509     //private static Pattern CLASS_DESCRIPTION = Pattern.compile("</PRE>\\s*<P>(.*?)<HR>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 510     private static Pattern CLASS_DESCRIPTION = Pattern.compile("<div class=\"description\">.*?<[pP]>(.*?)</div>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 511     /*
 512     <table ...>
 513     ...
 514     </table>
 515 
 516     GROUP 1 = Property Summary Table
 517      */
 518    // private static Pattern PROPERTY_SUMMARY = Pattern.compile("NAME=\"property_summary\">.*?<TABLE[^>]+>(.*?)</TABLE>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 519     private static Pattern PROPERTY_SUMMARY = Pattern.compile("<h3>Property Summary</h3>.*?<table[^>]+>(.*?)</table>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 520     /*
 521     <h3>Method Summary</h3>
 522     <table...>
 523     ...
 524     </table>
 525 
 526     GROUP 1 = Method Summary Table
 527      */
 528   //  private static Pattern METHOD_SUMMARY = Pattern.compile("NAME=\"method_summary\">.*?<TABLE[^>]+>(.*?)</TABLE>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 529     private static Pattern METHOD_SUMMARY = Pattern.compile("<h3>Method Summary</h3>.*?<table[^>]+>(.*?)</table>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 530     /*
 531     <h3>Enum Constant Summary</h3>
 532     <table...>
 533     ...
 534     </table>
 535 
 536     GROUP 1 = Enum Summary Table
 537      */
 538    // private static Pattern ENUM_SUMMARY = Pattern.compile("NAME=\"enum_constant_summary\">.*?<TABLE[^>]+>(.*?)</TABLE>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 539     private static Pattern ENUM_SUMMARY = Pattern.compile("<h3>Enum Constant Summary</h3>.*?<table[^>]+>(.*?)</table>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 540     /*
 541     <h3>Field Summary</h3>
 542     <table...>
 543     ...
 544     </table>
 545 
 546     GROUP 1 = Field Summary Table
 547      */
 548    // private static Pattern FIELD_SUMMARY = Pattern.compile("NAME=\"field_summary\">.*?<TABLE[^>]+>(.*?)</TABLE>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 549     private static Pattern FIELD_SUMMARY = Pattern.compile("<h3>Field Summary</h3>.*?<table[^>]+>(.*?)</table>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 550     /*
 551     <td class="colFirst"><code><a href="../../../javafx/beans/property/DoubleProperty.html" title="class in javafx.beans.property">DoubleProperty</a></code></td>
 552     GROUP 1 = Url
 553     GROUP 2 = Name
 554     GROUP 2 = Description
 555      */
 556     //private static Pattern PROPERTY = Pattern.compile("<TD>.*?<A HREF=\"([^\"]*)\">([^<]*)</A>.*?<BR>(.*?)</TD>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 557     private static Pattern PROPERTY = Pattern.compile("<td class=\"colFirst\">.*?<a href=\"([^\"]*)\">([^<]*)</a>(.*?)</td>",Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
 558 }