1 /*
   2  * $Id$
   3  *
   4  * Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.  Oracle designates this
  10  * particular file as subject to the "Classpath" exception as provided
  11  * by Oracle in the LICENSE file that accompanied this code.
  12  *
  13  * This code is distributed in the hope that it will be useful, but WITHOUT
  14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16  * version 2 for more details (a copy is included in the LICENSE file that
  17  * accompanied this code).
  18  *
  19  * You should have received a copy of the GNU General Public License version
  20  * 2 along with this work; if not, write to the Free Software Foundation,
  21  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  22  *
  23  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  24  * or visit www.oracle.com if you need additional information or have any
  25  * questions.
  26  */
  27 package com.sun.jct.utils.glossarygen;
  28 
  29 import java.io.BufferedReader;
  30 import java.io.BufferedWriter;
  31 import java.io.File;
  32 import java.io.FileReader;
  33 import java.io.FileWriter;
  34 import java.io.IOException;
  35 import java.io.PrintStream;
  36 import java.io.PrintWriter;
  37 import java.io.Reader;
  38 import java.io.StringWriter;
  39 import java.io.Writer;
  40 import java.net.URL;
  41 import java.util.ArrayList;
  42 import java.util.Arrays;
  43 import java.util.HashSet;
  44 import java.util.Iterator;
  45 import java.util.List;
  46 import java.util.Map;
  47 import java.util.Set;
  48 import java.util.TreeMap;
  49 import java.util.Vector;
  50 import org.apache.tools.ant.BuildException;
  51 import org.apache.tools.ant.FileScanner;
  52 import org.apache.tools.ant.taskdefs.MatchingTask;
  53 
  54 /**
  55  * A utility to generate a glossary from a set of HTML files and directories.
  56  * The glossary terms are taken from the individual <h1> tags in the various files;
  57  * The glossary definitions are the body of those files.
  58  * Thus, to add a new entry into the glossary, it is simply necessary to add a
  59  * new file into the set of files passed to this program.
  60  *
  61  * <p>The output can be in one or both of two forms.
  62  *
  63  * <p>The glossary can be output as a JavaHelp-compatible glossary.xml file, with an associated
  64  * map file. The map file can be merged with any other maps with the "mapmerge"
  65  * utility.
  66  *
  67  * <p>Or, the glossary can be output as a single glossary.html file, containing the
  68  * sorted set of terms and their definitions. In the form, any &lt;h*&gt; tags in the
  69  * body are replaced with &lt;p class="glossaryHead*"&gt;.
  70  *
  71  * <p>The input files can have keywords associated with them, which can be used to
  72  * filter the files selected for the glossary. These keywords can be provided in
  73  * META tag, as follows:<br>
  74  * &lt;META name="glossaryKeywords" content="<i>space-separated list of keywords</i>"&gt;
  75  */
  76 public class Main {
  77     /**
  78      * An exception to report bad command line arguments.
  79      */
  80     public static class BadArgs extends Exception {
  81         BadArgs(String msg) {
  82             super(msg);
  83         }
  84     }
  85 
  86     /**
  87      * Command line entry point.<br>
  88      * @param args Command line arguments, per the usage as described.
  89      */
  90     public static void main(String[] args) {
  91         try {
  92             if (args.length == 0)
  93                 usage(System.err);
  94             else {
  95                 Main m = new Main(args);
  96                 m.run();
  97             }
  98         }
  99         catch (BadArgs e) {
 100             System.err.println(e);
 101             usage(System.err);
 102             System.exit(1);
 103         }
 104         catch (Throwable t) {
 105             t.printStackTrace();
 106             System.exit(2);
 107         }
 108     }
 109 
 110     /**
 111      * Write out short command line help.
 112      * @param out A stream to which to write the help.
 113      */
 114     private static void usage(PrintStream out) {
 115         String program = System.getProperty("program", "java " + Main.class.getName());
 116         out.println("Usage:");
 117         out.println("   " + program + " options files...");
 118         out.println("");
 119         out.println("Arguments:");
 120         out.println("-mapOut map.xml");
 121         out.println("        Specify the location of the map.xml file to be written.");
 122         out.println("-htmlOut glossary.html");
 123         out.println("        Specify the location of the glossary.html file.");
 124         out.println("-xmlOut glossary.xml");
 125         out.println("        Specify the location of the glossary.xml file.");
 126         out.println("-key keyword");
 127         out.println("        Specify a keyword to filter HTML files.");
 128         out.println("files...");
 129         out.println("        HTML files and directories.");
 130     }
 131 
 132     public Main() { }
 133 
 134     /**
 135      * Create an object based on command line args.
 136      * It is an error if no input files or no output file is given.
 137      * @param args Command line args.
 138      * @see #main
 139      * @throws Main.BadArgs if problems are found in the given arguments.
 140      */
 141     public Main(String[] args) throws BadArgs {
 142         for (int i = 0; i < args.length; i++) {
 143             if (args[i].equalsIgnoreCase("-htmlout") && i + 1 < args.length) {
 144                 htmlOutFile = new File(args[++i]);
 145             }
 146             else if (args[i].equalsIgnoreCase("-xmlout") && i + 1 < args.length) {
 147                 xmlOutFile = new File(args[++i]);
 148             }
 149             else if (args[i].equalsIgnoreCase("-mapout") && i + 1 < args.length) {
 150                 mapOutFile = new File(args[++i]);
 151             }
 152             else if (args[i].equalsIgnoreCase("-mapdir") && i + 1 < args.length) {
 153                 mapDir = new File(args[++i]);
 154             }
 155             else if (args[i].equalsIgnoreCase("-key") && i + 1 < args.length) {
 156                 keyword = args[++i];
 157             }
 158             else {
 159                 inFiles = new File[args.length - i];
 160                 for (int j = 0; j < inFiles.length; j++)
 161                     inFiles[j] = new File(args[i++]);
 162             }
 163         }
 164     }
 165 
 166     public static class Ant extends MatchingTask {
 167         private Main m = new Main();
 168 
 169         public void setHtmlOutFile(File file) {
 170             m.htmlOutFile = file;
 171         }
 172 
 173         public void setXmlOutFile(File file) {
 174             m.xmlOutFile = file;
 175         }
 176 
 177         public void setMapOutFile(File file) {
 178             m.mapOutFile = file;
 179         }
 180 
 181         public void setMapDir(File file) {
 182             m.mapDir = file;
 183         }
 184 
 185         public void setKeyword(String key) {
 186             m.keyword = key;
 187         }
 188 
 189         public void setDir(File dir) {
 190             getImplicitFileSet().setDir(dir);
 191         }
 192 
 193         public void execute() {
 194             FileScanner s = getImplicitFileSet().getDirectoryScanner(getProject());
 195             m.addFiles(s.getBasedir(), s.getIncludedFiles());
 196 
 197             try {
 198                 m.run();
 199             } catch (BadArgs e) {
 200                 throw new BuildException(e.getMessage());
 201             } catch (IOException e) {
 202                 throw new BuildException(e);
 203             }
 204         }
 205     }
 206 
 207     public void addFiles(File baseDir, String[] paths) {
 208         if (paths == null)
 209             return;
 210         List<File> files = new ArrayList<File>();
 211         if (inFiles != null)
 212             files.addAll(Arrays.asList(inFiles));
 213         for (int i = 0; i < paths.length; i++)
 214             files.add(new File(baseDir, paths[i]));
 215         inFiles = files.toArray(new File[files.size()]);
 216     }
 217 
 218     private void run() throws BadArgs, IOException {
 219         if (inFiles == null || inFiles.length == 0)
 220             throw new BadArgs("no input files specified");
 221 
 222         if (htmlOutFile == null && mapOutFile == null && xmlOutFile == null)
 223             throw new BadArgs("no output files specified");
 224 
 225         if (xmlOutFile != null && mapOutFile == null )
 226             throw new BadArgs("no map output file specified");
 227 
 228         if (mapOutFile != null && xmlOutFile == null)
 229             throw new BadArgs("no XML output file specified");
 230 
 231         if (mapOutFile != null && mapDir == null)
 232             mapDir = mapOutFile.getParentFile();
 233 
 234         glossary = new TreeMap<>();
 235 
 236         read(inFiles);
 237 
 238         PrintWriter glossaryOut = (xmlOutFile == null ? null
 239                                    : new PrintWriter(new BufferedWriter(new FileWriter(xmlOutFile))));
 240         if (glossaryOut != null) {
 241             glossaryOut.println("<?xml version='1.0' encoding='ISO-8859-1'  ?>");
 242             glossaryOut.println("<!DOCTYPE index");
 243             glossaryOut.println("  PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Index Version 1.0//EN\"");
 244             glossaryOut.println("         \"http://java.sun.com/products/javahelp/index_1_0.dtd\">");
 245             glossaryOut.println("");
 246             glossaryOut.println("<index version=\"1.0\">");
 247         }
 248 
 249         PrintWriter mapOut = (mapOutFile == null ? null
 250                               : new PrintWriter(new BufferedWriter(new FileWriter(mapOutFile))));
 251         if (mapOut != null) {
 252             mapOut.println("<?xml version='1.0' encoding='ISO-8859-1' ?>");
 253             mapOut.println("<!DOCTYPE map");
 254             mapOut.println("  PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Map Version 1.0//EN\"");
 255             mapOut.println("         \"http://java.sun.com/products/javahelp/map_1_0.dtd\">");
 256             mapOut.println("<map version=\"1.0\">");
 257         }
 258 
 259         PrintWriter htmlOut = (htmlOutFile == null ? null
 260                                : new PrintWriter(new BufferedWriter(new FileWriter(htmlOutFile))));
 261         if (htmlOut != null) {
 262             htmlOut.println("<!DOCTYPE HTML>");
 263             htmlOut.println("<html>");
 264             htmlOut.println("<head>");
 265             htmlOut.println("<title>");
 266             htmlOut.println("Glossary");
 267             htmlOut.println("</title>");
 268             htmlOut.println("<LINK REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"../jthelp.css\" TITLE=\"Style\">");
 269             htmlOut.println("</head>");
 270             htmlOut.println("<body>");
 271             htmlOut.println("<h1 class=\"glossary\">Glossary</h1>");
 272         }
 273 
 274         char currLetter = 0;
 275 
 276         for (Iterator<Entry> iter = glossary.values().iterator(); iter.hasNext(); ) {
 277             Entry e = (iter.next());
 278             if (!e.matches(keyword))
 279                 continue;
 280 
 281             String key = e.getKey();
 282             char initial = key.charAt(0);
 283             if (Character.isLetter(initial) && initial != currLetter) {
 284                 for (char c = (currLetter == 0 ? 'A' : (char) (currLetter + 1));
 285                      c <= initial; c++) {
 286                     if (glossaryOut != null) {
 287                         glossaryOut.println("");
 288                         glossaryOut.println("<!-- " + c + " -->");
 289                     }
 290                     if (htmlOut != null) {
 291                         htmlOut.println("");
 292                         htmlOut.println("<p class=\"glossaryHead2\">" + c + "</p>");
 293                     }
 294                 }
 295                 currLetter = initial;
 296             }
 297 
 298             if (glossaryOut != null)
 299                 glossaryOut.println("<indexitem text=\"" + key + "\" target=\"" + getTarget(key) + "\"/>");
 300 
 301             if (mapOut != null)
 302                 mapOut.println("<mapID target=\"" + getTarget(key) + "\" url=\"" + getRelativeFile(mapDir, e.getFile()) + "\" />");
 303 
 304             if (htmlOut != null)
 305                 htmlOut.println(e.getText());
 306         }
 307 
 308         for (char c = (currLetter == 0 ? 'A' : (char) (currLetter + 1)); c <= 'Z'; c++) {
 309             if (htmlOut != null)
 310                 htmlOut.println("<p class=\"glossaryHead2\">" + c + "</p>");
 311         }
 312 
 313         if (htmlOut != null) {
 314             htmlOut.println("</body>");
 315             htmlOut.println("</html>");
 316             htmlOut.close();
 317         }
 318 
 319         if (mapOut != null) {
 320             mapOut.println("</map>");
 321             mapOut.close();
 322         }
 323 
 324         if (glossaryOut != null) {
 325             glossaryOut.println("</index>");
 326             glossaryOut.close();
 327         }
 328     }
 329 
 330     private void read(File[] files) throws IOException {
 331         for (int i = 0; i < files.length; i++)
 332             read(files[i]);
 333     }
 334 
 335     private void read(File file) throws IOException {
 336         if (file.isDirectory()) {
 337             if (!file.getName().equals("SCCS"))
 338                 read(file.listFiles());
 339         }
 340         else {
 341             if (file.getName().endsWith(".html")) {
 342                 Entry e = new Entry(file);
 343                 glossary.put(e.getKey().toUpperCase(), e);
 344             }
 345         }
 346     }
 347 
 348     private File getRelativeFile(File dir, File file) {
 349         String dp = dir.getPath() + "/";
 350         String fp = file.getPath();
 351         return (fp.startsWith(dp) ? new File(fp.substring(dp.length())) : file);
 352     }
 353 
 354     private static String getTarget(String key) {
 355         StringBuffer sb = new StringBuffer();
 356         sb.append("glossary.");
 357         boolean needUpper = false;
 358         for (int i = 0; i < key.length(); i++) {
 359             char c = key.charAt(i);
 360             if (Character.isLetter(c)) {
 361                 sb.append(needUpper ? Character.toUpperCase(c) : c);
 362                 needUpper = false;
 363             }
 364             else
 365                 needUpper = true;
 366         }
 367         return sb.toString();
 368     }
 369 
 370     private File[] inFiles;
 371     private File htmlOutFile;
 372     private File mapOutFile;
 373     private File mapDir;
 374     private File xmlOutFile;
 375     private String keyword;
 376     private Map<String, Entry> glossary;
 377 
 378 }
 379 
 380 
 381 class Entry {
 382     Entry(File f) throws IOException {
 383         file = f;
 384         Reader in = new BufferedReader(new FileReader(f));
 385         Writer out = new StringWriter();
 386         copy(in, out);
 387         text = out.toString().trim();
 388         head1 = head1.trim();
 389 
 390         // System.err.println("<<< " + head1 + " >>>");
 391         // System.err.println(text);
 392         // System.err.println();
 393         // System.err.println();
 394     }
 395 
 396     File getFile() {
 397         return file;
 398     }
 399 
 400     String getKey() {
 401         return head1;
 402     }
 403 
 404     String getText() {
 405         return text;
 406     }
 407 
 408     boolean matches(String keyword) {
 409         if (keyword == null)
 410             return true;
 411 
 412         if (keywords == null || keywords.size() == 0)
 413             return true;
 414 
 415         return keywords.contains(keyword);
 416     }
 417 
 418     /**
 419      * Copy the input stream to the output, looking for tags that need
 420      * to be rewritten.
 421      */
 422     private void copy(Reader in, Writer out) throws IOException {
 423         this.in = in;
 424         this.out = out;
 425         head1 = "";
 426         hIndent = 2;
 427         copyMode = NO_COPY;
 428         line = 1;
 429         nextCh();
 430         while (c >= 0) {
 431             if (c == '<') {
 432                 if (copyMode == COPY) {
 433                     copyMode = PENDING_COPY;
 434                     pendingCopy.setLength(0);
 435                 }
 436 
 437                 nextCh();
 438                 skipSpace();
 439                 switch (c) {
 440                 case '!':
 441                     nextCh();
 442                     if (c == '-') {
 443                         nextCh();
 444                         if (c == '-') {
 445                             nextCh();
 446                             skipComment();
 447                         }
 448                     }
 449                     break;
 450 
 451                 case '/':
 452                     nextCh();
 453                     String endTag = scanIdentifier();
 454                     if (copyMode != PENDING_COPY)
 455                         skipTag();
 456                     else if (isBody(endTag))
 457                         scanBody(false);
 458                     else if (isHead(endTag))
 459                         scanHead(Character.getNumericValue(endTag.charAt(1)), false);
 460                     else
 461                         skipTag();
 462                     break;
 463 
 464                 default:
 465                     String startTag = scanIdentifier();
 466                     if (isBody(startTag))
 467                         scanBody(true);
 468                     else if (isMeta(startTag))
 469                         scanMeta();
 470                     else if (copyMode != PENDING_COPY)
 471                         skipTag();
 472                     //else if (isArea(startTag))
 473                     //  scanArea();
 474                     else if (isHead(startTag))
 475                         scanHead(Character.getNumericValue(startTag.charAt(1)), true);
 476                     //else if (isImage(startTag))
 477                     //  scanImage();
 478                     else if (isLink(startTag))
 479                         scanLink();
 480                     //else if (isMap(startTag))
 481                     //  scanMap();
 482                     else
 483                         skipTag();
 484                 }
 485             }
 486             else {
 487                 if (inHead1)
 488                     head1 += ((char) c);
 489                 nextCh();
 490             }
 491         }
 492 
 493     }
 494 
 495 //    private boolean isArea(String tag) {
 496 //      return tag.equals("area");
 497 //    }
 498 //
 499 //    /**
 500 //     * Process the contents of <area ... href=...>
 501 //     */
 502 //    private void scanArea() throws IOException {
 503 //      out.write(pendingCopy.toString());
 504 //      copyMode = COPY;
 505 //
 506 //      skipSpace();
 507 //      while (c != '>') {
 508 //          String att = scanIdentifier();
 509 //          if (att.equalsIgnoreCase("href") && copyMode == COPY) {
 510 //              // the current character should be a whitespace or =
 511 //              // either way, we just write out =
 512 //              out.write('=');
 513 //              copyMode = NO_COPY;
 514 //              String target = scanValue();
 515 //              URL t = new URL(currURL, target);
 516 //              String link = t.getFile();
 517 //              if (link.startsWith(basePath))
 518 //                  link = link.substring(basePath.length());
 519 //              if (link.endsWith(".html"))
 520 //                  link = link.substring(0, link.length() - 5);
 521 //              String ref = t.getRef();
 522 //              if (ref != null && ref.length() > 0)
 523 //                  link = link + "!" + ref;
 524 //              out.write("\"");
 525 //              out.write('#' + link);
 526 //              out.write("\" ");
 527 //              copyMode = COPY;
 528 //          }
 529 //          else
 530 //              scanValue();
 531 //          skipSpace();
 532 //      }
 533 //      nextCh();
 534 //    }
 535 
 536     private boolean isBody(String tag) {
 537         return tag.equals("body");
 538     }
 539 
 540     private void scanBody(boolean start) throws IOException {
 541         if (start) {
 542             skipSpace();
 543             while (c != '>') {
 544                 scanIdentifier();
 545                 scanValue();
 546                 skipSpace();
 547             }
 548             nextCh(); // skip past >
 549             copyMode = COPY;
 550 //          String link = currURL.getFile();
 551 //          if (link.startsWith(basePath))
 552 //              link = link.substring(basePath.length());
 553             String link = file.getName();
 554             if (link.endsWith(".html"))
 555                 link = link.substring(0, link.length() - 5);
 556             out.write("\n<!-- file: " + file + " -->\n<a name=\"" + link + "\"></a>");
 557         }
 558         else {
 559             copyMode = NO_COPY;
 560         }
 561     }
 562 
 563     private boolean isHead(String tag) {
 564         return ( tag.length() == 2
 565                  && tag.charAt(0) == 'h'
 566                  && Character.isDigit(tag.charAt(1)) );
 567     }
 568 
 569     private void scanHead(int level, boolean start) throws IOException {
 570 
 571         if (copyMode == PENDING_COPY) {
 572             int n = Math.min(hIndent + level, 6);
 573             out.write('<');
 574             // for glossary.pdf, we change <hn> to <p> and default class to glossaryHeadn
 575             out.write(start ? "p" : "/p");
 576 
 577 // standard head code
 578 //          if (!start)
 579 //              out.write('/');
 580 //          out.write('h');
 581 //          out.write(String.valueOf(n));
 582 
 583             copyMode = COPY;
 584 
 585             String className = null;
 586             skipSpace();
 587             while (c != '>') {
 588                 String name = scanIdentifier();
 589                 String value = scanValue();
 590                 if (name.equalsIgnoreCase("class"))
 591                     className = value;
 592                 skipSpace();
 593             }
 594 
 595             if (start && className == null) {
 596                 // write default class
 597                 out.write(" class=\"glossaryHead");
 598                 out.write(String.valueOf(n));
 599                 out.write('"');
 600             }
 601 
 602             nextCh(); // skip past >
 603 
 604             if (level == 1)
 605                 inHead1 = start;
 606 
 607 //          if (start && autoNumberLevel > 0) {
 608 //              hNums[n - 1]++;
 609 //              if (n < 6)
 610 //                  hNums[n] = 0;
 611 //              if (n <= autoNumberLevel) {
 612 //                  for (int i = 0; i < n; i++) {
 613 //                      out.write(String.valueOf(hNums[i]));
 614 //                      out.write('.');
 615 //                  }
 616 //                  out.write("&nbsp;");
 617 //              }
 618 //          }
 619         }
 620     }
 621 
 622 //    private boolean isImage(String tag) {
 623 //      return tag.equals("img");
 624 //    }
 625 //
 626 //    /**
 627 //     * Process the contents of <a href=...>
 628 //     */
 629 //    private void scanImage() throws IOException {
 630 //      out.write(pendingCopy.toString());
 631 //      copyMode = COPY;
 632 //
 633 //      skipSpace();
 634 //      while (c != '>') {
 635 //          String att = scanIdentifier();
 636 //          if (att.equalsIgnoreCase("src") && copyMode == COPY) {
 637 //              // the current character should be a whitespace or =
 638 //              // either way, we just write out =
 639 //              out.write('=');
 640 //              copyMode = NO_COPY;
 641 //              String src = scanValue();
 642 //              URL u = new URL(currURL, src);
 643 //              String srcPath = u.getFile();
 644 //              // if the path refers to an entry in the /images directory,
 645 //              // check for a matching entry in the /pdfImages directory
 646 //              // and use that if found.
 647 //              int imagesIndex = srcPath.indexOf("/images/");
 648 //              if (imagesIndex >= 0) {
 649 //                  String pdfImagePath = srcPath.substring(0, imagesIndex)
 650 //                      + "/pdfImages/"
 651 //                      + srcPath.substring(imagesIndex + "/images/".length());
 652 //                  if (new File(pdfImagePath).exists())
 653 //                      srcPath = pdfImagePath;
 654 //              }
 655 //              out.write('"');
 656 //              out.write(srcPath);
 657 //              out.write('"');
 658 //              copyMode = COPY;
 659 //          }
 660 //          else if (att.equalsIgnoreCase("usemap") && copyMode == COPY) {
 661 //              // the current character should be a whitespace or =
 662 //              // either way, we just write out =
 663 //              out.write('=');
 664 //              copyMode = NO_COPY;
 665 //              String target = scanValue();
 666 //              URL t = new URL(currURL, target);
 667 //              String link = t.getFile();
 668 //              if (link.startsWith(basePath))
 669 //                  link = link.substring(basePath.length());
 670 //              if (link.endsWith(".html"))
 671 //                  link = link.substring(0, link.length() - 5);
 672 //              String ref = t.getRef();
 673 //              if (ref != null && ref.length() > 0)
 674 //                  link = link + "!" + ref;
 675 //              out.write("\"");
 676 //              out.write('#' + link);
 677 //              out.write("\" ");
 678 //              copyMode = COPY;
 679 //          }
 680 //          else
 681 //              scanValue();
 682 //          skipSpace();
 683 //      }
 684 //      nextCh();
 685 //    }
 686 
 687     private boolean isLink(String tag) {
 688         return tag.equals("a");
 689     }
 690 
 691     /**
 692      * Process the contents of <a href=...>
 693      */
 694     private void scanLink() throws IOException {
 695         out.write(pendingCopy.toString());
 696         copyMode = COPY;
 697 
 698         skipSpace();
 699         while (c != '>') {
 700             String att = scanIdentifier();
 701             if (att.equalsIgnoreCase("href") && copyMode == COPY) {
 702                 // the current character should be a whitespace or =
 703                 // either way, we just write out =
 704                 out.write('=');
 705                 copyMode = NO_COPY;
 706                 String target = scanValue();
 707                 URL t = new URL(file.toURL(), target);
 708 //              String link = t.getFile();
 709                 String link = target;
 710 //              if (link.startsWith(basePath))
 711 //                  link = link.substring(basePath.length());
 712                 if (link.endsWith(".html"))
 713                     link = link.substring(0, link.length() - 5);
 714                 String ref = t.getRef();
 715                 if (ref != null && ref.length() > 0)
 716                     link = link + "!" + ref;
 717                 out.write('"');
 718                 out.write('#' + link);
 719                 out.write('"');
 720                 copyMode = COPY;
 721             }
 722             else if (att.equalsIgnoreCase("name") && copyMode == COPY) {
 723                 // the current character should be a whitespace or =
 724                 // either way, we just write out =
 725                 out.write('=');
 726                 copyMode = NO_COPY;
 727                 String oldName = scanValue();
 728 //              String name = currURL.getFile();
 729                 String name = file.getPath();
 730 //              if (name.startsWith(basePath))
 731 //                  name = name.substring(basePath.length());
 732                 if (name.endsWith(".html"))
 733                     name = name.substring(0, name.length() - 5);
 734                 name = name + "!" + oldName;
 735                 out.write('"');
 736                 out.write(name);
 737                 out.write('"');
 738                 copyMode = COPY;
 739             }
 740             else
 741                 scanValue();
 742             skipSpace();
 743         }
 744         nextCh();
 745     }
 746 
 747 //    private boolean isMap(String tag) {
 748 //      return tag.equals("map");
 749 //    }
 750 //
 751 //    /**
 752 //     * Process the contents of <map name=...>
 753 //     */
 754 //    private void scanMap() throws IOException {
 755 //      out.write(pendingCopy.toString());
 756 //      copyMode = COPY;
 757 //
 758 //      skipSpace();
 759 //      while (c != '>') {
 760 //          String att = scanIdentifier();
 761 //          if (att.equalsIgnoreCase("name") && copyMode == COPY) {
 762 //              // the current character should be a whitespace or =
 763 //              // either way, we just write out =
 764 //              out.write('=');
 765 //              copyMode = NO_COPY;
 766 //              String oldName = scanValue();
 767 //              String name = currURL.getFile();
 768 //              if (name.startsWith(basePath))
 769 //                  name = name.substring(basePath.length());
 770 //              if (name.endsWith(".html"))
 771 //                  name = name.substring(0, name.length() - 5);
 772 //              name = name + "!" + oldName;
 773 //              out.write('"');
 774 //              out.write(name);
 775 //              out.write('"');
 776 //              copyMode = COPY;
 777 //          }
 778 //          else
 779 //              scanValue();
 780 //          skipSpace();
 781 //      }
 782 //      nextCh();
 783 //    }
 784 
 785     private boolean isMeta(String tag) {
 786         return tag.equals("meta");
 787     }
 788 
 789     private void scanMeta() throws IOException {
 790         String name = "";
 791         String content = "";
 792 
 793         skipSpace();
 794         while (c != '>') {
 795             String attr_name = scanIdentifier();
 796             String attr_val = scanValue();
 797             if (attr_name.equalsIgnoreCase("name"))
 798                 name = attr_val;
 799             else if (attr_name.equalsIgnoreCase("content"))
 800                 content = attr_val;
 801             skipSpace();
 802         }
 803         nextCh();
 804 
 805 //      if (name.equalsIgnoreCase("hIndent")) {
 806 //          hIndent = Integer.parseInt(content);
 807 //      }
 808         if (name.equalsIgnoreCase("glossaryKeywords")) {
 809             keywords = new HashSet<>(Arrays.asList(split(content)));
 810         }
 811     }
 812 
 813     private boolean isTitle(String tag) {
 814         return tag.equals("title");
 815     }
 816 
 817     /**
 818      * Read an identifier, and lowercase it
 819      */
 820     private String scanIdentifier() throws IOException {
 821         StringBuffer buf = new StringBuffer();
 822         while (true) {
 823             if ((c >= 'a') && (c <= 'z')) {
 824                 buf.append((char)c);
 825                 nextCh();
 826             } else if ((c >= 'A') && (c <= 'Z')) {
 827                 buf.append((char)('a' + (c - 'A')));
 828                 nextCh();
 829             } else if ((c >= '0') && (c <= '9')) {
 830                 buf.append((char)c);
 831                 nextCh();
 832             } else if (c == '-') {  // needed for <META HTTP-EQUIV ....>
 833                 buf.append((char)c);
 834                 nextCh();
 835             } else
 836                 if (buf.length() == 0)
 837                     throw new IOException("Identifier expected (" + file + ":" + line + ")");
 838                 else
 839                     return buf.toString();
 840         }
 841     }
 842 
 843     /**
 844      * Read the value of an HTML attribute, which may be quoted.
 845      */
 846     private String scanValue() throws IOException {
 847         skipSpace();
 848         if (c != '=')
 849             return "";
 850 
 851         int quote = -1;
 852         nextCh();
 853         skipSpace();
 854         if ((c == '\'') || (c == '\"')) {
 855             quote = c;
 856             nextCh();
 857             skipSpace();
 858         }
 859         StringBuffer buf = new StringBuffer();
 860         while (((quote < 0) && (c != ' ') && (c != '\t') &&
 861                 (c != '\n') && (c != '\r') && (c != '>')) ||
 862                ((quote >= 0) && (c != quote))) {
 863             if (c == -1 || c == '\n' || c == '\r') {
 864                 throw new IOException("mismatched quotes (" + file + ":" + line + ")");
 865             }
 866             buf.append((char)c);
 867             nextCh();
 868         }
 869         if (c == quote)
 870             nextCh();
 871         skipSpace();
 872         return buf.toString();
 873     }
 874 
 875     /**
 876      * Skip an HTML comment  <!-- ... -->
 877      */
 878     private void skipComment() throws IOException {
 879         // a comment sequence is "<!--" ... "-->"
 880         // at the time this is called, "<!--" has been read;
 881         StringBuffer text = new StringBuffer("<!--");
 882         int numHyphens = 0;
 883         while (c != -1 && (numHyphens < 2 || c != '>')) {
 884             if (c == '-')
 885                 numHyphens++;
 886             else
 887                 numHyphens = 0;
 888             text.append((char) c);
 889             nextCh();
 890             //System.out.print((char)c);
 891         }
 892         text.append((char) c);
 893         nextCh();
 894 
 895         String comment = text.toString();
 896 
 897         switch (copyMode) {
 898 
 899         case PENDING_COPY:
 900             if (comment.equalsIgnoreCase("<!--CopyOff-->")) {
 901                 copyMode = SUPPRESS_COPY;
 902                 pendingCopy.setLength(0);
 903             }
 904             else {
 905                 out.write(comment);
 906                 copyMode = COPY;
 907             }
 908             break;
 909 
 910         case SUPPRESS_COPY:
 911             if (comment.equalsIgnoreCase("<!--CopyOn-->"))
 912                 copyMode = COPY;
 913             break;
 914         }
 915     }
 916 
 917     /**
 918      * Skip whitespace.
 919      */
 920     private void skipSpace() throws IOException {
 921         while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r')) {
 922             nextCh();
 923         }
 924     }
 925 
 926     /**
 927      * Skip the contents of an HTML tag i.e. <...>
 928      */
 929     private void skipTag() throws IOException {
 930         skipSpace();
 931         while (c != '>') {
 932             String att = scanIdentifier();
 933             if (att == "")
 934                 throw new IOException("error parsing HTML input (" + file + ":" + line + ")");
 935             String value = scanValue();
 936             skipSpace();
 937         }
 938         nextCh();
 939 
 940         if (copyMode == PENDING_COPY) {
 941             out.write(pendingCopy.toString());
 942             copyMode = COPY;
 943         }
 944     }
 945 
 946     /**
 947      * Read the next character.
 948      */
 949     private void nextCh() throws IOException {
 950         switch (copyMode) {
 951         case COPY:
 952             out.write((char) c);
 953             break;
 954 
 955         case PENDING_COPY:
 956             pendingCopy.append((char) c);
 957             break;
 958         }
 959 
 960         c = in.read();
 961         if (c == '\n')
 962             line++;
 963     }
 964 
 965     private static String escape(String s) {
 966         for (int i = 0; i < s.length(); i++) {
 967             switch (s.charAt(i)) {
 968             case '<': case '>': case '&':
 969                 StringBuffer sb = new StringBuffer(s.length()*2);
 970                 for (int j = 0; j < s.length(); j++) {
 971                     char c = s.charAt(j);
 972                     switch (c) {
 973                     case '<': sb.append("&lt;"); break;
 974                     case '>': sb.append("&gt;"); break;
 975                     case '&': sb.append("&amp;"); break;
 976                     default: sb.append(c);
 977                     }
 978                 }
 979                 return sb.toString();
 980             }
 981         }
 982         return s;
 983     }
 984 
 985     private static String[] split(String s) {
 986         Vector<String> v = new Vector<>();
 987         int start = -1;
 988         for (int i = 0; i < s.length(); i++) {
 989             char c = s.charAt(i);
 990             if (Character.isLetterOrDigit(c) || c == '_') {
 991                 if (start == -1)
 992                     start = i;
 993             }
 994             else {
 995                 if (start != -1)
 996                     v.addElement(s.substring(start, i));
 997                 start = -1;
 998             }
 999         }
1000         if (start != -1)
1001             v.addElement(s.substring(start));
1002         String[] a = new String[v.size()];
1003         v.copyInto(a);
1004         return a;
1005     }
1006 
1007     private File file;
1008     private String head1;
1009     private String text;
1010     private Set<String> keywords;
1011 
1012     private Reader in;
1013     private Writer out;
1014     private int c;
1015     private boolean inHead1;
1016     private int line;
1017     private int copyMode;
1018     private static final int NO_COPY = 0, PENDING_COPY = 1, SUPPRESS_COPY = 2, COPY = 3;
1019     private StringBuffer pendingCopy = new StringBuffer();
1020     private int hIndent;
1021     private int[] hNums = new int[6];
1022 }