1 /* 2 * $Id$ 3 * 4 * Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. Oracle designates this 10 * particular file as subject to the "Classpath" exception as provided 11 * by Oracle in the LICENSE file that accompanied this code. 12 * 13 * This code is distributed in the hope that it will be useful, but WITHOUT 14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 * version 2 for more details (a copy is included in the LICENSE file that 17 * accompanied this code). 18 * 19 * You should have received a copy of the GNU General Public License version 20 * 2 along with this work; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 22 * 23 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 24 * or visit www.oracle.com if you need additional information or have any 25 * questions. 26 */ 27 package com.sun.jct.utils.glossarygen; 28 29 import java.io.BufferedReader; 30 import java.io.BufferedWriter; 31 import java.io.File; 32 import java.io.FileReader; 33 import java.io.FileWriter; 34 import java.io.IOException; 35 import java.io.PrintStream; 36 import java.io.PrintWriter; 37 import java.io.Reader; 38 import java.io.StringWriter; 39 import java.io.Writer; 40 import java.net.URL; 41 import java.util.ArrayList; 42 import java.util.Arrays; 43 import java.util.HashSet; 44 import java.util.Iterator; 45 import java.util.List; 46 import java.util.Map; 47 import java.util.Set; 48 import java.util.TreeMap; 49 import java.util.Vector; 50 import org.apache.tools.ant.BuildException; 51 import org.apache.tools.ant.FileScanner; 52 import org.apache.tools.ant.taskdefs.MatchingTask; 53 54 /** 55 * A utility to generate a glossary from a set of HTML files and directories. 56 * The glossary terms are taken from the individual <h1> tags in the various files; 57 * The glossary definitions are the body of those files. 58 * Thus, to add a new entry into the glossary, it is simply necessary to add a 59 * new file into the set of files passed to this program. 60 * 61 * <p>The output can be in one or both of two forms. 62 * 63 * <p>The glossary can be output as a JavaHelp-compatible glossary.xml file, with an associated 64 * map file. The map file can be merged with any other maps with the "mapmerge" 65 * utility. 66 * 67 * <p>Or, the glossary can be output as a single glossary.html file, containing the 68 * sorted set of terms and their definitions. In the form, any <h*> tags in the 69 * body are replaced with <p class="glossaryHead*">. 70 * 71 * <p>The input files can have keywords associated with them, which can be used to 72 * filter the files selected for the glossary. These keywords can be provided in 73 * META tag, as follows:<br> 74 * <META name="glossaryKeywords" content="<i>space-separated list of keywords</i>"> 75 */ 76 public class Main { 77 /** 78 * An exception to report bad command line arguments. 79 */ 80 public static class BadArgs extends Exception { 81 BadArgs(String msg) { 82 super(msg); 83 } 84 } 85 86 /** 87 * Command line entry point.<br> 88 * @param args Command line arguments, per the usage as described. 89 */ 90 public static void main(String[] args) { 91 try { 92 if (args.length == 0) 93 usage(System.err); 94 else { 95 Main m = new Main(args); 96 m.run(); 97 } 98 } 99 catch (BadArgs e) { 100 System.err.println(e); 101 usage(System.err); 102 System.exit(1); 103 } 104 catch (Throwable t) { 105 t.printStackTrace(); 106 System.exit(2); 107 } 108 } 109 110 /** 111 * Write out short command line help. 112 * @param out A stream to which to write the help. 113 */ 114 private static void usage(PrintStream out) { 115 String program = System.getProperty("program", "java " + Main.class.getName()); 116 out.println("Usage:"); 117 out.println(" " + program + " options files..."); 118 out.println(""); 119 out.println("Arguments:"); 120 out.println("-mapOut map.xml"); 121 out.println(" Specify the location of the map.xml file to be written."); 122 out.println("-htmlOut glossary.html"); 123 out.println(" Specify the location of the glossary.html file."); 124 out.println("-xmlOut glossary.xml"); 125 out.println(" Specify the location of the glossary.xml file."); 126 out.println("-key keyword"); 127 out.println(" Specify a keyword to filter HTML files."); 128 out.println("files..."); 129 out.println(" HTML files and directories."); 130 } 131 132 public Main() { } 133 134 /** 135 * Create an object based on command line args. 136 * It is an error if no input files or no output file is given. 137 * @param args Command line args. 138 * @see #main 139 * @throws Main.BadArgs if problems are found in the given arguments. 140 */ 141 public Main(String[] args) throws BadArgs { 142 for (int i = 0; i < args.length; i++) { 143 if (args[i].equalsIgnoreCase("-htmlout") && i + 1 < args.length) { 144 htmlOutFile = new File(args[++i]); 145 } 146 else if (args[i].equalsIgnoreCase("-xmlout") && i + 1 < args.length) { 147 xmlOutFile = new File(args[++i]); 148 } 149 else if (args[i].equalsIgnoreCase("-mapout") && i + 1 < args.length) { 150 mapOutFile = new File(args[++i]); 151 } 152 else if (args[i].equalsIgnoreCase("-mapdir") && i + 1 < args.length) { 153 mapDir = new File(args[++i]); 154 } 155 else if (args[i].equalsIgnoreCase("-key") && i + 1 < args.length) { 156 keyword = args[++i]; 157 } 158 else { 159 inFiles = new File[args.length - i]; 160 for (int j = 0; j < inFiles.length; j++) 161 inFiles[j] = new File(args[i++]); 162 } 163 } 164 } 165 166 public static class Ant extends MatchingTask { 167 private Main m = new Main(); 168 169 public void setHtmlOutFile(File file) { 170 m.htmlOutFile = file; 171 } 172 173 public void setXmlOutFile(File file) { 174 m.xmlOutFile = file; 175 } 176 177 public void setMapOutFile(File file) { 178 m.mapOutFile = file; 179 } 180 181 public void setMapDir(File file) { 182 m.mapDir = file; 183 } 184 185 public void setKeyword(String key) { 186 m.keyword = key; 187 } 188 189 public void setDir(File dir) { 190 getImplicitFileSet().setDir(dir); 191 } 192 193 public void execute() { 194 FileScanner s = getImplicitFileSet().getDirectoryScanner(getProject()); 195 m.addFiles(s.getBasedir(), s.getIncludedFiles()); 196 197 try { 198 m.run(); 199 } catch (BadArgs e) { 200 throw new BuildException(e.getMessage()); 201 } catch (IOException e) { 202 throw new BuildException(e); 203 } 204 } 205 } 206 207 public void addFiles(File baseDir, String[] paths) { 208 if (paths == null) 209 return; 210 List<File> files = new ArrayList<File>(); 211 if (inFiles != null) 212 files.addAll(Arrays.asList(inFiles)); 213 for (int i = 0; i < paths.length; i++) 214 files.add(new File(baseDir, paths[i])); 215 inFiles = files.toArray(new File[files.size()]); 216 } 217 218 private void run() throws BadArgs, IOException { 219 if (inFiles == null || inFiles.length == 0) 220 throw new BadArgs("no input files specified"); 221 222 if (htmlOutFile == null && mapOutFile == null && xmlOutFile == null) 223 throw new BadArgs("no output files specified"); 224 225 if (xmlOutFile != null && mapOutFile == null ) 226 throw new BadArgs("no map output file specified"); 227 228 if (mapOutFile != null && xmlOutFile == null) 229 throw new BadArgs("no XML output file specified"); 230 231 if (mapOutFile != null && mapDir == null) 232 mapDir = mapOutFile.getParentFile(); 233 234 glossary = new TreeMap<>(); 235 236 read(inFiles); 237 238 PrintWriter glossaryOut = (xmlOutFile == null ? null 239 : new PrintWriter(new BufferedWriter(new FileWriter(xmlOutFile)))); 240 if (glossaryOut != null) { 241 glossaryOut.println("<?xml version='1.0' encoding='ISO-8859-1' ?>"); 242 glossaryOut.println("<!DOCTYPE index"); 243 glossaryOut.println(" PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Index Version 1.0//EN\""); 244 glossaryOut.println(" \"http://java.sun.com/products/javahelp/index_1_0.dtd\">"); 245 glossaryOut.println(""); 246 glossaryOut.println("<index version=\"1.0\">"); 247 } 248 249 PrintWriter mapOut = (mapOutFile == null ? null 250 : new PrintWriter(new BufferedWriter(new FileWriter(mapOutFile)))); 251 if (mapOut != null) { 252 mapOut.println("<?xml version='1.0' encoding='ISO-8859-1' ?>"); 253 mapOut.println("<!DOCTYPE map"); 254 mapOut.println(" PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Map Version 1.0//EN\""); 255 mapOut.println(" \"http://java.sun.com/products/javahelp/map_1_0.dtd\">"); 256 mapOut.println("<map version=\"1.0\">"); 257 } 258 259 PrintWriter htmlOut = (htmlOutFile == null ? null 260 : new PrintWriter(new BufferedWriter(new FileWriter(htmlOutFile)))); 261 if (htmlOut != null) { 262 htmlOut.println("<!DOCTYPE HTML>"); 263 htmlOut.println("<html>"); 264 htmlOut.println("<head>"); 265 htmlOut.println("<title>"); 266 htmlOut.println("Glossary"); 267 htmlOut.println("</title>"); 268 htmlOut.println("<LINK REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"../jthelp.css\" TITLE=\"Style\">"); 269 htmlOut.println("</head>"); 270 htmlOut.println("<body>"); 271 htmlOut.println("<h1 class=\"glossary\">Glossary</h1>"); 272 } 273 274 char currLetter = 0; 275 276 for (Iterator<Entry> iter = glossary.values().iterator(); iter.hasNext(); ) { 277 Entry e = (iter.next()); 278 if (!e.matches(keyword)) 279 continue; 280 281 String key = e.getKey(); 282 char initial = key.charAt(0); 283 if (Character.isLetter(initial) && initial != currLetter) { 284 for (char c = (currLetter == 0 ? 'A' : (char) (currLetter + 1)); 285 c <= initial; c++) { 286 if (glossaryOut != null) { 287 glossaryOut.println(""); 288 glossaryOut.println("<!-- " + c + " -->"); 289 } 290 if (htmlOut != null) { 291 htmlOut.println(""); 292 htmlOut.println("<p class=\"glossaryHead2\">" + c + "</p>"); 293 } 294 } 295 currLetter = initial; 296 } 297 298 if (glossaryOut != null) 299 glossaryOut.println("<indexitem text=\"" + key + "\" target=\"" + getTarget(key) + "\"/>"); 300 301 if (mapOut != null) 302 mapOut.println("<mapID target=\"" + getTarget(key) + "\" url=\"" + getRelativeFile(mapDir, e.getFile()) + "\" />"); 303 304 if (htmlOut != null) 305 htmlOut.println(e.getText()); 306 } 307 308 for (char c = (currLetter == 0 ? 'A' : (char) (currLetter + 1)); c <= 'Z'; c++) { 309 if (htmlOut != null) 310 htmlOut.println("<p class=\"glossaryHead2\">" + c + "</p>"); 311 } 312 313 if (htmlOut != null) { 314 htmlOut.println("</body>"); 315 htmlOut.println("</html>"); 316 htmlOut.close(); 317 } 318 319 if (mapOut != null) { 320 mapOut.println("</map>"); 321 mapOut.close(); 322 } 323 324 if (glossaryOut != null) { 325 glossaryOut.println("</index>"); 326 glossaryOut.close(); 327 } 328 } 329 330 private void read(File[] files) throws IOException { 331 for (int i = 0; i < files.length; i++) 332 read(files[i]); 333 } 334 335 private void read(File file) throws IOException { 336 if (file.isDirectory()) { 337 if (!file.getName().equals("SCCS")) 338 read(file.listFiles()); 339 } 340 else { 341 if (file.getName().endsWith(".html")) { 342 Entry e = new Entry(file); 343 glossary.put(e.getKey().toUpperCase(), e); 344 } 345 } 346 } 347 348 private File getRelativeFile(File dir, File file) { 349 String dp = dir.getPath() + "/"; 350 String fp = file.getPath(); 351 return (fp.startsWith(dp) ? new File(fp.substring(dp.length())) : file); 352 } 353 354 private static String getTarget(String key) { 355 StringBuffer sb = new StringBuffer(); 356 sb.append("glossary."); 357 boolean needUpper = false; 358 for (int i = 0; i < key.length(); i++) { 359 char c = key.charAt(i); 360 if (Character.isLetter(c)) { 361 sb.append(needUpper ? Character.toUpperCase(c) : c); 362 needUpper = false; 363 } 364 else 365 needUpper = true; 366 } 367 return sb.toString(); 368 } 369 370 private File[] inFiles; 371 private File htmlOutFile; 372 private File mapOutFile; 373 private File mapDir; 374 private File xmlOutFile; 375 private String keyword; 376 private Map<String, Entry> glossary; 377 378 } 379 380 381 class Entry { 382 Entry(File f) throws IOException { 383 file = f; 384 Reader in = new BufferedReader(new FileReader(f)); 385 Writer out = new StringWriter(); 386 copy(in, out); 387 text = out.toString().trim(); 388 head1 = head1.trim(); 389 390 // System.err.println("<<< " + head1 + " >>>"); 391 // System.err.println(text); 392 // System.err.println(); 393 // System.err.println(); 394 } 395 396 File getFile() { 397 return file; 398 } 399 400 String getKey() { 401 return head1; 402 } 403 404 String getText() { 405 return text; 406 } 407 408 boolean matches(String keyword) { 409 if (keyword == null) 410 return true; 411 412 if (keywords == null || keywords.size() == 0) 413 return true; 414 415 return keywords.contains(keyword); 416 } 417 418 /** 419 * Copy the input stream to the output, looking for tags that need 420 * to be rewritten. 421 */ 422 private void copy(Reader in, Writer out) throws IOException { 423 this.in = in; 424 this.out = out; 425 head1 = ""; 426 hIndent = 2; 427 copyMode = NO_COPY; 428 line = 1; 429 nextCh(); 430 while (c >= 0) { 431 if (c == '<') { 432 if (copyMode == COPY) { 433 copyMode = PENDING_COPY; 434 pendingCopy.setLength(0); 435 } 436 437 nextCh(); 438 skipSpace(); 439 switch (c) { 440 case '!': 441 nextCh(); 442 if (c == '-') { 443 nextCh(); 444 if (c == '-') { 445 nextCh(); 446 skipComment(); 447 } 448 } 449 break; 450 451 case '/': 452 nextCh(); 453 String endTag = scanIdentifier(); 454 if (copyMode != PENDING_COPY) 455 skipTag(); 456 else if (isBody(endTag)) 457 scanBody(false); 458 else if (isHead(endTag)) 459 scanHead(Character.getNumericValue(endTag.charAt(1)), false); 460 else 461 skipTag(); 462 break; 463 464 default: 465 String startTag = scanIdentifier(); 466 if (isBody(startTag)) 467 scanBody(true); 468 else if (isMeta(startTag)) 469 scanMeta(); 470 else if (copyMode != PENDING_COPY) 471 skipTag(); 472 //else if (isArea(startTag)) 473 // scanArea(); 474 else if (isHead(startTag)) 475 scanHead(Character.getNumericValue(startTag.charAt(1)), true); 476 //else if (isImage(startTag)) 477 // scanImage(); 478 else if (isLink(startTag)) 479 scanLink(); 480 //else if (isMap(startTag)) 481 // scanMap(); 482 else 483 skipTag(); 484 } 485 } 486 else { 487 if (inHead1) 488 head1 += ((char) c); 489 nextCh(); 490 } 491 } 492 493 } 494 495 // private boolean isArea(String tag) { 496 // return tag.equals("area"); 497 // } 498 // 499 // /** 500 // * Process the contents of <area ... href=...> 501 // */ 502 // private void scanArea() throws IOException { 503 // out.write(pendingCopy.toString()); 504 // copyMode = COPY; 505 // 506 // skipSpace(); 507 // while (c != '>') { 508 // String att = scanIdentifier(); 509 // if (att.equalsIgnoreCase("href") && copyMode == COPY) { 510 // // the current character should be a whitespace or = 511 // // either way, we just write out = 512 // out.write('='); 513 // copyMode = NO_COPY; 514 // String target = scanValue(); 515 // URL t = new URL(currURL, target); 516 // String link = t.getFile(); 517 // if (link.startsWith(basePath)) 518 // link = link.substring(basePath.length()); 519 // if (link.endsWith(".html")) 520 // link = link.substring(0, link.length() - 5); 521 // String ref = t.getRef(); 522 // if (ref != null && ref.length() > 0) 523 // link = link + "!" + ref; 524 // out.write("\""); 525 // out.write('#' + link); 526 // out.write("\" "); 527 // copyMode = COPY; 528 // } 529 // else 530 // scanValue(); 531 // skipSpace(); 532 // } 533 // nextCh(); 534 // } 535 536 private boolean isBody(String tag) { 537 return tag.equals("body"); 538 } 539 540 private void scanBody(boolean start) throws IOException { 541 if (start) { 542 skipSpace(); 543 while (c != '>') { 544 scanIdentifier(); 545 scanValue(); 546 skipSpace(); 547 } 548 nextCh(); // skip past > 549 copyMode = COPY; 550 // String link = currURL.getFile(); 551 // if (link.startsWith(basePath)) 552 // link = link.substring(basePath.length()); 553 String link = file.getName(); 554 if (link.endsWith(".html")) 555 link = link.substring(0, link.length() - 5); 556 out.write("\n<!-- file: " + file + " -->\n<a name=\"" + link + "\"></a>"); 557 } 558 else { 559 copyMode = NO_COPY; 560 } 561 } 562 563 private boolean isHead(String tag) { 564 return ( tag.length() == 2 565 && tag.charAt(0) == 'h' 566 && Character.isDigit(tag.charAt(1)) ); 567 } 568 569 private void scanHead(int level, boolean start) throws IOException { 570 571 if (copyMode == PENDING_COPY) { 572 int n = Math.min(hIndent + level, 6); 573 out.write('<'); 574 // for glossary.pdf, we change <hn> to <p> and default class to glossaryHeadn 575 out.write(start ? "p" : "/p"); 576 577 // standard head code 578 // if (!start) 579 // out.write('/'); 580 // out.write('h'); 581 // out.write(String.valueOf(n)); 582 583 copyMode = COPY; 584 585 String className = null; 586 skipSpace(); 587 while (c != '>') { 588 String name = scanIdentifier(); 589 String value = scanValue(); 590 if (name.equalsIgnoreCase("class")) 591 className = value; 592 skipSpace(); 593 } 594 595 if (start && className == null) { 596 // write default class 597 out.write(" class=\"glossaryHead"); 598 out.write(String.valueOf(n)); 599 out.write('"'); 600 } 601 602 nextCh(); // skip past > 603 604 if (level == 1) 605 inHead1 = start; 606 607 // if (start && autoNumberLevel > 0) { 608 // hNums[n - 1]++; 609 // if (n < 6) 610 // hNums[n] = 0; 611 // if (n <= autoNumberLevel) { 612 // for (int i = 0; i < n; i++) { 613 // out.write(String.valueOf(hNums[i])); 614 // out.write('.'); 615 // } 616 // out.write(" "); 617 // } 618 // } 619 } 620 } 621 622 // private boolean isImage(String tag) { 623 // return tag.equals("img"); 624 // } 625 // 626 // /** 627 // * Process the contents of <a href=...> 628 // */ 629 // private void scanImage() throws IOException { 630 // out.write(pendingCopy.toString()); 631 // copyMode = COPY; 632 // 633 // skipSpace(); 634 // while (c != '>') { 635 // String att = scanIdentifier(); 636 // if (att.equalsIgnoreCase("src") && copyMode == COPY) { 637 // // the current character should be a whitespace or = 638 // // either way, we just write out = 639 // out.write('='); 640 // copyMode = NO_COPY; 641 // String src = scanValue(); 642 // URL u = new URL(currURL, src); 643 // String srcPath = u.getFile(); 644 // // if the path refers to an entry in the /images directory, 645 // // check for a matching entry in the /pdfImages directory 646 // // and use that if found. 647 // int imagesIndex = srcPath.indexOf("/images/"); 648 // if (imagesIndex >= 0) { 649 // String pdfImagePath = srcPath.substring(0, imagesIndex) 650 // + "/pdfImages/" 651 // + srcPath.substring(imagesIndex + "/images/".length()); 652 // if (new File(pdfImagePath).exists()) 653 // srcPath = pdfImagePath; 654 // } 655 // out.write('"'); 656 // out.write(srcPath); 657 // out.write('"'); 658 // copyMode = COPY; 659 // } 660 // else if (att.equalsIgnoreCase("usemap") && copyMode == COPY) { 661 // // the current character should be a whitespace or = 662 // // either way, we just write out = 663 // out.write('='); 664 // copyMode = NO_COPY; 665 // String target = scanValue(); 666 // URL t = new URL(currURL, target); 667 // String link = t.getFile(); 668 // if (link.startsWith(basePath)) 669 // link = link.substring(basePath.length()); 670 // if (link.endsWith(".html")) 671 // link = link.substring(0, link.length() - 5); 672 // String ref = t.getRef(); 673 // if (ref != null && ref.length() > 0) 674 // link = link + "!" + ref; 675 // out.write("\""); 676 // out.write('#' + link); 677 // out.write("\" "); 678 // copyMode = COPY; 679 // } 680 // else 681 // scanValue(); 682 // skipSpace(); 683 // } 684 // nextCh(); 685 // } 686 687 private boolean isLink(String tag) { 688 return tag.equals("a"); 689 } 690 691 /** 692 * Process the contents of <a href=...> 693 */ 694 private void scanLink() throws IOException { 695 out.write(pendingCopy.toString()); 696 copyMode = COPY; 697 698 skipSpace(); 699 while (c != '>') { 700 String att = scanIdentifier(); 701 if (att.equalsIgnoreCase("href") && copyMode == COPY) { 702 // the current character should be a whitespace or = 703 // either way, we just write out = 704 out.write('='); 705 copyMode = NO_COPY; 706 String target = scanValue(); 707 URL t = new URL(file.toURL(), target); 708 // String link = t.getFile(); 709 String link = target; 710 // if (link.startsWith(basePath)) 711 // link = link.substring(basePath.length()); 712 if (link.endsWith(".html")) 713 link = link.substring(0, link.length() - 5); 714 String ref = t.getRef(); 715 if (ref != null && ref.length() > 0) 716 link = link + "!" + ref; 717 out.write('"'); 718 out.write('#' + link); 719 out.write('"'); 720 copyMode = COPY; 721 } 722 else if (att.equalsIgnoreCase("name") && copyMode == COPY) { 723 // the current character should be a whitespace or = 724 // either way, we just write out = 725 out.write('='); 726 copyMode = NO_COPY; 727 String oldName = scanValue(); 728 // String name = currURL.getFile(); 729 String name = file.getPath(); 730 // if (name.startsWith(basePath)) 731 // name = name.substring(basePath.length()); 732 if (name.endsWith(".html")) 733 name = name.substring(0, name.length() - 5); 734 name = name + "!" + oldName; 735 out.write('"'); 736 out.write(name); 737 out.write('"'); 738 copyMode = COPY; 739 } 740 else 741 scanValue(); 742 skipSpace(); 743 } 744 nextCh(); 745 } 746 747 // private boolean isMap(String tag) { 748 // return tag.equals("map"); 749 // } 750 // 751 // /** 752 // * Process the contents of <map name=...> 753 // */ 754 // private void scanMap() throws IOException { 755 // out.write(pendingCopy.toString()); 756 // copyMode = COPY; 757 // 758 // skipSpace(); 759 // while (c != '>') { 760 // String att = scanIdentifier(); 761 // if (att.equalsIgnoreCase("name") && copyMode == COPY) { 762 // // the current character should be a whitespace or = 763 // // either way, we just write out = 764 // out.write('='); 765 // copyMode = NO_COPY; 766 // String oldName = scanValue(); 767 // String name = currURL.getFile(); 768 // if (name.startsWith(basePath)) 769 // name = name.substring(basePath.length()); 770 // if (name.endsWith(".html")) 771 // name = name.substring(0, name.length() - 5); 772 // name = name + "!" + oldName; 773 // out.write('"'); 774 // out.write(name); 775 // out.write('"'); 776 // copyMode = COPY; 777 // } 778 // else 779 // scanValue(); 780 // skipSpace(); 781 // } 782 // nextCh(); 783 // } 784 785 private boolean isMeta(String tag) { 786 return tag.equals("meta"); 787 } 788 789 private void scanMeta() throws IOException { 790 String name = ""; 791 String content = ""; 792 793 skipSpace(); 794 while (c != '>') { 795 String attr_name = scanIdentifier(); 796 String attr_val = scanValue(); 797 if (attr_name.equalsIgnoreCase("name")) 798 name = attr_val; 799 else if (attr_name.equalsIgnoreCase("content")) 800 content = attr_val; 801 skipSpace(); 802 } 803 nextCh(); 804 805 // if (name.equalsIgnoreCase("hIndent")) { 806 // hIndent = Integer.parseInt(content); 807 // } 808 if (name.equalsIgnoreCase("glossaryKeywords")) { 809 keywords = new HashSet<>(Arrays.asList(split(content))); 810 } 811 } 812 813 private boolean isTitle(String tag) { 814 return tag.equals("title"); 815 } 816 817 /** 818 * Read an identifier, and lowercase it 819 */ 820 private String scanIdentifier() throws IOException { 821 StringBuffer buf = new StringBuffer(); 822 while (true) { 823 if ((c >= 'a') && (c <= 'z')) { 824 buf.append((char)c); 825 nextCh(); 826 } else if ((c >= 'A') && (c <= 'Z')) { 827 buf.append((char)('a' + (c - 'A'))); 828 nextCh(); 829 } else if ((c >= '0') && (c <= '9')) { 830 buf.append((char)c); 831 nextCh(); 832 } else if (c == '-') { // needed for <META HTTP-EQUIV ....> 833 buf.append((char)c); 834 nextCh(); 835 } else 836 if (buf.length() == 0) 837 throw new IOException("Identifier expected (" + file + ":" + line + ")"); 838 else 839 return buf.toString(); 840 } 841 } 842 843 /** 844 * Read the value of an HTML attribute, which may be quoted. 845 */ 846 private String scanValue() throws IOException { 847 skipSpace(); 848 if (c != '=') 849 return ""; 850 851 int quote = -1; 852 nextCh(); 853 skipSpace(); 854 if ((c == '\'') || (c == '\"')) { 855 quote = c; 856 nextCh(); 857 skipSpace(); 858 } 859 StringBuffer buf = new StringBuffer(); 860 while (((quote < 0) && (c != ' ') && (c != '\t') && 861 (c != '\n') && (c != '\r') && (c != '>')) || 862 ((quote >= 0) && (c != quote))) { 863 if (c == -1 || c == '\n' || c == '\r') { 864 throw new IOException("mismatched quotes (" + file + ":" + line + ")"); 865 } 866 buf.append((char)c); 867 nextCh(); 868 } 869 if (c == quote) 870 nextCh(); 871 skipSpace(); 872 return buf.toString(); 873 } 874 875 /** 876 * Skip an HTML comment <!-- ... --> 877 */ 878 private void skipComment() throws IOException { 879 // a comment sequence is "<!--" ... "-->" 880 // at the time this is called, "<!--" has been read; 881 StringBuffer text = new StringBuffer("<!--"); 882 int numHyphens = 0; 883 while (c != -1 && (numHyphens < 2 || c != '>')) { 884 if (c == '-') 885 numHyphens++; 886 else 887 numHyphens = 0; 888 text.append((char) c); 889 nextCh(); 890 //System.out.print((char)c); 891 } 892 text.append((char) c); 893 nextCh(); 894 895 String comment = text.toString(); 896 897 switch (copyMode) { 898 899 case PENDING_COPY: 900 if (comment.equalsIgnoreCase("<!--CopyOff-->")) { 901 copyMode = SUPPRESS_COPY; 902 pendingCopy.setLength(0); 903 } 904 else { 905 out.write(comment); 906 copyMode = COPY; 907 } 908 break; 909 910 case SUPPRESS_COPY: 911 if (comment.equalsIgnoreCase("<!--CopyOn-->")) 912 copyMode = COPY; 913 break; 914 } 915 } 916 917 /** 918 * Skip whitespace. 919 */ 920 private void skipSpace() throws IOException { 921 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r')) { 922 nextCh(); 923 } 924 } 925 926 /** 927 * Skip the contents of an HTML tag i.e. <...> 928 */ 929 private void skipTag() throws IOException { 930 skipSpace(); 931 while (c != '>') { 932 String att = scanIdentifier(); 933 if (att == "") 934 throw new IOException("error parsing HTML input (" + file + ":" + line + ")"); 935 String value = scanValue(); 936 skipSpace(); 937 } 938 nextCh(); 939 940 if (copyMode == PENDING_COPY) { 941 out.write(pendingCopy.toString()); 942 copyMode = COPY; 943 } 944 } 945 946 /** 947 * Read the next character. 948 */ 949 private void nextCh() throws IOException { 950 switch (copyMode) { 951 case COPY: 952 out.write((char) c); 953 break; 954 955 case PENDING_COPY: 956 pendingCopy.append((char) c); 957 break; 958 } 959 960 c = in.read(); 961 if (c == '\n') 962 line++; 963 } 964 965 private static String escape(String s) { 966 for (int i = 0; i < s.length(); i++) { 967 switch (s.charAt(i)) { 968 case '<': case '>': case '&': 969 StringBuffer sb = new StringBuffer(s.length()*2); 970 for (int j = 0; j < s.length(); j++) { 971 char c = s.charAt(j); 972 switch (c) { 973 case '<': sb.append("<"); break; 974 case '>': sb.append(">"); break; 975 case '&': sb.append("&"); break; 976 default: sb.append(c); 977 } 978 } 979 return sb.toString(); 980 } 981 } 982 return s; 983 } 984 985 private static String[] split(String s) { 986 Vector<String> v = new Vector<>(); 987 int start = -1; 988 for (int i = 0; i < s.length(); i++) { 989 char c = s.charAt(i); 990 if (Character.isLetterOrDigit(c) || c == '_') { 991 if (start == -1) 992 start = i; 993 } 994 else { 995 if (start != -1) 996 v.addElement(s.substring(start, i)); 997 start = -1; 998 } 999 } 1000 if (start != -1) 1001 v.addElement(s.substring(start)); 1002 String[] a = new String[v.size()]; 1003 v.copyInto(a); 1004 return a; 1005 } 1006 1007 private File file; 1008 private String head1; 1009 private String text; 1010 private Set<String> keywords; 1011 1012 private Reader in; 1013 private Writer out; 1014 private int c; 1015 private boolean inHead1; 1016 private int line; 1017 private int copyMode; 1018 private static final int NO_COPY = 0, PENDING_COPY = 1, SUPPRESS_COPY = 2, COPY = 3; 1019 private StringBuffer pendingCopy = new StringBuffer(); 1020 private int hIndent; 1021 private int[] hNums = new int[6]; 1022 }