1 /* 2 * $Id$ 3 * 4 * Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. Oracle designates this 10 * particular file as subject to the "Classpath" exception as provided 11 * by Oracle in the LICENSE file that accompanied this code. 12 * 13 * This code is distributed in the hope that it will be useful, but WITHOUT 14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 * version 2 for more details (a copy is included in the LICENSE file that 17 * accompanied this code). 18 * 19 * You should have received a copy of the GNU General Public License version 20 * 2 along with this work; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 22 * 23 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 24 * or visit www.oracle.com if you need additional information or have any 25 * questions. 26 */ 27 package com.sun.jct.utils.indexgen; 28 29 import java.io.BufferedReader; 30 import java.io.BufferedWriter; 31 import java.io.File; 32 import java.io.FileNotFoundException; 33 import java.io.FileReader; 34 import java.io.FileWriter; 35 import java.io.IOException; 36 import java.io.PrintStream; 37 import java.io.PrintWriter; 38 import java.io.Reader; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.Comparator; 42 import java.util.Iterator; 43 import java.util.List; 44 import java.util.Set; 45 import java.util.TreeSet; 46 import java.util.Vector; 47 import org.apache.tools.ant.BuildException; 48 import org.apache.tools.ant.FileScanner; 49 import org.apache.tools.ant.taskdefs.MatchingTask; 50 51 /** 52 * <p>A utility to generate an index from a set of HTML files and directories. 53 * Index entries are indicated inline by HTML of the form 54 * <pre> 55 * <a name=XXX><!-- index: abc; pqr --></a> 56 * </pre> 57 * This will create entries in the index under "abc" and "pqr" that link back to "XXX". 58 * The entries can have sub-entries, separated by ":", as in: 59 * <pre> 60 * <a name=XXX><!-- index: abc:def; pqr:stu --></a> 61 * This will create entries in the index under "abc"/"def" and "pqr"/"stu" 62 * that link back to "XXX". The format is the same as for FrameMaker index marker 63 * entries. 64 * 65 * <p>The output can be in one or both of two forms. 66 * 67 * <p>The index can be output as a JavaHelp-compatible index.xml file, 68 * with an associated map file. The map file can be merged with any other 69 * maps with the "mapmerge" utility. 70 * 71 * <p>Or, the index can be output as a single index.html file, containing the 72 * sorted set of index entries and their references. 73 */ 74 public class Main { 75 /** 76 * An exception to report bad command line arguments. 77 */ 78 public static class BadArgs extends Exception { 79 BadArgs(String msg) { 80 super(msg); 81 } 82 } 83 84 /** 85 * Command line entry point.<br> 86 * @param args Command line arguments, per the usage as described. 87 */ 88 public static void main(String[] args) { 89 try { 90 if (args.length == 0) 91 usage(System.err); 92 else { 93 Main m = new Main(args); 94 m.run(); 95 } 96 } 97 catch (BadArgs e) { 98 System.err.println(e); 99 usage(System.err); 100 System.exit(1); 101 } 102 catch (Throwable t) { 103 t.printStackTrace(); 104 System.exit(2); 105 } 106 } 107 108 /** 109 * Write out short command line help. 110 * @param out A stream to which to write the help. 111 */ 112 private static void usage(PrintStream out) { 113 String program = System.getProperty("program", "java " + Main.class.getName()); 114 out.println("Usage:"); 115 out.println(" " + program + " options files..."); 116 out.println(""); 117 out.println("Arguments:"); 118 out.println("-mapOut map.xml"); 119 out.println(" Specify the location of the map.xml file to be written."); 120 out.println("-htmlOut index.html"); 121 out.println(" Specify the location of the index.html file."); 122 out.println("-xmlOut index.xml"); 123 out.println(" Specify the location of the index.xml file."); 124 out.println("-srcpath dir;dir;..."); 125 out.println(" Specify the a path in which to look for source files."); 126 out.println("files..."); 127 out.println(" HTML files and directories."); 128 } 129 130 public Main() { } 131 132 /** 133 * Create an object based on command line args. 134 * It is an error if no input files or no output file is given. 135 * @param args Command line args. 136 * @see #main 137 * @throws Main.BadArgs if problems are found in the given arguments. 138 */ 139 public Main(String[] args) throws BadArgs { 140 for (int i = 0; i < args.length; i++) { 141 if (args[i].equalsIgnoreCase("-htmlout") && i + 1 < args.length) { 142 htmlOutFile = new File(args[++i]); 143 } 144 else if (args[i].equalsIgnoreCase("-xmlout") && i + 1 < args.length) { 145 xmlOutFile = new File(args[++i]); 146 } 147 else if (args[i].equalsIgnoreCase("-mapout") && i + 1 < args.length) { 148 mapOutFile = new File(args[++i]); 149 } 150 else if (args[i].equalsIgnoreCase("-mapdir") && i + 1 < args.length) { 151 mapDir = new File(args[++i]); 152 } 153 else if (args[i].equalsIgnoreCase("-srcPath") && i + 1 < args.length) { 154 path = splitPath(args[++i]); 155 } 156 else { 157 inFiles = new File[args.length - i]; 158 for (int j = 0; j < inFiles.length; j++) 159 inFiles[j] = new File(args[i++]); 160 } 161 } 162 } 163 164 public static class Ant extends MatchingTask { 165 private Main m = new Main(); 166 167 public void setHtmlOutFile(File file) { 168 m.htmlOutFile = file; 169 } 170 171 public void setXmlOutFile(File file) { 172 m.xmlOutFile = file; 173 } 174 175 public void setMapOutFile(File file) { 176 m.mapOutFile = file; 177 } 178 179 public void setMapDir(File file) { 180 m.mapDir = file; 181 } 182 183 public void setDir(File dir) { 184 getImplicitFileSet().setDir(dir); 185 } 186 187 public void execute() { 188 FileScanner s = getImplicitFileSet().getDirectoryScanner(getProject()); 189 m.path = new File[] { s.getBasedir() }; 190 m.addFiles(s.getIncludedFiles()); 191 192 try { 193 m.run(); 194 } catch (BadArgs e) { 195 throw new BuildException(e.getMessage()); 196 } catch (IOException e) { 197 throw new BuildException(e); 198 } 199 } 200 } 201 202 public void addFiles(String[] paths) { 203 if (paths == null) 204 return; 205 List<File> files = new ArrayList<File>(); 206 if (inFiles != null) 207 files.addAll(Arrays.asList(inFiles)); 208 for (int i = 0; i < paths.length; i++) 209 files.add(new File(paths[i])); 210 inFiles = files.toArray(new File[files.size()]); 211 } 212 213 private void run() throws BadArgs, IOException { 214 if (inFiles == null || inFiles.length == 0) 215 throw new BadArgs("no input files specified"); 216 217 if (htmlOutFile == null && mapOutFile == null && xmlOutFile == null) 218 throw new BadArgs("no output files specified"); 219 220 if (xmlOutFile != null && mapOutFile == null ) 221 throw new BadArgs("no map output file specified"); 222 223 if (mapOutFile != null && xmlOutFile == null) 224 throw new BadArgs("no XML output file specified"); 225 226 root = new Node(); 227 228 read(inFiles); 229 230 writeIndex(); 231 } 232 233 private void read(File[] files) throws IOException { 234 for (int i = 0; i < files.length; i++) 235 read(files[i]); 236 } 237 238 private void read(File file) throws IOException { 239 if (path == null) 240 read(file, file); 241 else { 242 for (int i = 0; i < path.length; i++) { 243 File f = new File(path[i], file.getPath()); 244 if (f.exists()) { 245 read(f, file); 246 return; 247 } 248 } 249 throw new FileNotFoundException(file.getPath()); 250 } 251 } 252 253 private void read(File absFile, File relFile) throws IOException { 254 255 if (absFile.isDirectory()) { 256 if (!absFile.getName().equals("SCCS")) { 257 String[] files = absFile.list(); 258 for (int i = 0; i < files.length; i++) { 259 read(new File(absFile, files[i]), 260 (relFile.getPath().equals(".") ? new File(files[i]) : new File(relFile, files[i]))); 261 } 262 } 263 return; 264 } 265 266 if (!absFile.getName().endsWith(".html")) 267 return; 268 269 // ordinary file -- scan it looking for index entries 270 in = new BufferedReader(new FileReader(absFile)); 271 currFile = relFile; 272 currName = null; 273 line = 1; 274 nextCh(); 275 while (c >= 0) { 276 if (c == '<') { 277 278 nextCh(); 279 skipSpace(); 280 switch (c) { 281 case '!': 282 nextCh(); 283 if (c == '-') { 284 nextCh(); 285 if (c == '-') { 286 nextCh(); 287 scanComment(); 288 } 289 } 290 break; 291 292 case '/': 293 nextCh(); 294 String endTag = scanIdentifier(); 295 if (isLink(endTag)) { 296 currName = ""; 297 skipTag(); 298 } 299 else 300 skipTag(); 301 break; 302 303 default: 304 String startTag = scanIdentifier(); 305 if (isLink(startTag)) 306 scanLink(); 307 else 308 skipTag(); 309 } 310 } 311 else 312 nextCh(); 313 } 314 315 } 316 317 private boolean isLink(String tag) { 318 return tag.equals("a"); 319 } 320 321 /** 322 * Process the contents of <a href=...> 323 */ 324 private void scanLink() throws IOException { 325 skipSpace(); 326 while (c != '>') { 327 String att = scanIdentifier(); 328 String value = scanValue(); 329 if (att.equalsIgnoreCase("name")) 330 currName = value; 331 skipSpace(); 332 } 333 nextCh(); 334 } 335 336 /** 337 * Read an identifier, and lowercase it 338 */ 339 private String scanIdentifier() throws IOException { 340 StringBuffer buf = new StringBuffer(); 341 while (true) { 342 if ((c >= 'a') && (c <= 'z')) { 343 buf.append((char)c); 344 nextCh(); 345 } else if ((c >= 'A') && (c <= 'Z')) { 346 buf.append((char)('a' + (c - 'A'))); 347 nextCh(); 348 } else if ((c >= '0') && (c <= '9')) { 349 buf.append((char)c); 350 nextCh(); 351 } else if (c == '-') { // needed for <META HTTP-EQUIV ....> 352 buf.append((char)c); 353 nextCh(); 354 } else 355 if (buf.length() == 0) 356 throw new IOException("Identifier expected (" + currFile + ":" + line + ")"); 357 else 358 return buf.toString(); 359 } 360 } 361 362 /** 363 * Read the value of an HTML attribute, which may be quoted. 364 */ 365 private String scanValue() throws IOException { 366 skipSpace(); 367 if (c != '=') 368 return ""; 369 370 int quote = -1; 371 nextCh(); 372 skipSpace(); 373 if ((c == '\'') || (c == '\"')) { 374 quote = c; 375 nextCh(); 376 skipSpace(); 377 } 378 StringBuffer buf = new StringBuffer(); 379 while (((quote < 0) && (c != ' ') && (c != '\t') && 380 (c != '\n') && (c != '\r') && (c != '>')) || 381 ((quote >= 0) && (c != quote))) { 382 if (c == -1 || c == '\n' || c == '\r') { 383 throw new IOException("mismatched quotes (" + currFile + ":" + line + ")"); 384 } 385 buf.append((char)c); 386 nextCh(); 387 } 388 if (c == quote) 389 nextCh(); 390 skipSpace(); 391 return buf.toString(); 392 } 393 394 /** 395 * Scan an HTML comment <!-- ... --> 396 */ 397 private void scanComment() throws IOException { 398 // a comment sequence is "<!--" ... "-->" 399 // at the time this is called, "<!--" has been read; 400 StringBuffer text = new StringBuffer("<!--"); 401 int numHyphens = 0; 402 while (c != -1 && (numHyphens < 2 || c != '>')) { 403 if (c == '-') 404 numHyphens++; 405 else 406 numHyphens = 0; 407 text.append((char) c); 408 nextCh(); 409 //System.out.print((char)c); 410 } 411 text.append((char) c); 412 nextCh(); 413 414 String comment = text.toString().substring(4, text.length() - 3).trim(); 415 416 if (comment.startsWith("index:")) { 417 String[] entries = split(comment.substring(6).trim(), ';'); 418 for (int i = 0; i < entries.length; i++) 419 addToIndex(split(entries[i].trim(), ':'), currFile, currName); 420 } 421 } 422 423 /** 424 * Skip whitespace. 425 */ 426 private void skipSpace() throws IOException { 427 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r')) { 428 nextCh(); 429 } 430 } 431 432 /** 433 * Skip the contents of an HTML tag i.e. <...> 434 */ 435 private void skipTag() throws IOException { 436 skipSpace(); 437 while (c != '>') { 438 String att = scanIdentifier(); 439 if (att == "") 440 throw new IOException("error parsing HTML input (" + currFile + ":" + line + ")"); 441 String value = scanValue(); 442 skipSpace(); 443 } 444 nextCh(); 445 } 446 447 /** 448 * Read the next character. 449 */ 450 private void nextCh() throws IOException { 451 c = in.read(); 452 if (c == '\n') 453 line++; 454 } 455 456 private static String escape(String s) { 457 for (int i = 0; i < s.length(); i++) { 458 switch (s.charAt(i)) { 459 case '<': case '>': case '&': 460 StringBuffer sb = new StringBuffer(s.length()*2); 461 for (int j = 0; j < s.length(); j++) { 462 char c = s.charAt(j); 463 switch (c) { 464 case '<': sb.append("<"); break; 465 case '>': sb.append(">"); break; 466 case '&': sb.append("&"); break; 467 default: sb.append(c); 468 } 469 } 470 return sb.toString(); 471 } 472 } 473 return s; 474 } 475 476 private void addToIndex(String[] path, File file, String ref) { 477 Node node = root.getChild(path); 478 String href = file.getPath(); 479 if (ref != null && ref.length() > 0) 480 href = href + "#" + ref; 481 node.setInfo(href); 482 } 483 484 private void writeIndex() throws IOException { 485 486 PrintWriter indexOut = (xmlOutFile == null ? null 487 : new PrintWriter(new BufferedWriter(new FileWriter(xmlOutFile)))); 488 if (indexOut != null) { 489 indexOut.println("<?xml version='1.0' encoding='ISO-8859-1' ?>"); 490 indexOut.println("<!DOCTYPE index"); 491 indexOut.println(" PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Index Version 1.0//EN\""); 492 indexOut.println(" \"http://java.sun.com/products/javahelp/index_1_0.dtd\">"); 493 indexOut.println(""); 494 indexOut.println("<index version=\"1.0\">"); 495 } 496 497 PrintWriter mapOut = (mapOutFile == null ? null 498 : new PrintWriter(new BufferedWriter(new FileWriter(mapOutFile)))); 499 if (mapOut != null) { 500 mapOut.println("<?xml version='1.0' encoding='ISO-8859-1' ?>"); 501 mapOut.println("<!DOCTYPE map"); 502 mapOut.println(" PUBLIC \"-//Sun Microsystems Inc.//DTD JavaHelp Map Version 1.0//EN\""); 503 mapOut.println(" \"http://java.sun.com/products/javahelp/map_1_0.dtd\">"); 504 mapOut.println("<map version=\"1.0\">"); 505 } 506 507 PrintWriter htmlOut = (htmlOutFile == null ? null 508 : new PrintWriter(new BufferedWriter(new FileWriter(htmlOutFile)))); 509 if (htmlOut != null) { 510 htmlOut.println("<!DOCTYPE HTML>"); 511 htmlOut.println("<html>"); 512 htmlOut.println("<head>"); 513 htmlOut.println("<title>"); 514 htmlOut.println("Index"); 515 htmlOut.println("</title>"); 516 htmlOut.println("<LINK REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"../jthelp.css\" TITLE=\"Style\">"); 517 htmlOut.println("</head>"); 518 htmlOut.println("<body>"); 519 htmlOut.println("<h1>Index</h1>"); 520 } 521 522 char currLetter = 0; 523 524 for (Iterator iter = root.iterator(); iter.hasNext(); ) { 525 Node node = (Node) (iter.next()); 526 String name = node.getName(); 527 char initial = Character.toUpperCase(name.charAt(0)); 528 if (Character.isLetter(initial) && initial != currLetter) { 529 for (char c = (currLetter == 0 ? 'A' : (char) (currLetter + 1)); 530 c <= initial; c++) { 531 if (htmlOut != null) { 532 htmlOut.println(""); 533 htmlOut.println("<p class=\"index0\">" + c + "</p>"); 534 } 535 } 536 currLetter = initial; 537 } 538 539 write(indexOut, mapOut, htmlOut, node, 0); 540 } 541 542 if (htmlOut != null) { 543 for (char c = (char) (currLetter + 1); c <= 'Z'; c++) 544 htmlOut.println("<p class=\"index0\">" + c + "</p>"); 545 htmlOut.println("</body>"); 546 htmlOut.println("</html>"); 547 htmlOut.close(); 548 } 549 550 if (mapOut != null) { 551 mapOut.println("</map>"); 552 mapOut.close(); 553 } 554 555 if (indexOut != null) { 556 indexOut.println("</index>"); 557 indexOut.close(); 558 } 559 560 } 561 562 private void write(PrintWriter xmlOut, PrintWriter mapOut, PrintWriter htmlOut, Node node, int depth) { 563 String href = node.getInfo(); 564 565 if (htmlOut != null) { 566 htmlOut.write("<p class=\"index"); 567 htmlOut.write(String.valueOf(depth + 1)); 568 htmlOut.write("\">"); 569 if (href != null) { 570 htmlOut.write("<a href=\""); 571 htmlOut.write(escapeString(href)); 572 htmlOut.write("\">"); 573 } 574 htmlOut.write(node.getName()); 575 if (href != null) 576 htmlOut.write("</a>"); 577 htmlOut.write("</p>\n"); 578 } 579 580 if (xmlOut != null) { 581 xmlOut.write("<indexitem text=\""); 582 xmlOut.write(escapeString(node.getName())); 583 xmlOut.write("\" "); 584 if (href != null) { 585 xmlOut.write(" target=\""); 586 xmlOut.write(escapeString(getTarget(href))); 587 xmlOut.write("\" "); 588 if (mapOut != null) { 589 mapOut.println("<mapID target=\"" 590 + escapeString(getTarget(href)) 591 + "\" url=\"" 592 + escapeString(href) 593 + "\" />"); 594 } 595 } 596 xmlOut.println(node.getChildCount() == 0 ? "/>" : ">"); 597 } 598 599 if (node.getChildCount() > 0) { 600 for (Iterator iter = node.iterator(); iter.hasNext(); ) { 601 Node child = (Node) (iter.next()); 602 write(xmlOut, mapOut, htmlOut, child, depth + 1); 603 } 604 605 if (xmlOut != null) 606 xmlOut.println("</indexitem>"); 607 } 608 } 609 610 private static String[] split(String s, char sep) { 611 Vector<String> v = new Vector<>(); 612 int start = -1; 613 for (int i = 0; i < s.length(); i++) { 614 char c = s.charAt(i); 615 if (c == sep) { 616 if (start != -1) 617 v.addElement(s.substring(start, i).trim()); 618 start = -1; 619 } else { 620 if (start == -1) 621 start = i; 622 } 623 } 624 if (start != -1) 625 v.addElement(s.substring(start).trim()); 626 String[] a = new String[v.size()]; 627 v.copyInto(a); 628 return a; 629 } 630 631 private static File[] splitPath(String s) { 632 Vector<File> v = new Vector<>(); 633 int start = -1; 634 for (int i = 0; i < s.length(); i++) { 635 char c = s.charAt(i); 636 if (c == File.pathSeparatorChar) { 637 if (start != -1) 638 v.addElement(new File(s.substring(start, i))); 639 start = -1; 640 } else { 641 if (start == -1) 642 start = i; 643 } 644 } 645 if (start != -1) 646 v.addElement(new File(s.substring(start))); 647 File[] a = new File[v.size()]; 648 v.copyInto(a); 649 return a; 650 } 651 652 private static String getTarget(String key) { 653 String file; 654 String ref; 655 656 int hash = key.lastIndexOf("#"); 657 if (hash == -1) { 658 file = key; 659 ref = null; 660 } 661 else { 662 file = key.substring(0, hash); 663 ref = key.substring(hash + 1); 664 } 665 666 if (file.endsWith(".html")) 667 file = file.substring(0, file.length() - 5); 668 669 if (ref == null) 670 key = file; 671 else 672 key = file + "#" + ref; 673 674 StringBuffer sb = new StringBuffer(); 675 sb.append("index."); 676 boolean needUpper = false; 677 for (int i = 0; i < key.length(); i++) { 678 char c = key.charAt(i); 679 if (Character.isLetter(c)) { 680 sb.append(needUpper ? Character.toUpperCase(c) : c); 681 needUpper = false; 682 } 683 else 684 needUpper = true; 685 } 686 return sb.toString(); 687 } 688 689 private static String escapeString(String text) { 690 691 // check to see if there are any special characters 692 boolean specialChars = false; 693 for (int i = 0; i < text.length() && !specialChars; i++) { 694 switch (text.charAt(i)) { 695 case '<': case '>': case '&': case '"': 696 specialChars = true; 697 } 698 } 699 700 // if there are special characters rewrite the string with escaped characters 701 // otherwise, return it as is 702 if (specialChars) { 703 StringBuffer sb = new StringBuffer(); 704 for (int i = 0; i < text.length(); i++) { 705 char c = text.charAt(i); 706 switch (c) { 707 case '<': sb.append("<"); break; 708 case '>': sb.append(">"); break; 709 case '&': sb.append("&"); break; 710 case '"': sb.append("""); break; 711 default: sb.append(c); 712 } 713 } 714 return sb.toString(); 715 } 716 else 717 return text; 718 } 719 720 private File[] path; 721 private File[] inFiles; 722 private File htmlOutFile; 723 private File mapOutFile; 724 private File mapDir; 725 private File xmlOutFile; 726 727 private Reader in; 728 private int c; 729 private int line; 730 731 private File currFile; 732 private String currName; 733 private Node root; 734 735 private static Iterator<Node> nullIterator = new Iterator<Node>() { 736 public boolean hasNext() { 737 return false; 738 } 739 public Node next() { 740 return null; 741 } 742 public void remove() { 743 } 744 }; 745 746 private Comparator<Node> indexComparator = new Comparator<Node>() { 747 public int compare(Node n1, Node n2) { 748 return n1.getName().compareToIgnoreCase(n2.getName()); 749 } 750 751 public boolean equals(Object o) { 752 return false; 753 } 754 }; 755 756 private class Node 757 { 758 Node() { } 759 760 Node(Node parent, String name) { 761 this.name = name; 762 parent.add(this); 763 } 764 765 String getName() { 766 return name; 767 } 768 769 void setInfo(String info) { 770 this.info = info; 771 } 772 773 String getInfo() { 774 return info; 775 } 776 777 Node getChild(String name) { 778 if (children != null) { 779 for (Iterator<Node> iter = children.iterator(); iter.hasNext(); ) { 780 Node child = iter.next(); 781 if (child.name.equals(name)) 782 return child; 783 } 784 } 785 786 return new Node(this, name); 787 } 788 789 Node getChild(String[] path) { 790 Node c = this; 791 for (int index = 0; index < path.length; index++) 792 c = c.getChild(path[index]); 793 return c; 794 } 795 796 int getChildCount() { 797 return (children == null ? 0 : children.size()); 798 } 799 800 Iterator<Node> iterator() { 801 return (children == null ? nullIterator : children.iterator()); 802 } 803 804 private void add(Node child) { 805 if (children == null) 806 children = new TreeSet<Node>(indexComparator); 807 children.add(child); 808 } 809 810 private String name; 811 private Set<Node> children; 812 private String info; 813 } 814 815 }