src/share/classes/javax/swing/text/html/parser/Parser.java

Print this page


   1 /*
   2  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  63  * <p>as well as:
  64  * '&lt;p>&lt;a href="xx">&nbsp;&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
  65  * which appears to be treated as:
  66  * '&lt;p>&lt;a href="xx">&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
  67  * <p>
  68  * If <code>strict</code> is false, when a tag that breaks flow,
  69  * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
  70  * encountered, all whitespace will be ignored until a non whitespace
  71  * character is encountered. This appears to give behavior closer to
  72  * the popular browsers.
  73  *
  74  * @see DTD
  75  * @see TagElement
  76  * @see SimpleAttributeSet
  77  * @author Arthur van Hoff
  78  * @author Sunita Mani
  79  */
  80 public
  81 class Parser implements DTDConstants {
  82 



  83     private char text[] = new char[1024];
  84     private int textpos = 0;
  85     private TagElement last;
  86     private boolean space;
  87 
  88     private char str[] = new char[128];
  89     private int strpos = 0;
  90 
  91     protected DTD dtd = null;
  92 
  93     private int ch;
  94     private int ln;
  95     private Reader in;
  96 
  97     private Element recent;
  98     private TagStack stack;
  99     private boolean skipTag = false;
 100     private TagElement lastFormSent = null;
 101     private SimpleAttributeSet attributes = new SimpleAttributeSet();
 102 


 935                         ln++;
 936                         ch = readCh();
 937                         lfCount++;
 938                         break;
 939 
 940                     case '\r':
 941                         ln++;
 942                         if ((ch = readCh()) == '\n') {
 943                             ch = readCh();
 944                             crlfCount++;
 945                         }
 946                         else {
 947                             crCount++;
 948                         }
 949                         break;
 950 
 951                     case ';':
 952                         ch = readCh();
 953                         break;
 954                 }
 955                 char data[] = {mapNumericReference((char) n)};









 956                 return data;
 957             }





 958             addString('#');
 959             if (!parseIdentifier(false)) {
 960                 error("ident.expected");
 961                 strpos = pos;
 962                 char data[] = {'&', '#'};
 963                 return data;
 964             }
 965         } else if (!parseIdentifier(false)) {
 966             char data[] = {'&'};
 967             return data;
 968         }
 969 
 970         boolean semicolon = false;
 971 
 972         switch (ch) {
 973           case '\n':
 974             ln++;
 975             ch = readCh();
 976             lfCount++;
 977             break;


   1 /*
   2  * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  63  * <p>as well as:
  64  * '&lt;p>&lt;a href="xx">&nbsp;&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
  65  * which appears to be treated as:
  66  * '&lt;p>&lt;a href="xx">&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
  67  * <p>
  68  * If <code>strict</code> is false, when a tag that breaks flow,
  69  * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
  70  * encountered, all whitespace will be ignored until a non whitespace
  71  * character is encountered. This appears to give behavior closer to
  72  * the popular browsers.
  73  *
  74  * @see DTD
  75  * @see TagElement
  76  * @see SimpleAttributeSet
  77  * @author Arthur van Hoff
  78  * @author Sunita Mani
  79  */
  80 public
  81 class Parser implements DTDConstants {
  82 
  83     //Maximum codepoint value within BMP
  84     private final int MAX_BMP_BOUND = 65535;
  85 
  86     private char text[] = new char[1024];
  87     private int textpos = 0;
  88     private TagElement last;
  89     private boolean space;
  90 
  91     private char str[] = new char[128];
  92     private int strpos = 0;
  93 
  94     protected DTD dtd = null;
  95 
  96     private int ch;
  97     private int ln;
  98     private Reader in;
  99 
 100     private Element recent;
 101     private TagStack stack;
 102     private boolean skipTag = false;
 103     private TagElement lastFormSent = null;
 104     private SimpleAttributeSet attributes = new SimpleAttributeSet();
 105 


 938                         ln++;
 939                         ch = readCh();
 940                         lfCount++;
 941                         break;
 942 
 943                     case '\r':
 944                         ln++;
 945                         if ((ch = readCh()) == '\n') {
 946                             ch = readCh();
 947                             crlfCount++;
 948                         }
 949                         else {
 950                             crCount++;
 951                         }
 952                         break;
 953 
 954                     case ';':
 955                         ch = readCh();
 956                         break;
 957                 }
 958                 //Check if n codepoint is within BMP; convert into surrogate
 959                 //pair otherwise 
 960                 try { 
 961                     char data[];
 962                     if (n <= MAX_BMP_BOUND) { 
 963                         data = Character.toChars(mapNumericReference((char) n));
 964                     } else {
 965                         data = Character.toChars(n);
 966                     }
 967                         
 968                     return data; 
 969                 }
 970                 catch(IllegalArgumentException ex) {
 971                     error(ex.toString()); 
 972                     return new char[0]; 
 973                 } 
 974             }
 975             addString('#');
 976             if (!parseIdentifier(false)) {
 977                 error("ident.expected");
 978                 strpos = pos;
 979                 char data[] = {'&', '#'};
 980                 return data;
 981             }
 982         } else if (!parseIdentifier(false)) {
 983             char data[] = {'&'};
 984             return data;
 985         }
 986 
 987         boolean semicolon = false;
 988 
 989         switch (ch) {
 990           case '\n':
 991             ln++;
 992             ch = readCh();
 993             lfCount++;
 994             break;