1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package javax.xml.catalog;
  26 
  27 import java.io.UnsupportedEncodingException;
  28 import java.net.URLDecoder;
  29 import java.net.URLEncoder;
  30 
  31 /**
  32  * The Normalizer is responsible for normalizing Public and System Identifiers
  33  * as specified in section 6.2, 6.3 and 6.4 of the specification
  34  *  * <a
  35  * href="https://www.oasis-open.org/committees/download.php/14809/xml-catalogs.html">
  36  * XML Catalogs, OASIS Standard V1.1, 7 October 2005</a>.
  37  *
  38  * @since 9
  39  */
  40 class Normalizer {
  41 
  42     /**
  43      * Normalize a public identifier in accordance with section 6.2 of the
  44      * Catalog specification.
  45      *
  46      * <p>
  47      * All strings of white space in public identifiers must be normalized to
  48      * single space characters (#x20), and leading and trailing white space must
  49      * be removed.
  50      *
  51      * @param publicId The unnormalized public identifier
  52      *
  53      * @return The normalized identifier
  54      */
  55     static String normalizePublicId(String publicId) {
  56         if (publicId == null) return null;
  57 
  58         StringBuilder sb = new StringBuilder(publicId.length());
  59         char last = 'a';
  60         for (char c : publicId.toCharArray()) {
  61             //skip beginning and duplicate space
  62             if ((c == ' ') && (sb.length() == 0 || last == ' ')) {
  63                 continue;
  64             }
  65 
  66             //replace whitespace with space
  67             if (c == '\t' || c == '\r' || c == '\n') {
  68                 if (last != ' ') {
  69                     sb.append(' ');
  70                     last = ' ';
  71                 }
  72             } else {
  73                 sb.append(c);
  74                 last = c;
  75             }
  76         }
  77         //remove the last space
  78         if (last == ' ') {
  79             sb.deleteCharAt(sb.length() - 1);
  80         }
  81 
  82         return sb.toString();
  83     }
  84 
  85     /**
  86      * Encode a public identifier as a "publicid" URN.
  87      *
  88      * @param publicId The unnormalized public identifier
  89      *
  90      * @return The normalized identifier
  91      * @throws CatalogException if encoding failed
  92      */
  93     static String encodeURN(String publicId) {
  94         String urn = normalizePublicId(publicId);
  95 
  96         try {
  97             urn = URLEncoder.encode(urn, "UTF-8");
  98             urn = urn.replace("::", ";");
  99             urn = urn.replace("//", ":");
 100         } catch (UnsupportedEncodingException ex) {
 101             CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex);
 102         }
 103         return "urn:publicid:" + urn;
 104     }
 105 
 106     /**
 107      * Decode a "publicid" URN into a public identifier.
 108      *
 109      * @param urn The urn:publicid: URN
 110      *
 111      * @return The normalized identifier
 112      * @throws CatalogException if decoding failed
 113      */
 114     static String decodeURN(String urn) {
 115         String publicId;
 116 
 117         if (urn != null && urn.startsWith("urn:publicid:")) {
 118             publicId = urn.substring(13);
 119         } else {
 120             return urn;
 121         }
 122         try {
 123             publicId = publicId.replace(":", "//");
 124             publicId = publicId.replace(";", "::");
 125             publicId = URLDecoder.decode(publicId, "UTF-8");
 126         } catch (UnsupportedEncodingException ex) {
 127             CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex);
 128         }
 129 
 130         return publicId;
 131     }
 132 
 133     /**
 134      * Perform character normalization on a URI reference.
 135      *
 136      * @param uriref The URI reference
 137      * @return The normalized URI reference
 138      */
 139     static String normalizeURI(String uriref) {
 140         if (uriref == null) {
 141             return null;
 142         }
 143 
 144         byte[] bytes;
 145         uriref = uriref.trim();
 146         try {
 147             bytes = uriref.getBytes("UTF-8");
 148         } catch (UnsupportedEncodingException uee) {
 149             // this can't happen
 150             return uriref;
 151         }
 152 
 153         StringBuilder newRef = new StringBuilder(bytes.length);
 154         for (int count = 0; count < bytes.length; count++) {
 155             int ch = bytes[count] & 0xFF;
 156 
 157             if ((ch <= 0x20) // ctrl
 158                     || (ch > 0x7F) // high ascii
 159                     || (ch == 0x22) // "
 160                     || (ch == 0x3C) // <
 161                     || (ch == 0x3E) // >
 162                     || (ch == 0x5C) // \
 163                     || (ch == 0x5E) // ^
 164                     || (ch == 0x60) // `
 165                     || (ch == 0x7B) // {
 166                     || (ch == 0x7C) // |
 167                     || (ch == 0x7D) // }
 168                     || (ch == 0x7F)) {
 169                 newRef.append("%").append(String.format("%02X", ch));
 170             } else {
 171                 newRef.append((char) bytes[count]);
 172             }
 173         }
 174 
 175         return newRef.toString().trim();
 176     }
 177 }