< prev index next >

jdk/src/java.base/share/classes/sun/net/idn/StringPrep.java

Print this page




  33 //      2005-05-19 Edward Wang
  34 //          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/StringPrep.java
  35 //          - move from package com.ibm.icu.text to package sun.net.idn
  36 //          - use ParseException instead of StringPrepParseException
  37 //          - change 'Normalizer.getUnicodeVersion()' to 'NormalizerImpl.getUnicodeVersion()'
  38 //          - remove all @deprecated tag to make compiler happy
  39 //      2007-08-14 Martin Buchholz
  40 //          - remove redundant casts
  41 //
  42 package sun.net.idn;
  43 
  44 import java.io.BufferedInputStream;
  45 import java.io.ByteArrayInputStream;
  46 import java.io.IOException;
  47 import java.io.InputStream;
  48 import java.text.ParseException;
  49 
  50 import sun.text.Normalizer;
  51 import sun.text.normalizer.CharTrie;
  52 import sun.text.normalizer.Trie;
  53 import sun.text.normalizer.NormalizerImpl;
  54 import sun.text.normalizer.VersionInfo;
  55 import sun.text.normalizer.UCharacter;
  56 import sun.text.normalizer.UCharacterIterator;
  57 import sun.text.normalizer.UTF16;
  58 import sun.net.idn.UCharacterDirection;
  59 import sun.net.idn.StringPrepDataReader;
  60 
  61 /**
  62  * StringPrep API implements the StingPrep framework as described by
  63  * <a href="http://www.ietf.org/rfc/rfc3454.txt">RFC 3454</a>.
  64  * StringPrep prepares Unicode strings for use in network protocols.
  65  * Profiles of StingPrep are set of rules and data according to which the
  66  * Unicode Strings are prepared. Each profiles contains tables which describe
  67  * how a code point should be treated. The tables are broadly classied into
  68  * <ul>
  69  *     <li> Unassigned Table: Contains code points that are unassigned
  70  *          in the Unicode Version supported by StringPrep. Currently
  71  *          RFC 3454 supports Unicode 3.2. </li>
  72  *     <li> Prohibited Table: Contains code points that are prohibted from
  73  *          the output of the StringPrep processing function. </li>


 210 
 211         byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
 212 
 213 
 214         //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
 215         mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
 216         // load the rest of the data data and initialize the data members
 217         reader.read(sprepBytes,mappingData);
 218 
 219         sprepTrieImpl           = new StringPrepTrieImpl();
 220         sprepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl  );
 221 
 222         // get the data format version
 223         formatVersion = reader.getDataFormatVersion();
 224 
 225         // get the options
 226         doNFKC            = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
 227         checkBiDi         = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
 228         sprepUniVer   = getVersionInfo(reader.getUnicodeVersion());
 229         normCorrVer   = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
 230         VersionInfo normUniVer = NormalizerImpl.getUnicodeVersion();
 231         if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
 232            normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
 233            ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
 234            ){
 235             throw new IOException("Normalization Correction version not supported");
 236         }
 237         b.close();
 238     }
 239 
 240     private static final class Values{
 241         boolean isIndex;
 242         int value;
 243         int type;
 244         public void reset(){
 245             isIndex = false;
 246             value = 0;
 247             type = -1;
 248         }
 249     }
 250 


 337         return dest;
 338     }
 339 
 340 
 341     private StringBuffer normalize(StringBuffer src){
 342         /*
 343          * Option UNORM_BEFORE_PRI_29:
 344          *
 345          * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
 346          * requires strict adherence to Unicode 3.2 normalization,
 347          * including buggy composition from before fixing Public Review Issue #29.
 348          * Note that this results in some valid but nonsensical text to be
 349          * either corrupted or rejected, depending on the text.
 350          * See http://www.unicode.org/review/resolved-pri.html#pri29
 351          * See unorm.cpp and cnormtst.c
 352          */
 353         return new StringBuffer(
 354             Normalizer.normalize(
 355                 src.toString(),
 356                 java.text.Normalizer.Form.NFKC,
 357                 Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
 358     }
 359     /*
 360     boolean isLabelSeparator(int ch){
 361         int result = getCodePointValue(ch);
 362         if( (result & 0x07)  == LABEL_SEPARATOR){
 363             return true;
 364         }
 365         return false;
 366     }
 367     */
 368      /*
 369        1) Map -- For each character in the input, check if it has a mapping
 370           and, if so, replace it with its mapping.
 371 
 372        2) Normalize -- Possibly normalize the result of step 1 using Unicode
 373           normalization.
 374 
 375        3) Prohibit -- Check for any characters that are not allowed in the
 376           output.  If any are found, return an error.
 377 




  33 //      2005-05-19 Edward Wang
  34 //          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/StringPrep.java
  35 //          - move from package com.ibm.icu.text to package sun.net.idn
  36 //          - use ParseException instead of StringPrepParseException
  37 //          - change 'Normalizer.getUnicodeVersion()' to 'NormalizerImpl.getUnicodeVersion()'
  38 //          - remove all @deprecated tag to make compiler happy
  39 //      2007-08-14 Martin Buchholz
  40 //          - remove redundant casts
  41 //
  42 package sun.net.idn;
  43 
  44 import java.io.BufferedInputStream;
  45 import java.io.ByteArrayInputStream;
  46 import java.io.IOException;
  47 import java.io.InputStream;
  48 import java.text.ParseException;
  49 
  50 import sun.text.Normalizer;
  51 import sun.text.normalizer.CharTrie;
  52 import sun.text.normalizer.Trie;

  53 import sun.text.normalizer.VersionInfo;
  54 import sun.text.normalizer.UCharacter;
  55 import sun.text.normalizer.UCharacterIterator;
  56 import sun.text.normalizer.UTF16;
  57 import sun.net.idn.UCharacterDirection;
  58 import sun.net.idn.StringPrepDataReader;
  59 
  60 /**
  61  * StringPrep API implements the StingPrep framework as described by
  62  * <a href="http://www.ietf.org/rfc/rfc3454.txt">RFC 3454</a>.
  63  * StringPrep prepares Unicode strings for use in network protocols.
  64  * Profiles of StingPrep are set of rules and data according to which the
  65  * Unicode Strings are prepared. Each profiles contains tables which describe
  66  * how a code point should be treated. The tables are broadly classied into
  67  * <ul>
  68  *     <li> Unassigned Table: Contains code points that are unassigned
  69  *          in the Unicode Version supported by StringPrep. Currently
  70  *          RFC 3454 supports Unicode 3.2. </li>
  71  *     <li> Prohibited Table: Contains code points that are prohibted from
  72  *          the output of the StringPrep processing function. </li>


 209 
 210         byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
 211 
 212 
 213         //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
 214         mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
 215         // load the rest of the data data and initialize the data members
 216         reader.read(sprepBytes,mappingData);
 217 
 218         sprepTrieImpl           = new StringPrepTrieImpl();
 219         sprepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl  );
 220 
 221         // get the data format version
 222         formatVersion = reader.getDataFormatVersion();
 223 
 224         // get the options
 225         doNFKC            = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
 226         checkBiDi         = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
 227         sprepUniVer   = getVersionInfo(reader.getUnicodeVersion());
 228         normCorrVer   = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
 229         VersionInfo normUniVer = UCharacter.getUnicodeVersion();
 230         if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
 231            normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
 232            ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
 233            ){
 234             throw new IOException("Normalization Correction version not supported");
 235         }
 236         b.close();
 237     }
 238 
 239     private static final class Values{
 240         boolean isIndex;
 241         int value;
 242         int type;
 243         public void reset(){
 244             isIndex = false;
 245             value = 0;
 246             type = -1;
 247         }
 248     }
 249 


 336         return dest;
 337     }
 338 
 339 
 340     private StringBuffer normalize(StringBuffer src){
 341         /*
 342          * Option UNORM_BEFORE_PRI_29:
 343          *
 344          * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
 345          * requires strict adherence to Unicode 3.2 normalization,
 346          * including buggy composition from before fixing Public Review Issue #29.
 347          * Note that this results in some valid but nonsensical text to be
 348          * either corrupted or rejected, depending on the text.
 349          * See http://www.unicode.org/review/resolved-pri.html#pri29
 350          * See unorm.cpp and cnormtst.c
 351          */
 352         return new StringBuffer(
 353             Normalizer.normalize(
 354                 src.toString(),
 355                 java.text.Normalizer.Form.NFKC,
 356                 Normalizer.UNICODE_3_2));
 357     }
 358     /*
 359     boolean isLabelSeparator(int ch){
 360         int result = getCodePointValue(ch);
 361         if( (result & 0x07)  == LABEL_SEPARATOR){
 362             return true;
 363         }
 364         return false;
 365     }
 366     */
 367      /*
 368        1) Map -- For each character in the input, check if it has a mapping
 369           and, if so, replace it with its mapping.
 370 
 371        2) Normalize -- Possibly normalize the result of step 1 using Unicode
 372           normalization.
 373 
 374        3) Prohibit -- Check for any characters that are not allowed in the
 375           output.  If any are found, return an error.
 376 


< prev index next >