33 // 2005-05-19 Edward Wang
34 // - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/StringPrep.java
35 // - move from package com.ibm.icu.text to package sun.net.idn
36 // - use ParseException instead of StringPrepParseException
37 // - change 'Normalizer.getUnicodeVersion()' to 'NormalizerImpl.getUnicodeVersion()'
38 // - remove all @deprecated tag to make compiler happy
39 // 2007-08-14 Martin Buchholz
40 // - remove redundant casts
41 //
42 package sun.net.idn;
43
44 import java.io.BufferedInputStream;
45 import java.io.ByteArrayInputStream;
46 import java.io.IOException;
47 import java.io.InputStream;
48 import java.text.ParseException;
49
50 import sun.text.Normalizer;
51 import sun.text.normalizer.CharTrie;
52 import sun.text.normalizer.Trie;
53 import sun.text.normalizer.NormalizerImpl;
54 import sun.text.normalizer.VersionInfo;
55 import sun.text.normalizer.UCharacter;
56 import sun.text.normalizer.UCharacterIterator;
57 import sun.text.normalizer.UTF16;
58 import sun.net.idn.UCharacterDirection;
59 import sun.net.idn.StringPrepDataReader;
60
61 /**
62 * StringPrep API implements the StingPrep framework as described by
63 * <a href="http://www.ietf.org/rfc/rfc3454.txt">RFC 3454</a>.
64 * StringPrep prepares Unicode strings for use in network protocols.
65 * Profiles of StingPrep are set of rules and data according to which the
66 * Unicode Strings are prepared. Each profiles contains tables which describe
67 * how a code point should be treated. The tables are broadly classied into
68 * <ul>
69 * <li> Unassigned Table: Contains code points that are unassigned
70 * in the Unicode Version supported by StringPrep. Currently
71 * RFC 3454 supports Unicode 3.2. </li>
72 * <li> Prohibited Table: Contains code points that are prohibted from
73 * the output of the StringPrep processing function. </li>
210
211 byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
212
213
214 //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
215 mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
216 // load the rest of the data data and initialize the data members
217 reader.read(sprepBytes,mappingData);
218
219 sprepTrieImpl = new StringPrepTrieImpl();
220 sprepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl );
221
222 // get the data format version
223 formatVersion = reader.getDataFormatVersion();
224
225 // get the options
226 doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
227 checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
228 sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
229 normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
230 VersionInfo normUniVer = NormalizerImpl.getUnicodeVersion();
231 if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
232 normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
233 ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
234 ){
235 throw new IOException("Normalization Correction version not supported");
236 }
237 b.close();
238 }
239
240 private static final class Values{
241 boolean isIndex;
242 int value;
243 int type;
244 public void reset(){
245 isIndex = false;
246 value = 0;
247 type = -1;
248 }
249 }
250
337 return dest;
338 }
339
340
341 private StringBuffer normalize(StringBuffer src){
342 /*
343 * Option UNORM_BEFORE_PRI_29:
344 *
345 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
346 * requires strict adherence to Unicode 3.2 normalization,
347 * including buggy composition from before fixing Public Review Issue #29.
348 * Note that this results in some valid but nonsensical text to be
349 * either corrupted or rejected, depending on the text.
350 * See http://www.unicode.org/review/resolved-pri.html#pri29
351 * See unorm.cpp and cnormtst.c
352 */
353 return new StringBuffer(
354 Normalizer.normalize(
355 src.toString(),
356 java.text.Normalizer.Form.NFKC,
357 Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
358 }
359 /*
360 boolean isLabelSeparator(int ch){
361 int result = getCodePointValue(ch);
362 if( (result & 0x07) == LABEL_SEPARATOR){
363 return true;
364 }
365 return false;
366 }
367 */
368 /*
369 1) Map -- For each character in the input, check if it has a mapping
370 and, if so, replace it with its mapping.
371
372 2) Normalize -- Possibly normalize the result of step 1 using Unicode
373 normalization.
374
375 3) Prohibit -- Check for any characters that are not allowed in the
376 output. If any are found, return an error.
377
|
33 // 2005-05-19 Edward Wang
34 // - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/StringPrep.java
35 // - move from package com.ibm.icu.text to package sun.net.idn
36 // - use ParseException instead of StringPrepParseException
37 // - change 'Normalizer.getUnicodeVersion()' to 'NormalizerImpl.getUnicodeVersion()'
38 // - remove all @deprecated tag to make compiler happy
39 // 2007-08-14 Martin Buchholz
40 // - remove redundant casts
41 //
42 package sun.net.idn;
43
44 import java.io.BufferedInputStream;
45 import java.io.ByteArrayInputStream;
46 import java.io.IOException;
47 import java.io.InputStream;
48 import java.text.ParseException;
49
50 import sun.text.Normalizer;
51 import sun.text.normalizer.CharTrie;
52 import sun.text.normalizer.Trie;
53 import sun.text.normalizer.VersionInfo;
54 import sun.text.normalizer.UCharacter;
55 import sun.text.normalizer.UCharacterIterator;
56 import sun.text.normalizer.UTF16;
57 import sun.net.idn.UCharacterDirection;
58 import sun.net.idn.StringPrepDataReader;
59
60 /**
61 * StringPrep API implements the StingPrep framework as described by
62 * <a href="http://www.ietf.org/rfc/rfc3454.txt">RFC 3454</a>.
63 * StringPrep prepares Unicode strings for use in network protocols.
64 * Profiles of StingPrep are set of rules and data according to which the
65 * Unicode Strings are prepared. Each profiles contains tables which describe
66 * how a code point should be treated. The tables are broadly classied into
67 * <ul>
68 * <li> Unassigned Table: Contains code points that are unassigned
69 * in the Unicode Version supported by StringPrep. Currently
70 * RFC 3454 supports Unicode 3.2. </li>
71 * <li> Prohibited Table: Contains code points that are prohibted from
72 * the output of the StringPrep processing function. </li>
209
210 byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
211
212
213 //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
214 mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2];
215 // load the rest of the data data and initialize the data members
216 reader.read(sprepBytes,mappingData);
217
218 sprepTrieImpl = new StringPrepTrieImpl();
219 sprepTrieImpl.sprepTrie = new CharTrie( new ByteArrayInputStream(sprepBytes),sprepTrieImpl );
220
221 // get the data format version
222 formatVersion = reader.getDataFormatVersion();
223
224 // get the options
225 doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
226 checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
227 sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
228 normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
229 VersionInfo normUniVer = UCharacter.getUnicodeVersion();
230 if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
231 normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
232 ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
233 ){
234 throw new IOException("Normalization Correction version not supported");
235 }
236 b.close();
237 }
238
239 private static final class Values{
240 boolean isIndex;
241 int value;
242 int type;
243 public void reset(){
244 isIndex = false;
245 value = 0;
246 type = -1;
247 }
248 }
249
336 return dest;
337 }
338
339
340 private StringBuffer normalize(StringBuffer src){
341 /*
342 * Option UNORM_BEFORE_PRI_29:
343 *
344 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
345 * requires strict adherence to Unicode 3.2 normalization,
346 * including buggy composition from before fixing Public Review Issue #29.
347 * Note that this results in some valid but nonsensical text to be
348 * either corrupted or rejected, depending on the text.
349 * See http://www.unicode.org/review/resolved-pri.html#pri29
350 * See unorm.cpp and cnormtst.c
351 */
352 return new StringBuffer(
353 Normalizer.normalize(
354 src.toString(),
355 java.text.Normalizer.Form.NFKC,
356 Normalizer.UNICODE_3_2));
357 }
358 /*
359 boolean isLabelSeparator(int ch){
360 int result = getCodePointValue(ch);
361 if( (result & 0x07) == LABEL_SEPARATOR){
362 return true;
363 }
364 return false;
365 }
366 */
367 /*
368 1) Map -- For each character in the input, check if it has a mapping
369 and, if so, replace it with its mapping.
370
371 2) Normalize -- Possibly normalize the result of step 1 using Unicode
372 normalization.
373
374 3) Prohibit -- Check for any characters that are not allowed in the
375 output. If any are found, return an error.
376
|