1 /*
2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 /*
26 *******************************************************************************
27 * Copyright (C) 1996-2014, International Business Machines Corporation and
28 * others. All Rights Reserved.
29 *******************************************************************************
30 */
31
32 package sun.text.normalizer;
33
34 import java.io.IOException;
35 import java.nio.ByteBuffer;
36 import java.util.Iterator;
37 import java.util.MissingResourceException;
38
39 import sun.text.normalizer.UCharacter.HangulSyllableType;
40 import sun.text.normalizer.UCharacter.NumericType;
41
42 /**
43 * <p>Internal class used for Unicode character property database.</p>
44 * <p>This classes store binary data read from uprops.icu.
45 * It does not have the capability to parse the data into more high-level
46 * information. It only returns bytes of information when required.</p>
47 * <p>Due to the form most commonly used for retrieval, array of char is used
48 * to store the binary data.</p>
49 * <p>UCharacterPropertyDB also contains information on accessing indexes to
50 * significant points in the binary data.</p>
51 * <p>Responsibility for molding the binary data into more meaning form lies on
52 * <a href=UCharacter.html>UCharacter</a>.</p>
53 * @author Syn Wee Quek
54 * @since release 2.1, february 1st 2002
55 */
56
57 final class UCharacterProperty
58 {
59 // public data members -----------------------------------------------
60
61 /*
62 * public singleton instance
63 */
64 public static final UCharacterProperty INSTANCE;
65
66 /**
67 * Trie data
68 */
69 public Trie2_16 m_trie_;
70
71 /**
72 * Unicode version
73 */
74 public VersionInfo m_unicodeVersion_;
75
76 /**
77 * Character type mask
300 /**
301 * Maximum values for block, bits used as in vector word
302 * 0
303 */
304 int m_maxBlockScriptValue_;
305 /**
306 * Maximum values for script, bits used as in vector word
307 * 0
308 */
309 int m_maxJTGValue_;
310 /**
311 * Script_Extensions data
312 */
313 public char[] m_scriptExtensions_;
314
315 // private variables -------------------------------------------------
316
317 /**
318 * Default name of the datafile
319 */
320 private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";
321
322 /**
323 * Shift value for lead surrogate to form a supplementary character.
324 */
325 private static final int LEAD_SURROGATE_SHIFT_ = 10;
326 /**
327 * Offset to add to combined surrogate pair to avoid masking.
328 */
329 private static final int SURROGATE_OFFSET_ =
330 UTF16.SUPPLEMENTARY_MIN_VALUE -
331 (UTF16.SURROGATE_MIN_VALUE <<
332 LEAD_SURROGATE_SHIFT_) -
333 UTF16.TRAIL_SURROGATE_MIN_VALUE;
334
335
336 // property data constants -------------------------------------------------
337
338 /**
339 * Numeric types and values in the main properties words.
340 */
|
1 /*
2 * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 /*
26 *******************************************************************************
27 * Copyright (C) 1996-2014, International Business Machines Corporation and
28 * others. All Rights Reserved.
29 *******************************************************************************
30 */
31
32 package jdk.internal.icu.impl;
33
34 import java.io.IOException;
35 import java.nio.ByteBuffer;
36 import java.util.Iterator;
37 import java.util.MissingResourceException;
38
39 import jdk.internal.icu.lang.UCharacter.HangulSyllableType;
40 import jdk.internal.icu.lang.UCharacter.NumericType;
41 import jdk.internal.icu.text.UTF16;
42 import jdk.internal.icu.text.UnicodeSet;
43 import jdk.internal.icu.util.VersionInfo;
44
45 /**
46 * <p>Internal class used for Unicode character property database.</p>
47 * <p>This classes store binary data read from uprops.icu.
48 * It does not have the capability to parse the data into more high-level
49 * information. It only returns bytes of information when required.</p>
50 * <p>Due to the form most commonly used for retrieval, array of char is used
51 * to store the binary data.</p>
52 * <p>UCharacterPropertyDB also contains information on accessing indexes to
53 * significant points in the binary data.</p>
54 * <p>Responsibility for molding the binary data into more meaning form lies on
55 * <a href=UCharacter.html>UCharacter</a>.</p>
56 * @author Syn Wee Quek
57 * @since release 2.1, february 1st 2002
58 */
59
60 public final class UCharacterProperty
61 {
62 // public data members -----------------------------------------------
63
64 /*
65 * public singleton instance
66 */
67 public static final UCharacterProperty INSTANCE;
68
69 /**
70 * Trie data
71 */
72 public Trie2_16 m_trie_;
73
74 /**
75 * Unicode version
76 */
77 public VersionInfo m_unicodeVersion_;
78
79 /**
80 * Character type mask
303 /**
304 * Maximum values for block, bits used as in vector word
305 * 0
306 */
307 int m_maxBlockScriptValue_;
308 /**
309 * Maximum values for script, bits used as in vector word
310 * 0
311 */
312 int m_maxJTGValue_;
313 /**
314 * Script_Extensions data
315 */
316 public char[] m_scriptExtensions_;
317
318 // private variables -------------------------------------------------
319
320 /**
321 * Default name of the datafile
322 */
323 private static final String DATA_FILE_NAME_ = "/jdk/internal/icu/impl/data/icudt64b/uprops.icu";
324
325 /**
326 * Shift value for lead surrogate to form a supplementary character.
327 */
328 private static final int LEAD_SURROGATE_SHIFT_ = 10;
329 /**
330 * Offset to add to combined surrogate pair to avoid masking.
331 */
332 private static final int SURROGATE_OFFSET_ =
333 UTF16.SUPPLEMENTARY_MIN_VALUE -
334 (UTF16.SURROGATE_MIN_VALUE <<
335 LEAD_SURROGATE_SHIFT_) -
336 UTF16.TRAIL_SURROGATE_MIN_VALUE;
337
338
339 // property data constants -------------------------------------------------
340
341 /**
342 * Numeric types and values in the main properties words.
343 */
|