1 /*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 /*
27 ******************************************************************************
28 *
29 * Copyright (C) 2009-2014, International Business Machines
30 * Corporation and others. All Rights Reserved.
31 *
32 ******************************************************************************
33 */
34
35 package sun.text.normalizer;
36
37 import sun.text.normalizer.UnicodeSet.SpanCondition;
38
39 /**
40 * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
41 *
42 * Latin-1: Look up bytes.
43 * 2-byte characters: Bits organized vertically.
44 * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges.
45 * Supplementary characters: Call contains() on the parent set.
46 */
47 final class BMPSet {
48
49 /**
50 * One boolean ('true' or 'false') per Latin-1 character.
51 */
52 private boolean[] latin1Contains;
53
54 /**
55 * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
56 * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
57 * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
58 *
59 * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at
60 * runtime.
61 */
62 private int[] table7FF;
63
64 /**
65 * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
66 * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
67 * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
|
1 /*
2 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 /*
27 ******************************************************************************
28 *
29 * Copyright (C) 2009-2014, International Business Machines
30 * Corporation and others. All Rights Reserved.
31 *
32 ******************************************************************************
33 */
34
35 package jdk.internal.icu.impl;
36
37 import jdk.internal.icu.text.UnicodeSet.SpanCondition;
38 import jdk.internal.icu.util.OutputInt;
39
40 /**
41 * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
42 *
43 * Latin-1: Look up bytes.
44 * 2-byte characters: Bits organized vertically.
45 * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges.
46 * Supplementary characters: Call contains() on the parent set.
47 */
48 public final class BMPSet {
49
50 /**
51 * One boolean ('true' or 'false') per Latin-1 character.
52 */
53 private boolean[] latin1Contains;
54
55 /**
56 * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
57 * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
58 * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
59 *
60 * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at
61 * runtime.
62 */
63 private int[] table7FF;
64
65 /**
66 * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
67 * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
68 * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
|