58 * private API for communication with RBTableBuilder.
59 * This class isn't just an inner class of RBCollationTables itself because
60 * of its large size. For source-code readability, it seemed better for the
61 * builder to have its own source file.
62 */
63 final class RBTableBuilder {
64
65 public RBTableBuilder(RBCollationTables.BuildAPI tables) {
66 this.tables = tables;
67 }
68
69 /**
70 * Create a table-based collation object with the given rules.
71 * This is the main function that actually builds the tables and
72 * stores them back in the RBCollationTables object. It is called
73 * ONLY by the RBCollationTables constructor.
74 * @see RuleBasedCollator#RuleBasedCollator
75 * @exception ParseException If the rules format is incorrect.
76 */
77
78 public void build(String pattern, int decmp) throws ParseException
79 {
80 boolean isSource = true;
81 int i = 0;
82 String expChars;
83 String groupChars;
84 if (pattern.length() == 0)
85 throw new ParseException("Build rules empty.", 0);
86
87 // This array maps Unicode characters to their collation ordering
88 mapping = new UCompactIntArray(RBCollationTables.UNMAPPED);
89 // Normalize the build rules. Find occurances of all decomposed characters
90 // and normalize the rules before feeding into the builder. By "normalize",
91 // we mean that all precomposed Unicode characters must be converted into
92 // a base character and one or more combining characters (such as accents).
93 // When there are multiple combining characters attached to a base character,
94 // the combining characters must be in their canonical order
95 //
96 // sherman/Note:
97 //(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
98 //hangual syllables to jamos, so we can actually just call decompose with
99 //normalizer's IGNORE_HANGUL option turned on
100 //
101 //(2)just call the "special version" in NormalizerImpl directly
102 //pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
103 //
104 //Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
105 //pattern = Normalizer.normalize(pattern, mode, 0, true);
106
107 pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);
108
109 // Build the merged collation entries
110 // Since rules can be specified in any order in the string
111 // (e.g. "c , C < d , D < e , E .... C < CH")
112 // this splits all of the rules in the string out into separate
113 // objects and then sorts them. In the above example, it merges the
114 // "C < CH" rule in just before the "C < D" rule.
115 //
116
117 mPattern = new MergeCollation(pattern);
118
119 int order = 0;
120
121 // Now walk though each entry and add it to my own tables
122 for (i = 0; i < mPattern.getCount(); ++i)
123 {
124 PatternEntry entry = mPattern.getItemAt(i);
125 if (entry != null) {
126 groupChars = entry.getChars();
127 if (groupChars.length() > 1) {
128 switch(groupChars.charAt(groupChars.length()-1)) {
129 case '@':
130 frenchSec = true;
131 groupChars = groupChars.substring(0, groupChars.length()-1);
132 break;
133 case '!':
134 seAsianSwapping = true;
135 groupChars = groupChars.substring(0, groupChars.length()-1);
136 break;
137 }
138 }
139
140 order = increment(entry.getStrength(), order);
141 expChars = entry.getExtension();
142
143 if (expChars.length() != 0) {
144 addExpandOrder(groupChars, expChars, order);
145 } else if (groupChars.length() > 1) {
146 char ch = groupChars.charAt(0);
147 if (Character.isHighSurrogate(ch) && groupChars.length() == 2) {
148 addOrder(Character.toCodePoint(ch, groupChars.charAt(1)), order);
149 } else {
150 addContractOrder(groupChars, order);
151 }
152 } else {
153 char ch = groupChars.charAt(0);
154 addOrder(ch, order);
155 }
156 }
157 }
158 addComposedChars();
159
160 commit();
161 mapping.compact();
162 /*
163 System.out.println("mappingSize=" + mapping.getKSize());
|
58 * private API for communication with RBTableBuilder.
59 * This class isn't just an inner class of RBCollationTables itself because
60 * of its large size. For source-code readability, it seemed better for the
61 * builder to have its own source file.
62 */
63 final class RBTableBuilder {
64
65 public RBTableBuilder(RBCollationTables.BuildAPI tables) {
66 this.tables = tables;
67 }
68
69 /**
70 * Create a table-based collation object with the given rules.
71 * This is the main function that actually builds the tables and
72 * stores them back in the RBCollationTables object. It is called
73 * ONLY by the RBCollationTables constructor.
74 * @see RuleBasedCollator#RuleBasedCollator
75 * @exception ParseException If the rules format is incorrect.
76 */
77
78 public void build(String pattern, int decmp) throws ParseException {
79 String expChars;
80 String groupChars;
81 if (pattern.isEmpty())
82 throw new ParseException("Build rules empty.", 0);
83
84 // This array maps Unicode characters to their collation ordering
85 mapping = new UCompactIntArray(RBCollationTables.UNMAPPED);
86 // Normalize the build rules. Find occurances of all decomposed characters
87 // and normalize the rules before feeding into the builder. By "normalize",
88 // we mean that all precomposed Unicode characters must be converted into
89 // a base character and one or more combining characters (such as accents).
90 // When there are multiple combining characters attached to a base character,
91 // the combining characters must be in their canonical order
92 //
93 // sherman/Note:
94 //(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
95 //hangual syllables to jamos, so we can actually just call decompose with
96 //normalizer's IGNORE_HANGUL option turned on
97 //
98 //(2)just call the "special version" in NormalizerImpl directly
99 //pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
100 //
101 //Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
102 //pattern = Normalizer.normalize(pattern, mode, 0, true);
103
104 pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);
105
106 // Build the merged collation entries
107 // Since rules can be specified in any order in the string
108 // (e.g. "c , C < d , D < e , E .... C < CH")
109 // this splits all of the rules in the string out into separate
110 // objects and then sorts them. In the above example, it merges the
111 // "C < CH" rule in just before the "C < D" rule.
112 //
113
114 mPattern = new MergeCollation(pattern);
115
116 int order = 0;
117
118 // Now walk though each entry and add it to my own tables
119 for (int i = 0; i < mPattern.getCount(); ++i) {
120 PatternEntry entry = mPattern.getItemAt(i);
121 if (entry != null) {
122 groupChars = entry.getChars();
123 if (groupChars.length() > 1) {
124 switch(groupChars.charAt(groupChars.length()-1)) {
125 case '@':
126 frenchSec = true;
127 groupChars = groupChars.substring(0, groupChars.length()-1);
128 break;
129 case '!':
130 seAsianSwapping = true;
131 groupChars = groupChars.substring(0, groupChars.length()-1);
132 break;
133 }
134 }
135
136 order = increment(entry.getStrength(), order);
137 expChars = entry.getExtension();
138
139 if (!expChars.isEmpty()) {
140 addExpandOrder(groupChars, expChars, order);
141 } else if (groupChars.length() > 1) {
142 char ch = groupChars.charAt(0);
143 if (Character.isHighSurrogate(ch) && groupChars.length() == 2) {
144 addOrder(Character.toCodePoint(ch, groupChars.charAt(1)), order);
145 } else {
146 addContractOrder(groupChars, order);
147 }
148 } else {
149 char ch = groupChars.charAt(0);
150 addOrder(ch, order);
151 }
152 }
153 }
154 addComposedChars();
155
156 commit();
157 mapping.compact();
158 /*
159 System.out.println("mappingSize=" + mapping.getKSize());
|