76 // Encoding.prevCharHead 77 public static int prevCharHead(int p, int s) { 78 return s <= p ? -1 : s - 1; 79 } 80 81 /* onigenc_get_right_adjust_char_head_with_prev */ 82 public static int rightAdjustCharHeadWithPrev(int s, IntHolder prev) { 83 if (prev != null) prev.value = -1; /* Sorry */ 84 return s; 85 } 86 87 // Encoding.stepBack 88 public static int stepBack(int p, int s, int n) { 89 while (s != -1 && n-- > 0) { 90 if (s <= p) return -1; 91 s--; 92 } 93 return s; 94 } 95 96 public static int mbcToCode(byte[] bytes, int p, int end) { 97 int code = 0; 98 for (int i = p; i < end; i++) { 99 code = (code << 8) | (bytes[i] & 0xff); 100 } 101 return code; 102 } 103 104 public static int mbcodeStartPosition() { 105 return 0x80; 106 } 107 108 public static char[] caseFoldCodesByString(int flag, char c) { 109 if (Character.isUpperCase(c)) { 110 return new char[] {Character.toLowerCase(c)}; 111 } else if (Character.isLowerCase(c)) { 112 return new char[] {Character.toUpperCase(c)}; 113 } else { 114 return EMPTYCHARS; 115 } 116 } 117 118 public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) { 119 int[] code = new int[1]; 120 121 for (int c = 0; c < 0xffff; c++) { 122 if (Character.getType(c) == Character.LOWERCASE_LETTER) { 123 124 int upper = code[0] = Character.toUpperCase(c); 125 fun.apply(c, code, 1, arg); 126 127 code[0] = c; 128 fun.apply(upper, code, 1, arg); 129 } 130 } 131 } 132 133 public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) { 134 sbOut.value = 0x100; // use bitset for codes smaller than 256 135 int[] range = null; 136 137 if (ctype < codeRanges.length) { 138 range = codeRanges[ctype]; 139 140 if (range == null) { 141 // format: [numberOfRanges, rangeStart, rangeEnd, ...] 142 range = new int[16]; 143 int rangeCount = 0; 144 int lastCode = -2; 145 146 for (int code = 0; code <= 0xffff; code++) { 147 if (isCodeCType(code, ctype)) { 148 if (lastCode < code -1) { 149 if (rangeCount * 2 + 2 >= range.length) { 150 range = Arrays.copyOf(range, range.length * 2); | 76 // Encoding.prevCharHead 77 public static int prevCharHead(int p, int s) { 78 return s <= p ? -1 : s - 1; 79 } 80 81 /* onigenc_get_right_adjust_char_head_with_prev */ 82 public static int rightAdjustCharHeadWithPrev(int s, IntHolder prev) { 83 if (prev != null) prev.value = -1; /* Sorry */ 84 return s; 85 } 86 87 // Encoding.stepBack 88 public static int stepBack(int p, int s, int n) { 89 while (s != -1 && n-- > 0) { 90 if (s <= p) return -1; 91 s--; 92 } 93 return s; 94 } 95 96 public static int mbcodeStartPosition() { 97 return 0x80; 98 } 99 100 public static char[] caseFoldCodesByString(int flag, char c) { 101 char[] codes = EMPTYCHARS; 102 final char upper = toUpperCase(c); 103 104 if (upper != toLowerCase(upper)) { 105 int count = 0; 106 char ch = 0; 107 108 do { 109 final char u = toUpperCase(ch); 110 if (u == upper && ch != c) { 111 // Almost all characters will return array of length 1, very few 2 or 3, so growing by one is fine. 112 codes = count == 0 ? new char[1] : Arrays.copyOf(codes, count + 1); 113 codes[count++] = ch; 114 } 115 } while (ch++ < 0xffff); 116 } 117 return codes; 118 } 119 120 public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) { 121 for (int c = 0; c < 0xffff; c++) { 122 if (Character.isLowerCase(c)) { 123 final int upper = toUpperCase(c); 124 125 if (upper != c) { 126 fun.apply(c, upper, arg); 127 } 128 } 129 } 130 131 // Some characters have multiple lower case variants, hence we need to do a second run 132 for (int c = 0; c < 0xffff; c++) { 133 if (Character.isLowerCase(c)) { 134 final int upper = toUpperCase(c); 135 136 if (upper != c) { 137 fun.apply(upper, c, arg); 138 } 139 } 140 } 141 } 142 143 public static char toLowerCase(char c) { 144 return (char)toLowerCase((int)c); 145 } 146 147 public static int toLowerCase(int c) { 148 if (c < 128) { 149 return ('A' <= c && c <= 'Z') ? (c + ('a' - 'A')) : c; 150 } 151 // Do not convert non-ASCII upper case character to ASCII lower case. 152 int lower = Character.toLowerCase(c); 153 return (lower < 128) ? c : lower; 154 155 } 156 157 public static char toUpperCase(char c) { 158 return (char)toUpperCase((int)c); 159 } 160 161 public static int toUpperCase(int c) { 162 if (c < 128) { 163 return ('a' <= c && c <= 'z') ? c + ('A' - 'a') : c; 164 } 165 // Do not convert non-ASCII lower case character to ASCII upper case. 166 int upper = Character.toUpperCase(c); 167 return (upper < 128) ? c : upper; 168 } 169 170 public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) { 171 sbOut.value = 0x100; // use bitset for codes smaller than 256 172 int[] range = null; 173 174 if (ctype < codeRanges.length) { 175 range = codeRanges[ctype]; 176 177 if (range == null) { 178 // format: [numberOfRanges, rangeStart, rangeEnd, ...] 179 range = new int[16]; 180 int rangeCount = 0; 181 int lastCode = -2; 182 183 for (int code = 0; code <= 0xffff; code++) { 184 if (isCodeCType(code, ctype)) { 185 if (lastCode < code -1) { 186 if (rangeCount * 2 + 2 >= range.length) { 187 range = Arrays.copyOf(range, range.length * 2); |