1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.joni; 21 22 import static jdk.nashorn.internal.joni.BitStatus.bsAt; 23 import static jdk.nashorn.internal.joni.Option.isCaptureGroup; 24 import static jdk.nashorn.internal.joni.Option.isDontCaptureGroup; 25 26 import java.util.HashMap; 27 import java.util.Iterator; 28 29 import jdk.nashorn.internal.joni.constants.AnchorType; 30 import jdk.nashorn.internal.joni.constants.RegexState; 31 import jdk.nashorn.internal.joni.exception.ErrorMessages; 32 import jdk.nashorn.internal.joni.exception.InternalException; 33 import jdk.nashorn.internal.joni.exception.ValueException; 34 35 public final class Regex implements RegexState { 36 37 int[] code; /* compiled pattern */ 38 int codeLength; 39 boolean stackNeeded; 40 Object[]operands; /* e.g. shared CClassNode */ 41 int operandLength; 42 43 int state; /* normal, searching, compiling */ // remove 44 int numMem; /* used memory(...) num counted from 1 */ 45 int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ 46 int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ 47 int numCombExpCheck; /* combination explosion check */ 48 int numCall; /* number of subexp call */ 49 int captureHistory; /* (?@...) flag (1-31) */ 50 int btMemStart; /* need backtrack flag */ 51 int btMemEnd; /* need backtrack flag */ 52 53 int stackPopLevel; 54 55 int[]repeatRangeLo; 56 int[]repeatRangeHi; 57 58 public WarnCallback warnings; 59 public MatcherFactory factory; 60 61 int options; 62 int userOptions; 63 Object userObject; 64 //final Syntax syntax; 65 final int caseFoldFlag; 66 67 HashMap<String,NameEntry> nameTable; // named entries 68 69 /* optimization info (string search, char-map and anchors) */ 70 SearchAlgorithm searchAlgorithm; /* optimize flag */ 71 int thresholdLength; /* search str-length for apply optimize */ 72 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ 73 int anchorDmin; /* (SEMI_)END_BUF anchor distance */ 74 int anchorDmax; /* (SEMI_)END_BUF anchor distance */ 75 int subAnchor; /* start-anchor for exact or map */ 76 77 char[] exact; 78 int exactP; 79 int exactEnd; 80 81 byte[] map; /* used as BM skip or char-map */ 82 int[] intMap; /* BM skip for exact_len > 255 */ 83 int[] intMapBackward; /* BM skip for backward search */ 84 int dMin; /* min-distance of exact or map */ 85 int dMax; /* max-distance of exact or map */ 86 87 char[][] templates; 88 int templateNum; 89 90 public Regex(CharSequence cs) { 91 this(cs.toString()); 92 } 93 94 public Regex(String str) { 95 this(str.toCharArray(), 0, str.length(), 0); 96 } 97 98 public Regex(char[] chars) { 99 this(chars, 0, chars.length, 0); 100 } 101 102 public Regex(char[] chars, int p, int end) { 103 this(chars, p, end, 0); 104 } 105 106 public Regex(char[] chars, int p, int end, int option) { 107 this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); 108 } 109 110 // onig_new 111 public Regex(char[] chars, int p, int end, int option, Syntax syntax) { 112 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); 113 } 114 115 public Regex(char[]chars, int p, int end, int option, WarnCallback warnings) { 116 this(chars, p, end, option, Syntax.RUBY, warnings); 117 } 118 119 // onig_new 120 public Regex(char[] chars, int p, int end, int option, Syntax syntax, WarnCallback warnings) { 121 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); 122 } 123 124 // onig_alloc_init 125 public Regex(char[] chars, int p, int end, int option, int caseFoldFlag, Syntax syntax, WarnCallback warnings) { 126 127 if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == 128 (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { 129 throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); 130 } 131 132 if ((option & Option.NEGATE_SINGLELINE) != 0) { 133 option |= syntax.options; 134 option &= ~Option.SINGLELINE; 135 } else { 136 option |= syntax.options; 137 } 138 139 this.options = option; 140 this.caseFoldFlag = caseFoldFlag; 141 this.warnings = warnings; 142 143 new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile(); 144 145 this.warnings = null; 146 } 147 148 public Matcher matcher(char[] chars) { 149 return matcher(chars, 0, chars.length); 150 } 151 152 public Matcher matcher(char[] chars, int p, int end) { 153 return factory.create(this, chars, p, end); 154 } 155 156 public int numberOfCaptures() { 157 return numMem; 158 } 159 160 public int numberOfCaptureHistories() { 161 if (Config.USE_CAPTURE_HISTORY) { 162 int n = 0; 163 for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { 164 if (bsAt(captureHistory, i)) n++; 165 } 166 return n; 167 } else { 168 return 0; 169 } 170 } 171 172 String nameTableToString() { 173 StringBuilder sb = new StringBuilder(); 174 175 if (nameTable != null) { 176 sb.append("name table\n"); 177 for (NameEntry ne : nameTable.values()) { 178 sb.append(" " + ne + "\n"); 179 } 180 sb.append("\n"); 181 } 182 return sb.toString(); 183 } 184 185 NameEntry nameFind(char[] name, int nameP, int nameEnd) { 186 if (nameTable != null) return nameTable.get(new String(name, nameP, nameEnd - nameP)); 187 return null; 188 } 189 190 void renumberNameTable(int[]map) { 191 if (nameTable != null) { 192 for (NameEntry e : nameTable.values()) { 193 if (e.backNum > 1) { 194 for (int i=0; i<e.backNum; i++) { 195 e.backRefs[i] = map[e.backRefs[i]]; 196 } 197 } else if (e.backNum == 1) { 198 e.backRef1 = map[e.backRef1]; 199 } 200 } 201 } 202 } 203 204 public int numberOfNames() { 205 return nameTable == null ? 0 : nameTable.size(); 206 } 207 208 void nameAdd(char[] name, int nameP, int nameEnd, int backRef, Syntax syntax) { 209 if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME); 210 211 NameEntry e = null; 212 if (nameTable == null) { 213 nameTable = new HashMap<String,NameEntry>(); // 13, oni defaults to 5 214 } else { 215 e = nameFind(name, nameP, nameEnd); 216 } 217 218 if (e == null) { 219 // dup the name here as oni does ?, what for ? (it has to manage it, we don't) 220 e = new NameEntry(name, nameP, nameEnd); 221 nameTable.put(new String(name, nameP, nameEnd - nameP), e); 222 } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) { 223 throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP)); 224 } 225 226 e.addBackref(backRef); 227 } 228 229 NameEntry nameToGroupNumbers(char[] name, int nameP, int nameEnd) { 230 return nameFind(name, nameP, nameEnd); 231 } 232 233 public int nameToBackrefNumber(char[] name, int nameP, int nameEnd, Region region) { 234 NameEntry e = nameToGroupNumbers(name, nameP, nameEnd); 235 if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE, 236 new String(name, nameP, nameEnd - nameP)); 237 238 switch(e.backNum) { 239 case 0: 240 throw new InternalException(ErrorMessages.ERR_PARSER_BUG); 241 case 1: 242 return e.backRef1; 243 default: 244 if (region != null) { 245 for (int i = e.backNum - 1; i >= 0; i--) { 246 if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i]; 247 } 248 } 249 return e.backRefs[e.backNum - 1]; 250 } 251 } 252 253 public Iterator<NameEntry> namedBackrefIterator() { 254 return nameTable.values().iterator(); 255 } 256 257 public boolean noNameGroupIsActive(Syntax syntax) { 258 if (isDontCaptureGroup(options)) return false; 259 260 if (Config.USE_NAMED_GROUP) { 261 if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false; 262 } 263 return true; 264 } 265 266 /* set skip map for Boyer-Moor search */ 267 void setupBMSkipMap() { 268 char[] chars = exact; 269 int p = exactP; 270 int end = exactEnd; 271 int len = end - p; 272 273 if (len < Config.CHAR_TABLE_SIZE) { 274 // map/skip 275 if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; 276 277 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len; 278 for (int i=0; i<len-1; i++) map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? 279 } else { 280 if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE]; 281 282 for (int i=0; i<len-1; i++) intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? 283 } 284 } 285 286 void setExactInfo(OptExactInfo e) { 287 if (e.length == 0) return; 288 289 // shall we copy that ? 290 exact = e.chars; 291 exactP = 0; 292 exactEnd = e.length; 293 294 if (e.ignoreCase) { 295 searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); 296 } else { 297 if (e.length >= 2) { 298 setupBMSkipMap(); 299 searchAlgorithm = SearchAlgorithm.BM; 300 } else { 301 searchAlgorithm = SearchAlgorithm.SLOW; 302 } 303 } 304 305 dMin = e.mmd.min; 306 dMax = e.mmd.max; 307 308 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 309 thresholdLength = dMin + (exactEnd - exactP); 310 } 311 } 312 313 void setOptimizeMapInfo(OptMapInfo m) { 314 map = m.map; 315 316 searchAlgorithm = SearchAlgorithm.MAP; 317 dMin = m.mmd.min; 318 dMax = m.mmd.max; 319 320 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 321 thresholdLength = dMin + 1; 322 } 323 } 324 325 void setSubAnchor(OptAnchorInfo anc) { 326 subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; 327 subAnchor |= anc.rightAnchor & AnchorType.END_LINE; 328 } 329 330 void clearOptimizeInfo() { 331 searchAlgorithm = SearchAlgorithm.NONE; 332 anchor = 0; 333 anchorDmax = 0; 334 anchorDmin = 0; 335 subAnchor = 0; 336 337 exact = null; 338 exactP = exactEnd = 0; 339 } 340 341 public String encStringToString(byte[]bytes, int p, int end) { 342 StringBuilder sb = new StringBuilder("\nPATTERN: /"); 343 344 while (p < end) { 345 sb.append(new String(new byte[]{bytes[p]})); 346 p++; 347 } 348 return sb.append("/").toString(); 349 } 350 351 public String optimizeInfoToString() { 352 String s = ""; 353 s += "optimize: " + searchAlgorithm.getName() + "\n"; 354 s += " anchor: " + OptAnchorInfo.anchorToString(anchor); 355 356 if ((anchor & AnchorType.END_BUF_MASK) != 0) { 357 s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax); 358 } 359 360 s += "\n"; 361 362 if (searchAlgorithm != SearchAlgorithm.NONE) { 363 s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n"; 364 } 365 366 s += "dmin: " + dMin + " dmax: " + dMax + "\n"; 367 s += "threshold length: " + thresholdLength + "\n"; 368 369 if (exact != null) { 370 s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n"; 371 } else if (searchAlgorithm == SearchAlgorithm.MAP) { 372 int n=0; 373 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++; 374 375 s += "map: n = " + n + "\n"; 376 if (n > 0) { 377 int c=0; 378 s += "["; 379 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 380 if (map[i] != 0) { 381 if (c > 0) s += ", "; 382 c++; 383 // TODO if (enc.isPrint(i) 384 s += ((char)i); 385 } 386 } 387 s += "]\n"; 388 } 389 } 390 391 return s; 392 } 393 394 public int getOptions() { 395 return options; 396 } 397 398 public void setUserOptions(int options) { 399 this.userOptions = options; 400 } 401 402 public int getUserOptions() { 403 return userOptions; 404 } 405 406 public void setUserObject(Object object) { 407 this.userObject = object; 408 } 409 410 public Object getUserObject() { 411 return userObject; 412 } 413 }