1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.joni;
  21 
  22 import static jdk.nashorn.internal.joni.BitStatus.bsAt;
  23 import static jdk.nashorn.internal.joni.Option.isCaptureGroup;
  24 import static jdk.nashorn.internal.joni.Option.isDontCaptureGroup;
  25 
  26 import java.util.HashMap;
  27 import java.util.Iterator;
  28 
  29 import jdk.nashorn.internal.joni.constants.AnchorType;
  30 import jdk.nashorn.internal.joni.constants.RegexState;
  31 import jdk.nashorn.internal.joni.exception.ErrorMessages;
  32 import jdk.nashorn.internal.joni.exception.InternalException;
  33 import jdk.nashorn.internal.joni.exception.ValueException;
  34 
  35 public final class Regex implements RegexState {
  36 
  37     int[] code;             /* compiled pattern */
  38     int codeLength;
  39     boolean stackNeeded;
  40     Object[]operands;       /* e.g. shared CClassNode */
  41     int operandLength;
  42 
  43     int state;              /* normal, searching, compiling */ // remove
  44     int numMem;             /* used memory(...) num counted from 1 */
  45     int numRepeat;          /* OP_REPEAT/OP_REPEAT_NG id-counter */
  46     int numNullCheck;       /* OP_NULL_CHECK_START/END id counter */
  47     int numCombExpCheck;    /* combination explosion check */
  48     int numCall;            /* number of subexp call */
  49     int captureHistory;     /* (?@...) flag (1-31) */
  50     int btMemStart;         /* need backtrack flag */
  51     int btMemEnd;           /* need backtrack flag */
  52 
  53     int stackPopLevel;
  54 
  55     int[]repeatRangeLo;
  56     int[]repeatRangeHi;
  57 
  58     public WarnCallback warnings;
  59     public MatcherFactory factory;
  60 
  61     int options;
  62     int userOptions;
  63     Object userObject;
  64     //final Syntax syntax;
  65     final int caseFoldFlag;
  66 
  67     HashMap<String,NameEntry> nameTable;        // named entries
  68 
  69     /* optimization info (string search, char-map and anchors) */
  70     SearchAlgorithm searchAlgorithm;        /* optimize flag */
  71     int thresholdLength;                    /* search str-length for apply optimize */
  72     int anchor;                             /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
  73     int anchorDmin;                         /* (SEMI_)END_BUF anchor distance */
  74     int anchorDmax;                         /* (SEMI_)END_BUF anchor distance */
  75     int subAnchor;                          /* start-anchor for exact or map */
  76 
  77     char[] exact;
  78     int exactP;
  79     int exactEnd;
  80 
  81     byte[] map;                              /* used as BM skip or char-map */
  82     int[] intMap;                            /* BM skip for exact_len > 255 */
  83     int[] intMapBackward;                    /* BM skip for backward search */
  84     int dMin;                               /* min-distance of exact or map */
  85     int dMax;                               /* max-distance of exact or map */
  86 
  87     char[][] templates;
  88     int templateNum;
  89 
  90     public Regex(CharSequence cs) {
  91         this(cs.toString());
  92     }
  93 
  94     public Regex(String str) {
  95         this(str.toCharArray(), 0, str.length(), 0);
  96     }
  97 
  98     public Regex(char[] chars) {
  99         this(chars, 0, chars.length, 0);
 100     }
 101 
 102     public Regex(char[] chars, int p, int end) {
 103         this(chars, p, end, 0);
 104     }
 105 
 106     public Regex(char[] chars, int p, int end, int option) {
 107         this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
 108     }
 109 
 110     // onig_new
 111     public Regex(char[] chars, int p, int end, int option, Syntax syntax) {
 112         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
 113     }
 114 
 115     public Regex(char[]chars, int p, int end, int option, WarnCallback warnings) {
 116         this(chars, p, end, option, Syntax.RUBY, warnings);
 117     }
 118 
 119     // onig_new
 120     public Regex(char[] chars, int p, int end, int option, Syntax syntax, WarnCallback warnings) {
 121         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
 122     }
 123 
 124     // onig_alloc_init
 125     public Regex(char[] chars, int p, int end, int option, int caseFoldFlag, Syntax syntax, WarnCallback warnings) {
 126 
 127         if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
 128             (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
 129             throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
 130         }
 131 
 132         if ((option & Option.NEGATE_SINGLELINE) != 0) {
 133             option |= syntax.options;
 134             option &= ~Option.SINGLELINE;
 135         } else {
 136             option |= syntax.options;
 137         }
 138 
 139         this.options = option;
 140         this.caseFoldFlag = caseFoldFlag;
 141         this.warnings = warnings;
 142 
 143         new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile();
 144 
 145         this.warnings = null;
 146     }
 147 
 148     public Matcher matcher(char[] chars) {
 149         return matcher(chars, 0, chars.length);
 150     }
 151 
 152     public Matcher matcher(char[] chars, int p, int end) {
 153         return factory.create(this, chars, p, end);
 154     }
 155 
 156     public int numberOfCaptures() {
 157         return numMem;
 158     }
 159 
 160     public int numberOfCaptureHistories() {
 161         if (Config.USE_CAPTURE_HISTORY) {
 162             int n = 0;
 163             for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) {
 164                 if (bsAt(captureHistory, i)) n++;
 165             }
 166             return n;
 167         } else {
 168             return 0;
 169         }
 170     }
 171 
 172     String nameTableToString() {
 173         StringBuilder sb = new StringBuilder();
 174 
 175         if (nameTable != null) {
 176             sb.append("name table\n");
 177             for (NameEntry ne : nameTable.values()) {
 178                 sb.append("  " + ne + "\n");
 179             }
 180             sb.append("\n");
 181         }
 182         return sb.toString();
 183     }
 184 
 185     NameEntry nameFind(char[] name, int nameP, int nameEnd) {
 186         if (nameTable != null) return nameTable.get(new String(name, nameP, nameEnd - nameP));
 187         return null;
 188     }
 189 
 190     void renumberNameTable(int[]map) {
 191         if (nameTable != null) {
 192             for (NameEntry e : nameTable.values()) {
 193                 if (e.backNum > 1) {
 194                     for (int i=0; i<e.backNum; i++) {
 195                         e.backRefs[i] = map[e.backRefs[i]];
 196                     }
 197                 } else if (e.backNum == 1) {
 198                     e.backRef1 = map[e.backRef1];
 199                 }
 200             }
 201         }
 202     }
 203 
 204     public int numberOfNames() {
 205         return nameTable == null ? 0 : nameTable.size();
 206     }
 207 
 208     void nameAdd(char[] name, int nameP, int nameEnd, int backRef, Syntax syntax) {
 209         if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.ERR_EMPTY_GROUP_NAME);
 210 
 211         NameEntry e = null;
 212         if (nameTable == null) {
 213             nameTable = new HashMap<String,NameEntry>(); // 13, oni defaults to 5
 214         } else {
 215             e = nameFind(name, nameP, nameEnd);
 216         }
 217 
 218         if (e == null) {
 219             // dup the name here as oni does ?, what for ? (it has to manage it, we don't)
 220             e = new NameEntry(name, nameP, nameEnd);
 221             nameTable.put(new String(name, nameP, nameEnd - nameP), e);
 222         } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) {
 223             throw new ValueException(ErrorMessages.ERR_MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
 224         }
 225 
 226         e.addBackref(backRef);
 227     }
 228 
 229     NameEntry nameToGroupNumbers(char[] name, int nameP, int nameEnd) {
 230         return nameFind(name, nameP, nameEnd);
 231     }
 232 
 233     public int nameToBackrefNumber(char[] name, int nameP, int nameEnd, Region region) {
 234         NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
 235         if (e == null) throw new ValueException(ErrorMessages.ERR_UNDEFINED_NAME_REFERENCE,
 236                                                 new String(name, nameP, nameEnd - nameP));
 237 
 238         switch(e.backNum) {
 239         case 0:
 240             throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
 241         case 1:
 242             return e.backRef1;
 243         default:
 244             if (region != null) {
 245                 for (int i = e.backNum - 1; i >= 0; i--) {
 246                     if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
 247                 }
 248             }
 249             return e.backRefs[e.backNum - 1];
 250         }
 251     }
 252 
 253     public Iterator<NameEntry> namedBackrefIterator() {
 254         return nameTable.values().iterator();
 255     }
 256 
 257     public boolean noNameGroupIsActive(Syntax syntax) {
 258         if (isDontCaptureGroup(options)) return false;
 259 
 260         if (Config.USE_NAMED_GROUP) {
 261             if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false;
 262         }
 263         return true;
 264     }
 265 
 266     /* set skip map for Boyer-Moor search */
 267     void setupBMSkipMap() {
 268         char[] chars = exact;
 269         int p = exactP;
 270         int end = exactEnd;
 271         int len = end - p;
 272 
 273         if (len < Config.CHAR_TABLE_SIZE) {
 274             // map/skip
 275             if (map == null) map = new byte[Config.CHAR_TABLE_SIZE];
 276 
 277             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len;
 278             for (int i=0; i<len-1; i++) map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
 279         } else {
 280             if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE];
 281 
 282             for (int i=0; i<len-1; i++) intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
 283         }
 284     }
 285 
 286     void setExactInfo(OptExactInfo e) {
 287         if (e.length == 0) return;
 288 
 289         // shall we copy that ?
 290         exact = e.chars;
 291         exactP = 0;
 292         exactEnd = e.length;
 293 
 294         if (e.ignoreCase) {
 295             searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
 296         } else {
 297             if (e.length >= 2) {
 298                 setupBMSkipMap();
 299                 searchAlgorithm = SearchAlgorithm.BM;
 300             } else {
 301                 searchAlgorithm = SearchAlgorithm.SLOW;
 302             }
 303         }
 304 
 305         dMin = e.mmd.min;
 306         dMax = e.mmd.max;
 307 
 308         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
 309             thresholdLength = dMin + (exactEnd - exactP);
 310         }
 311     }
 312 
 313     void setOptimizeMapInfo(OptMapInfo m) {
 314         map = m.map;
 315 
 316         searchAlgorithm = SearchAlgorithm.MAP;
 317         dMin = m.mmd.min;
 318         dMax = m.mmd.max;
 319 
 320         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
 321             thresholdLength = dMin + 1;
 322         }
 323     }
 324 
 325     void setSubAnchor(OptAnchorInfo anc) {
 326         subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
 327         subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
 328     }
 329 
 330     void clearOptimizeInfo() {
 331         searchAlgorithm = SearchAlgorithm.NONE;
 332         anchor = 0;
 333         anchorDmax = 0;
 334         anchorDmin = 0;
 335         subAnchor = 0;
 336 
 337         exact = null;
 338         exactP = exactEnd = 0;
 339     }
 340 
 341     public String encStringToString(byte[]bytes, int p, int end) {
 342         StringBuilder sb = new StringBuilder("\nPATTERN: /");
 343 
 344         while (p < end) {
 345             sb.append(new String(new byte[]{bytes[p]}));
 346             p++;
 347         }
 348         return sb.append("/").toString();
 349     }
 350 
 351     public String optimizeInfoToString() {
 352         String s = "";
 353         s += "optimize: " + searchAlgorithm.getName() + "\n";
 354         s += "  anchor:     " + OptAnchorInfo.anchorToString(anchor);
 355 
 356         if ((anchor & AnchorType.END_BUF_MASK) != 0) {
 357             s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax);
 358         }
 359 
 360         s += "\n";
 361 
 362         if (searchAlgorithm != SearchAlgorithm.NONE) {
 363             s += "  sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
 364         }
 365 
 366         s += "dmin: " + dMin + " dmax: " + dMax + "\n";
 367         s += "threshold length: " + thresholdLength + "\n";
 368 
 369         if (exact != null) {
 370             s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
 371         } else if (searchAlgorithm == SearchAlgorithm.MAP) {
 372             int n=0;
 373             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
 374 
 375             s += "map: n = " + n + "\n";
 376             if (n > 0) {
 377                 int c=0;
 378                 s += "[";
 379                 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
 380                     if (map[i] != 0) {
 381                         if (c > 0) s += ", ";
 382                         c++;
 383                         // TODO if (enc.isPrint(i)
 384                         s += ((char)i);
 385                     }
 386                 }
 387                 s += "]\n";
 388             }
 389         }
 390 
 391         return s;
 392     }
 393 
 394     public int getOptions() {
 395         return options;
 396     }
 397 
 398     public void setUserOptions(int options) {
 399         this.userOptions = options;
 400     }
 401 
 402     public int getUserOptions() {
 403         return userOptions;
 404     }
 405 
 406     public void setUserObject(Object object) {
 407         this.userObject = object;
 408     }
 409 
 410     public Object getUserObject() {
 411         return userObject;
 412     }
 413 }