1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni;
  21 
  22 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
  23 import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
  24 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
  25 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  26 
  27 @SuppressWarnings("javadoc")
  28 public final class Regex implements RegexState {
  29 
  30     int[] code;             /* compiled pattern */
  31     int codeLength;
  32     boolean stackNeeded;
  33     Object[] operands;       /* e.g. shared CClassNode */
  34     int operandLength;
  35 
  36     int numMem;             /* used memory(...) num counted from 1 */
  37     int numRepeat;          /* OP_REPEAT/OP_REPEAT_NG id-counter */
  38     int numNullCheck;       /* OP_NULL_CHECK_START/END id counter */
  39     int captureHistory;     /* (?@...) flag (1-31) */
  40     int btMemStart;         /* need backtrack flag */
  41     int btMemEnd;           /* need backtrack flag */
  42 
  43     int stackPopLevel;
  44 
  45     int[] repeatRangeLo;
  46     int[] repeatRangeHi;
  47 
  48     WarnCallback warnings;
  49     MatcherFactory factory;
  50     protected Analyser analyser;
  51 
  52     int options;
  53     final int caseFoldFlag;
  54 
  55     /* optimization info (string search, char-map and anchors) */
  56     SearchAlgorithm searchAlgorithm;        /* optimize flag */
  57     int thresholdLength;                    /* search str-length for apply optimize */
  58     int anchor;                             /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
  59     int anchorDmin;                         /* (SEMI_)END_BUF anchor distance */
  60     int anchorDmax;                         /* (SEMI_)END_BUF anchor distance */
  61     int subAnchor;                          /* start-anchor for exact or map */
  62 
  63     char[] exact;
  64     int exactP;
  65     int exactEnd;
  66 
  67     byte[] map;                              /* used as BM skip or char-map */
  68     int[] intMap;                            /* BM skip for exact_len > 255 */
  69     int[] intMapBackward;                    /* BM skip for backward search */
  70     int dMin;                               /* min-distance of exact or map */
  71     int dMax;                               /* max-distance of exact or map */
  72 
  73     char[][] templates;
  74     int templateNum;
  75 
  76     public Regex(final CharSequence cs) {
  77         this(cs.toString());
  78     }
  79 
  80     public Regex(final String str) {
  81         this(str.toCharArray(), 0, str.length(), 0);
  82     }
  83 
  84     public Regex(final char[] chars) {
  85         this(chars, 0, chars.length, 0);
  86     }
  87 
  88     public Regex(final char[] chars, final int p, final int end) {
  89         this(chars, p, end, 0);
  90     }
  91 
  92     public Regex(final char[] chars, final int p, final int end, final int option) {
  93         this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
  94     }
  95 
  96     // onig_new
  97     public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) {
  98         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
  99     }
 100 
 101     public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) {
 102         this(chars, p, end, option, Syntax.RUBY, warnings);
 103     }
 104 
 105     // onig_new
 106     public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) {
 107         this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
 108     }
 109 
 110     // onig_alloc_init
 111     public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) {
 112         int option = optionp;
 113 
 114         if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
 115             (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
 116             throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
 117         }
 118 
 119         if ((option & Option.NEGATE_SINGLELINE) != 0) {
 120             option |= syntax.options;
 121             option &= ~Option.SINGLELINE;
 122         } else {
 123             option |= syntax.options;
 124         }
 125 
 126         this.options = option;
 127         this.caseFoldFlag = caseFoldFlag;
 128         this.warnings = warnings;
 129 
 130         new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile();
 131 
 132         this.warnings = null;
 133     }
 134 
 135     public Matcher matcher(final char[] chars) {
 136         return matcher(chars, 0, chars.length);
 137     }
 138 
 139     public Matcher matcher(final char[] chars, final int p, final int end) {
 140         return factory.create(this, chars, p, end);
 141     }
 142 
 143     public WarnCallback getWarnings() {
 144         return warnings;
 145     }
 146 
 147     public int numberOfCaptures() {
 148         return numMem;
 149     }
 150 
 151     /* set skip map for Boyer-Moor search */
 152     void setupBMSkipMap() {
 153         final char[] chars = exact;
 154         final int p = exactP;
 155         final int end = exactEnd;
 156         final int len = end - p;
 157 
 158         if (len < Config.CHAR_TABLE_SIZE) {
 159             // map/skip
 160             if (map == null) {
 161                 map = new byte[Config.CHAR_TABLE_SIZE];
 162             }
 163 
 164             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
 165                 map[i] = (byte)len;
 166             }
 167             for (int i=0; i<len-1; i++)
 168              {
 169                 map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
 170             }
 171         } else {
 172             if (intMap == null) {
 173                 intMap = new int[Config.CHAR_TABLE_SIZE];
 174             }
 175 
 176             for (int i=0; i<len-1; i++)
 177              {
 178                 intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
 179             }
 180         }
 181     }
 182 
 183     void setExactInfo(final OptExactInfo e) {
 184         if (e.length == 0) {
 185             return;
 186         }
 187 
 188         // shall we copy that ?
 189         exact = e.chars;
 190         exactP = 0;
 191         exactEnd = e.length;
 192 
 193         if (e.ignoreCase) {
 194             searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
 195         } else {
 196             if (e.length >= 2) {
 197                 setupBMSkipMap();
 198                 searchAlgorithm = SearchAlgorithm.BM;
 199             } else {
 200                 searchAlgorithm = SearchAlgorithm.SLOW;
 201             }
 202         }
 203 
 204         dMin = e.mmd.min;
 205         dMax = e.mmd.max;
 206 
 207         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
 208             thresholdLength = dMin + (exactEnd - exactP);
 209         }
 210     }
 211 
 212     void setOptimizeMapInfo(final OptMapInfo m) {
 213         map = m.map;
 214 
 215         searchAlgorithm = SearchAlgorithm.MAP;
 216         dMin = m.mmd.min;
 217         dMax = m.mmd.max;
 218 
 219         if (dMin != MinMaxLen.INFINITE_DISTANCE) {
 220             thresholdLength = dMin + 1;
 221         }
 222     }
 223 
 224     void setSubAnchor(final OptAnchorInfo anc) {
 225         subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
 226         subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
 227     }
 228 
 229     void clearOptimizeInfo() {
 230         searchAlgorithm = SearchAlgorithm.NONE;
 231         anchor = 0;
 232         anchorDmax = 0;
 233         anchorDmin = 0;
 234         subAnchor = 0;
 235 
 236         exact = null;
 237         exactP = exactEnd = 0;
 238     }
 239 
 240     public String optimizeInfoToString() {
 241         final StringBuilder s = new StringBuilder();
 242         s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
 243         s.append("  anchor:     ").append(OptAnchorInfo.anchorToString(anchor));
 244 
 245         if ((anchor & AnchorType.END_BUF_MASK) != 0) {
 246             s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
 247         }
 248 
 249         s.append("\n");
 250 
 251         if (searchAlgorithm != SearchAlgorithm.NONE) {
 252             s.append("  sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
 253         }
 254 
 255         s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
 256         s.append("threshold length: ").append(thresholdLength).append("\n");
 257 
 258         if (exact != null) {
 259             s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
 260         } else if (searchAlgorithm == SearchAlgorithm.MAP) {
 261             int n=0;
 262             for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
 263                 if (map[i] != 0) {
 264                     n++;
 265                 }
 266             }
 267 
 268             s.append("map: n = ").append(n).append("\n");
 269             if (n > 0) {
 270                 int c=0;
 271                 s.append("[");
 272                 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
 273                     if (map[i] != 0) {
 274                         if (c > 0) {
 275                             s.append(", ");
 276                         }
 277                         c++;
 278                         // TODO if (enc.isPrint(i)
 279                         s.append((char)i);
 280                     }
 281                 }
 282                 s.append("]\n");
 283             }
 284         }
 285 
 286         return s.toString();
 287     }
 288 
 289     public int getOptions() {
 290         return options;
 291     }
 292 
 293     public String dumpTree() {
 294         return analyser == null ? null : analyser.root.toString();
 295     }
 296 
 297     public String dumpByteCode() {
 298         return new ByteCodePrinter(this).byteCodeListToString();
 299     }
 300 
 301 }