1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni;
  21 
  22 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
  23 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  24 
  25 public final class CodeRangeBuffer {
  26     private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
  27     private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
  28 
  29     int[]p;
  30     int used;
  31 
  32     public CodeRangeBuffer(int[]ranges) {
  33         p = ranges;
  34         used = ranges[0] + 1;
  35     }
  36 
  37     public CodeRangeBuffer() {
  38         p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
  39         writeCodePoint(0, 0);
  40     }
  41 
  42     public int[]getCodeRange() {
  43         return p;
  44     }
  45 
  46     private CodeRangeBuffer(CodeRangeBuffer orig) {
  47         p = new int[orig.p.length];
  48         System.arraycopy(orig.p, 0, p, 0, p.length);
  49         used = orig.used;
  50     }
  51 
  52     public String toString() {
  53         StringBuilder buf = new StringBuilder();
  54         buf.append("CodeRange");
  55         buf.append("\n  used: " + used);
  56         buf.append("\n  code point: " + p[0]);
  57         buf.append("\n  ranges: ");
  58 
  59         for (int i=0; i<p[0]; i++) {
  60             buf.append("[" + rangeNumToString(p[i * 2 + 1]) + ".." + rangeNumToString(p[i * 2 + 2]) + "]");
  61             if (i > 0 && i % 6 == 0) buf.append("\n          ");
  62         }
  63 
  64         return buf.toString();
  65     }
  66 
  67     private static String rangeNumToString(int num){
  68         return "0x" + Integer.toString(num, 16);
  69     }
  70 
  71     public void expand(int low) {
  72         int length = p.length;
  73         do { length <<= 1; } while (length < low);
  74         int[]tmp = new int[length];
  75         System.arraycopy(p, 0, tmp, 0, used);
  76         p = tmp;
  77     }
  78 
  79     public void ensureSize(int size) {
  80         int length = p.length;
  81         while (length < size ) { length <<= 1; }
  82         if (p.length != length) {
  83             int[]tmp = new int[length];
  84             System.arraycopy(p, 0, tmp, 0, used);
  85             p = tmp;
  86         }
  87     }
  88 
  89     private void moveRight(int from, int to, int n) {
  90         if (to + n > p.length) expand(to + n);
  91         System.arraycopy(p, from, p, to, n);
  92         if (to + n > used) used = to + n;
  93     }
  94 
  95     protected void moveLeft(int from, int to, int n) {
  96         System.arraycopy(p, from, p, to, n);
  97     }
  98 
  99     private void moveLeftAndReduce(int from, int to) {
 100         System.arraycopy(p, from, p, to, used - from);
 101         used -= from - to;
 102     }
 103 
 104     public void writeCodePoint(int pos, int b) {
 105         int u = pos + 1;
 106         if (p.length < u) expand(u);
 107         p[pos] = b;
 108         if (used < u) used = u;
 109     }
 110 
 111     public CodeRangeBuffer clone() {
 112         return new CodeRangeBuffer(this);
 113     }
 114 
 115     // ugly part: these methods should be made OO
 116     // add_code_range_to_buf
 117     public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) {
 118         if (from > to) {
 119             int n = from;
 120             from = to;
 121             to = n;
 122         }
 123 
 124         if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode
 125 
 126         int[]p = pbuf.p;
 127         int n = p[0];
 128 
 129         int low = 0;
 130         int bound = n;
 131 
 132         while (low < bound) {
 133             int x = (low + bound) >>> 1;
 134             if (from > p[x * 2 + 2]) {
 135                 low = x + 1;
 136             } else {
 137                 bound = x;
 138             }
 139         }
 140 
 141         int high = low;
 142         bound = n;
 143 
 144         while (high < bound) {
 145             int x = (high + bound) >>> 1;
 146             if (to >= p[x * 2 + 1] - 1) {
 147                 high = x + 1;
 148             } else {
 149                 bound = x;
 150             }
 151         }
 152 
 153         int incN = low + 1 - high;
 154 
 155         if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
 156 
 157         if (incN != 1) {
 158             if (from > p[low * 2 + 1]) from = p[low * 2 + 1];
 159             if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2];
 160         }
 161 
 162         if (incN != 0 && high < n) {
 163             int fromPos = 1 + high * 2;
 164             int toPos = 1 + (low + 1) * 2;
 165             int size = (n - high) * 2;
 166 
 167             if (incN > 0) {
 168                 pbuf.moveRight(fromPos, toPos, size);
 169             } else {
 170                 pbuf.moveLeftAndReduce(fromPos, toPos);
 171             }
 172         }
 173 
 174         int pos = 1 + low * 2;
 175         // pbuf.ensureSize(pos + 2);
 176         pbuf.writeCodePoint(pos, from);
 177         pbuf.writeCodePoint(pos + 1, to);
 178         n += incN;
 179         pbuf.writeCodePoint(0, n);
 180 
 181         return pbuf;
 182     }
 183 
 184     // add_code_range, be aware of it returning null!
 185     public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) {
 186         if (from >to) {
 187             if (env.syntax.allowEmptyRangeInCC()) {
 188                 return pbuf;
 189             } else {
 190                 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
 191             }
 192         }
 193         return addCodeRangeToBuff(pbuf, from, to);
 194     }
 195 
 196     // SET_ALL_MULTI_BYTE_RANGE
 197     protected static CodeRangeBuffer setAllMultiByteRange(CodeRangeBuffer pbuf) {
 198         return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
 199     }
 200 
 201     // ADD_ALL_MULTI_BYTE_RANGE
 202     public static CodeRangeBuffer addAllMultiByteRange(CodeRangeBuffer pbuf) {
 203         return setAllMultiByteRange(pbuf);
 204     }
 205 
 206     // not_code_range_buf
 207     public static CodeRangeBuffer notCodeRangeBuff(CodeRangeBuffer bbuf) {
 208         CodeRangeBuffer pbuf = null;
 209 
 210         if (bbuf == null) return setAllMultiByteRange(pbuf);
 211 
 212         int[]p = bbuf.p;
 213         int n = p[0];
 214 
 215         if (n <= 0) return setAllMultiByteRange(pbuf);
 216 
 217         int pre = EncodingHelper.mbcodeStartPosition();
 218 
 219         int from;
 220         int to = 0;
 221         for (int i=0; i<n; i++) {
 222             from = p[i * 2 + 1];
 223             to = p[i * 2 + 2];
 224             if (pre <= from - 1) {
 225                 pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
 226             }
 227             if (to == ALL_MULTI_BYTE_RANGE) break;
 228             pre = to + 1;
 229         }
 230 
 231         if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
 232         return pbuf;
 233     }
 234 
 235     // or_code_range_buf
 236     public static CodeRangeBuffer orCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
 237                                                   CodeRangeBuffer bbuf2, boolean not2) {
 238         CodeRangeBuffer pbuf = null;
 239 
 240         if (bbuf1 == null && bbuf2 == null) {
 241             if (not1 || not2) {
 242                 return setAllMultiByteRange(pbuf);
 243             }
 244             return null;
 245         }
 246 
 247         if (bbuf2 == null) {
 248             CodeRangeBuffer tbuf;
 249             boolean tnot;
 250             // swap
 251             tnot = not1; not1 = not2; not2 = tnot;
 252             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
 253         }
 254 
 255         if (bbuf1 == null) {
 256             if (not1) {
 257                 return setAllMultiByteRange(pbuf);
 258             } else {
 259                 if (!not2) {
 260                     return bbuf2.clone();
 261                 } else {
 262                     return notCodeRangeBuff(bbuf2);
 263                 }
 264             }
 265         }
 266 
 267         if (not1) {
 268             CodeRangeBuffer tbuf;
 269             boolean tnot;
 270             // swap
 271             tnot = not1; not1 = not2; not2 = tnot;
 272             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
 273         }
 274 
 275         if (!not2 && !not1) { /* 1 OR 2 */
 276             pbuf = bbuf2.clone();
 277         } else if (!not1) { /* 1 OR (not 2) */
 278             pbuf = notCodeRangeBuff(bbuf2);
 279         }
 280 
 281         int[]p1 = bbuf1.p;
 282         int n1 = p1[0];
 283 
 284         for (int i=0; i<n1; i++) {
 285             int from = p1[i * 2 + 1];
 286             int to = p1[i * 2 + 2];
 287             pbuf = addCodeRangeToBuff(pbuf, from, to);
 288         }
 289 
 290         return pbuf;
 291     }
 292 
 293     // and_code_range1
 294     public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, int[]data, int n) {
 295         for (int i=0; i<n; i++) {
 296             int from2 = data[i * 2 + 1];
 297             int to2 = data[i * 2 + 2];
 298             if (from2 < from1) {
 299                 if (to2 < from1) {
 300                     continue;
 301                 } else {
 302                     from1 = to2 + 1;
 303                 }
 304             } else if (from2 <= to1) {
 305                 if (to2 < to1) {
 306                     if (from1 <= from2 - 1) {
 307                         pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
 308                     }
 309                     from1 = to2 + 1;
 310                 } else {
 311                     to1 = from2 - 1;
 312                 }
 313             } else {
 314                 from1 = from2;
 315             }
 316             if (from1 > to1) break;
 317         }
 318 
 319         if (from1 <= to1) {
 320             pbuf = addCodeRangeToBuff(pbuf, from1, to1);
 321         }
 322 
 323         return pbuf;
 324     }
 325 
 326     // and_code_range_buf
 327     public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
 328                                                    CodeRangeBuffer bbuf2, boolean not2) {
 329         CodeRangeBuffer pbuf = null;
 330 
 331         if (bbuf1 == null) {
 332             if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
 333             return null;
 334         } else if (bbuf2 == null) {
 335             if (not2) return bbuf1.clone();
 336             return null;
 337         }
 338 
 339         if (not1) {
 340             CodeRangeBuffer tbuf;
 341             boolean tnot;
 342             // swap
 343             tnot = not1; not1 = not2; not2 = tnot;
 344             tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
 345         }
 346 
 347         int[]p1 = bbuf1.p;
 348         int n1 = p1[0];
 349         int[]p2 = bbuf2.p;
 350         int n2 = p2[0];
 351 
 352         if (!not2 && !not1) { /* 1 AND 2 */
 353             for (int i=0; i<n1; i++) {
 354                 int from1 = p1[i * 2 + 1];
 355                 int to1 = p1[i * 2 + 2];
 356 
 357                 for (int j=0; j<n2; j++) {
 358                     int from2 = p2[j * 2 + 1];
 359                     int to2 = p2[j * 2 + 2];
 360 
 361                     if (from2 > to1) break;
 362                     if (to2 < from1) continue;
 363                     int from = from1 > from2 ? from1 : from2;
 364                     int to = to1 < to2 ? to1 : to2;
 365                     pbuf = addCodeRangeToBuff(pbuf, from, to);
 366                 }
 367             }
 368         } else if (!not1) { /* 1 AND (not 2) */
 369             for (int i=0; i<n1; i++) {
 370                 int from1 = p1[i * 2 + 1];
 371                 int to1 = p1[i * 2 + 2];
 372                 pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
 373             }
 374         }
 375 
 376         return pbuf;
 377     }
 378 }