1 /* 2 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 // -*- C++ -*- 27 // Small program for unpacking specially compressed Java packages. 28 // John R. Rose 29 30 #include <stdio.h> 31 #include <string.h> 32 #include <stdlib.h> 33 #include <stdarg.h> 34 35 #include "jni_util.h" 36 37 #include "defines.h" 38 #include "bytes.h" 39 #include "utils.h" 40 #include "coding.h" 41 42 #include "constants.h" 43 #include "unpack.h" 44 45 extern coding basic_codings[]; 46 47 #define CODING_PRIVATE(spec) \ 48 int spec_ = spec; \ 49 int B = CODING_B(spec_); \ 50 int H = CODING_H(spec_); \ 51 int L = 256 - H; \ 52 int S = CODING_S(spec_); \ 53 int D = CODING_D(spec_) 54 55 #define IS_NEG_CODE(S, codeVal) \ 56 ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 ) 57 58 #define DECODE_SIGN_S1(ux) \ 59 ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) ) 60 61 static maybe_inline 62 int decode_sign(int S, uint ux) { // == Coding.decodeSign32 63 assert(S > 0); 64 uint sigbits = (ux >> S); 65 if (IS_NEG_CODE(S, ux)) 66 return (int)( ~sigbits); 67 else 68 return (int)(ux - sigbits); 69 // Note that (int)(ux-sigbits) can be negative, if ux is large enough. 70 } 71 72 coding* coding::init() { 73 if (umax > 0) return this; // already done 74 assert(spec != 0); // sanity 75 76 // fill in derived fields 77 CODING_PRIVATE(spec); 78 79 // Return null if 'arb(BHSD)' parameter constraints are not met: 80 if (B < 1 || B > B_MAX) return null; 81 if (H < 1 || H > 256) return null; 82 if (S < 0 || S > 2) return null; 83 if (D < 0 || D > 1) return null; 84 if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size 85 if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding 86 87 // first compute the range of the coding, in 64 bits 88 jlong range = 0; 89 { 90 jlong H_i = 1; 91 for (int i = 0; i < B; i++) { 92 range += H_i; 93 H_i *= H; 94 } 95 range *= L; 96 range += H_i; 97 } 98 assert(range > 0); // no useless codings, please 99 100 int this_umax; 101 102 // now, compute min and max 103 if (range >= ((jlong)1 << 32)) { 104 this_umax = INT_MAX_VALUE; 105 this->umin = INT_MIN_VALUE; 106 this->max = INT_MAX_VALUE; 107 this->min = INT_MIN_VALUE; 108 } else { 109 this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; 110 this->max = this_umax; 111 this->min = this->umin = 0; 112 if (S != 0 && range != 0) { 113 int Smask = (1<<S)-1; 114 jlong maxPosCode = range-1; 115 jlong maxNegCode = range-1; 116 while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode; 117 while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode; 118 int maxPos = decode_sign(S, (uint)maxPosCode); 119 if (maxPos < 0) 120 this->max = INT_MAX_VALUE; // 32-bit wraparound 121 else 122 this->max = maxPos; 123 if (maxNegCode < 0) 124 this->min = 0; // No negative codings at all. 125 else 126 this->min = decode_sign(S, (uint)maxNegCode); 127 } 128 } 129 130 assert(!(isFullRange | isSigned | isSubrange)); // init 131 if (min < 0) 132 this->isSigned = true; 133 if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) 134 this->isSubrange = true; 135 if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) 136 this->isFullRange = true; 137 138 // do this last, to reduce MT exposure (should have a membar too) 139 this->umax = this_umax; 140 141 return this; 142 } 143 144 coding* coding::findBySpec(int spec) { 145 for (coding* scan = &basic_codings[0]; ; scan++) { 146 if (scan->spec == spec) 147 return scan->init(); 148 if (scan->spec == 0) 149 break; 150 } 151 coding* ptr = NEW(coding, 1); 152 CHECK_NULL_RETURN(ptr, 0); 153 coding* c = ptr->initFrom(spec); 154 if (c == null) { 155 mtrace('f', ptr, 0); 156 ::free(ptr); 157 } else 158 // else caller should free it... 159 c->isMalloc = true; 160 return c; 161 } 162 163 coding* coding::findBySpec(int B, int H, int S, int D) { 164 if (B < 1 || B > B_MAX) return null; 165 if (H < 1 || H > 256) return null; 166 if (S < 0 || S > 2) return null; 167 if (D < 0 || D > 1) return null; 168 return findBySpec(CODING_SPEC(B, H, S, D)); 169 } 170 171 void coding::free() { 172 if (isMalloc) { 173 mtrace('f', this, 0); 174 ::free(this); 175 } 176 } 177 178 void coding_method::reset(value_stream* state) { 179 assert(state->rp == state->rplimit); // not in mid-stream, please 180 //assert(this == vs0.cm); 181 state[0] = vs0; 182 if (uValues != null) { 183 uValues->reset(state->helper()); 184 } 185 } 186 187 maybe_inline 188 uint coding::parse(byte* &rp, int B, int H) { 189 int L = 256-H; 190 byte* ptr = rp; 191 // hand peel the i==0 part of the loop: 192 uint b_i = *ptr++ & 0xFF; 193 if (B == 1 || b_i < (uint)L) 194 { rp = ptr; return b_i; } 195 uint sum = b_i; 196 uint H_i = H; 197 assert(B <= B_MAX); 198 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired 199 b_i = *ptr++ & 0xFF; 200 sum += b_i * H_i; 201 if (i == B || b_i < (uint)L) 202 { rp = ptr; return sum; } 203 H_i *= H; 204 } 205 assert(false); 206 return 0; 207 } 208 209 maybe_inline 210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { 211 assert(H == (1<<lgH)); 212 int L = 256-(1<<lgH); 213 byte* ptr = rp; 214 // hand peel the i==0 part of the loop: 215 uint b_i = *ptr++ & 0xFF; 216 if (B == 1 || b_i < (uint)L) 217 { rp = ptr; return b_i; } 218 uint sum = b_i; 219 uint lg_H_i = lgH; 220 assert(B <= B_MAX); 221 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired 222 b_i = *ptr++ & 0xFF; 223 sum += b_i << lg_H_i; 224 if (i == B || b_i < (uint)L) 225 { rp = ptr; return sum; } 226 lg_H_i += lgH; 227 } 228 assert(false); 229 return 0; 230 } 231 232 static const char ERB[] = "EOF reading band"; 233 234 maybe_inline 235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) { 236 if (N < 0) { 237 abort("bad value count"); 238 return; 239 } 240 byte* ptr = rp; 241 if (B == 1 || H == 256) { 242 size_t len = (size_t)N*B; 243 if (len / B != (size_t)N || ptr+len > limit) { 244 abort(ERB); 245 return; 246 } 247 rp = ptr+len; 248 return; 249 } 250 // Note: We assume rp has enough zero-padding. 251 int L = 256-H; 252 int n = B; 253 while (N > 0) { 254 ptr += 1; 255 if (--n == 0) { 256 // end of encoding at B bytes, regardless of byte value 257 } else { 258 int b = (ptr[-1] & 0xFF); 259 if (b >= L) { 260 // keep going, unless we find a byte < L 261 continue; 262 } 263 } 264 // found the last byte 265 N -= 1; 266 n = B; // reset length counter 267 // do an error check here 268 if (ptr > limit) { 269 abort(ERB); 270 return; 271 } 272 } 273 rp = ptr; 274 return; 275 } 276 277 bool value_stream::hasHelper() { 278 // If my coding method is a pop-style method, 279 // then I need a second value stream to transmit 280 // unfavored values. 281 // This can be determined by examining fValues. 282 return cm->fValues != null; 283 } 284 285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { 286 rp = rp_; 287 rplimit = rplimit_; 288 sum = 0; 289 cm = null; // no need in the simple case 290 setCoding(defc); 291 } 292 293 void value_stream::setCoding(coding* defc) { 294 if (defc == null) { 295 unpack_abort("bad coding"); 296 defc = coding::findByIndex(_meta_canon_min); // random pick for recovery 297 } 298 299 c = (*defc); 300 301 // choose cmk 302 cmk = cmk_ERROR; 303 switch (c.spec) { 304 case BYTE1_spec: cmk = cmk_BYTE1; break; 305 case CHAR3_spec: cmk = cmk_CHAR3; break; 306 case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; 307 case DELTA5_spec: cmk = cmk_DELTA5; break; 308 case BCI5_spec: cmk = cmk_BCI5; break; 309 case BRANCH5_spec: cmk = cmk_BRANCH5; break; 310 default: 311 if (c.D() == 0) { 312 switch (c.S()) { 313 case 0: cmk = cmk_BHS0; break; 314 case 1: cmk = cmk_BHS1; break; 315 default: cmk = cmk_BHS; break; 316 } 317 } else { 318 if (c.S() == 1) { 319 if (c.isFullRange) cmk = cmk_BHS1D1full; 320 if (c.isSubrange) cmk = cmk_BHS1D1sub; 321 } 322 if (cmk == cmk_ERROR) cmk = cmk_BHSD1; 323 } 324 } 325 } 326 327 static maybe_inline 328 int getPopValue(value_stream* self, uint uval) { 329 if (uval > 0) { 330 // note that the initial parse performed a range check 331 assert(uval <= (uint)self->cm->fVlength); 332 return self->cm->fValues[uval-1]; 333 } else { 334 // take an unfavored value 335 return self->helper()->getInt(); 336 } 337 } 338 339 maybe_inline 340 int coding::sumInUnsignedRange(int x, int y) { 341 assert(isSubrange); 342 int range = (int)(umax+1); 343 assert(range > 0); 344 x += y; 345 if (x != (int)((jlong)(x-y) + (jlong)y)) { 346 // 32-bit overflow interferes with range reduction. 347 // Back off from the overflow by adding a multiple of range: 348 if (x < 0) { 349 x -= range; 350 assert(x >= 0); 351 } else { 352 x += range; 353 assert(x < 0); 354 } 355 } 356 if (x < 0) { 357 x += range; 358 if (x >= 0) return x; 359 } else if (x >= range) { 360 x -= range; 361 if (x < range) return x; 362 } else { 363 // in range 364 return x; 365 } 366 // do it the hard way 367 x %= range; 368 if (x < 0) x += range; 369 return x; 370 } 371 372 static maybe_inline 373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { 374 assert((uint)(self->c.isSubrange) == (uint)isSubrange); 375 assert(self->c.isSubrange | self->c.isFullRange); 376 if (isSubrange) 377 return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); 378 else 379 return self->sum += (int) uval; 380 } 381 382 bool value_stream::hasValue() { 383 if (rp < rplimit) return true; 384 if (cm == null) return false; 385 if (cm->next == null) return false; 386 cm->next->reset(this); 387 return hasValue(); 388 } 389 390 int value_stream::getInt() { 391 if (rp >= rplimit) { 392 // Advance to next coding segment. 393 if (rp > rplimit || cm == null || cm->next == null) { 394 // Must perform this check and throw an exception on bad input. 395 unpack_abort(ERB); 396 return 0; 397 } 398 cm->next->reset(this); 399 return getInt(); 400 } 401 402 CODING_PRIVATE(c.spec); 403 uint uval; 404 enum { 405 B5 = 5, 406 B3 = 3, 407 H128 = 128, 408 H64 = 64, 409 H4 = 4 410 }; 411 switch (cmk) { 412 case cmk_BHS: 413 assert(D == 0); 414 uval = coding::parse(rp, B, H); 415 if (S == 0) 416 return (int) uval; 417 return decode_sign(S, uval); 418 419 case cmk_BHS0: 420 assert(S == 0 && D == 0); 421 uval = coding::parse(rp, B, H); 422 return (int) uval; 423 424 case cmk_BHS1: 425 assert(S == 1 && D == 0); 426 uval = coding::parse(rp, B, H); 427 return DECODE_SIGN_S1(uval); 428 429 case cmk_BYTE1: 430 assert(c.spec == BYTE1_spec); 431 assert(B == 1 && H == 256 && S == 0 && D == 0); 432 return *rp++ & 0xFF; 433 434 case cmk_CHAR3: 435 assert(c.spec == CHAR3_spec); 436 assert(B == B3 && H == H128 && S == 0 && D == 0); 437 return coding::parse_lgH(rp, B3, H128, 7); 438 439 case cmk_UNSIGNED5: 440 assert(c.spec == UNSIGNED5_spec); 441 assert(B == B5 && H == H64 && S == 0 && D == 0); 442 return coding::parse_lgH(rp, B5, H64, 6); 443 444 case cmk_BHSD1: 445 assert(D == 1); 446 uval = coding::parse(rp, B, H); 447 if (S != 0) 448 uval = (uint) decode_sign(S, uval); 449 return getDeltaValue(this, uval, (bool)c.isSubrange); 450 451 case cmk_BHS1D1full: 452 assert(S == 1 && D == 1 && c.isFullRange); 453 uval = coding::parse(rp, B, H); 454 uval = (uint) DECODE_SIGN_S1(uval); 455 return getDeltaValue(this, uval, false); 456 457 case cmk_BHS1D1sub: 458 assert(S == 1 && D == 1 && c.isSubrange); 459 uval = coding::parse(rp, B, H); 460 uval = (uint) DECODE_SIGN_S1(uval); 461 return getDeltaValue(this, uval, true); 462 463 case cmk_DELTA5: 464 assert(c.spec == DELTA5_spec); 465 assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); 466 uval = coding::parse_lgH(rp, B5, H64, 6); 467 sum += DECODE_SIGN_S1(uval); 468 return sum; 469 470 case cmk_BCI5: 471 assert(c.spec == BCI5_spec); 472 assert(B == B5 && H == H4 && S == 0 && D == 0); 473 return coding::parse_lgH(rp, B5, H4, 2); 474 475 case cmk_BRANCH5: 476 assert(c.spec == BRANCH5_spec); 477 assert(B == B5 && H == H4 && S == 2 && D == 0); 478 uval = coding::parse_lgH(rp, B5, H4, 2); 479 return decode_sign(S, uval); 480 481 case cmk_pop: 482 uval = coding::parse(rp, B, H); 483 if (S != 0) { 484 uval = (uint) decode_sign(S, uval); 485 } 486 if (D != 0) { 487 assert(c.isSubrange | c.isFullRange); 488 if (c.isSubrange) 489 sum = c.sumInUnsignedRange(sum, (int) uval); 490 else 491 sum += (int) uval; 492 uval = (uint) sum; 493 } 494 return getPopValue(this, uval); 495 496 case cmk_pop_BHS0: 497 assert(S == 0 && D == 0); 498 uval = coding::parse(rp, B, H); 499 return getPopValue(this, uval); 500 501 case cmk_pop_BYTE1: 502 assert(c.spec == BYTE1_spec); 503 assert(B == 1 && H == 256 && S == 0 && D == 0); 504 return getPopValue(this, *rp++ & 0xFF); 505 506 default: 507 break; 508 } 509 assert(false); 510 return 0; 511 } 512 513 static maybe_inline 514 int moreCentral(int x, int y) { // used to find end of Pop.{F} 515 // Suggested implementation from the Pack200 specification: 516 uint kx = (x >> 31) ^ (x << 1); 517 uint ky = (y >> 31) ^ (y << 1); 518 return (kx < ky? x: y); 519 } 520 //static maybe_inline 521 //int moreCentral2(int x, int y, int min) { 522 // // Strict implementation of buggy 150.7 specification. 523 // // The bug is that the spec. says absolute-value ties are broken 524 // // in favor of positive numbers, but the suggested implementation 525 // // (also mentioned in the spec.) breaks ties in favor of negative numbers. 526 // if ((x + y) != 0) 527 // return min; 528 // else 529 // // return the other value, which breaks a tie in the positive direction 530 // return (x > y)? x: y; 531 //} 532 533 static const byte* no_meta[] = {null}; 534 #define NO_META (*(byte**)no_meta) 535 enum { POP_FAVORED_N = -2 }; 536 537 // mode bits 538 #define DISABLE_RUN 1 // used immediately inside ACodee 539 #define DISABLE_POP 2 // used recursively in all pop sub-bands 540 541 // This function knows all about meta-coding. 542 void coding_method::init(byte* &band_rp, byte* band_limit, 543 byte* &meta_rp, int mode, 544 coding* defc, int N, 545 intlist* valueSink) { 546 assert(N != 0); 547 548 assert(u != null); // must be pre-initialized 549 //if (u == null) u = unpacker::current(); // expensive 550 551 int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); 552 coding* foundc = null; 553 coding* to_free = null; 554 555 if (op == _meta_default) { 556 foundc = defc; 557 // and fall through 558 559 } else if (op >= _meta_canon_min && op <= _meta_canon_max) { 560 foundc = coding::findByIndex(op); 561 // and fall through 562 563 } else if (op == _meta_arb) { 564 int args = (*meta_rp++ & 0xFF); 565 // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) 566 int D = ((args >> 0) & 1); 567 int S = ((args >> 1) & 3); 568 int B = ((args >> 3) & -1) + 1; 569 // & (H[1..256]-1) 570 int H = (*meta_rp++ & 0xFF) + 1; 571 foundc = coding::findBySpec(B, H, S, D); 572 to_free = foundc; // findBySpec may dynamically allocate 573 if (foundc == null) { 574 abort("illegal arb. coding"); 575 return; 576 } 577 // and fall through 578 579 } else if (op >= _meta_run && op < _meta_pop) { 580 int args = (op - _meta_run); 581 // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) 582 int KX = ((args >> 0) & 3); 583 int KBFlag = ((args >> 2) & 1); 584 int ABDef = ((args >> 3) & -1); 585 assert(ABDef <= 2); 586 // & KB: one of [0..255] if KBFlag=1 587 int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); 588 int K = (KB+1) << (KX * 4); 589 int N2 = (N >= 0) ? N-K : N; 590 if (N == 0 || (N2 <= 0 && N2 != N)) { 591 abort("illegal run encoding"); 592 return; 593 } 594 if ((mode & DISABLE_RUN) != 0) { 595 abort("illegal nested run encoding"); 596 return; 597 } 598 599 // & Enc{ ACode } if ADef=0 (ABDef != 1) 600 // No direct nesting of 'run' in ACode, but in BCode it's OK. 601 int disRun = mode | DISABLE_RUN; 602 if (ABDef == 1) { 603 this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); 604 } else { 605 this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); 606 } 607 CHECK; 608 609 // & Enc{ BCode } if BDef=0 (ABDef != 2) 610 coding_method* tail = U_NEW(coding_method, 1); 611 CHECK_NULL(tail); 612 tail->u = u; 613 614 // The 'run' codings may be nested indirectly via 'pop' codings. 615 // This means that this->next may already be filled in, if 616 // ACode was of type 'pop' with a 'run' token coding. 617 // No problem: Just chain the upcoming BCode onto the end. 618 for (coding_method* self = this; ; self = self->next) { 619 if (self->next == null) { 620 self->next = tail; 621 break; 622 } 623 } 624 625 if (ABDef == 2) { 626 tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); 627 } else { 628 tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); 629 } 630 // Note: The preceding calls to init should be tail-recursive. 631 632 return; // done; no falling through 633 634 } else if (op >= _meta_pop && op < _meta_limit) { 635 int args = (op - _meta_pop); 636 // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) 637 int FDef = ((args >> 0) & 1); 638 int UDef = ((args >> 1) & 1); 639 int TDefL = ((args >> 2) & -1); 640 assert(TDefL <= 11); 641 int TDef = (TDefL > 0); 642 int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); 643 int TH = (256-TL); 644 if (N <= 0) { 645 abort("illegal pop encoding"); 646 return; 647 } 648 if ((mode & DISABLE_POP) != 0) { 649 abort("illegal nested pop encoding"); 650 return; 651 } 652 653 // No indirect nesting of 'pop', but 'run' is OK. 654 int disPop = DISABLE_POP; 655 656 // & Enc{ FCode } if FDef=0 657 int FN = POP_FAVORED_N; 658 assert(valueSink == null); 659 intlist fValueSink; fValueSink.init(); 660 coding_method fval; 661 BYTES_OF(fval).clear(); fval.u = u; 662 if (FDef != 0) { 663 fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); 664 } else { 665 fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); 666 } 667 bytes fvbuf; 668 fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); 669 fVlength = fValueSink.length(); // i.e., the parameter K 670 fValueSink.free(); 671 CHECK; 672 673 // Skip the first {F} run in all subsequent passes. 674 // The next call to this->init(...) will set vs0.rp to point after the {F}. 675 676 // & Enc{ TCode } if TDef=0 (TDefL==0) 677 if (TDef != 0) { 678 coding* tcode = coding::findBySpec(1, 256); // BYTE1 679 // find the most narrowly sufficient code: 680 for (int B = 2; B <= B_MAX; B++) { 681 if (fVlength <= tcode->umax) break; // found it 682 tcode->free(); 683 tcode = coding::findBySpec(B, TH); 684 CHECK_NULL(tcode); 685 } 686 if (!(fVlength <= tcode->umax)) { 687 abort("pop.L value too small"); 688 return; 689 } 690 this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); 691 tcode->free(); 692 } else { 693 this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); 694 } 695 CHECK; 696 697 // Count the number of zero tokens right now. 698 // Also verify that they are in bounds. 699 int UN = 0; // one {U} for each zero in {T} 700 value_stream vs = vs0; 701 for (int i = 0; i < N; i++) { 702 uint val = vs.getInt(); 703 if (val == 0) UN += 1; 704 if (!(val <= (uint)fVlength)) { 705 abort("pop token out of range"); 706 return; 707 } 708 } 709 vs.done(); 710 711 // & Enc{ UCode } if UDef=0 712 if (UN != 0) { 713 uValues = U_NEW(coding_method, 1); 714 CHECK_NULL(uValues); 715 uValues->u = u; 716 if (UDef != 0) { 717 uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); 718 } else { 719 uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); 720 } 721 } else { 722 if (UDef == 0) { 723 int uop = (*meta_rp++ & 0xFF); 724 if (uop > _meta_canon_max) 725 // %%% Spec. requires the more strict (uop != _meta_default). 726 abort("bad meta-coding for empty pop/U"); 727 } 728 } 729 730 // Bug fix for 6259542 731 // Last of all, adjust vs0.cmk to the 'pop' flavor 732 for (coding_method* self = this; self != null; self = self->next) { 733 coding_method_kind cmk2 = cmk_pop; 734 switch (self->vs0.cmk) { 735 case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; 736 case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; 737 default: break; 738 } 739 self->vs0.cmk = cmk2; 740 if (self != this) { 741 assert(self->fValues == null); // no double init 742 self->fValues = this->fValues; 743 self->fVlength = this->fVlength; 744 assert(self->uValues == null); // must stay null 745 } 746 } 747 748 return; // done; no falling through 749 750 } else { 751 abort("bad meta-coding"); 752 return; 753 } 754 755 // Common code here skips a series of values with one coding. 756 assert(foundc != null); 757 758 assert(vs0.cmk == cmk_ERROR); // no garbage, please 759 assert(vs0.rp == null); // no garbage, please 760 assert(vs0.rplimit == null); // no garbage, please 761 assert(vs0.sum == 0); // no garbage, please 762 763 vs0.init(band_rp, band_limit, foundc); 764 765 // Done with foundc. Free if necessary. 766 if (to_free != null) { 767 to_free->free(); 768 to_free = null; 769 } 770 foundc = null; 771 772 coding& c = vs0.c; 773 CODING_PRIVATE(c.spec); 774 // assert sane N 775 assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); 776 777 // Look at the values, or at least skip over them quickly. 778 if (valueSink == null) { 779 // Skip and ignore values in the first pass. 780 c.parseMultiple(band_rp, N, band_limit, B, H); 781 } else if (N >= 0) { 782 // Pop coding, {F} sequence, initial run of values... 783 assert((mode & DISABLE_POP) != 0); 784 value_stream vs = vs0; 785 for (int n = 0; n < N; n++) { 786 int val = vs.getInt(); 787 valueSink->add(val); 788 } 789 band_rp = vs.rp; 790 } else { 791 // Pop coding, {F} sequence, final run of values... 792 assert((mode & DISABLE_POP) != 0); 793 assert(N == POP_FAVORED_N); 794 int min = INT_MIN_VALUE; // farthest from the center 795 // min2 is based on the buggy specification of centrality in version 150.7 796 // no known implementations transmit this value, but just in case... 797 //int min2 = INT_MIN_VALUE; 798 int last = 0; 799 // if there were initial runs, find the potential sentinels in them: 800 for (int i = 0; i < valueSink->length(); i++) { 801 last = valueSink->get(i); 802 min = moreCentral(min, last); 803 //min2 = moreCentral2(min2, last, min); 804 } 805 value_stream vs = vs0; 806 for (;;) { 807 int val = vs.getInt(); 808 if (valueSink->length() > 0 && 809 (val == last || val == min)) //|| val == min2 810 break; 811 valueSink->add(val); 812 CHECK; 813 last = val; 814 min = moreCentral(min, last); 815 //min2 = moreCentral2(min2, last, min); 816 } 817 band_rp = vs.rp; 818 } 819 CHECK; 820 821 // Get an accurate upper limit now. 822 vs0.rplimit = band_rp; 823 vs0.cm = this; 824 825 return; // success 826 } 827 828 coding basic_codings[] = { 829 // This one is not a usable irregular coding, but is used by cp_Utf8_chars. 830 CODING_INIT(3,128,0,0), 831 832 // Fixed-length codings: 833 CODING_INIT(1,256,0,0), 834 CODING_INIT(1,256,1,0), 835 CODING_INIT(1,256,0,1), 836 CODING_INIT(1,256,1,1), 837 CODING_INIT(2,256,0,0), 838 CODING_INIT(2,256,1,0), 839 CODING_INIT(2,256,0,1), 840 CODING_INIT(2,256,1,1), 841 CODING_INIT(3,256,0,0), 842 CODING_INIT(3,256,1,0), 843 CODING_INIT(3,256,0,1), 844 CODING_INIT(3,256,1,1), 845 CODING_INIT(4,256,0,0), 846 CODING_INIT(4,256,1,0), 847 CODING_INIT(4,256,0,1), 848 CODING_INIT(4,256,1,1), 849 850 // Full-range variable-length codings: 851 CODING_INIT(5, 4,0,0), 852 CODING_INIT(5, 4,1,0), 853 CODING_INIT(5, 4,2,0), 854 CODING_INIT(5, 16,0,0), 855 CODING_INIT(5, 16,1,0), 856 CODING_INIT(5, 16,2,0), 857 CODING_INIT(5, 32,0,0), 858 CODING_INIT(5, 32,1,0), 859 CODING_INIT(5, 32,2,0), 860 CODING_INIT(5, 64,0,0), 861 CODING_INIT(5, 64,1,0), 862 CODING_INIT(5, 64,2,0), 863 CODING_INIT(5,128,0,0), 864 CODING_INIT(5,128,1,0), 865 CODING_INIT(5,128,2,0), 866 867 CODING_INIT(5, 4,0,1), 868 CODING_INIT(5, 4,1,1), 869 CODING_INIT(5, 4,2,1), 870 CODING_INIT(5, 16,0,1), 871 CODING_INIT(5, 16,1,1), 872 CODING_INIT(5, 16,2,1), 873 CODING_INIT(5, 32,0,1), 874 CODING_INIT(5, 32,1,1), 875 CODING_INIT(5, 32,2,1), 876 CODING_INIT(5, 64,0,1), 877 CODING_INIT(5, 64,1,1), 878 CODING_INIT(5, 64,2,1), 879 CODING_INIT(5,128,0,1), 880 CODING_INIT(5,128,1,1), 881 CODING_INIT(5,128,2,1), 882 883 // Variable length subrange codings: 884 CODING_INIT(2,192,0,0), 885 CODING_INIT(2,224,0,0), 886 CODING_INIT(2,240,0,0), 887 CODING_INIT(2,248,0,0), 888 CODING_INIT(2,252,0,0), 889 890 CODING_INIT(2, 8,0,1), 891 CODING_INIT(2, 8,1,1), 892 CODING_INIT(2, 16,0,1), 893 CODING_INIT(2, 16,1,1), 894 CODING_INIT(2, 32,0,1), 895 CODING_INIT(2, 32,1,1), 896 CODING_INIT(2, 64,0,1), 897 CODING_INIT(2, 64,1,1), 898 CODING_INIT(2,128,0,1), 899 CODING_INIT(2,128,1,1), 900 CODING_INIT(2,192,0,1), 901 CODING_INIT(2,192,1,1), 902 CODING_INIT(2,224,0,1), 903 CODING_INIT(2,224,1,1), 904 CODING_INIT(2,240,0,1), 905 CODING_INIT(2,240,1,1), 906 CODING_INIT(2,248,0,1), 907 CODING_INIT(2,248,1,1), 908 909 CODING_INIT(3,192,0,0), 910 CODING_INIT(3,224,0,0), 911 CODING_INIT(3,240,0,0), 912 CODING_INIT(3,248,0,0), 913 CODING_INIT(3,252,0,0), 914 915 CODING_INIT(3, 8,0,1), 916 CODING_INIT(3, 8,1,1), 917 CODING_INIT(3, 16,0,1), 918 CODING_INIT(3, 16,1,1), 919 CODING_INIT(3, 32,0,1), 920 CODING_INIT(3, 32,1,1), 921 CODING_INIT(3, 64,0,1), 922 CODING_INIT(3, 64,1,1), 923 CODING_INIT(3,128,0,1), 924 CODING_INIT(3,128,1,1), 925 CODING_INIT(3,192,0,1), 926 CODING_INIT(3,192,1,1), 927 CODING_INIT(3,224,0,1), 928 CODING_INIT(3,224,1,1), 929 CODING_INIT(3,240,0,1), 930 CODING_INIT(3,240,1,1), 931 CODING_INIT(3,248,0,1), 932 CODING_INIT(3,248,1,1), 933 934 CODING_INIT(4,192,0,0), 935 CODING_INIT(4,224,0,0), 936 CODING_INIT(4,240,0,0), 937 CODING_INIT(4,248,0,0), 938 CODING_INIT(4,252,0,0), 939 940 CODING_INIT(4, 8,0,1), 941 CODING_INIT(4, 8,1,1), 942 CODING_INIT(4, 16,0,1), 943 CODING_INIT(4, 16,1,1), 944 CODING_INIT(4, 32,0,1), 945 CODING_INIT(4, 32,1,1), 946 CODING_INIT(4, 64,0,1), 947 CODING_INIT(4, 64,1,1), 948 CODING_INIT(4,128,0,1), 949 CODING_INIT(4,128,1,1), 950 CODING_INIT(4,192,0,1), 951 CODING_INIT(4,192,1,1), 952 CODING_INIT(4,224,0,1), 953 CODING_INIT(4,224,1,1), 954 CODING_INIT(4,240,0,1), 955 CODING_INIT(4,240,1,1), 956 CODING_INIT(4,248,0,1), 957 CODING_INIT(4,248,1,1), 958 CODING_INIT(0,0,0,0) 959 }; 960 #define BASIC_INDEX_LIMIT \ 961 (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1) 962 963 coding* coding::findByIndex(int idx) { 964 #ifndef PRODUCT 965 /* Tricky assert here, constants and gcc complains about it without local. */ 966 int index_limit = BASIC_INDEX_LIMIT; 967 assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit); 968 #endif 969 if (idx >= _meta_canon_min && idx <= _meta_canon_max) 970 return basic_codings[idx].init(); 971 else 972 return null; 973 } 974 975 #ifndef PRODUCT 976 const char* coding::string() { 977 CODING_PRIVATE(spec); 978 bytes buf; 979 buf.malloc(100); 980 char maxS[20], minS[20]; 981 sprintf(maxS, "%d", max); 982 sprintf(minS, "%d", min); 983 if (max == INT_MAX_VALUE) strcpy(maxS, "max"); 984 if (min == INT_MIN_VALUE) strcpy(minS, "min"); 985 sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", 986 B,H,S,D,L,minS,maxS); 987 return (const char*) buf.ptr; 988 } 989 #endif