1 /* 2 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 // -*- C++ -*- 27 // Small program for unpacking specially compressed Java packages. 28 // John R. Rose 29 30 #include <stdio.h> 31 #include <string.h> 32 #include <stdlib.h> 33 #include <stdarg.h> 34 35 #include "defines.h" 36 #include "bytes.h" 37 #include "utils.h" 38 #include "coding.h" 39 40 #include "constants.h" 41 #include "unpack.h" 42 43 extern coding basic_codings[]; 44 45 #define CODING_PRIVATE(spec) \ 46 int spec_ = spec; \ 47 int B = CODING_B(spec_); \ 48 int H = CODING_H(spec_); \ 49 int L = 256 - H; \ 50 int S = CODING_S(spec_); \ 51 int D = CODING_D(spec_) 52 53 #define IS_NEG_CODE(S, codeVal) \ 54 ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 ) 55 56 #define DECODE_SIGN_S1(ux) \ 57 ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) ) 58 59 static maybe_inline 60 int decode_sign(int S, uint ux) { // == Coding.decodeSign32 61 assert(S > 0); 62 uint sigbits = (ux >> S); 63 if (IS_NEG_CODE(S, ux)) 64 return (int)( ~sigbits); 65 else 66 return (int)(ux - sigbits); 67 // Note that (int)(ux-sigbits) can be negative, if ux is large enough. 68 } 69 70 coding* coding::init() { 71 if (umax > 0) return this; // already done 72 assert(spec != 0); // sanity 73 74 // fill in derived fields 75 CODING_PRIVATE(spec); 76 77 // Return null if 'arb(BHSD)' parameter constraints are not met: 78 if (B < 1 || B > B_MAX) return null; 79 if (H < 1 || H > 256) return null; 80 if (S < 0 || S > 2) return null; 81 if (D < 0 || D > 1) return null; 82 if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size 83 if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding 84 85 // first compute the range of the coding, in 64 bits 86 jlong range = 0; 87 { 88 jlong H_i = 1; 89 for (int i = 0; i < B; i++) { 90 range += H_i; 91 H_i *= H; 92 } 93 range *= L; 94 range += H_i; 95 } 96 assert(range > 0); // no useless codings, please 97 98 int this_umax; 99 100 // now, compute min and max 101 if (range >= ((jlong)1 << 32)) { 102 this_umax = INT_MAX_VALUE; 103 this->umin = INT_MIN_VALUE; 104 this->max = INT_MAX_VALUE; 105 this->min = INT_MIN_VALUE; 106 } else { 107 this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; 108 this->max = this_umax; 109 this->min = this->umin = 0; 110 if (S != 0 && range != 0) { 111 int Smask = (1<<S)-1; 112 jlong maxPosCode = range-1; 113 jlong maxNegCode = range-1; 114 while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode; 115 while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode; 116 int maxPos = decode_sign(S, (uint)maxPosCode); 117 if (maxPos < 0) 118 this->max = INT_MAX_VALUE; // 32-bit wraparound 119 else 120 this->max = maxPos; 121 if (maxNegCode < 0) 122 this->min = 0; // No negative codings at all. 123 else 124 this->min = decode_sign(S, (uint)maxNegCode); 125 } 126 } 127 128 assert(!(isFullRange | isSigned | isSubrange)); // init 129 if (min < 0) 130 this->isSigned = true; 131 if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) 132 this->isSubrange = true; 133 if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) 134 this->isFullRange = true; 135 136 // do this last, to reduce MT exposure (should have a membar too) 137 this->umax = this_umax; 138 139 return this; 140 } 141 142 coding* coding::findBySpec(int spec) { 143 for (coding* scan = &basic_codings[0]; ; scan++) { 144 if (scan->spec == spec) 145 return scan->init(); 146 if (scan->spec == 0) 147 break; 148 } 149 coding* ptr = NEW(coding, 1); 150 CHECK_NULL_0(ptr); 151 coding* c = ptr->initFrom(spec); 152 if (c == null) { 153 mtrace('f', ptr, 0); 154 ::free(ptr); 155 } else 156 // else caller should free it... 157 c->isMalloc = true; 158 return c; 159 } 160 161 coding* coding::findBySpec(int B, int H, int S, int D) { 162 if (B < 1 || B > B_MAX) return null; 163 if (H < 1 || H > 256) return null; 164 if (S < 0 || S > 2) return null; 165 if (D < 0 || D > 1) return null; 166 return findBySpec(CODING_SPEC(B, H, S, D)); 167 } 168 169 void coding::free() { 170 if (isMalloc) { 171 mtrace('f', this, 0); 172 ::free(this); 173 } 174 } 175 176 void coding_method::reset(value_stream* state) { 177 assert(state->rp == state->rplimit); // not in mid-stream, please 178 //assert(this == vs0.cm); 179 state[0] = vs0; 180 if (uValues != null) { 181 uValues->reset(state->helper()); 182 } 183 } 184 185 maybe_inline 186 uint coding::parse(byte* &rp, int B, int H) { 187 int L = 256-H; 188 byte* ptr = rp; 189 // hand peel the i==0 part of the loop: 190 uint b_i = *ptr++ & 0xFF; 191 if (B == 1 || b_i < (uint)L) 192 { rp = ptr; return b_i; } 193 uint sum = b_i; 194 uint H_i = H; 195 assert(B <= B_MAX); 196 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired 197 b_i = *ptr++ & 0xFF; 198 sum += b_i * H_i; 199 if (i == B || b_i < (uint)L) 200 { rp = ptr; return sum; } 201 H_i *= H; 202 } 203 assert(false); 204 return 0; 205 } 206 207 maybe_inline 208 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { 209 assert(H == (1<<lgH)); 210 int L = 256-(1<<lgH); 211 byte* ptr = rp; 212 // hand peel the i==0 part of the loop: 213 uint b_i = *ptr++ & 0xFF; 214 if (B == 1 || b_i < (uint)L) 215 { rp = ptr; return b_i; } 216 uint sum = b_i; 217 uint lg_H_i = lgH; 218 assert(B <= B_MAX); 219 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired 220 b_i = *ptr++ & 0xFF; 221 sum += b_i << lg_H_i; 222 if (i == B || b_i < (uint)L) 223 { rp = ptr; return sum; } 224 lg_H_i += lgH; 225 } 226 assert(false); 227 return 0; 228 } 229 230 static const char ERB[] = "EOF reading band"; 231 232 maybe_inline 233 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) { 234 if (N < 0) { 235 abort("bad value count"); 236 return; 237 } 238 byte* ptr = rp; 239 if (B == 1 || H == 256) { 240 size_t len = (size_t)N*B; 241 if (len / B != (size_t)N || ptr+len > limit) { 242 abort(ERB); 243 return; 244 } 245 rp = ptr+len; 246 return; 247 } 248 // Note: We assume rp has enough zero-padding. 249 int L = 256-H; 250 int n = B; 251 while (N > 0) { 252 ptr += 1; 253 if (--n == 0) { 254 // end of encoding at B bytes, regardless of byte value 255 } else { 256 int b = (ptr[-1] & 0xFF); 257 if (b >= L) { 258 // keep going, unless we find a byte < L 259 continue; 260 } 261 } 262 // found the last byte 263 N -= 1; 264 n = B; // reset length counter 265 // do an error check here 266 if (ptr > limit) { 267 abort(ERB); 268 return; 269 } 270 } 271 rp = ptr; 272 return; 273 } 274 275 bool value_stream::hasHelper() { 276 // If my coding method is a pop-style method, 277 // then I need a second value stream to transmit 278 // unfavored values. 279 // This can be determined by examining fValues. 280 return cm->fValues != null; 281 } 282 283 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { 284 rp = rp_; 285 rplimit = rplimit_; 286 sum = 0; 287 cm = null; // no need in the simple case 288 setCoding(defc); 289 } 290 291 void value_stream::setCoding(coding* defc) { 292 if (defc == null) { 293 unpack_abort("bad coding"); 294 defc = coding::findByIndex(_meta_canon_min); // random pick for recovery 295 } 296 297 c = (*defc); 298 299 // choose cmk 300 cmk = cmk_ERROR; 301 switch (c.spec) { 302 case BYTE1_spec: cmk = cmk_BYTE1; break; 303 case CHAR3_spec: cmk = cmk_CHAR3; break; 304 case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; 305 case DELTA5_spec: cmk = cmk_DELTA5; break; 306 case BCI5_spec: cmk = cmk_BCI5; break; 307 case BRANCH5_spec: cmk = cmk_BRANCH5; break; 308 default: 309 if (c.D() == 0) { 310 switch (c.S()) { 311 case 0: cmk = cmk_BHS0; break; 312 case 1: cmk = cmk_BHS1; break; 313 default: cmk = cmk_BHS; break; 314 } 315 } else { 316 if (c.S() == 1) { 317 if (c.isFullRange) cmk = cmk_BHS1D1full; 318 if (c.isSubrange) cmk = cmk_BHS1D1sub; 319 } 320 if (cmk == cmk_ERROR) cmk = cmk_BHSD1; 321 } 322 } 323 } 324 325 static maybe_inline 326 int getPopValue(value_stream* self, uint uval) { 327 if (uval > 0) { 328 // note that the initial parse performed a range check 329 assert(uval <= (uint)self->cm->fVlength); 330 return self->cm->fValues[uval-1]; 331 } else { 332 // take an unfavored value 333 return self->helper()->getInt(); 334 } 335 } 336 337 maybe_inline 338 int coding::sumInUnsignedRange(int x, int y) { 339 assert(isSubrange); 340 int range = (int)(umax+1); 341 assert(range > 0); 342 x += y; 343 if (x != (int)((jlong)(x-y) + (jlong)y)) { 344 // 32-bit overflow interferes with range reduction. 345 // Back off from the overflow by adding a multiple of range: 346 if (x < 0) { 347 x -= range; 348 assert(x >= 0); 349 } else { 350 x += range; 351 assert(x < 0); 352 } 353 } 354 if (x < 0) { 355 x += range; 356 if (x >= 0) return x; 357 } else if (x >= range) { 358 x -= range; 359 if (x < range) return x; 360 } else { 361 // in range 362 return x; 363 } 364 // do it the hard way 365 x %= range; 366 if (x < 0) x += range; 367 return x; 368 } 369 370 static maybe_inline 371 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { 372 assert((uint)(self->c.isSubrange) == (uint)isSubrange); 373 assert(self->c.isSubrange | self->c.isFullRange); 374 if (isSubrange) 375 return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); 376 else 377 return self->sum += (int) uval; 378 } 379 380 bool value_stream::hasValue() { 381 if (rp < rplimit) return true; 382 if (cm == null) return false; 383 if (cm->next == null) return false; 384 cm->next->reset(this); 385 return hasValue(); 386 } 387 388 int value_stream::getInt() { 389 if (rp >= rplimit) { 390 // Advance to next coding segment. 391 if (rp > rplimit || cm == null || cm->next == null) { 392 // Must perform this check and throw an exception on bad input. 393 unpack_abort(ERB); 394 return 0; 395 } 396 cm->next->reset(this); 397 return getInt(); 398 } 399 400 CODING_PRIVATE(c.spec); 401 uint uval; 402 enum { 403 B5 = 5, 404 B3 = 3, 405 H128 = 128, 406 H64 = 64, 407 H4 = 4 408 }; 409 switch (cmk) { 410 case cmk_BHS: 411 assert(D == 0); 412 uval = coding::parse(rp, B, H); 413 if (S == 0) 414 return (int) uval; 415 return decode_sign(S, uval); 416 417 case cmk_BHS0: 418 assert(S == 0 && D == 0); 419 uval = coding::parse(rp, B, H); 420 return (int) uval; 421 422 case cmk_BHS1: 423 assert(S == 1 && D == 0); 424 uval = coding::parse(rp, B, H); 425 return DECODE_SIGN_S1(uval); 426 427 case cmk_BYTE1: 428 assert(c.spec == BYTE1_spec); 429 assert(B == 1 && H == 256 && S == 0 && D == 0); 430 return *rp++ & 0xFF; 431 432 case cmk_CHAR3: 433 assert(c.spec == CHAR3_spec); 434 assert(B == B3 && H == H128 && S == 0 && D == 0); 435 return coding::parse_lgH(rp, B3, H128, 7); 436 437 case cmk_UNSIGNED5: 438 assert(c.spec == UNSIGNED5_spec); 439 assert(B == B5 && H == H64 && S == 0 && D == 0); 440 return coding::parse_lgH(rp, B5, H64, 6); 441 442 case cmk_BHSD1: 443 assert(D == 1); 444 uval = coding::parse(rp, B, H); 445 if (S != 0) 446 uval = (uint) decode_sign(S, uval); 447 return getDeltaValue(this, uval, (bool)c.isSubrange); 448 449 case cmk_BHS1D1full: 450 assert(S == 1 && D == 1 && c.isFullRange); 451 uval = coding::parse(rp, B, H); 452 uval = (uint) DECODE_SIGN_S1(uval); 453 return getDeltaValue(this, uval, false); 454 455 case cmk_BHS1D1sub: 456 assert(S == 1 && D == 1 && c.isSubrange); 457 uval = coding::parse(rp, B, H); 458 uval = (uint) DECODE_SIGN_S1(uval); 459 return getDeltaValue(this, uval, true); 460 461 case cmk_DELTA5: 462 assert(c.spec == DELTA5_spec); 463 assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); 464 uval = coding::parse_lgH(rp, B5, H64, 6); 465 sum += DECODE_SIGN_S1(uval); 466 return sum; 467 468 case cmk_BCI5: 469 assert(c.spec == BCI5_spec); 470 assert(B == B5 && H == H4 && S == 0 && D == 0); 471 return coding::parse_lgH(rp, B5, H4, 2); 472 473 case cmk_BRANCH5: 474 assert(c.spec == BRANCH5_spec); 475 assert(B == B5 && H == H4 && S == 2 && D == 0); 476 uval = coding::parse_lgH(rp, B5, H4, 2); 477 return decode_sign(S, uval); 478 479 case cmk_pop: 480 uval = coding::parse(rp, B, H); 481 if (S != 0) { 482 uval = (uint) decode_sign(S, uval); 483 } 484 if (D != 0) { 485 assert(c.isSubrange | c.isFullRange); 486 if (c.isSubrange) 487 sum = c.sumInUnsignedRange(sum, (int) uval); 488 else 489 sum += (int) uval; 490 uval = (uint) sum; 491 } 492 return getPopValue(this, uval); 493 494 case cmk_pop_BHS0: 495 assert(S == 0 && D == 0); 496 uval = coding::parse(rp, B, H); 497 return getPopValue(this, uval); 498 499 case cmk_pop_BYTE1: 500 assert(c.spec == BYTE1_spec); 501 assert(B == 1 && H == 256 && S == 0 && D == 0); 502 return getPopValue(this, *rp++ & 0xFF); 503 504 default: 505 break; 506 } 507 assert(false); 508 return 0; 509 } 510 511 static maybe_inline 512 int moreCentral(int x, int y) { // used to find end of Pop.{F} 513 // Suggested implementation from the Pack200 specification: 514 uint kx = (x >> 31) ^ (x << 1); 515 uint ky = (y >> 31) ^ (y << 1); 516 return (kx < ky? x: y); 517 } 518 //static maybe_inline 519 //int moreCentral2(int x, int y, int min) { 520 // // Strict implementation of buggy 150.7 specification. 521 // // The bug is that the spec. says absolute-value ties are broken 522 // // in favor of positive numbers, but the suggested implementation 523 // // (also mentioned in the spec.) breaks ties in favor of negative numbers. 524 // if ((x + y) != 0) 525 // return min; 526 // else 527 // // return the other value, which breaks a tie in the positive direction 528 // return (x > y)? x: y; 529 //} 530 531 static const byte* no_meta[] = {null}; 532 #define NO_META (*(byte**)no_meta) 533 enum { POP_FAVORED_N = -2 }; 534 535 // mode bits 536 #define DISABLE_RUN 1 // used immediately inside ACodee 537 #define DISABLE_POP 2 // used recursively in all pop sub-bands 538 539 // This function knows all about meta-coding. 540 void coding_method::init(byte* &band_rp, byte* band_limit, 541 byte* &meta_rp, int mode, 542 coding* defc, int N, 543 intlist* valueSink) { 544 assert(N != 0); 545 546 assert(u != null); // must be pre-initialized 547 //if (u == null) u = unpacker::current(); // expensive 548 549 int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); 550 coding* foundc = null; 551 coding* to_free = null; 552 553 if (op == _meta_default) { 554 foundc = defc; 555 // and fall through 556 557 } else if (op >= _meta_canon_min && op <= _meta_canon_max) { 558 foundc = coding::findByIndex(op); 559 // and fall through 560 561 } else if (op == _meta_arb) { 562 int args = (*meta_rp++ & 0xFF); 563 // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) 564 int D = ((args >> 0) & 1); 565 int S = ((args >> 1) & 3); 566 int B = ((args >> 3) & -1) + 1; 567 // & (H[1..256]-1) 568 int H = (*meta_rp++ & 0xFF) + 1; 569 foundc = coding::findBySpec(B, H, S, D); 570 to_free = foundc; // findBySpec may dynamically allocate 571 if (foundc == null) { 572 abort("illegal arb. coding"); 573 return; 574 } 575 // and fall through 576 577 } else if (op >= _meta_run && op < _meta_pop) { 578 int args = (op - _meta_run); 579 // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) 580 int KX = ((args >> 0) & 3); 581 int KBFlag = ((args >> 2) & 1); 582 int ABDef = ((args >> 3) & -1); 583 assert(ABDef <= 2); 584 // & KB: one of [0..255] if KBFlag=1 585 int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); 586 int K = (KB+1) << (KX * 4); 587 int N2 = (N >= 0) ? N-K : N; 588 if (N == 0 || (N2 <= 0 && N2 != N)) { 589 abort("illegal run encoding"); 590 return; 591 } 592 if ((mode & DISABLE_RUN) != 0) { 593 abort("illegal nested run encoding"); 594 return; 595 } 596 597 // & Enc{ ACode } if ADef=0 (ABDef != 1) 598 // No direct nesting of 'run' in ACode, but in BCode it's OK. 599 int disRun = mode | DISABLE_RUN; 600 if (ABDef == 1) { 601 this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); 602 } else { 603 this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); 604 } 605 CHECK; 606 607 // & Enc{ BCode } if BDef=0 (ABDef != 2) 608 coding_method* tail = U_NEW(coding_method, 1); 609 CHECK_NULL(tail); 610 tail->u = u; 611 612 // The 'run' codings may be nested indirectly via 'pop' codings. 613 // This means that this->next may already be filled in, if 614 // ACode was of type 'pop' with a 'run' token coding. 615 // No problem: Just chain the upcoming BCode onto the end. 616 for (coding_method* self = this; ; self = self->next) { 617 if (self->next == null) { 618 self->next = tail; 619 break; 620 } 621 } 622 623 if (ABDef == 2) { 624 tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); 625 } else { 626 tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); 627 } 628 // Note: The preceding calls to init should be tail-recursive. 629 630 return; // done; no falling through 631 632 } else if (op >= _meta_pop && op < _meta_limit) { 633 int args = (op - _meta_pop); 634 // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) 635 int FDef = ((args >> 0) & 1); 636 int UDef = ((args >> 1) & 1); 637 int TDefL = ((args >> 2) & -1); 638 assert(TDefL <= 11); 639 int TDef = (TDefL > 0); 640 int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); 641 int TH = (256-TL); 642 if (N <= 0) { 643 abort("illegal pop encoding"); 644 return; 645 } 646 if ((mode & DISABLE_POP) != 0) { 647 abort("illegal nested pop encoding"); 648 return; 649 } 650 651 // No indirect nesting of 'pop', but 'run' is OK. 652 int disPop = DISABLE_POP; 653 654 // & Enc{ FCode } if FDef=0 655 int FN = POP_FAVORED_N; 656 assert(valueSink == null); 657 intlist fValueSink; fValueSink.init(); 658 coding_method fval; 659 BYTES_OF(fval).clear(); fval.u = u; 660 if (FDef != 0) { 661 fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); 662 } else { 663 fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); 664 } 665 bytes fvbuf; 666 fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); 667 fVlength = fValueSink.length(); // i.e., the parameter K 668 fValueSink.free(); 669 CHECK; 670 671 // Skip the first {F} run in all subsequent passes. 672 // The next call to this->init(...) will set vs0.rp to point after the {F}. 673 674 // & Enc{ TCode } if TDef=0 (TDefL==0) 675 if (TDef != 0) { 676 coding* tcode = coding::findBySpec(1, 256); // BYTE1 677 // find the most narrowly sufficient code: 678 for (int B = 2; B <= B_MAX; B++) { 679 if (fVlength <= tcode->umax) break; // found it 680 tcode->free(); 681 tcode = coding::findBySpec(B, TH); 682 CHECK_NULL(tcode); 683 } 684 if (!(fVlength <= tcode->umax)) { 685 abort("pop.L value too small"); 686 return; 687 } 688 this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); 689 tcode->free(); 690 } else { 691 this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); 692 } 693 CHECK; 694 695 // Count the number of zero tokens right now. 696 // Also verify that they are in bounds. 697 int UN = 0; // one {U} for each zero in {T} 698 value_stream vs = vs0; 699 for (int i = 0; i < N; i++) { 700 uint val = vs.getInt(); 701 if (val == 0) UN += 1; 702 if (!(val <= (uint)fVlength)) { 703 abort("pop token out of range"); 704 return; 705 } 706 } 707 vs.done(); 708 709 // & Enc{ UCode } if UDef=0 710 if (UN != 0) { 711 uValues = U_NEW(coding_method, 1); 712 CHECK_NULL(uValues); 713 uValues->u = u; 714 if (UDef != 0) { 715 uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); 716 } else { 717 uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); 718 } 719 } else { 720 if (UDef == 0) { 721 int uop = (*meta_rp++ & 0xFF); 722 if (uop > _meta_canon_max) 723 // %%% Spec. requires the more strict (uop != _meta_default). 724 abort("bad meta-coding for empty pop/U"); 725 } 726 } 727 728 // Bug fix for 6259542 729 // Last of all, adjust vs0.cmk to the 'pop' flavor 730 for (coding_method* self = this; self != null; self = self->next) { 731 coding_method_kind cmk2 = cmk_pop; 732 switch (self->vs0.cmk) { 733 case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; 734 case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; 735 default: break; 736 } 737 self->vs0.cmk = cmk2; 738 if (self != this) { 739 assert(self->fValues == null); // no double init 740 self->fValues = this->fValues; 741 self->fVlength = this->fVlength; 742 assert(self->uValues == null); // must stay null 743 } 744 } 745 746 return; // done; no falling through 747 748 } else { 749 abort("bad meta-coding"); 750 return; 751 } 752 753 // Common code here skips a series of values with one coding. 754 assert(foundc != null); 755 756 assert(vs0.cmk == cmk_ERROR); // no garbage, please 757 assert(vs0.rp == null); // no garbage, please 758 assert(vs0.rplimit == null); // no garbage, please 759 assert(vs0.sum == 0); // no garbage, please 760 761 vs0.init(band_rp, band_limit, foundc); 762 763 // Done with foundc. Free if necessary. 764 if (to_free != null) { 765 to_free->free(); 766 to_free = null; 767 } 768 foundc = null; 769 770 coding& c = vs0.c; 771 CODING_PRIVATE(c.spec); 772 // assert sane N 773 assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); 774 775 // Look at the values, or at least skip over them quickly. 776 if (valueSink == null) { 777 // Skip and ignore values in the first pass. 778 c.parseMultiple(band_rp, N, band_limit, B, H); 779 } else if (N >= 0) { 780 // Pop coding, {F} sequence, initial run of values... 781 assert((mode & DISABLE_POP) != 0); 782 value_stream vs = vs0; 783 for (int n = 0; n < N; n++) { 784 int val = vs.getInt(); 785 valueSink->add(val); 786 } 787 band_rp = vs.rp; 788 } else { 789 // Pop coding, {F} sequence, final run of values... 790 assert((mode & DISABLE_POP) != 0); 791 assert(N == POP_FAVORED_N); 792 int min = INT_MIN_VALUE; // farthest from the center 793 // min2 is based on the buggy specification of centrality in version 150.7 794 // no known implementations transmit this value, but just in case... 795 //int min2 = INT_MIN_VALUE; 796 int last = 0; 797 // if there were initial runs, find the potential sentinels in them: 798 for (int i = 0; i < valueSink->length(); i++) { 799 last = valueSink->get(i); 800 min = moreCentral(min, last); 801 //min2 = moreCentral2(min2, last, min); 802 } 803 value_stream vs = vs0; 804 for (;;) { 805 int val = vs.getInt(); 806 if (valueSink->length() > 0 && 807 (val == last || val == min)) //|| val == min2 808 break; 809 valueSink->add(val); 810 CHECK; 811 last = val; 812 min = moreCentral(min, last); 813 //min2 = moreCentral2(min2, last, min); 814 } 815 band_rp = vs.rp; 816 } 817 CHECK; 818 819 // Get an accurate upper limit now. 820 vs0.rplimit = band_rp; 821 vs0.cm = this; 822 823 return; // success 824 } 825 826 coding basic_codings[] = { 827 // This one is not a usable irregular coding, but is used by cp_Utf8_chars. 828 CODING_INIT(3,128,0,0), 829 830 // Fixed-length codings: 831 CODING_INIT(1,256,0,0), 832 CODING_INIT(1,256,1,0), 833 CODING_INIT(1,256,0,1), 834 CODING_INIT(1,256,1,1), 835 CODING_INIT(2,256,0,0), 836 CODING_INIT(2,256,1,0), 837 CODING_INIT(2,256,0,1), 838 CODING_INIT(2,256,1,1), 839 CODING_INIT(3,256,0,0), 840 CODING_INIT(3,256,1,0), 841 CODING_INIT(3,256,0,1), 842 CODING_INIT(3,256,1,1), 843 CODING_INIT(4,256,0,0), 844 CODING_INIT(4,256,1,0), 845 CODING_INIT(4,256,0,1), 846 CODING_INIT(4,256,1,1), 847 848 // Full-range variable-length codings: 849 CODING_INIT(5, 4,0,0), 850 CODING_INIT(5, 4,1,0), 851 CODING_INIT(5, 4,2,0), 852 CODING_INIT(5, 16,0,0), 853 CODING_INIT(5, 16,1,0), 854 CODING_INIT(5, 16,2,0), 855 CODING_INIT(5, 32,0,0), 856 CODING_INIT(5, 32,1,0), 857 CODING_INIT(5, 32,2,0), 858 CODING_INIT(5, 64,0,0), 859 CODING_INIT(5, 64,1,0), 860 CODING_INIT(5, 64,2,0), 861 CODING_INIT(5,128,0,0), 862 CODING_INIT(5,128,1,0), 863 CODING_INIT(5,128,2,0), 864 865 CODING_INIT(5, 4,0,1), 866 CODING_INIT(5, 4,1,1), 867 CODING_INIT(5, 4,2,1), 868 CODING_INIT(5, 16,0,1), 869 CODING_INIT(5, 16,1,1), 870 CODING_INIT(5, 16,2,1), 871 CODING_INIT(5, 32,0,1), 872 CODING_INIT(5, 32,1,1), 873 CODING_INIT(5, 32,2,1), 874 CODING_INIT(5, 64,0,1), 875 CODING_INIT(5, 64,1,1), 876 CODING_INIT(5, 64,2,1), 877 CODING_INIT(5,128,0,1), 878 CODING_INIT(5,128,1,1), 879 CODING_INIT(5,128,2,1), 880 881 // Variable length subrange codings: 882 CODING_INIT(2,192,0,0), 883 CODING_INIT(2,224,0,0), 884 CODING_INIT(2,240,0,0), 885 CODING_INIT(2,248,0,0), 886 CODING_INIT(2,252,0,0), 887 888 CODING_INIT(2, 8,0,1), 889 CODING_INIT(2, 8,1,1), 890 CODING_INIT(2, 16,0,1), 891 CODING_INIT(2, 16,1,1), 892 CODING_INIT(2, 32,0,1), 893 CODING_INIT(2, 32,1,1), 894 CODING_INIT(2, 64,0,1), 895 CODING_INIT(2, 64,1,1), 896 CODING_INIT(2,128,0,1), 897 CODING_INIT(2,128,1,1), 898 CODING_INIT(2,192,0,1), 899 CODING_INIT(2,192,1,1), 900 CODING_INIT(2,224,0,1), 901 CODING_INIT(2,224,1,1), 902 CODING_INIT(2,240,0,1), 903 CODING_INIT(2,240,1,1), 904 CODING_INIT(2,248,0,1), 905 CODING_INIT(2,248,1,1), 906 907 CODING_INIT(3,192,0,0), 908 CODING_INIT(3,224,0,0), 909 CODING_INIT(3,240,0,0), 910 CODING_INIT(3,248,0,0), 911 CODING_INIT(3,252,0,0), 912 913 CODING_INIT(3, 8,0,1), 914 CODING_INIT(3, 8,1,1), 915 CODING_INIT(3, 16,0,1), 916 CODING_INIT(3, 16,1,1), 917 CODING_INIT(3, 32,0,1), 918 CODING_INIT(3, 32,1,1), 919 CODING_INIT(3, 64,0,1), 920 CODING_INIT(3, 64,1,1), 921 CODING_INIT(3,128,0,1), 922 CODING_INIT(3,128,1,1), 923 CODING_INIT(3,192,0,1), 924 CODING_INIT(3,192,1,1), 925 CODING_INIT(3,224,0,1), 926 CODING_INIT(3,224,1,1), 927 CODING_INIT(3,240,0,1), 928 CODING_INIT(3,240,1,1), 929 CODING_INIT(3,248,0,1), 930 CODING_INIT(3,248,1,1), 931 932 CODING_INIT(4,192,0,0), 933 CODING_INIT(4,224,0,0), 934 CODING_INIT(4,240,0,0), 935 CODING_INIT(4,248,0,0), 936 CODING_INIT(4,252,0,0), 937 938 CODING_INIT(4, 8,0,1), 939 CODING_INIT(4, 8,1,1), 940 CODING_INIT(4, 16,0,1), 941 CODING_INIT(4, 16,1,1), 942 CODING_INIT(4, 32,0,1), 943 CODING_INIT(4, 32,1,1), 944 CODING_INIT(4, 64,0,1), 945 CODING_INIT(4, 64,1,1), 946 CODING_INIT(4,128,0,1), 947 CODING_INIT(4,128,1,1), 948 CODING_INIT(4,192,0,1), 949 CODING_INIT(4,192,1,1), 950 CODING_INIT(4,224,0,1), 951 CODING_INIT(4,224,1,1), 952 CODING_INIT(4,240,0,1), 953 CODING_INIT(4,240,1,1), 954 CODING_INIT(4,248,0,1), 955 CODING_INIT(4,248,1,1), 956 CODING_INIT(0,0,0,0) 957 }; 958 #define BASIC_INDEX_LIMIT \ 959 (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1) 960 961 coding* coding::findByIndex(int idx) { 962 #ifndef PRODUCT 963 /* Tricky assert here, constants and gcc complains about it without local. */ 964 int index_limit = BASIC_INDEX_LIMIT; 965 assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit); 966 #endif 967 if (idx >= _meta_canon_min && idx <= _meta_canon_max) 968 return basic_codings[idx].init(); 969 else 970 return null; 971 } 972 973 #ifndef PRODUCT 974 const char* coding::string() { 975 CODING_PRIVATE(spec); 976 bytes buf; 977 buf.malloc(100); 978 char maxS[20], minS[20]; 979 sprintf(maxS, "%d", max); 980 sprintf(minS, "%d", min); 981 if (max == INT_MAX_VALUE) strcpy(maxS, "max"); 982 if (min == INT_MIN_VALUE) strcpy(minS, "min"); 983 sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", 984 B,H,S,D,L,minS,maxS); 985 return (const char*) buf.ptr; 986 } 987 #endif