1 /*
   2  * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 // -*- C++ -*-
  27 // Small program for unpacking specially compressed Java packages.
  28 // John R. Rose
  29 
  30 #include <stdio.h>
  31 #include <string.h>
  32 #include <stdlib.h>
  33 #include <stdarg.h>
  34 
  35 #include "jni_util.h"
  36 
  37 #include "defines.h"
  38 #include "bytes.h"
  39 #include "utils.h"
  40 #include "coding.h"
  41 
  42 #include "constants.h"
  43 #include "unpack.h"
  44 
  45 extern coding basic_codings[];
  46 
  47 #define CODING_PRIVATE(spec) \
  48   int spec_ = spec; \
  49   int B = CODING_B(spec_); \
  50   int H = CODING_H(spec_); \
  51   int L = 256 - H; \
  52   int S = CODING_S(spec_); \
  53   int D = CODING_D(spec_)
  54 
  55 #define IS_NEG_CODE(S, codeVal) \
  56   ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
  57 
  58 #define DECODE_SIGN_S1(ux) \
  59   ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
  60 
  61 static maybe_inline
  62 int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
  63   assert(S > 0);
  64   uint sigbits = (ux >> S);
  65   if (IS_NEG_CODE(S, ux))
  66     return (int)(    ~sigbits);
  67   else
  68     return (int)(ux - sigbits);
  69   // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
  70 }
  71 
  72 coding* coding::init() {
  73   if (umax > 0)  return this;  // already done
  74   assert(spec != 0);  // sanity
  75 
  76   // fill in derived fields
  77   CODING_PRIVATE(spec);
  78 
  79   // Return null if 'arb(BHSD)' parameter constraints are not met:
  80   if (B < 1 || B > B_MAX)  return null;
  81   if (H < 1 || H > 256)    return null;
  82   if (S < 0 || S > 2)      return null;
  83   if (D < 0 || D > 1)      return null;
  84   if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
  85   if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding
  86 
  87   // first compute the range of the coding, in 64 bits
  88   jlong range = 0;
  89   {
  90     jlong H_i = 1;
  91     for (int i = 0; i < B; i++) {
  92       range += H_i;
  93       H_i *= H;
  94     }
  95     range *= L;
  96     range += H_i;
  97   }
  98   assert(range > 0);  // no useless codings, please
  99 
 100   int this_umax;
 101 
 102   // now, compute min and max
 103   if (range >= ((jlong)1 << 32)) {
 104     this_umax  = INT_MAX_VALUE;
 105     this->umin = INT_MIN_VALUE;
 106     this->max  = INT_MAX_VALUE;
 107     this->min  = INT_MIN_VALUE;
 108   } else {
 109     this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
 110     this->max = this_umax;
 111     this->min = this->umin = 0;
 112     if (S != 0 && range != 0) {
 113       int Smask = (1<<S)-1;
 114       jlong maxPosCode = range-1;
 115       jlong maxNegCode = range-1;
 116       while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
 117       while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
 118       int maxPos = decode_sign(S, (uint)maxPosCode);
 119       if (maxPos < 0)
 120         this->max = INT_MAX_VALUE;  // 32-bit wraparound
 121       else
 122         this->max = maxPos;
 123       if (maxNegCode < 0)
 124         this->min = 0;  // No negative codings at all.
 125       else
 126         this->min = decode_sign(S, (uint)maxNegCode);
 127     }
 128   }
 129 
 130   assert(!(isFullRange | isSigned | isSubrange)); // init
 131   if (min < 0)
 132     this->isSigned = true;
 133   if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
 134     this->isSubrange = true;
 135   if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
 136     this->isFullRange = true;
 137 
 138   // do this last, to reduce MT exposure (should have a membar too)
 139   this->umax = this_umax;
 140 
 141   return this;
 142 }
 143 
 144 coding* coding::findBySpec(int spec) {
 145   for (coding* scan = &basic_codings[0]; ; scan++) {
 146     if (scan->spec == spec)
 147       return scan->init();
 148     if (scan->spec == 0)
 149       break;
 150   }
 151   coding* ptr = NEW(coding, 1);
 152   CHECK_NULL_RETURN(ptr, 0);
 153   coding* c = ptr->initFrom(spec);
 154   if (c == null) {
 155     mtrace('f', ptr, 0);
 156     ::free(ptr);
 157   } else
 158     // else caller should free it...
 159     c->isMalloc = true;
 160   return c;
 161 }
 162 
 163 coding* coding::findBySpec(int B, int H, int S, int D) {
 164   if (B < 1 || B > B_MAX)  return null;
 165   if (H < 1 || H > 256)    return null;
 166   if (S < 0 || S > 2)      return null;
 167   if (D < 0 || D > 1)      return null;
 168   return findBySpec(CODING_SPEC(B, H, S, D));
 169 }
 170 
 171 void coding::free() {
 172   if (isMalloc) {
 173     mtrace('f', this, 0);
 174     ::free(this);
 175   }
 176 }
 177 
 178 void coding_method::reset(value_stream* state) {
 179   assert(state->rp == state->rplimit);  // not in mid-stream, please
 180   //assert(this == vs0.cm);
 181   state[0] = vs0;
 182   if (uValues != null) {
 183     uValues->reset(state->helper());
 184   }
 185 }
 186 
 187 maybe_inline
 188 uint coding::parse(byte* &rp, int B, int H) {
 189   int L = 256-H;
 190   byte* ptr = rp;
 191   // hand peel the i==0 part of the loop:
 192   uint b_i = *ptr++ & 0xFF;
 193   if (B == 1 || b_i < (uint)L)
 194     { rp = ptr; return b_i; }
 195   uint sum = b_i;
 196   uint H_i = H;
 197   assert(B <= B_MAX);
 198   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
 199     b_i = *ptr++ & 0xFF;
 200     sum += b_i * H_i;
 201     if (i == B || b_i < (uint)L)
 202       { rp = ptr; return sum; }
 203     H_i *= H;
 204   }
 205   assert(false);
 206   return 0;
 207 }
 208 
 209 maybe_inline
 210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
 211   assert(H == (1<<lgH));
 212   int L = 256-(1<<lgH);
 213   byte* ptr = rp;
 214   // hand peel the i==0 part of the loop:
 215   uint b_i = *ptr++ & 0xFF;
 216   if (B == 1 || b_i < (uint)L)
 217     { rp = ptr; return b_i; }
 218   uint sum = b_i;
 219   uint lg_H_i = lgH;
 220   assert(B <= B_MAX);
 221   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
 222     b_i = *ptr++ & 0xFF;
 223     sum += b_i << lg_H_i;
 224     if (i == B || b_i < (uint)L)
 225       { rp = ptr; return sum; }
 226     lg_H_i += lgH;
 227   }
 228   assert(false);
 229   return 0;
 230 }
 231 
 232 static const char ERB[] = "EOF reading band";
 233 
 234 maybe_inline
 235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
 236   if (N < 0) {
 237     abort("bad value count");
 238     return;
 239   }
 240   byte* ptr = rp;
 241   if (B == 1 || H == 256) {
 242     size_t len = (size_t)N*B;
 243     if (len / B != (size_t)N || ptr+len > limit) {
 244       abort(ERB);
 245       return;
 246     }
 247     rp = ptr+len;
 248     return;
 249   }
 250   // Note:  We assume rp has enough zero-padding.
 251   int L = 256-H;
 252   int n = B;
 253   while (N > 0) {
 254     ptr += 1;
 255     if (--n == 0) {
 256       // end of encoding at B bytes, regardless of byte value
 257     } else {
 258       int b = (ptr[-1] & 0xFF);
 259       if (b >= L) {
 260         // keep going, unless we find a byte < L
 261         continue;
 262       }
 263     }
 264     // found the last byte
 265     N -= 1;
 266     n = B;   // reset length counter
 267     // do an error check here
 268     if (ptr > limit) {
 269       abort(ERB);
 270       return;
 271     }
 272   }
 273   rp = ptr;
 274   return;
 275 }
 276 
 277 bool value_stream::hasHelper() {
 278   // If my coding method is a pop-style method,
 279   // then I need a second value stream to transmit
 280   // unfavored values.
 281   // This can be determined by examining fValues.
 282   return cm->fValues != null;
 283 }
 284 
 285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
 286   rp = rp_;
 287   rplimit = rplimit_;
 288   sum = 0;
 289   cm = null;  // no need in the simple case
 290   setCoding(defc);
 291 }
 292 
 293 void value_stream::setCoding(coding* defc) {
 294   if (defc == null) {
 295     unpack_abort("bad coding");
 296     defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
 297   }
 298 
 299   c = (*defc);
 300 
 301   // choose cmk
 302   cmk = cmk_ERROR;
 303   switch (c.spec) {
 304   case BYTE1_spec:      cmk = cmk_BYTE1;        break;
 305   case CHAR3_spec:      cmk = cmk_CHAR3;        break;
 306   case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
 307   case DELTA5_spec:     cmk = cmk_DELTA5;       break;
 308   case BCI5_spec:       cmk = cmk_BCI5;         break;
 309   case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
 310   default:
 311     if (c.D() == 0) {
 312       switch (c.S()) {
 313       case 0:  cmk = cmk_BHS0;  break;
 314       case 1:  cmk = cmk_BHS1;  break;
 315       default: cmk = cmk_BHS;   break;
 316       }
 317     } else {
 318       if (c.S() == 1) {
 319         if (c.isFullRange)   cmk = cmk_BHS1D1full;
 320         if (c.isSubrange)    cmk = cmk_BHS1D1sub;
 321       }
 322       if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
 323     }
 324   }
 325 }
 326 
 327 static maybe_inline
 328 int getPopValue(value_stream* self, uint uval) {
 329   if (uval > 0) {
 330     // note that the initial parse performed a range check
 331     assert(uval <= (uint)self->cm->fVlength);
 332     return self->cm->fValues[uval-1];
 333   } else {
 334     // take an unfavored value
 335     return self->helper()->getInt();
 336   }
 337 }
 338 
 339 maybe_inline
 340 int coding::sumInUnsignedRange(int x, int y) {
 341   assert(isSubrange);
 342   int range = (int)(umax+1);
 343   assert(range > 0);
 344   x += y;
 345   if (x != (int)((jlong)(x-y) + (jlong)y)) {
 346     // 32-bit overflow interferes with range reduction.
 347     // Back off from the overflow by adding a multiple of range:
 348     if (x < 0) {
 349       x -= range;
 350       assert(x >= 0);
 351     } else {
 352       x += range;
 353       assert(x < 0);
 354     }
 355   }
 356   if (x < 0) {
 357     x += range;
 358     if (x >= 0)  return x;
 359   } else if (x >= range) {
 360     x -= range;
 361     if (x < range)  return x;
 362   } else {
 363     // in range
 364     return x;
 365   }
 366   // do it the hard way
 367   x %= range;
 368   if (x < 0)  x += range;
 369   return x;
 370 }
 371 
 372 static maybe_inline
 373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
 374   assert((uint)(self->c.isSubrange) == (uint)isSubrange);
 375   assert(self->c.isSubrange | self->c.isFullRange);
 376   if (isSubrange)
 377     return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
 378   else
 379     return self->sum += (int) uval;
 380 }
 381 
 382 bool value_stream::hasValue() {
 383   if (rp < rplimit)      return true;
 384   if (cm == null)        return false;
 385   if (cm->next == null)  return false;
 386   cm->next->reset(this);
 387   return hasValue();
 388 }
 389 
 390 int value_stream::getInt() {
 391   if (rp >= rplimit) {
 392     // Advance to next coding segment.
 393     if (rp > rplimit || cm == null || cm->next == null) {
 394       // Must perform this check and throw an exception on bad input.
 395       unpack_abort(ERB);
 396       return 0;
 397     }
 398     cm->next->reset(this);
 399     return getInt();
 400   }
 401 
 402   CODING_PRIVATE(c.spec);
 403   uint uval;
 404   enum {
 405     B5 = 5,
 406     B3 = 3,
 407     H128 = 128,
 408     H64 = 64,
 409     H4 = 4
 410   };
 411   switch (cmk) {
 412   case cmk_BHS:
 413     assert(D == 0);
 414     uval = coding::parse(rp, B, H);
 415     if (S == 0)
 416       return (int) uval;
 417     return decode_sign(S, uval);
 418 
 419   case cmk_BHS0:
 420     assert(S == 0 && D == 0);
 421     uval = coding::parse(rp, B, H);
 422     return (int) uval;
 423 
 424   case cmk_BHS1:
 425     assert(S == 1 && D == 0);
 426     uval = coding::parse(rp, B, H);
 427     return DECODE_SIGN_S1(uval);
 428 
 429   case cmk_BYTE1:
 430     assert(c.spec == BYTE1_spec);
 431     assert(B == 1 && H == 256 && S == 0 && D == 0);
 432     return *rp++ & 0xFF;
 433 
 434   case cmk_CHAR3:
 435     assert(c.spec == CHAR3_spec);
 436     assert(B == B3 && H == H128 && S == 0 && D == 0);
 437     return coding::parse_lgH(rp, B3, H128, 7);
 438 
 439   case cmk_UNSIGNED5:
 440     assert(c.spec == UNSIGNED5_spec);
 441     assert(B == B5 && H == H64 && S == 0 && D == 0);
 442     return coding::parse_lgH(rp, B5, H64, 6);
 443 
 444   case cmk_BHSD1:
 445     assert(D == 1);
 446     uval = coding::parse(rp, B, H);
 447     if (S != 0)
 448       uval = (uint) decode_sign(S, uval);
 449     return getDeltaValue(this, uval, (bool)c.isSubrange);
 450 
 451   case cmk_BHS1D1full:
 452     assert(S == 1 && D == 1 && c.isFullRange);
 453     uval = coding::parse(rp, B, H);
 454     uval = (uint) DECODE_SIGN_S1(uval);
 455     return getDeltaValue(this, uval, false);
 456 
 457   case cmk_BHS1D1sub:
 458     assert(S == 1 && D == 1 && c.isSubrange);
 459     uval = coding::parse(rp, B, H);
 460     uval = (uint) DECODE_SIGN_S1(uval);
 461     return getDeltaValue(this, uval, true);
 462 
 463   case cmk_DELTA5:
 464     assert(c.spec == DELTA5_spec);
 465     assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
 466     uval = coding::parse_lgH(rp, B5, H64, 6);
 467     sum += DECODE_SIGN_S1(uval);
 468     return sum;
 469 
 470   case cmk_BCI5:
 471     assert(c.spec == BCI5_spec);
 472     assert(B == B5 && H == H4 && S == 0 && D == 0);
 473     return coding::parse_lgH(rp, B5, H4, 2);
 474 
 475   case cmk_BRANCH5:
 476     assert(c.spec == BRANCH5_spec);
 477     assert(B == B5 && H == H4 && S == 2 && D == 0);
 478     uval = coding::parse_lgH(rp, B5, H4, 2);
 479     return decode_sign(S, uval);
 480 
 481   case cmk_pop:
 482     uval = coding::parse(rp, B, H);
 483     if (S != 0) {
 484       uval = (uint) decode_sign(S, uval);
 485     }
 486     if (D != 0) {
 487       assert(c.isSubrange | c.isFullRange);
 488       if (c.isSubrange)
 489         sum = c.sumInUnsignedRange(sum, (int) uval);
 490       else
 491         sum += (int) uval;
 492       uval = (uint) sum;
 493     }
 494     return getPopValue(this, uval);
 495 
 496   case cmk_pop_BHS0:
 497     assert(S == 0 && D == 0);
 498     uval = coding::parse(rp, B, H);
 499     return getPopValue(this, uval);
 500 
 501   case cmk_pop_BYTE1:
 502     assert(c.spec == BYTE1_spec);
 503     assert(B == 1 && H == 256 && S == 0 && D == 0);
 504     return getPopValue(this, *rp++ & 0xFF);
 505 
 506   default:
 507     break;
 508   }
 509   assert(false);
 510   return 0;
 511 }
 512 
 513 static maybe_inline
 514 int moreCentral(int x, int y) {  // used to find end of Pop.{F}
 515   // Suggested implementation from the Pack200 specification:
 516   uint kx = (x >> 31) ^ (x << 1);
 517   uint ky = (y >> 31) ^ (y << 1);
 518   return (kx < ky? x: y);
 519 }
 520 //static maybe_inline
 521 //int moreCentral2(int x, int y, int min) {
 522 //  // Strict implementation of buggy 150.7 specification.
 523 //  // The bug is that the spec. says absolute-value ties are broken
 524 //  // in favor of positive numbers, but the suggested implementation
 525 //  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
 526 //  if ((x + y) != 0)
 527 //    return min;
 528 //  else
 529 //    // return the other value, which breaks a tie in the positive direction
 530 //    return (x > y)? x: y;
 531 //}
 532 
 533 static const byte* no_meta[] = {null};
 534 #define NO_META (*(byte**)no_meta)
 535 enum { POP_FAVORED_N = -2 };
 536 
 537 // mode bits
 538 #define DISABLE_RUN  1  // used immediately inside ACodee
 539 #define DISABLE_POP  2  // used recursively in all pop sub-bands
 540 
 541 // This function knows all about meta-coding.
 542 void coding_method::init(byte* &band_rp, byte* band_limit,
 543                          byte* &meta_rp, int mode,
 544                          coding* defc, int N,
 545                          intlist* valueSink) {
 546   assert(N != 0);
 547 
 548   assert(u != null);  // must be pre-initialized
 549   //if (u == null)  u = unpacker::current();  // expensive
 550 
 551   int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
 552   coding* foundc = null;
 553   coding* to_free = null;
 554 
 555   if (op == _meta_default) {
 556     foundc = defc;
 557     // and fall through
 558 
 559   } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
 560     foundc = coding::findByIndex(op);
 561     // and fall through
 562 
 563   } else if (op == _meta_arb) {
 564     int args = (*meta_rp++ & 0xFF);
 565     // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
 566     int D = ((args >> 0) & 1);
 567     int S = ((args >> 1) & 3);
 568     int B = ((args >> 3) & -1) + 1;
 569     // & (H[1..256]-1)
 570     int H = (*meta_rp++ & 0xFF) + 1;
 571     foundc = coding::findBySpec(B, H, S, D);
 572     to_free = foundc;  // findBySpec may dynamically allocate
 573     if (foundc == null) {
 574       abort("illegal arb. coding");
 575       return;
 576     }
 577     // and fall through
 578 
 579   } else if (op >= _meta_run && op < _meta_pop) {
 580     int args = (op - _meta_run);
 581     // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
 582     int KX     = ((args >> 0) & 3);
 583     int KBFlag = ((args >> 2) & 1);
 584     int ABDef  = ((args >> 3) & -1);
 585     assert(ABDef <= 2);
 586     // & KB: one of [0..255] if KBFlag=1
 587     int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
 588     int K      = (KB+1) << (KX * 4);
 589     int N2 = (N >= 0) ? N-K : N;
 590     if (N == 0 || (N2 <= 0 && N2 != N)) {
 591       abort("illegal run encoding");
 592       return;
 593     }
 594     if ((mode & DISABLE_RUN) != 0) {
 595       abort("illegal nested run encoding");
 596       return;
 597     }
 598 
 599     // & Enc{ ACode } if ADef=0  (ABDef != 1)
 600     // No direct nesting of 'run' in ACode, but in BCode it's OK.
 601     int disRun = mode | DISABLE_RUN;
 602     if (ABDef == 1) {
 603       this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
 604     } else {
 605       this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
 606     }
 607     CHECK;
 608 
 609     // & Enc{ BCode } if BDef=0  (ABDef != 2)
 610     coding_method* tail = U_NEW(coding_method, 1);
 611     CHECK_NULL(tail);
 612     tail->u = u;
 613 
 614     // The 'run' codings may be nested indirectly via 'pop' codings.
 615     // This means that this->next may already be filled in, if
 616     // ACode was of type 'pop' with a 'run' token coding.
 617     // No problem:  Just chain the upcoming BCode onto the end.
 618     for (coding_method* self = this; ; self = self->next) {
 619       if (self->next == null) {
 620         self->next = tail;
 621         break;
 622       }
 623     }
 624 
 625     if (ABDef == 2) {
 626       tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
 627     } else {
 628       tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
 629     }
 630     // Note:  The preceding calls to init should be tail-recursive.
 631 
 632     return;  // done; no falling through
 633 
 634   } else if (op >= _meta_pop && op < _meta_limit) {
 635     int args = (op - _meta_pop);
 636     // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
 637     int FDef  = ((args >> 0) & 1);
 638     int UDef  = ((args >> 1) & 1);
 639     int TDefL = ((args >> 2) & -1);
 640     assert(TDefL <= 11);
 641     int TDef  = (TDefL > 0);
 642     int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
 643     int TH    = (256-TL);
 644     if (N <= 0) {
 645       abort("illegal pop encoding");
 646       return;
 647     }
 648     if ((mode & DISABLE_POP) != 0) {
 649       abort("illegal nested pop encoding");
 650       return;
 651     }
 652 
 653     // No indirect nesting of 'pop', but 'run' is OK.
 654     int disPop = DISABLE_POP;
 655 
 656     // & Enc{ FCode } if FDef=0
 657     int FN = POP_FAVORED_N;
 658     assert(valueSink == null);
 659     intlist fValueSink; fValueSink.init();
 660     coding_method fval;
 661     BYTES_OF(fval).clear(); fval.u = u;
 662     if (FDef != 0) {
 663       fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
 664     } else {
 665       fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
 666     }
 667     bytes fvbuf;
 668     fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
 669     fVlength = fValueSink.length();  // i.e., the parameter K
 670     fValueSink.free();
 671     CHECK;
 672 
 673     // Skip the first {F} run in all subsequent passes.
 674     // The next call to this->init(...) will set vs0.rp to point after the {F}.
 675 
 676     // & Enc{ TCode } if TDef=0  (TDefL==0)
 677     if (TDef != 0) {
 678       coding* tcode = coding::findBySpec(1, 256);  // BYTE1
 679       // find the most narrowly sufficient code:
 680       for (int B = 2; B <= B_MAX; B++) {
 681         if (fVlength <= tcode->umax)  break;  // found it
 682         tcode->free();
 683         tcode = coding::findBySpec(B, TH);
 684         CHECK_NULL(tcode);
 685       }
 686       if (!(fVlength <= tcode->umax)) {
 687         abort("pop.L value too small");
 688         return;
 689       }
 690       this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
 691       tcode->free();
 692     } else {
 693       this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
 694     }
 695     CHECK;
 696 
 697     // Count the number of zero tokens right now.
 698     // Also verify that they are in bounds.
 699     int UN = 0;   // one {U} for each zero in {T}
 700     value_stream vs = vs0;
 701     for (int i = 0; i < N; i++) {
 702       uint val = vs.getInt();
 703       if (val == 0)  UN += 1;
 704       if (!(val <= (uint)fVlength)) {
 705         abort("pop token out of range");
 706         return;
 707       }
 708     }
 709     vs.done();
 710 
 711     // & Enc{ UCode } if UDef=0
 712     if (UN != 0) {
 713       uValues = U_NEW(coding_method, 1);
 714       CHECK_NULL(uValues);
 715       uValues->u = u;
 716       if (UDef != 0) {
 717         uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
 718       } else {
 719         uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
 720       }
 721     } else {
 722       if (UDef == 0) {
 723         int uop = (*meta_rp++ & 0xFF);
 724         if (uop > _meta_canon_max)
 725           // %%% Spec. requires the more strict (uop != _meta_default).
 726           abort("bad meta-coding for empty pop/U");
 727       }
 728     }
 729 
 730     // Bug fix for 6259542
 731     // Last of all, adjust vs0.cmk to the 'pop' flavor
 732     for (coding_method* self = this; self != null; self = self->next) {
 733         coding_method_kind cmk2 = cmk_pop;
 734         switch (self->vs0.cmk) {
 735         case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
 736         case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
 737         default: break;
 738         }
 739         self->vs0.cmk = cmk2;
 740         if (self != this) {
 741           assert(self->fValues == null); // no double init
 742           self->fValues  = this->fValues;
 743           self->fVlength = this->fVlength;
 744           assert(self->uValues == null); // must stay null
 745         }
 746     }
 747 
 748     return;  // done; no falling through
 749 
 750   } else {
 751     abort("bad meta-coding");
 752     return;
 753   }
 754 
 755   // Common code here skips a series of values with one coding.
 756   assert(foundc != null);
 757 
 758   assert(vs0.cmk == cmk_ERROR);  // no garbage, please
 759   assert(vs0.rp == null);  // no garbage, please
 760   assert(vs0.rplimit == null);  // no garbage, please
 761   assert(vs0.sum == 0);  // no garbage, please
 762 
 763   vs0.init(band_rp, band_limit, foundc);
 764 
 765   // Done with foundc.  Free if necessary.
 766   if (to_free != null) {
 767     to_free->free();
 768     to_free = null;
 769   }
 770   foundc = null;
 771 
 772   coding& c = vs0.c;
 773   CODING_PRIVATE(c.spec);
 774   // assert sane N
 775   assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
 776 
 777   // Look at the values, or at least skip over them quickly.
 778   if (valueSink == null) {
 779     // Skip and ignore values in the first pass.
 780     c.parseMultiple(band_rp, N, band_limit, B, H);
 781   } else if (N >= 0) {
 782     // Pop coding, {F} sequence, initial run of values...
 783     assert((mode & DISABLE_POP) != 0);
 784     value_stream vs = vs0;
 785     for (int n = 0; n < N; n++) {
 786       int val = vs.getInt();
 787       valueSink->add(val);
 788     }
 789     band_rp = vs.rp;
 790   } else {
 791     // Pop coding, {F} sequence, final run of values...
 792     assert((mode & DISABLE_POP) != 0);
 793     assert(N == POP_FAVORED_N);
 794     int min = INT_MIN_VALUE;  // farthest from the center
 795     // min2 is based on the buggy specification of centrality in version 150.7
 796     // no known implementations transmit this value, but just in case...
 797     //int min2 = INT_MIN_VALUE;
 798     int last = 0;
 799     // if there were initial runs, find the potential sentinels in them:
 800     for (int i = 0; i < valueSink->length(); i++) {
 801       last = valueSink->get(i);
 802       min = moreCentral(min, last);
 803       //min2 = moreCentral2(min2, last, min);
 804     }
 805     value_stream vs = vs0;
 806     for (;;) {
 807       int val = vs.getInt();
 808       if (valueSink->length() > 0 &&
 809           (val == last || val == min)) //|| val == min2
 810         break;
 811       valueSink->add(val);
 812       CHECK;
 813       last = val;
 814       min = moreCentral(min, last);
 815       //min2 = moreCentral2(min2, last, min);
 816     }
 817     band_rp = vs.rp;
 818   }
 819   CHECK;
 820 
 821   // Get an accurate upper limit now.
 822   vs0.rplimit = band_rp;
 823   vs0.cm = this;
 824 
 825   return; // success
 826 }
 827 
 828 coding basic_codings[] = {
 829   // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
 830   CODING_INIT(3,128,0,0),
 831 
 832   // Fixed-length codings:
 833   CODING_INIT(1,256,0,0),
 834   CODING_INIT(1,256,1,0),
 835   CODING_INIT(1,256,0,1),
 836   CODING_INIT(1,256,1,1),
 837   CODING_INIT(2,256,0,0),
 838   CODING_INIT(2,256,1,0),
 839   CODING_INIT(2,256,0,1),
 840   CODING_INIT(2,256,1,1),
 841   CODING_INIT(3,256,0,0),
 842   CODING_INIT(3,256,1,0),
 843   CODING_INIT(3,256,0,1),
 844   CODING_INIT(3,256,1,1),
 845   CODING_INIT(4,256,0,0),
 846   CODING_INIT(4,256,1,0),
 847   CODING_INIT(4,256,0,1),
 848   CODING_INIT(4,256,1,1),
 849 
 850   // Full-range variable-length codings:
 851   CODING_INIT(5,  4,0,0),
 852   CODING_INIT(5,  4,1,0),
 853   CODING_INIT(5,  4,2,0),
 854   CODING_INIT(5, 16,0,0),
 855   CODING_INIT(5, 16,1,0),
 856   CODING_INIT(5, 16,2,0),
 857   CODING_INIT(5, 32,0,0),
 858   CODING_INIT(5, 32,1,0),
 859   CODING_INIT(5, 32,2,0),
 860   CODING_INIT(5, 64,0,0),
 861   CODING_INIT(5, 64,1,0),
 862   CODING_INIT(5, 64,2,0),
 863   CODING_INIT(5,128,0,0),
 864   CODING_INIT(5,128,1,0),
 865   CODING_INIT(5,128,2,0),
 866 
 867   CODING_INIT(5,  4,0,1),
 868   CODING_INIT(5,  4,1,1),
 869   CODING_INIT(5,  4,2,1),
 870   CODING_INIT(5, 16,0,1),
 871   CODING_INIT(5, 16,1,1),
 872   CODING_INIT(5, 16,2,1),
 873   CODING_INIT(5, 32,0,1),
 874   CODING_INIT(5, 32,1,1),
 875   CODING_INIT(5, 32,2,1),
 876   CODING_INIT(5, 64,0,1),
 877   CODING_INIT(5, 64,1,1),
 878   CODING_INIT(5, 64,2,1),
 879   CODING_INIT(5,128,0,1),
 880   CODING_INIT(5,128,1,1),
 881   CODING_INIT(5,128,2,1),
 882 
 883   // Variable length subrange codings:
 884   CODING_INIT(2,192,0,0),
 885   CODING_INIT(2,224,0,0),
 886   CODING_INIT(2,240,0,0),
 887   CODING_INIT(2,248,0,0),
 888   CODING_INIT(2,252,0,0),
 889 
 890   CODING_INIT(2,  8,0,1),
 891   CODING_INIT(2,  8,1,1),
 892   CODING_INIT(2, 16,0,1),
 893   CODING_INIT(2, 16,1,1),
 894   CODING_INIT(2, 32,0,1),
 895   CODING_INIT(2, 32,1,1),
 896   CODING_INIT(2, 64,0,1),
 897   CODING_INIT(2, 64,1,1),
 898   CODING_INIT(2,128,0,1),
 899   CODING_INIT(2,128,1,1),
 900   CODING_INIT(2,192,0,1),
 901   CODING_INIT(2,192,1,1),
 902   CODING_INIT(2,224,0,1),
 903   CODING_INIT(2,224,1,1),
 904   CODING_INIT(2,240,0,1),
 905   CODING_INIT(2,240,1,1),
 906   CODING_INIT(2,248,0,1),
 907   CODING_INIT(2,248,1,1),
 908 
 909   CODING_INIT(3,192,0,0),
 910   CODING_INIT(3,224,0,0),
 911   CODING_INIT(3,240,0,0),
 912   CODING_INIT(3,248,0,0),
 913   CODING_INIT(3,252,0,0),
 914 
 915   CODING_INIT(3,  8,0,1),
 916   CODING_INIT(3,  8,1,1),
 917   CODING_INIT(3, 16,0,1),
 918   CODING_INIT(3, 16,1,1),
 919   CODING_INIT(3, 32,0,1),
 920   CODING_INIT(3, 32,1,1),
 921   CODING_INIT(3, 64,0,1),
 922   CODING_INIT(3, 64,1,1),
 923   CODING_INIT(3,128,0,1),
 924   CODING_INIT(3,128,1,1),
 925   CODING_INIT(3,192,0,1),
 926   CODING_INIT(3,192,1,1),
 927   CODING_INIT(3,224,0,1),
 928   CODING_INIT(3,224,1,1),
 929   CODING_INIT(3,240,0,1),
 930   CODING_INIT(3,240,1,1),
 931   CODING_INIT(3,248,0,1),
 932   CODING_INIT(3,248,1,1),
 933 
 934   CODING_INIT(4,192,0,0),
 935   CODING_INIT(4,224,0,0),
 936   CODING_INIT(4,240,0,0),
 937   CODING_INIT(4,248,0,0),
 938   CODING_INIT(4,252,0,0),
 939 
 940   CODING_INIT(4,  8,0,1),
 941   CODING_INIT(4,  8,1,1),
 942   CODING_INIT(4, 16,0,1),
 943   CODING_INIT(4, 16,1,1),
 944   CODING_INIT(4, 32,0,1),
 945   CODING_INIT(4, 32,1,1),
 946   CODING_INIT(4, 64,0,1),
 947   CODING_INIT(4, 64,1,1),
 948   CODING_INIT(4,128,0,1),
 949   CODING_INIT(4,128,1,1),
 950   CODING_INIT(4,192,0,1),
 951   CODING_INIT(4,192,1,1),
 952   CODING_INIT(4,224,0,1),
 953   CODING_INIT(4,224,1,1),
 954   CODING_INIT(4,240,0,1),
 955   CODING_INIT(4,240,1,1),
 956   CODING_INIT(4,248,0,1),
 957   CODING_INIT(4,248,1,1),
 958   CODING_INIT(0,0,0,0)
 959 };
 960 #define BASIC_INDEX_LIMIT \
 961         (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
 962 
 963 coding* coding::findByIndex(int idx) {
 964 #ifndef PRODUCT
 965   /* Tricky assert here, constants and gcc complains about it without local. */
 966   int index_limit = BASIC_INDEX_LIMIT;
 967   assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
 968 #endif
 969   if (idx >= _meta_canon_min && idx <= _meta_canon_max)
 970     return basic_codings[idx].init();
 971   else
 972     return null;
 973 }
 974 
 975 #ifndef PRODUCT
 976 const char* coding::string() {
 977   CODING_PRIVATE(spec);
 978   bytes buf;
 979   buf.malloc(100);
 980   char maxS[20], minS[20];
 981   sprintf(maxS, "%d", max);
 982   sprintf(minS, "%d", min);
 983   if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
 984   if (min == INT_MIN_VALUE)  strcpy(minS, "min");
 985   sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
 986           B,H,S,D,L,minS,maxS);
 987   return (const char*) buf.ptr;
 988 }
 989 #endif