Old src/share/native/com/sun/java/util/jar/pack/coding.cpp

   1 /*
   2  * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 // -*- C++ -*-
  27 // Small program for unpacking specially compressed Java packages.
  28 // John R. Rose
  29 
  30 #include <stdio.h>
  31 #include <string.h>
  32 #include <stdlib.h>
  33 #include <stdarg.h>
  34 
  35 #include "defines.h"
  36 #include "bytes.h"
  37 #include "utils.h"
  38 #include "coding.h"
  39 
  40 #include "constants.h"
  41 #include "unpack.h"
  42 
  43 extern coding basic_codings[];
  44 
  45 #define CODING_PRIVATE(spec) \
  46   int spec_ = spec; \
  47   int B = CODING_B(spec_); \
  48   int H = CODING_H(spec_); \
  49   int L = 256 - H; \
  50   int S = CODING_S(spec_); \
  51   int D = CODING_D(spec_)
  52 
  53 #define IS_NEG_CODE(S, codeVal) \
  54   ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
  55 
  56 #define DECODE_SIGN_S1(ux) \
  57   ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
  58 
  59 static maybe_inline
  60 int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
  61   assert(S > 0);
  62   uint sigbits = (ux >> S);
  63   if (IS_NEG_CODE(S, ux))
  64     return (int)(    ~sigbits);
  65   else
  66     return (int)(ux - sigbits);
  67   // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
  68 }
  69 
  70 coding* coding::init() {
  71   if (umax > 0)  return this;  // already done
  72   assert(spec != 0);  // sanity
  73 
  74   // fill in derived fields
  75   CODING_PRIVATE(spec);
  76 
  77   // Return null if 'arb(BHSD)' parameter constraints are not met:
  78   if (B < 1 || B > B_MAX)  return null;
  79   if (H < 1 || H > 256)    return null;
  80   if (S < 0 || S > 2)      return null;
  81   if (D < 0 || D > 1)      return null;
  82   if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
  83   if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding
  84 
  85   // first compute the range of the coding, in 64 bits
  86   jlong range = 0;
  87   {
  88     jlong H_i = 1;
  89     for (int i = 0; i < B; i++) {
  90       range += H_i;
  91       H_i *= H;
  92     }
  93     range *= L;
  94     range += H_i;
  95   }
  96   assert(range > 0);  // no useless codings, please
  97 
  98   int this_umax;
  99 
 100   // now, compute min and max
 101   if (range >= ((jlong)1 << 32)) {
 102     this_umax  = INT_MAX_VALUE;
 103     this->umin = INT_MIN_VALUE;
 104     this->max  = INT_MAX_VALUE;
 105     this->min  = INT_MIN_VALUE;
 106   } else {
 107     this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
 108     this->max = this_umax;
 109     this->min = this->umin = 0;
 110     if (S != 0 && range != 0) {
 111       int Smask = (1<<S)-1;
 112       jlong maxPosCode = range-1;
 113       jlong maxNegCode = range-1;
 114       while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
 115       while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
 116       int maxPos = decode_sign(S, (uint)maxPosCode);
 117       if (maxPos < 0)
 118         this->max = INT_MAX_VALUE;  // 32-bit wraparound
 119       else
 120         this->max = maxPos;
 121       if (maxNegCode < 0)
 122         this->min = 0;  // No negative codings at all.
 123       else
 124         this->min = decode_sign(S, (uint)maxNegCode);
 125     }
 126   }
 127 
 128   assert(!(isFullRange | isSigned | isSubrange)); // init
 129   if (min < 0)
 130     this->isSigned = true;
 131   if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
 132     this->isSubrange = true;
 133   if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
 134     this->isFullRange = true;
 135 
 136   // do this last, to reduce MT exposure (should have a membar too)
 137   this->umax = this_umax;
 138 
 139   return this;
 140 }
 141 
 142 coding* coding::findBySpec(int spec) {
 143   for (coding* scan = &basic_codings[0]; ; scan++) {
 144     if (scan->spec == spec)
 145       return scan->init();
 146     if (scan->spec == 0)
 147       break;
 148   }
 149   coding* ptr = NEW(coding, 1);
 150   CHECK_NULL_0(ptr);
 151   coding* c = ptr->initFrom(spec);
 152   if (c == null) {
 153     mtrace('f', ptr, 0);
 154     ::free(ptr);
 155   } else
 156     // else caller should free it...
 157     c->isMalloc = true;
 158   return c;
 159 }
 160 
 161 coding* coding::findBySpec(int B, int H, int S, int D) {
 162   if (B < 1 || B > B_MAX)  return null;
 163   if (H < 1 || H > 256)    return null;
 164   if (S < 0 || S > 2)      return null;
 165   if (D < 0 || D > 1)      return null;
 166   return findBySpec(CODING_SPEC(B, H, S, D));
 167 }
 168 
 169 void coding::free() {
 170   if (isMalloc) {
 171     mtrace('f', this, 0);
 172     ::free(this);
 173   }
 174 }
 175 
 176 void coding_method::reset(value_stream* state) {
 177   assert(state->rp == state->rplimit);  // not in mid-stream, please
 178   //assert(this == vs0.cm);
 179   state[0] = vs0;
 180   if (uValues != null) {
 181     uValues->reset(state->helper());
 182   }
 183 }
 184 
 185 maybe_inline
 186 uint coding::parse(byte* &rp, int B, int H) {
 187   int L = 256-H;
 188   byte* ptr = rp;
 189   // hand peel the i==0 part of the loop:
 190   uint b_i = *ptr++ & 0xFF;
 191   if (B == 1 || b_i < (uint)L)
 192     { rp = ptr; return b_i; }
 193   uint sum = b_i;
 194   uint H_i = H;
 195   assert(B <= B_MAX);
 196   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
 197     b_i = *ptr++ & 0xFF;
 198     sum += b_i * H_i;
 199     if (i == B || b_i < (uint)L)
 200       { rp = ptr; return sum; }
 201     H_i *= H;
 202   }
 203   assert(false);
 204   return 0;
 205 }
 206 
 207 maybe_inline
 208 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
 209   assert(H == (1<<lgH));
 210   int L = 256-(1<<lgH);
 211   byte* ptr = rp;
 212   // hand peel the i==0 part of the loop:
 213   uint b_i = *ptr++ & 0xFF;
 214   if (B == 1 || b_i < (uint)L)
 215     { rp = ptr; return b_i; }
 216   uint sum = b_i;
 217   uint lg_H_i = lgH;
 218   assert(B <= B_MAX);
 219   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
 220     b_i = *ptr++ & 0xFF;
 221     sum += b_i << lg_H_i;
 222     if (i == B || b_i < (uint)L)
 223       { rp = ptr; return sum; }
 224     lg_H_i += lgH;
 225   }
 226   assert(false);
 227   return 0;
 228 }
 229 
 230 static const char ERB[] = "EOF reading band";
 231 
 232 maybe_inline
 233 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
 234   if (N < 0) {
 235     abort("bad value count");
 236     return;
 237   }
 238   byte* ptr = rp;
 239   if (B == 1 || H == 256) {
 240     size_t len = (size_t)N*B;
 241     if (len / B != (size_t)N || ptr+len > limit) {
 242       abort(ERB);
 243       return;
 244     }
 245     rp = ptr+len;
 246     return;
 247   }
 248   // Note:  We assume rp has enough zero-padding.
 249   int L = 256-H;
 250   int n = B;
 251   while (N > 0) {
 252     ptr += 1;
 253     if (--n == 0) {
 254       // end of encoding at B bytes, regardless of byte value
 255     } else {
 256       int b = (ptr[-1] & 0xFF);
 257       if (b >= L) {
 258         // keep going, unless we find a byte < L
 259         continue;
 260       }
 261     }
 262     // found the last byte
 263     N -= 1;
 264     n = B;   // reset length counter
 265     // do an error check here
 266     if (ptr > limit) {
 267       abort(ERB);
 268       return;
 269     }
 270   }
 271   rp = ptr;
 272   return;
 273 }
 274 
 275 bool value_stream::hasHelper() {
 276   // If my coding method is a pop-style method,
 277   // then I need a second value stream to transmit
 278   // unfavored values.
 279   // This can be determined by examining fValues.
 280   return cm->fValues != null;
 281 }
 282 
 283 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
 284   rp = rp_;
 285   rplimit = rplimit_;
 286   sum = 0;
 287   cm = null;  // no need in the simple case
 288   setCoding(defc);
 289 }
 290 
 291 void value_stream::setCoding(coding* defc) {
 292   if (defc == null) {
 293     unpack_abort("bad coding");
 294     defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
 295   }
 296 
 297   c = (*defc);
 298 
 299   // choose cmk
 300   cmk = cmk_ERROR;
 301   switch (c.spec) {
 302   case BYTE1_spec:      cmk = cmk_BYTE1;        break;
 303   case CHAR3_spec:      cmk = cmk_CHAR3;        break;
 304   case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
 305   case DELTA5_spec:     cmk = cmk_DELTA5;       break;
 306   case BCI5_spec:       cmk = cmk_BCI5;         break;
 307   case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
 308   default:
 309     if (c.D() == 0) {
 310       switch (c.S()) {
 311       case 0:  cmk = cmk_BHS0;  break;
 312       case 1:  cmk = cmk_BHS1;  break;
 313       default: cmk = cmk_BHS;   break;
 314       }
 315     } else {
 316       if (c.S() == 1) {
 317         if (c.isFullRange)   cmk = cmk_BHS1D1full;
 318         if (c.isSubrange)    cmk = cmk_BHS1D1sub;
 319       }
 320       if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
 321     }
 322   }
 323 }
 324 
 325 static maybe_inline
 326 int getPopValue(value_stream* self, uint uval) {
 327   if (uval > 0) {
 328     // note that the initial parse performed a range check
 329     assert(uval <= (uint)self->cm->fVlength);
 330     return self->cm->fValues[uval-1];
 331   } else {
 332     // take an unfavored value
 333     return self->helper()->getInt();
 334   }
 335 }
 336 
 337 maybe_inline
 338 int coding::sumInUnsignedRange(int x, int y) {
 339   assert(isSubrange);
 340   int range = (int)(umax+1);
 341   assert(range > 0);
 342   x += y;
 343   if (x != (int)((jlong)(x-y) + (jlong)y)) {
 344     // 32-bit overflow interferes with range reduction.
 345     // Back off from the overflow by adding a multiple of range:
 346     if (x < 0) {
 347       x -= range;
 348       assert(x >= 0);
 349     } else {
 350       x += range;
 351       assert(x < 0);
 352     }
 353   }
 354   if (x < 0) {
 355     x += range;
 356     if (x >= 0)  return x;
 357   } else if (x >= range) {
 358     x -= range;
 359     if (x < range)  return x;
 360   } else {
 361     // in range
 362     return x;
 363   }
 364   // do it the hard way
 365   x %= range;
 366   if (x < 0)  x += range;
 367   return x;
 368 }
 369 
 370 static maybe_inline
 371 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
 372   assert((uint)(self->c.isSubrange) == (uint)isSubrange);
 373   assert(self->c.isSubrange | self->c.isFullRange);
 374   if (isSubrange)
 375     return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
 376   else
 377     return self->sum += (int) uval;
 378 }
 379 
 380 bool value_stream::hasValue() {
 381   if (rp < rplimit)      return true;
 382   if (cm == null)        return false;
 383   if (cm->next == null)  return false;
 384   cm->next->reset(this);
 385   return hasValue();
 386 }
 387 
 388 int value_stream::getInt() {
 389   if (rp >= rplimit) {
 390     // Advance to next coding segment.
 391     if (rp > rplimit || cm == null || cm->next == null) {
 392       // Must perform this check and throw an exception on bad input.
 393       unpack_abort(ERB);
 394       return 0;
 395     }
 396     cm->next->reset(this);
 397     return getInt();
 398   }
 399 
 400   CODING_PRIVATE(c.spec);
 401   uint uval;
 402   enum {
 403     B5 = 5,
 404     B3 = 3,
 405     H128 = 128,
 406     H64 = 64,
 407     H4 = 4
 408   };
 409   switch (cmk) {
 410   case cmk_BHS:
 411     assert(D == 0);
 412     uval = coding::parse(rp, B, H);
 413     if (S == 0)
 414       return (int) uval;
 415     return decode_sign(S, uval);
 416 
 417   case cmk_BHS0:
 418     assert(S == 0 && D == 0);
 419     uval = coding::parse(rp, B, H);
 420     return (int) uval;
 421 
 422   case cmk_BHS1:
 423     assert(S == 1 && D == 0);
 424     uval = coding::parse(rp, B, H);
 425     return DECODE_SIGN_S1(uval);
 426 
 427   case cmk_BYTE1:
 428     assert(c.spec == BYTE1_spec);
 429     assert(B == 1 && H == 256 && S == 0 && D == 0);
 430     return *rp++ & 0xFF;
 431 
 432   case cmk_CHAR3:
 433     assert(c.spec == CHAR3_spec);
 434     assert(B == B3 && H == H128 && S == 0 && D == 0);
 435     return coding::parse_lgH(rp, B3, H128, 7);
 436 
 437   case cmk_UNSIGNED5:
 438     assert(c.spec == UNSIGNED5_spec);
 439     assert(B == B5 && H == H64 && S == 0 && D == 0);
 440     return coding::parse_lgH(rp, B5, H64, 6);
 441 
 442   case cmk_BHSD1:
 443     assert(D == 1);
 444     uval = coding::parse(rp, B, H);
 445     if (S != 0)
 446       uval = (uint) decode_sign(S, uval);
 447     return getDeltaValue(this, uval, (bool)c.isSubrange);
 448 
 449   case cmk_BHS1D1full:
 450     assert(S == 1 && D == 1 && c.isFullRange);
 451     uval = coding::parse(rp, B, H);
 452     uval = (uint) DECODE_SIGN_S1(uval);
 453     return getDeltaValue(this, uval, false);
 454 
 455   case cmk_BHS1D1sub:
 456     assert(S == 1 && D == 1 && c.isSubrange);
 457     uval = coding::parse(rp, B, H);
 458     uval = (uint) DECODE_SIGN_S1(uval);
 459     return getDeltaValue(this, uval, true);
 460 
 461   case cmk_DELTA5:
 462     assert(c.spec == DELTA5_spec);
 463     assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
 464     uval = coding::parse_lgH(rp, B5, H64, 6);
 465     sum += DECODE_SIGN_S1(uval);
 466     return sum;
 467 
 468   case cmk_BCI5:
 469     assert(c.spec == BCI5_spec);
 470     assert(B == B5 && H == H4 && S == 0 && D == 0);
 471     return coding::parse_lgH(rp, B5, H4, 2);
 472 
 473   case cmk_BRANCH5:
 474     assert(c.spec == BRANCH5_spec);
 475     assert(B == B5 && H == H4 && S == 2 && D == 0);
 476     uval = coding::parse_lgH(rp, B5, H4, 2);
 477     return decode_sign(S, uval);
 478 
 479   case cmk_pop:
 480     uval = coding::parse(rp, B, H);
 481     if (S != 0) {
 482       uval = (uint) decode_sign(S, uval);
 483     }
 484     if (D != 0) {
 485       assert(c.isSubrange | c.isFullRange);
 486       if (c.isSubrange)
 487         sum = c.sumInUnsignedRange(sum, (int) uval);
 488       else
 489         sum += (int) uval;
 490       uval = (uint) sum;
 491     }
 492     return getPopValue(this, uval);
 493 
 494   case cmk_pop_BHS0:
 495     assert(S == 0 && D == 0);
 496     uval = coding::parse(rp, B, H);
 497     return getPopValue(this, uval);
 498 
 499   case cmk_pop_BYTE1:
 500     assert(c.spec == BYTE1_spec);
 501     assert(B == 1 && H == 256 && S == 0 && D == 0);
 502     return getPopValue(this, *rp++ & 0xFF);
 503 
 504   default:
 505     break;
 506   }
 507   assert(false);
 508   return 0;
 509 }
 510 
 511 static maybe_inline
 512 int moreCentral(int x, int y) {  // used to find end of Pop.{F}
 513   // Suggested implementation from the Pack200 specification:
 514   uint kx = (x >> 31) ^ (x << 1);
 515   uint ky = (y >> 31) ^ (y << 1);
 516   return (kx < ky? x: y);
 517 }
 518 //static maybe_inline
 519 //int moreCentral2(int x, int y, int min) {
 520 //  // Strict implementation of buggy 150.7 specification.
 521 //  // The bug is that the spec. says absolute-value ties are broken
 522 //  // in favor of positive numbers, but the suggested implementation
 523 //  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
 524 //  if ((x + y) != 0)
 525 //    return min;
 526 //  else
 527 //    // return the other value, which breaks a tie in the positive direction
 528 //    return (x > y)? x: y;
 529 //}
 530 
 531 static const byte* no_meta[] = {null};
 532 #define NO_META (*(byte**)no_meta)
 533 enum { POP_FAVORED_N = -2 };
 534 
 535 // mode bits
 536 #define DISABLE_RUN  1  // used immediately inside ACodee
 537 #define DISABLE_POP  2  // used recursively in all pop sub-bands
 538 
 539 // This function knows all about meta-coding.
 540 void coding_method::init(byte* &band_rp, byte* band_limit,
 541                          byte* &meta_rp, int mode,
 542                          coding* defc, int N,
 543                          intlist* valueSink) {
 544   assert(N != 0);
 545 
 546   assert(u != null);  // must be pre-initialized
 547   //if (u == null)  u = unpacker::current();  // expensive
 548 
 549   int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
 550   coding* foundc = null;
 551   coding* to_free = null;
 552 
 553   if (op == _meta_default) {
 554     foundc = defc;
 555     // and fall through
 556 
 557   } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
 558     foundc = coding::findByIndex(op);
 559     // and fall through
 560 
 561   } else if (op == _meta_arb) {
 562     int args = (*meta_rp++ & 0xFF);
 563     // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
 564     int D = ((args >> 0) & 1);
 565     int S = ((args >> 1) & 3);
 566     int B = ((args >> 3) & -1) + 1;
 567     // & (H[1..256]-1)
 568     int H = (*meta_rp++ & 0xFF) + 1;
 569     foundc = coding::findBySpec(B, H, S, D);
 570     to_free = foundc;  // findBySpec may dynamically allocate
 571     if (foundc == null) {
 572       abort("illegal arb. coding");
 573       return;
 574     }
 575     // and fall through
 576 
 577   } else if (op >= _meta_run && op < _meta_pop) {
 578     int args = (op - _meta_run);
 579     // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
 580     int KX     = ((args >> 0) & 3);
 581     int KBFlag = ((args >> 2) & 1);
 582     int ABDef  = ((args >> 3) & -1);
 583     assert(ABDef <= 2);
 584     // & KB: one of [0..255] if KBFlag=1
 585     int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
 586     int K      = (KB+1) << (KX * 4);
 587     int N2 = (N >= 0) ? N-K : N;
 588     if (N == 0 || (N2 <= 0 && N2 != N)) {
 589       abort("illegal run encoding");
 590       return;
 591     }
 592     if ((mode & DISABLE_RUN) != 0) {
 593       abort("illegal nested run encoding");
 594       return;
 595     }
 596 
 597     // & Enc{ ACode } if ADef=0  (ABDef != 1)
 598     // No direct nesting of 'run' in ACode, but in BCode it's OK.
 599     int disRun = mode | DISABLE_RUN;
 600     if (ABDef == 1) {
 601       this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
 602     } else {
 603       this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
 604     }
 605     CHECK;
 606 
 607     // & Enc{ BCode } if BDef=0  (ABDef != 2)
 608     coding_method* tail = U_NEW(coding_method, 1);
 609     CHECK_NULL(tail);
 610     tail->u = u;
 611 
 612     // The 'run' codings may be nested indirectly via 'pop' codings.
 613     // This means that this->next may already be filled in, if
 614     // ACode was of type 'pop' with a 'run' token coding.
 615     // No problem:  Just chain the upcoming BCode onto the end.
 616     for (coding_method* self = this; ; self = self->next) {
 617       if (self->next == null) {
 618         self->next = tail;
 619         break;
 620       }
 621     }
 622 
 623     if (ABDef == 2) {
 624       tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
 625     } else {
 626       tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
 627     }
 628     // Note:  The preceding calls to init should be tail-recursive.
 629 
 630     return;  // done; no falling through
 631 
 632   } else if (op >= _meta_pop && op < _meta_limit) {
 633     int args = (op - _meta_pop);
 634     // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
 635     int FDef  = ((args >> 0) & 1);
 636     int UDef  = ((args >> 1) & 1);
 637     int TDefL = ((args >> 2) & -1);
 638     assert(TDefL <= 11);
 639     int TDef  = (TDefL > 0);
 640     int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
 641     int TH    = (256-TL);
 642     if (N <= 0) {
 643       abort("illegal pop encoding");
 644       return;
 645     }
 646     if ((mode & DISABLE_POP) != 0) {
 647       abort("illegal nested pop encoding");
 648       return;
 649     }
 650 
 651     // No indirect nesting of 'pop', but 'run' is OK.
 652     int disPop = DISABLE_POP;
 653 
 654     // & Enc{ FCode } if FDef=0
 655     int FN = POP_FAVORED_N;
 656     assert(valueSink == null);
 657     intlist fValueSink; fValueSink.init();
 658     coding_method fval;
 659     BYTES_OF(fval).clear(); fval.u = u;
 660     if (FDef != 0) {
 661       fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
 662     } else {
 663       fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
 664     }
 665     bytes fvbuf;
 666     fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
 667     fVlength = fValueSink.length();  // i.e., the parameter K
 668     fValueSink.free();
 669     CHECK;
 670 
 671     // Skip the first {F} run in all subsequent passes.
 672     // The next call to this->init(...) will set vs0.rp to point after the {F}.
 673 
 674     // & Enc{ TCode } if TDef=0  (TDefL==0)
 675     if (TDef != 0) {
 676       coding* tcode = coding::findBySpec(1, 256);  // BYTE1
 677       // find the most narrowly sufficient code:
 678       for (int B = 2; B <= B_MAX; B++) {
 679         if (fVlength <= tcode->umax)  break;  // found it
 680         tcode->free();
 681         tcode = coding::findBySpec(B, TH);
 682         CHECK_NULL(tcode);
 683       }
 684       if (!(fVlength <= tcode->umax)) {
 685         abort("pop.L value too small");
 686         return;
 687       }
 688       this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
 689       tcode->free();
 690     } else {
 691       this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
 692     }
 693     CHECK;
 694 
 695     // Count the number of zero tokens right now.
 696     // Also verify that they are in bounds.
 697     int UN = 0;   // one {U} for each zero in {T}
 698     value_stream vs = vs0;
 699     for (int i = 0; i < N; i++) {
 700       uint val = vs.getInt();
 701       if (val == 0)  UN += 1;
 702       if (!(val <= (uint)fVlength)) {
 703         abort("pop token out of range");
 704         return;
 705       }
 706     }
 707     vs.done();
 708 
 709     // & Enc{ UCode } if UDef=0
 710     if (UN != 0) {
 711       uValues = U_NEW(coding_method, 1);
 712       CHECK_NULL(uValues);
 713       uValues->u = u;
 714       if (UDef != 0) {
 715         uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
 716       } else {
 717         uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
 718       }
 719     } else {
 720       if (UDef == 0) {
 721         int uop = (*meta_rp++ & 0xFF);
 722         if (uop > _meta_canon_max)
 723           // %%% Spec. requires the more strict (uop != _meta_default).
 724           abort("bad meta-coding for empty pop/U");
 725       }
 726     }
 727 
 728     // Bug fix for 6259542
 729     // Last of all, adjust vs0.cmk to the 'pop' flavor
 730     for (coding_method* self = this; self != null; self = self->next) {
 731         coding_method_kind cmk2 = cmk_pop;
 732         switch (self->vs0.cmk) {
 733         case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
 734         case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
 735         default: break;
 736         }
 737         self->vs0.cmk = cmk2;
 738         if (self != this) {
 739           assert(self->fValues == null); // no double init
 740           self->fValues  = this->fValues;
 741           self->fVlength = this->fVlength;
 742           assert(self->uValues == null); // must stay null
 743         }
 744     }
 745 
 746     return;  // done; no falling through
 747 
 748   } else {
 749     abort("bad meta-coding");
 750     return;
 751   }
 752 
 753   // Common code here skips a series of values with one coding.
 754   assert(foundc != null);
 755 
 756   assert(vs0.cmk == cmk_ERROR);  // no garbage, please
 757   assert(vs0.rp == null);  // no garbage, please
 758   assert(vs0.rplimit == null);  // no garbage, please
 759   assert(vs0.sum == 0);  // no garbage, please
 760 
 761   vs0.init(band_rp, band_limit, foundc);
 762 
 763   // Done with foundc.  Free if necessary.
 764   if (to_free != null) {
 765     to_free->free();
 766     to_free = null;
 767   }
 768   foundc = null;
 769 
 770   coding& c = vs0.c;
 771   CODING_PRIVATE(c.spec);
 772   // assert sane N
 773   assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
 774 
 775   // Look at the values, or at least skip over them quickly.
 776   if (valueSink == null) {
 777     // Skip and ignore values in the first pass.
 778     c.parseMultiple(band_rp, N, band_limit, B, H);
 779   } else if (N >= 0) {
 780     // Pop coding, {F} sequence, initial run of values...
 781     assert((mode & DISABLE_POP) != 0);
 782     value_stream vs = vs0;
 783     for (int n = 0; n < N; n++) {
 784       int val = vs.getInt();
 785       valueSink->add(val);
 786     }
 787     band_rp = vs.rp;
 788   } else {
 789     // Pop coding, {F} sequence, final run of values...
 790     assert((mode & DISABLE_POP) != 0);
 791     assert(N == POP_FAVORED_N);
 792     int min = INT_MIN_VALUE;  // farthest from the center
 793     // min2 is based on the buggy specification of centrality in version 150.7
 794     // no known implementations transmit this value, but just in case...
 795     //int min2 = INT_MIN_VALUE;
 796     int last = 0;
 797     // if there were initial runs, find the potential sentinels in them:
 798     for (int i = 0; i < valueSink->length(); i++) {
 799       last = valueSink->get(i);
 800       min = moreCentral(min, last);
 801       //min2 = moreCentral2(min2, last, min);
 802     }
 803     value_stream vs = vs0;
 804     for (;;) {
 805       int val = vs.getInt();
 806       if (valueSink->length() > 0 &&
 807           (val == last || val == min)) //|| val == min2
 808         break;
 809       valueSink->add(val);
 810       CHECK;
 811       last = val;
 812       min = moreCentral(min, last);
 813       //min2 = moreCentral2(min2, last, min);
 814     }
 815     band_rp = vs.rp;
 816   }
 817   CHECK;
 818 
 819   // Get an accurate upper limit now.
 820   vs0.rplimit = band_rp;
 821   vs0.cm = this;
 822 
 823   return; // success
 824 }
 825 
 826 coding basic_codings[] = {
 827   // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
 828   CODING_INIT(3,128,0,0),
 829 
 830   // Fixed-length codings:
 831   CODING_INIT(1,256,0,0),
 832   CODING_INIT(1,256,1,0),
 833   CODING_INIT(1,256,0,1),
 834   CODING_INIT(1,256,1,1),
 835   CODING_INIT(2,256,0,0),
 836   CODING_INIT(2,256,1,0),
 837   CODING_INIT(2,256,0,1),
 838   CODING_INIT(2,256,1,1),
 839   CODING_INIT(3,256,0,0),
 840   CODING_INIT(3,256,1,0),
 841   CODING_INIT(3,256,0,1),
 842   CODING_INIT(3,256,1,1),
 843   CODING_INIT(4,256,0,0),
 844   CODING_INIT(4,256,1,0),
 845   CODING_INIT(4,256,0,1),
 846   CODING_INIT(4,256,1,1),
 847 
 848   // Full-range variable-length codings:
 849   CODING_INIT(5,  4,0,0),
 850   CODING_INIT(5,  4,1,0),
 851   CODING_INIT(5,  4,2,0),
 852   CODING_INIT(5, 16,0,0),
 853   CODING_INIT(5, 16,1,0),
 854   CODING_INIT(5, 16,2,0),
 855   CODING_INIT(5, 32,0,0),
 856   CODING_INIT(5, 32,1,0),
 857   CODING_INIT(5, 32,2,0),
 858   CODING_INIT(5, 64,0,0),
 859   CODING_INIT(5, 64,1,0),
 860   CODING_INIT(5, 64,2,0),
 861   CODING_INIT(5,128,0,0),
 862   CODING_INIT(5,128,1,0),
 863   CODING_INIT(5,128,2,0),
 864 
 865   CODING_INIT(5,  4,0,1),
 866   CODING_INIT(5,  4,1,1),
 867   CODING_INIT(5,  4,2,1),
 868   CODING_INIT(5, 16,0,1),
 869   CODING_INIT(5, 16,1,1),
 870   CODING_INIT(5, 16,2,1),
 871   CODING_INIT(5, 32,0,1),
 872   CODING_INIT(5, 32,1,1),
 873   CODING_INIT(5, 32,2,1),
 874   CODING_INIT(5, 64,0,1),
 875   CODING_INIT(5, 64,1,1),
 876   CODING_INIT(5, 64,2,1),
 877   CODING_INIT(5,128,0,1),
 878   CODING_INIT(5,128,1,1),
 879   CODING_INIT(5,128,2,1),
 880 
 881   // Variable length subrange codings:
 882   CODING_INIT(2,192,0,0),
 883   CODING_INIT(2,224,0,0),
 884   CODING_INIT(2,240,0,0),
 885   CODING_INIT(2,248,0,0),
 886   CODING_INIT(2,252,0,0),
 887 
 888   CODING_INIT(2,  8,0,1),
 889   CODING_INIT(2,  8,1,1),
 890   CODING_INIT(2, 16,0,1),
 891   CODING_INIT(2, 16,1,1),
 892   CODING_INIT(2, 32,0,1),
 893   CODING_INIT(2, 32,1,1),
 894   CODING_INIT(2, 64,0,1),
 895   CODING_INIT(2, 64,1,1),
 896   CODING_INIT(2,128,0,1),
 897   CODING_INIT(2,128,1,1),
 898   CODING_INIT(2,192,0,1),
 899   CODING_INIT(2,192,1,1),
 900   CODING_INIT(2,224,0,1),
 901   CODING_INIT(2,224,1,1),
 902   CODING_INIT(2,240,0,1),
 903   CODING_INIT(2,240,1,1),
 904   CODING_INIT(2,248,0,1),
 905   CODING_INIT(2,248,1,1),
 906 
 907   CODING_INIT(3,192,0,0),
 908   CODING_INIT(3,224,0,0),
 909   CODING_INIT(3,240,0,0),
 910   CODING_INIT(3,248,0,0),
 911   CODING_INIT(3,252,0,0),
 912 
 913   CODING_INIT(3,  8,0,1),
 914   CODING_INIT(3,  8,1,1),
 915   CODING_INIT(3, 16,0,1),
 916   CODING_INIT(3, 16,1,1),
 917   CODING_INIT(3, 32,0,1),
 918   CODING_INIT(3, 32,1,1),
 919   CODING_INIT(3, 64,0,1),
 920   CODING_INIT(3, 64,1,1),
 921   CODING_INIT(3,128,0,1),
 922   CODING_INIT(3,128,1,1),
 923   CODING_INIT(3,192,0,1),
 924   CODING_INIT(3,192,1,1),
 925   CODING_INIT(3,224,0,1),
 926   CODING_INIT(3,224,1,1),
 927   CODING_INIT(3,240,0,1),
 928   CODING_INIT(3,240,1,1),
 929   CODING_INIT(3,248,0,1),
 930   CODING_INIT(3,248,1,1),
 931 
 932   CODING_INIT(4,192,0,0),
 933   CODING_INIT(4,224,0,0),
 934   CODING_INIT(4,240,0,0),
 935   CODING_INIT(4,248,0,0),
 936   CODING_INIT(4,252,0,0),
 937 
 938   CODING_INIT(4,  8,0,1),
 939   CODING_INIT(4,  8,1,1),
 940   CODING_INIT(4, 16,0,1),
 941   CODING_INIT(4, 16,1,1),
 942   CODING_INIT(4, 32,0,1),
 943   CODING_INIT(4, 32,1,1),
 944   CODING_INIT(4, 64,0,1),
 945   CODING_INIT(4, 64,1,1),
 946   CODING_INIT(4,128,0,1),
 947   CODING_INIT(4,128,1,1),
 948   CODING_INIT(4,192,0,1),
 949   CODING_INIT(4,192,1,1),
 950   CODING_INIT(4,224,0,1),
 951   CODING_INIT(4,224,1,1),
 952   CODING_INIT(4,240,0,1),
 953   CODING_INIT(4,240,1,1),
 954   CODING_INIT(4,248,0,1),
 955   CODING_INIT(4,248,1,1),
 956   CODING_INIT(0,0,0,0)
 957 };
 958 #define BASIC_INDEX_LIMIT \
 959         (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
 960 
 961 coding* coding::findByIndex(int idx) {
 962 #ifndef PRODUCT
 963   /* Tricky assert here, constants and gcc complains about it without local. */
 964   int index_limit = BASIC_INDEX_LIMIT;
 965   assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
 966 #endif
 967   if (idx >= _meta_canon_min && idx <= _meta_canon_max)
 968     return basic_codings[idx].init();
 969   else
 970     return null;
 971 }
 972 
 973 #ifndef PRODUCT
 974 const char* coding::string() {
 975   CODING_PRIVATE(spec);
 976   bytes buf;
 977   buf.malloc(100);
 978   char maxS[20], minS[20];
 979   sprintf(maxS, "%d", max);
 980   sprintf(minS, "%d", min);
 981   if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
 982   if (min == INT_MIN_VALUE)  strcpy(minS, "min");
 983   sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
 984           B,H,S,D,L,minS,maxS);
 985   return (const char*) buf.ptr;
 986 }
 987 #endif