--- old/jdk/src/jdk.pack200/share/native/common-unpack/coding.cpp 2016-12-13 21:44:46.000000000 -0800 +++ /dev/null 2016-12-13 21:44:46.000000000 -0800 @@ -1,989 +0,0 @@ -/* - * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -// -*- C++ -*- -// Small program for unpacking specially compressed Java packages. -// John R. Rose - -#include -#include -#include -#include - -#include "jni_util.h" - -#include "defines.h" -#include "bytes.h" -#include "utils.h" -#include "coding.h" - -#include "constants.h" -#include "unpack.h" - -extern coding basic_codings[]; - -#define CODING_PRIVATE(spec) \ - int spec_ = spec; \ - int B = CODING_B(spec_); \ - int H = CODING_H(spec_); \ - int L = 256 - H; \ - int S = CODING_S(spec_); \ - int D = CODING_D(spec_) - -#define IS_NEG_CODE(S, codeVal) \ - ( (((int)(codeVal)+1) & ((1<> 1) ^ -((int)(ux) & 1) ) - -static maybe_inline -int decode_sign(int S, uint ux) { // == Coding.decodeSign32 - assert(S > 0); - uint sigbits = (ux >> S); - if (IS_NEG_CODE(S, ux)) - return (int)( ~sigbits); - else - return (int)(ux - sigbits); - // Note that (int)(ux-sigbits) can be negative, if ux is large enough. -} - -coding* coding::init() { - if (umax > 0) return this; // already done - assert(spec != 0); // sanity - - // fill in derived fields - CODING_PRIVATE(spec); - - // Return null if 'arb(BHSD)' parameter constraints are not met: - if (B < 1 || B > B_MAX) return null; - if (H < 1 || H > 256) return null; - if (S < 0 || S > 2) return null; - if (D < 0 || D > 1) return null; - if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size - if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding - - // first compute the range of the coding, in 64 bits - jlong range = 0; - { - jlong H_i = 1; - for (int i = 0; i < B; i++) { - range += H_i; - H_i *= H; - } - range *= L; - range += H_i; - } - assert(range > 0); // no useless codings, please - - int this_umax; - - // now, compute min and max - if (range >= ((jlong)1 << 32)) { - this_umax = INT_MAX_VALUE; - this->umin = INT_MIN_VALUE; - this->max = INT_MAX_VALUE; - this->min = INT_MIN_VALUE; - } else { - this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; - this->max = this_umax; - this->min = this->umin = 0; - if (S != 0 && range != 0) { - int Smask = (1<max = INT_MAX_VALUE; // 32-bit wraparound - else - this->max = maxPos; - if (maxNegCode < 0) - this->min = 0; // No negative codings at all. - else - this->min = decode_sign(S, (uint)maxNegCode); - } - } - - assert(!(isFullRange | isSigned | isSubrange)); // init - if (min < 0) - this->isSigned = true; - if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) - this->isSubrange = true; - if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) - this->isFullRange = true; - - // do this last, to reduce MT exposure (should have a membar too) - this->umax = this_umax; - - return this; -} - -coding* coding::findBySpec(int spec) { - for (coding* scan = &basic_codings[0]; ; scan++) { - if (scan->spec == spec) - return scan->init(); - if (scan->spec == 0) - break; - } - coding* ptr = NEW(coding, 1); - CHECK_NULL_RETURN(ptr, 0); - coding* c = ptr->initFrom(spec); - if (c == null) { - mtrace('f', ptr, 0); - ::free(ptr); - } else - // else caller should free it... - c->isMalloc = true; - return c; -} - -coding* coding::findBySpec(int B, int H, int S, int D) { - if (B < 1 || B > B_MAX) return null; - if (H < 1 || H > 256) return null; - if (S < 0 || S > 2) return null; - if (D < 0 || D > 1) return null; - return findBySpec(CODING_SPEC(B, H, S, D)); -} - -void coding::free() { - if (isMalloc) { - mtrace('f', this, 0); - ::free(this); - } -} - -void coding_method::reset(value_stream* state) { - assert(state->rp == state->rplimit); // not in mid-stream, please - //assert(this == vs0.cm); - state[0] = vs0; - if (uValues != null) { - uValues->reset(state->helper()); - } -} - -maybe_inline -uint coding::parse(byte* &rp, int B, int H) { - int L = 256-H; - byte* ptr = rp; - // hand peel the i==0 part of the loop: - uint b_i = *ptr++ & 0xFF; - if (B == 1 || b_i < (uint)L) - { rp = ptr; return b_i; } - uint sum = b_i; - uint H_i = H; - assert(B <= B_MAX); - for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired - b_i = *ptr++ & 0xFF; - sum += b_i * H_i; - if (i == B || b_i < (uint)L) - { rp = ptr; return sum; } - H_i *= H; - } - assert(false); - return 0; -} - -maybe_inline -uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { - assert(H == (1< limit) { - abort(ERB); - return; - } - rp = ptr+len; - return; - } - // Note: We assume rp has enough zero-padding. - int L = 256-H; - int n = B; - while (N > 0) { - ptr += 1; - if (--n == 0) { - // end of encoding at B bytes, regardless of byte value - } else { - int b = (ptr[-1] & 0xFF); - if (b >= L) { - // keep going, unless we find a byte < L - continue; - } - } - // found the last byte - N -= 1; - n = B; // reset length counter - // do an error check here - if (ptr > limit) { - abort(ERB); - return; - } - } - rp = ptr; - return; -} - -bool value_stream::hasHelper() { - // If my coding method is a pop-style method, - // then I need a second value stream to transmit - // unfavored values. - // This can be determined by examining fValues. - return cm->fValues != null; -} - -void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { - rp = rp_; - rplimit = rplimit_; - sum = 0; - cm = null; // no need in the simple case - setCoding(defc); -} - -void value_stream::setCoding(coding* defc) { - if (defc == null) { - unpack_abort("bad coding"); - defc = coding::findByIndex(_meta_canon_min); // random pick for recovery - } - - c = (*defc); - - // choose cmk - cmk = cmk_ERROR; - switch (c.spec) { - case BYTE1_spec: cmk = cmk_BYTE1; break; - case CHAR3_spec: cmk = cmk_CHAR3; break; - case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; - case DELTA5_spec: cmk = cmk_DELTA5; break; - case BCI5_spec: cmk = cmk_BCI5; break; - case BRANCH5_spec: cmk = cmk_BRANCH5; break; - default: - if (c.D() == 0) { - switch (c.S()) { - case 0: cmk = cmk_BHS0; break; - case 1: cmk = cmk_BHS1; break; - default: cmk = cmk_BHS; break; - } - } else { - if (c.S() == 1) { - if (c.isFullRange) cmk = cmk_BHS1D1full; - if (c.isSubrange) cmk = cmk_BHS1D1sub; - } - if (cmk == cmk_ERROR) cmk = cmk_BHSD1; - } - } -} - -static maybe_inline -int getPopValue(value_stream* self, uint uval) { - if (uval > 0) { - // note that the initial parse performed a range check - assert(uval <= (uint)self->cm->fVlength); - return self->cm->fValues[uval-1]; - } else { - // take an unfavored value - return self->helper()->getInt(); - } -} - -maybe_inline -int coding::sumInUnsignedRange(int x, int y) { - assert(isSubrange); - int range = (int)(umax+1); - assert(range > 0); - x += y; - if (x != (int)((jlong)(x-y) + (jlong)y)) { - // 32-bit overflow interferes with range reduction. - // Back off from the overflow by adding a multiple of range: - if (x < 0) { - x -= range; - assert(x >= 0); - } else { - x += range; - assert(x < 0); - } - } - if (x < 0) { - x += range; - if (x >= 0) return x; - } else if (x >= range) { - x -= range; - if (x < range) return x; - } else { - // in range - return x; - } - // do it the hard way - x %= range; - if (x < 0) x += range; - return x; -} - -static maybe_inline -int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { - assert((uint)(self->c.isSubrange) == (uint)isSubrange); - assert(self->c.isSubrange | self->c.isFullRange); - if (isSubrange) - return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); - else - return self->sum += (int) uval; -} - -bool value_stream::hasValue() { - if (rp < rplimit) return true; - if (cm == null) return false; - if (cm->next == null) return false; - cm->next->reset(this); - return hasValue(); -} - -int value_stream::getInt() { - if (rp >= rplimit) { - // Advance to next coding segment. - if (rp > rplimit || cm == null || cm->next == null) { - // Must perform this check and throw an exception on bad input. - unpack_abort(ERB); - return 0; - } - cm->next->reset(this); - return getInt(); - } - - CODING_PRIVATE(c.spec); - uint uval; - enum { - B5 = 5, - B3 = 3, - H128 = 128, - H64 = 64, - H4 = 4 - }; - switch (cmk) { - case cmk_BHS: - assert(D == 0); - uval = coding::parse(rp, B, H); - if (S == 0) - return (int) uval; - return decode_sign(S, uval); - - case cmk_BHS0: - assert(S == 0 && D == 0); - uval = coding::parse(rp, B, H); - return (int) uval; - - case cmk_BHS1: - assert(S == 1 && D == 0); - uval = coding::parse(rp, B, H); - return DECODE_SIGN_S1(uval); - - case cmk_BYTE1: - assert(c.spec == BYTE1_spec); - assert(B == 1 && H == 256 && S == 0 && D == 0); - return *rp++ & 0xFF; - - case cmk_CHAR3: - assert(c.spec == CHAR3_spec); - assert(B == B3 && H == H128 && S == 0 && D == 0); - return coding::parse_lgH(rp, B3, H128, 7); - - case cmk_UNSIGNED5: - assert(c.spec == UNSIGNED5_spec); - assert(B == B5 && H == H64 && S == 0 && D == 0); - return coding::parse_lgH(rp, B5, H64, 6); - - case cmk_BHSD1: - assert(D == 1); - uval = coding::parse(rp, B, H); - if (S != 0) - uval = (uint) decode_sign(S, uval); - return getDeltaValue(this, uval, (bool)c.isSubrange); - - case cmk_BHS1D1full: - assert(S == 1 && D == 1 && c.isFullRange); - uval = coding::parse(rp, B, H); - uval = (uint) DECODE_SIGN_S1(uval); - return getDeltaValue(this, uval, false); - - case cmk_BHS1D1sub: - assert(S == 1 && D == 1 && c.isSubrange); - uval = coding::parse(rp, B, H); - uval = (uint) DECODE_SIGN_S1(uval); - return getDeltaValue(this, uval, true); - - case cmk_DELTA5: - assert(c.spec == DELTA5_spec); - assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); - uval = coding::parse_lgH(rp, B5, H64, 6); - sum += DECODE_SIGN_S1(uval); - return sum; - - case cmk_BCI5: - assert(c.spec == BCI5_spec); - assert(B == B5 && H == H4 && S == 0 && D == 0); - return coding::parse_lgH(rp, B5, H4, 2); - - case cmk_BRANCH5: - assert(c.spec == BRANCH5_spec); - assert(B == B5 && H == H4 && S == 2 && D == 0); - uval = coding::parse_lgH(rp, B5, H4, 2); - return decode_sign(S, uval); - - case cmk_pop: - uval = coding::parse(rp, B, H); - if (S != 0) { - uval = (uint) decode_sign(S, uval); - } - if (D != 0) { - assert(c.isSubrange | c.isFullRange); - if (c.isSubrange) - sum = c.sumInUnsignedRange(sum, (int) uval); - else - sum += (int) uval; - uval = (uint) sum; - } - return getPopValue(this, uval); - - case cmk_pop_BHS0: - assert(S == 0 && D == 0); - uval = coding::parse(rp, B, H); - return getPopValue(this, uval); - - case cmk_pop_BYTE1: - assert(c.spec == BYTE1_spec); - assert(B == 1 && H == 256 && S == 0 && D == 0); - return getPopValue(this, *rp++ & 0xFF); - - default: - break; - } - assert(false); - return 0; -} - -static maybe_inline -int moreCentral(int x, int y) { // used to find end of Pop.{F} - // Suggested implementation from the Pack200 specification: - uint kx = (x >> 31) ^ (x << 1); - uint ky = (y >> 31) ^ (y << 1); - return (kx < ky? x: y); -} -//static maybe_inline -//int moreCentral2(int x, int y, int min) { -// // Strict implementation of buggy 150.7 specification. -// // The bug is that the spec. says absolute-value ties are broken -// // in favor of positive numbers, but the suggested implementation -// // (also mentioned in the spec.) breaks ties in favor of negative numbers. -// if ((x + y) != 0) -// return min; -// else -// // return the other value, which breaks a tie in the positive direction -// return (x > y)? x: y; -//} - -static const byte* no_meta[] = {null}; -#define NO_META (*(byte**)no_meta) -enum { POP_FAVORED_N = -2 }; - -// mode bits -#define DISABLE_RUN 1 // used immediately inside ACodee -#define DISABLE_POP 2 // used recursively in all pop sub-bands - -// This function knows all about meta-coding. -void coding_method::init(byte* &band_rp, byte* band_limit, - byte* &meta_rp, int mode, - coding* defc, int N, - intlist* valueSink) { - assert(N != 0); - - assert(u != null); // must be pre-initialized - //if (u == null) u = unpacker::current(); // expensive - - int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); - coding* foundc = null; - coding* to_free = null; - - if (op == _meta_default) { - foundc = defc; - // and fall through - - } else if (op >= _meta_canon_min && op <= _meta_canon_max) { - foundc = coding::findByIndex(op); - // and fall through - - } else if (op == _meta_arb) { - int args = (*meta_rp++ & 0xFF); - // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) - int D = ((args >> 0) & 1); - int S = ((args >> 1) & 3); - int B = ((args >> 3) & -1) + 1; - // & (H[1..256]-1) - int H = (*meta_rp++ & 0xFF) + 1; - foundc = coding::findBySpec(B, H, S, D); - to_free = foundc; // findBySpec may dynamically allocate - if (foundc == null) { - abort("illegal arb. coding"); - return; - } - // and fall through - - } else if (op >= _meta_run && op < _meta_pop) { - int args = (op - _meta_run); - // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) - int KX = ((args >> 0) & 3); - int KBFlag = ((args >> 2) & 1); - int ABDef = ((args >> 3) & -1); - assert(ABDef <= 2); - // & KB: one of [0..255] if KBFlag=1 - int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); - int K = (KB+1) << (KX * 4); - int N2 = (N >= 0) ? N-K : N; - if (N == 0 || (N2 <= 0 && N2 != N)) { - abort("illegal run encoding"); - return; - } - if ((mode & DISABLE_RUN) != 0) { - abort("illegal nested run encoding"); - return; - } - - // & Enc{ ACode } if ADef=0 (ABDef != 1) - // No direct nesting of 'run' in ACode, but in BCode it's OK. - int disRun = mode | DISABLE_RUN; - if (ABDef == 1) { - this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); - } else { - this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); - } - CHECK; - - // & Enc{ BCode } if BDef=0 (ABDef != 2) - coding_method* tail = U_NEW(coding_method, 1); - CHECK_NULL(tail); - tail->u = u; - - // The 'run' codings may be nested indirectly via 'pop' codings. - // This means that this->next may already be filled in, if - // ACode was of type 'pop' with a 'run' token coding. - // No problem: Just chain the upcoming BCode onto the end. - for (coding_method* self = this; ; self = self->next) { - if (self->next == null) { - self->next = tail; - break; - } - } - - if (ABDef == 2) { - tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); - } else { - tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); - } - // Note: The preceding calls to init should be tail-recursive. - - return; // done; no falling through - - } else if (op >= _meta_pop && op < _meta_limit) { - int args = (op - _meta_pop); - // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) - int FDef = ((args >> 0) & 1); - int UDef = ((args >> 1) & 1); - int TDefL = ((args >> 2) & -1); - assert(TDefL <= 11); - int TDef = (TDefL > 0); - int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); - int TH = (256-TL); - if (N <= 0) { - abort("illegal pop encoding"); - return; - } - if ((mode & DISABLE_POP) != 0) { - abort("illegal nested pop encoding"); - return; - } - - // No indirect nesting of 'pop', but 'run' is OK. - int disPop = DISABLE_POP; - - // & Enc{ FCode } if FDef=0 - int FN = POP_FAVORED_N; - assert(valueSink == null); - intlist fValueSink; fValueSink.init(); - coding_method fval; - BYTES_OF(fval).clear(); fval.u = u; - if (FDef != 0) { - fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); - } else { - fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); - } - bytes fvbuf; - fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); - fVlength = fValueSink.length(); // i.e., the parameter K - fValueSink.free(); - CHECK; - - // Skip the first {F} run in all subsequent passes. - // The next call to this->init(...) will set vs0.rp to point after the {F}. - - // & Enc{ TCode } if TDef=0 (TDefL==0) - if (TDef != 0) { - coding* tcode = coding::findBySpec(1, 256); // BYTE1 - // find the most narrowly sufficient code: - for (int B = 2; B <= B_MAX; B++) { - if (fVlength <= tcode->umax) break; // found it - tcode->free(); - tcode = coding::findBySpec(B, TH); - CHECK_NULL(tcode); - } - if (!(fVlength <= tcode->umax)) { - abort("pop.L value too small"); - return; - } - this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); - tcode->free(); - } else { - this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); - } - CHECK; - - // Count the number of zero tokens right now. - // Also verify that they are in bounds. - int UN = 0; // one {U} for each zero in {T} - value_stream vs = vs0; - for (int i = 0; i < N; i++) { - uint val = vs.getInt(); - if (val == 0) UN += 1; - if (!(val <= (uint)fVlength)) { - abort("pop token out of range"); - return; - } - } - vs.done(); - - // & Enc{ UCode } if UDef=0 - if (UN != 0) { - uValues = U_NEW(coding_method, 1); - CHECK_NULL(uValues); - uValues->u = u; - if (UDef != 0) { - uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); - } else { - uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); - } - } else { - if (UDef == 0) { - int uop = (*meta_rp++ & 0xFF); - if (uop > _meta_canon_max) - // %%% Spec. requires the more strict (uop != _meta_default). - abort("bad meta-coding for empty pop/U"); - } - } - - // Bug fix for 6259542 - // Last of all, adjust vs0.cmk to the 'pop' flavor - for (coding_method* self = this; self != null; self = self->next) { - coding_method_kind cmk2 = cmk_pop; - switch (self->vs0.cmk) { - case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; - case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; - default: break; - } - self->vs0.cmk = cmk2; - if (self != this) { - assert(self->fValues == null); // no double init - self->fValues = this->fValues; - self->fVlength = this->fVlength; - assert(self->uValues == null); // must stay null - } - } - - return; // done; no falling through - - } else { - abort("bad meta-coding"); - return; - } - - // Common code here skips a series of values with one coding. - assert(foundc != null); - - assert(vs0.cmk == cmk_ERROR); // no garbage, please - assert(vs0.rp == null); // no garbage, please - assert(vs0.rplimit == null); // no garbage, please - assert(vs0.sum == 0); // no garbage, please - - vs0.init(band_rp, band_limit, foundc); - - // Done with foundc. Free if necessary. - if (to_free != null) { - to_free->free(); - to_free = null; - } - foundc = null; - - coding& c = vs0.c; - CODING_PRIVATE(c.spec); - // assert sane N - assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); - - // Look at the values, or at least skip over them quickly. - if (valueSink == null) { - // Skip and ignore values in the first pass. - c.parseMultiple(band_rp, N, band_limit, B, H); - } else if (N >= 0) { - // Pop coding, {F} sequence, initial run of values... - assert((mode & DISABLE_POP) != 0); - value_stream vs = vs0; - for (int n = 0; n < N; n++) { - int val = vs.getInt(); - valueSink->add(val); - } - band_rp = vs.rp; - } else { - // Pop coding, {F} sequence, final run of values... - assert((mode & DISABLE_POP) != 0); - assert(N == POP_FAVORED_N); - int min = INT_MIN_VALUE; // farthest from the center - // min2 is based on the buggy specification of centrality in version 150.7 - // no known implementations transmit this value, but just in case... - //int min2 = INT_MIN_VALUE; - int last = 0; - // if there were initial runs, find the potential sentinels in them: - for (int i = 0; i < valueSink->length(); i++) { - last = valueSink->get(i); - min = moreCentral(min, last); - //min2 = moreCentral2(min2, last, min); - } - value_stream vs = vs0; - for (;;) { - int val = vs.getInt(); - if (valueSink->length() > 0 && - (val == last || val == min)) //|| val == min2 - break; - valueSink->add(val); - CHECK; - last = val; - min = moreCentral(min, last); - //min2 = moreCentral2(min2, last, min); - } - band_rp = vs.rp; - } - CHECK; - - // Get an accurate upper limit now. - vs0.rplimit = band_rp; - vs0.cm = this; - - return; // success -} - -coding basic_codings[] = { - // This one is not a usable irregular coding, but is used by cp_Utf8_chars. - CODING_INIT(3,128,0,0), - - // Fixed-length codings: - CODING_INIT(1,256,0,0), - CODING_INIT(1,256,1,0), - CODING_INIT(1,256,0,1), - CODING_INIT(1,256,1,1), - CODING_INIT(2,256,0,0), - CODING_INIT(2,256,1,0), - CODING_INIT(2,256,0,1), - CODING_INIT(2,256,1,1), - CODING_INIT(3,256,0,0), - CODING_INIT(3,256,1,0), - CODING_INIT(3,256,0,1), - CODING_INIT(3,256,1,1), - CODING_INIT(4,256,0,0), - CODING_INIT(4,256,1,0), - CODING_INIT(4,256,0,1), - CODING_INIT(4,256,1,1), - - // Full-range variable-length codings: - CODING_INIT(5, 4,0,0), - CODING_INIT(5, 4,1,0), - CODING_INIT(5, 4,2,0), - CODING_INIT(5, 16,0,0), - CODING_INIT(5, 16,1,0), - CODING_INIT(5, 16,2,0), - CODING_INIT(5, 32,0,0), - CODING_INIT(5, 32,1,0), - CODING_INIT(5, 32,2,0), - CODING_INIT(5, 64,0,0), - CODING_INIT(5, 64,1,0), - CODING_INIT(5, 64,2,0), - CODING_INIT(5,128,0,0), - CODING_INIT(5,128,1,0), - CODING_INIT(5,128,2,0), - - CODING_INIT(5, 4,0,1), - CODING_INIT(5, 4,1,1), - CODING_INIT(5, 4,2,1), - CODING_INIT(5, 16,0,1), - CODING_INIT(5, 16,1,1), - CODING_INIT(5, 16,2,1), - CODING_INIT(5, 32,0,1), - CODING_INIT(5, 32,1,1), - CODING_INIT(5, 32,2,1), - CODING_INIT(5, 64,0,1), - CODING_INIT(5, 64,1,1), - CODING_INIT(5, 64,2,1), - CODING_INIT(5,128,0,1), - CODING_INIT(5,128,1,1), - CODING_INIT(5,128,2,1), - - // Variable length subrange codings: - CODING_INIT(2,192,0,0), - CODING_INIT(2,224,0,0), - CODING_INIT(2,240,0,0), - CODING_INIT(2,248,0,0), - CODING_INIT(2,252,0,0), - - CODING_INIT(2, 8,0,1), - CODING_INIT(2, 8,1,1), - CODING_INIT(2, 16,0,1), - CODING_INIT(2, 16,1,1), - CODING_INIT(2, 32,0,1), - CODING_INIT(2, 32,1,1), - CODING_INIT(2, 64,0,1), - CODING_INIT(2, 64,1,1), - CODING_INIT(2,128,0,1), - CODING_INIT(2,128,1,1), - CODING_INIT(2,192,0,1), - CODING_INIT(2,192,1,1), - CODING_INIT(2,224,0,1), - CODING_INIT(2,224,1,1), - CODING_INIT(2,240,0,1), - CODING_INIT(2,240,1,1), - CODING_INIT(2,248,0,1), - CODING_INIT(2,248,1,1), - - CODING_INIT(3,192,0,0), - CODING_INIT(3,224,0,0), - CODING_INIT(3,240,0,0), - CODING_INIT(3,248,0,0), - CODING_INIT(3,252,0,0), - - CODING_INIT(3, 8,0,1), - CODING_INIT(3, 8,1,1), - CODING_INIT(3, 16,0,1), - CODING_INIT(3, 16,1,1), - CODING_INIT(3, 32,0,1), - CODING_INIT(3, 32,1,1), - CODING_INIT(3, 64,0,1), - CODING_INIT(3, 64,1,1), - CODING_INIT(3,128,0,1), - CODING_INIT(3,128,1,1), - CODING_INIT(3,192,0,1), - CODING_INIT(3,192,1,1), - CODING_INIT(3,224,0,1), - CODING_INIT(3,224,1,1), - CODING_INIT(3,240,0,1), - CODING_INIT(3,240,1,1), - CODING_INIT(3,248,0,1), - CODING_INIT(3,248,1,1), - - CODING_INIT(4,192,0,0), - CODING_INIT(4,224,0,0), - CODING_INIT(4,240,0,0), - CODING_INIT(4,248,0,0), - CODING_INIT(4,252,0,0), - - CODING_INIT(4, 8,0,1), - CODING_INIT(4, 8,1,1), - CODING_INIT(4, 16,0,1), - CODING_INIT(4, 16,1,1), - CODING_INIT(4, 32,0,1), - CODING_INIT(4, 32,1,1), - CODING_INIT(4, 64,0,1), - CODING_INIT(4, 64,1,1), - CODING_INIT(4,128,0,1), - CODING_INIT(4,128,1,1), - CODING_INIT(4,192,0,1), - CODING_INIT(4,192,1,1), - CODING_INIT(4,224,0,1), - CODING_INIT(4,224,1,1), - CODING_INIT(4,240,0,1), - CODING_INIT(4,240,1,1), - CODING_INIT(4,248,0,1), - CODING_INIT(4,248,1,1), - CODING_INIT(0,0,0,0) -}; -#define BASIC_INDEX_LIMIT \ - (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1) - -coding* coding::findByIndex(int idx) { -#ifndef PRODUCT - /* Tricky assert here, constants and gcc complains about it without local. */ - int index_limit = BASIC_INDEX_LIMIT; - assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit); -#endif - if (idx >= _meta_canon_min && idx <= _meta_canon_max) - return basic_codings[idx].init(); - else - return null; -} - -#ifndef PRODUCT -const char* coding::string() { - CODING_PRIVATE(spec); - bytes buf; - buf.malloc(100); - char maxS[20], minS[20]; - sprintf(maxS, "%d", max); - sprintf(minS, "%d", min); - if (max == INT_MAX_VALUE) strcpy(maxS, "max"); - if (min == INT_MIN_VALUE) strcpy(minS, "min"); - sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", - B,H,S,D,L,minS,maxS); - return (const char*) buf.ptr; -} -#endif --- /dev/null 2016-12-13 21:44:46.000000000 -0800 +++ new/jdk/src/jdk.pack/share/native/common-unpack/coding.cpp 2016-12-13 21:44:45.000000000 -0800 @@ -0,0 +1,989 @@ +/* + * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +// -*- C++ -*- +// Small program for unpacking specially compressed Java packages. +// John R. Rose + +#include +#include +#include +#include + +#include "jni_util.h" + +#include "defines.h" +#include "bytes.h" +#include "utils.h" +#include "coding.h" + +#include "constants.h" +#include "unpack.h" + +extern coding basic_codings[]; + +#define CODING_PRIVATE(spec) \ + int spec_ = spec; \ + int B = CODING_B(spec_); \ + int H = CODING_H(spec_); \ + int L = 256 - H; \ + int S = CODING_S(spec_); \ + int D = CODING_D(spec_) + +#define IS_NEG_CODE(S, codeVal) \ + ( (((int)(codeVal)+1) & ((1<> 1) ^ -((int)(ux) & 1) ) + +static maybe_inline +int decode_sign(int S, uint ux) { // == Coding.decodeSign32 + assert(S > 0); + uint sigbits = (ux >> S); + if (IS_NEG_CODE(S, ux)) + return (int)( ~sigbits); + else + return (int)(ux - sigbits); + // Note that (int)(ux-sigbits) can be negative, if ux is large enough. +} + +coding* coding::init() { + if (umax > 0) return this; // already done + assert(spec != 0); // sanity + + // fill in derived fields + CODING_PRIVATE(spec); + + // Return null if 'arb(BHSD)' parameter constraints are not met: + if (B < 1 || B > B_MAX) return null; + if (H < 1 || H > 256) return null; + if (S < 0 || S > 2) return null; + if (D < 0 || D > 1) return null; + if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size + if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding + + // first compute the range of the coding, in 64 bits + jlong range = 0; + { + jlong H_i = 1; + for (int i = 0; i < B; i++) { + range += H_i; + H_i *= H; + } + range *= L; + range += H_i; + } + assert(range > 0); // no useless codings, please + + int this_umax; + + // now, compute min and max + if (range >= ((jlong)1 << 32)) { + this_umax = INT_MAX_VALUE; + this->umin = INT_MIN_VALUE; + this->max = INT_MAX_VALUE; + this->min = INT_MIN_VALUE; + } else { + this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; + this->max = this_umax; + this->min = this->umin = 0; + if (S != 0 && range != 0) { + int Smask = (1<max = INT_MAX_VALUE; // 32-bit wraparound + else + this->max = maxPos; + if (maxNegCode < 0) + this->min = 0; // No negative codings at all. + else + this->min = decode_sign(S, (uint)maxNegCode); + } + } + + assert(!(isFullRange | isSigned | isSubrange)); // init + if (min < 0) + this->isSigned = true; + if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) + this->isSubrange = true; + if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) + this->isFullRange = true; + + // do this last, to reduce MT exposure (should have a membar too) + this->umax = this_umax; + + return this; +} + +coding* coding::findBySpec(int spec) { + for (coding* scan = &basic_codings[0]; ; scan++) { + if (scan->spec == spec) + return scan->init(); + if (scan->spec == 0) + break; + } + coding* ptr = NEW(coding, 1); + CHECK_NULL_RETURN(ptr, 0); + coding* c = ptr->initFrom(spec); + if (c == null) { + mtrace('f', ptr, 0); + ::free(ptr); + } else + // else caller should free it... + c->isMalloc = true; + return c; +} + +coding* coding::findBySpec(int B, int H, int S, int D) { + if (B < 1 || B > B_MAX) return null; + if (H < 1 || H > 256) return null; + if (S < 0 || S > 2) return null; + if (D < 0 || D > 1) return null; + return findBySpec(CODING_SPEC(B, H, S, D)); +} + +void coding::free() { + if (isMalloc) { + mtrace('f', this, 0); + ::free(this); + } +} + +void coding_method::reset(value_stream* state) { + assert(state->rp == state->rplimit); // not in mid-stream, please + //assert(this == vs0.cm); + state[0] = vs0; + if (uValues != null) { + uValues->reset(state->helper()); + } +} + +maybe_inline +uint coding::parse(byte* &rp, int B, int H) { + int L = 256-H; + byte* ptr = rp; + // hand peel the i==0 part of the loop: + uint b_i = *ptr++ & 0xFF; + if (B == 1 || b_i < (uint)L) + { rp = ptr; return b_i; } + uint sum = b_i; + uint H_i = H; + assert(B <= B_MAX); + for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired + b_i = *ptr++ & 0xFF; + sum += b_i * H_i; + if (i == B || b_i < (uint)L) + { rp = ptr; return sum; } + H_i *= H; + } + assert(false); + return 0; +} + +maybe_inline +uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { + assert(H == (1< limit) { + abort(ERB); + return; + } + rp = ptr+len; + return; + } + // Note: We assume rp has enough zero-padding. + int L = 256-H; + int n = B; + while (N > 0) { + ptr += 1; + if (--n == 0) { + // end of encoding at B bytes, regardless of byte value + } else { + int b = (ptr[-1] & 0xFF); + if (b >= L) { + // keep going, unless we find a byte < L + continue; + } + } + // found the last byte + N -= 1; + n = B; // reset length counter + // do an error check here + if (ptr > limit) { + abort(ERB); + return; + } + } + rp = ptr; + return; +} + +bool value_stream::hasHelper() { + // If my coding method is a pop-style method, + // then I need a second value stream to transmit + // unfavored values. + // This can be determined by examining fValues. + return cm->fValues != null; +} + +void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { + rp = rp_; + rplimit = rplimit_; + sum = 0; + cm = null; // no need in the simple case + setCoding(defc); +} + +void value_stream::setCoding(coding* defc) { + if (defc == null) { + unpack_abort("bad coding"); + defc = coding::findByIndex(_meta_canon_min); // random pick for recovery + } + + c = (*defc); + + // choose cmk + cmk = cmk_ERROR; + switch (c.spec) { + case BYTE1_spec: cmk = cmk_BYTE1; break; + case CHAR3_spec: cmk = cmk_CHAR3; break; + case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; + case DELTA5_spec: cmk = cmk_DELTA5; break; + case BCI5_spec: cmk = cmk_BCI5; break; + case BRANCH5_spec: cmk = cmk_BRANCH5; break; + default: + if (c.D() == 0) { + switch (c.S()) { + case 0: cmk = cmk_BHS0; break; + case 1: cmk = cmk_BHS1; break; + default: cmk = cmk_BHS; break; + } + } else { + if (c.S() == 1) { + if (c.isFullRange) cmk = cmk_BHS1D1full; + if (c.isSubrange) cmk = cmk_BHS1D1sub; + } + if (cmk == cmk_ERROR) cmk = cmk_BHSD1; + } + } +} + +static maybe_inline +int getPopValue(value_stream* self, uint uval) { + if (uval > 0) { + // note that the initial parse performed a range check + assert(uval <= (uint)self->cm->fVlength); + return self->cm->fValues[uval-1]; + } else { + // take an unfavored value + return self->helper()->getInt(); + } +} + +maybe_inline +int coding::sumInUnsignedRange(int x, int y) { + assert(isSubrange); + int range = (int)(umax+1); + assert(range > 0); + x += y; + if (x != (int)((jlong)(x-y) + (jlong)y)) { + // 32-bit overflow interferes with range reduction. + // Back off from the overflow by adding a multiple of range: + if (x < 0) { + x -= range; + assert(x >= 0); + } else { + x += range; + assert(x < 0); + } + } + if (x < 0) { + x += range; + if (x >= 0) return x; + } else if (x >= range) { + x -= range; + if (x < range) return x; + } else { + // in range + return x; + } + // do it the hard way + x %= range; + if (x < 0) x += range; + return x; +} + +static maybe_inline +int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { + assert((uint)(self->c.isSubrange) == (uint)isSubrange); + assert(self->c.isSubrange | self->c.isFullRange); + if (isSubrange) + return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); + else + return self->sum += (int) uval; +} + +bool value_stream::hasValue() { + if (rp < rplimit) return true; + if (cm == null) return false; + if (cm->next == null) return false; + cm->next->reset(this); + return hasValue(); +} + +int value_stream::getInt() { + if (rp >= rplimit) { + // Advance to next coding segment. + if (rp > rplimit || cm == null || cm->next == null) { + // Must perform this check and throw an exception on bad input. + unpack_abort(ERB); + return 0; + } + cm->next->reset(this); + return getInt(); + } + + CODING_PRIVATE(c.spec); + uint uval; + enum { + B5 = 5, + B3 = 3, + H128 = 128, + H64 = 64, + H4 = 4 + }; + switch (cmk) { + case cmk_BHS: + assert(D == 0); + uval = coding::parse(rp, B, H); + if (S == 0) + return (int) uval; + return decode_sign(S, uval); + + case cmk_BHS0: + assert(S == 0 && D == 0); + uval = coding::parse(rp, B, H); + return (int) uval; + + case cmk_BHS1: + assert(S == 1 && D == 0); + uval = coding::parse(rp, B, H); + return DECODE_SIGN_S1(uval); + + case cmk_BYTE1: + assert(c.spec == BYTE1_spec); + assert(B == 1 && H == 256 && S == 0 && D == 0); + return *rp++ & 0xFF; + + case cmk_CHAR3: + assert(c.spec == CHAR3_spec); + assert(B == B3 && H == H128 && S == 0 && D == 0); + return coding::parse_lgH(rp, B3, H128, 7); + + case cmk_UNSIGNED5: + assert(c.spec == UNSIGNED5_spec); + assert(B == B5 && H == H64 && S == 0 && D == 0); + return coding::parse_lgH(rp, B5, H64, 6); + + case cmk_BHSD1: + assert(D == 1); + uval = coding::parse(rp, B, H); + if (S != 0) + uval = (uint) decode_sign(S, uval); + return getDeltaValue(this, uval, (bool)c.isSubrange); + + case cmk_BHS1D1full: + assert(S == 1 && D == 1 && c.isFullRange); + uval = coding::parse(rp, B, H); + uval = (uint) DECODE_SIGN_S1(uval); + return getDeltaValue(this, uval, false); + + case cmk_BHS1D1sub: + assert(S == 1 && D == 1 && c.isSubrange); + uval = coding::parse(rp, B, H); + uval = (uint) DECODE_SIGN_S1(uval); + return getDeltaValue(this, uval, true); + + case cmk_DELTA5: + assert(c.spec == DELTA5_spec); + assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); + uval = coding::parse_lgH(rp, B5, H64, 6); + sum += DECODE_SIGN_S1(uval); + return sum; + + case cmk_BCI5: + assert(c.spec == BCI5_spec); + assert(B == B5 && H == H4 && S == 0 && D == 0); + return coding::parse_lgH(rp, B5, H4, 2); + + case cmk_BRANCH5: + assert(c.spec == BRANCH5_spec); + assert(B == B5 && H == H4 && S == 2 && D == 0); + uval = coding::parse_lgH(rp, B5, H4, 2); + return decode_sign(S, uval); + + case cmk_pop: + uval = coding::parse(rp, B, H); + if (S != 0) { + uval = (uint) decode_sign(S, uval); + } + if (D != 0) { + assert(c.isSubrange | c.isFullRange); + if (c.isSubrange) + sum = c.sumInUnsignedRange(sum, (int) uval); + else + sum += (int) uval; + uval = (uint) sum; + } + return getPopValue(this, uval); + + case cmk_pop_BHS0: + assert(S == 0 && D == 0); + uval = coding::parse(rp, B, H); + return getPopValue(this, uval); + + case cmk_pop_BYTE1: + assert(c.spec == BYTE1_spec); + assert(B == 1 && H == 256 && S == 0 && D == 0); + return getPopValue(this, *rp++ & 0xFF); + + default: + break; + } + assert(false); + return 0; +} + +static maybe_inline +int moreCentral(int x, int y) { // used to find end of Pop.{F} + // Suggested implementation from the Pack200 specification: + uint kx = (x >> 31) ^ (x << 1); + uint ky = (y >> 31) ^ (y << 1); + return (kx < ky? x: y); +} +//static maybe_inline +//int moreCentral2(int x, int y, int min) { +// // Strict implementation of buggy 150.7 specification. +// // The bug is that the spec. says absolute-value ties are broken +// // in favor of positive numbers, but the suggested implementation +// // (also mentioned in the spec.) breaks ties in favor of negative numbers. +// if ((x + y) != 0) +// return min; +// else +// // return the other value, which breaks a tie in the positive direction +// return (x > y)? x: y; +//} + +static const byte* no_meta[] = {null}; +#define NO_META (*(byte**)no_meta) +enum { POP_FAVORED_N = -2 }; + +// mode bits +#define DISABLE_RUN 1 // used immediately inside ACodee +#define DISABLE_POP 2 // used recursively in all pop sub-bands + +// This function knows all about meta-coding. +void coding_method::init(byte* &band_rp, byte* band_limit, + byte* &meta_rp, int mode, + coding* defc, int N, + intlist* valueSink) { + assert(N != 0); + + assert(u != null); // must be pre-initialized + //if (u == null) u = unpacker::current(); // expensive + + int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); + coding* foundc = null; + coding* to_free = null; + + if (op == _meta_default) { + foundc = defc; + // and fall through + + } else if (op >= _meta_canon_min && op <= _meta_canon_max) { + foundc = coding::findByIndex(op); + // and fall through + + } else if (op == _meta_arb) { + int args = (*meta_rp++ & 0xFF); + // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) + int D = ((args >> 0) & 1); + int S = ((args >> 1) & 3); + int B = ((args >> 3) & -1) + 1; + // & (H[1..256]-1) + int H = (*meta_rp++ & 0xFF) + 1; + foundc = coding::findBySpec(B, H, S, D); + to_free = foundc; // findBySpec may dynamically allocate + if (foundc == null) { + abort("illegal arb. coding"); + return; + } + // and fall through + + } else if (op >= _meta_run && op < _meta_pop) { + int args = (op - _meta_run); + // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) + int KX = ((args >> 0) & 3); + int KBFlag = ((args >> 2) & 1); + int ABDef = ((args >> 3) & -1); + assert(ABDef <= 2); + // & KB: one of [0..255] if KBFlag=1 + int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); + int K = (KB+1) << (KX * 4); + int N2 = (N >= 0) ? N-K : N; + if (N == 0 || (N2 <= 0 && N2 != N)) { + abort("illegal run encoding"); + return; + } + if ((mode & DISABLE_RUN) != 0) { + abort("illegal nested run encoding"); + return; + } + + // & Enc{ ACode } if ADef=0 (ABDef != 1) + // No direct nesting of 'run' in ACode, but in BCode it's OK. + int disRun = mode | DISABLE_RUN; + if (ABDef == 1) { + this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); + } else { + this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); + } + CHECK; + + // & Enc{ BCode } if BDef=0 (ABDef != 2) + coding_method* tail = U_NEW(coding_method, 1); + CHECK_NULL(tail); + tail->u = u; + + // The 'run' codings may be nested indirectly via 'pop' codings. + // This means that this->next may already be filled in, if + // ACode was of type 'pop' with a 'run' token coding. + // No problem: Just chain the upcoming BCode onto the end. + for (coding_method* self = this; ; self = self->next) { + if (self->next == null) { + self->next = tail; + break; + } + } + + if (ABDef == 2) { + tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); + } else { + tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); + } + // Note: The preceding calls to init should be tail-recursive. + + return; // done; no falling through + + } else if (op >= _meta_pop && op < _meta_limit) { + int args = (op - _meta_pop); + // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) + int FDef = ((args >> 0) & 1); + int UDef = ((args >> 1) & 1); + int TDefL = ((args >> 2) & -1); + assert(TDefL <= 11); + int TDef = (TDefL > 0); + int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); + int TH = (256-TL); + if (N <= 0) { + abort("illegal pop encoding"); + return; + } + if ((mode & DISABLE_POP) != 0) { + abort("illegal nested pop encoding"); + return; + } + + // No indirect nesting of 'pop', but 'run' is OK. + int disPop = DISABLE_POP; + + // & Enc{ FCode } if FDef=0 + int FN = POP_FAVORED_N; + assert(valueSink == null); + intlist fValueSink; fValueSink.init(); + coding_method fval; + BYTES_OF(fval).clear(); fval.u = u; + if (FDef != 0) { + fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); + } else { + fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); + } + bytes fvbuf; + fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); + fVlength = fValueSink.length(); // i.e., the parameter K + fValueSink.free(); + CHECK; + + // Skip the first {F} run in all subsequent passes. + // The next call to this->init(...) will set vs0.rp to point after the {F}. + + // & Enc{ TCode } if TDef=0 (TDefL==0) + if (TDef != 0) { + coding* tcode = coding::findBySpec(1, 256); // BYTE1 + // find the most narrowly sufficient code: + for (int B = 2; B <= B_MAX; B++) { + if (fVlength <= tcode->umax) break; // found it + tcode->free(); + tcode = coding::findBySpec(B, TH); + CHECK_NULL(tcode); + } + if (!(fVlength <= tcode->umax)) { + abort("pop.L value too small"); + return; + } + this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); + tcode->free(); + } else { + this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); + } + CHECK; + + // Count the number of zero tokens right now. + // Also verify that they are in bounds. + int UN = 0; // one {U} for each zero in {T} + value_stream vs = vs0; + for (int i = 0; i < N; i++) { + uint val = vs.getInt(); + if (val == 0) UN += 1; + if (!(val <= (uint)fVlength)) { + abort("pop token out of range"); + return; + } + } + vs.done(); + + // & Enc{ UCode } if UDef=0 + if (UN != 0) { + uValues = U_NEW(coding_method, 1); + CHECK_NULL(uValues); + uValues->u = u; + if (UDef != 0) { + uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); + } else { + uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); + } + } else { + if (UDef == 0) { + int uop = (*meta_rp++ & 0xFF); + if (uop > _meta_canon_max) + // %%% Spec. requires the more strict (uop != _meta_default). + abort("bad meta-coding for empty pop/U"); + } + } + + // Bug fix for 6259542 + // Last of all, adjust vs0.cmk to the 'pop' flavor + for (coding_method* self = this; self != null; self = self->next) { + coding_method_kind cmk2 = cmk_pop; + switch (self->vs0.cmk) { + case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; + case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; + default: break; + } + self->vs0.cmk = cmk2; + if (self != this) { + assert(self->fValues == null); // no double init + self->fValues = this->fValues; + self->fVlength = this->fVlength; + assert(self->uValues == null); // must stay null + } + } + + return; // done; no falling through + + } else { + abort("bad meta-coding"); + return; + } + + // Common code here skips a series of values with one coding. + assert(foundc != null); + + assert(vs0.cmk == cmk_ERROR); // no garbage, please + assert(vs0.rp == null); // no garbage, please + assert(vs0.rplimit == null); // no garbage, please + assert(vs0.sum == 0); // no garbage, please + + vs0.init(band_rp, band_limit, foundc); + + // Done with foundc. Free if necessary. + if (to_free != null) { + to_free->free(); + to_free = null; + } + foundc = null; + + coding& c = vs0.c; + CODING_PRIVATE(c.spec); + // assert sane N + assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); + + // Look at the values, or at least skip over them quickly. + if (valueSink == null) { + // Skip and ignore values in the first pass. + c.parseMultiple(band_rp, N, band_limit, B, H); + } else if (N >= 0) { + // Pop coding, {F} sequence, initial run of values... + assert((mode & DISABLE_POP) != 0); + value_stream vs = vs0; + for (int n = 0; n < N; n++) { + int val = vs.getInt(); + valueSink->add(val); + } + band_rp = vs.rp; + } else { + // Pop coding, {F} sequence, final run of values... + assert((mode & DISABLE_POP) != 0); + assert(N == POP_FAVORED_N); + int min = INT_MIN_VALUE; // farthest from the center + // min2 is based on the buggy specification of centrality in version 150.7 + // no known implementations transmit this value, but just in case... + //int min2 = INT_MIN_VALUE; + int last = 0; + // if there were initial runs, find the potential sentinels in them: + for (int i = 0; i < valueSink->length(); i++) { + last = valueSink->get(i); + min = moreCentral(min, last); + //min2 = moreCentral2(min2, last, min); + } + value_stream vs = vs0; + for (;;) { + int val = vs.getInt(); + if (valueSink->length() > 0 && + (val == last || val == min)) //|| val == min2 + break; + valueSink->add(val); + CHECK; + last = val; + min = moreCentral(min, last); + //min2 = moreCentral2(min2, last, min); + } + band_rp = vs.rp; + } + CHECK; + + // Get an accurate upper limit now. + vs0.rplimit = band_rp; + vs0.cm = this; + + return; // success +} + +coding basic_codings[] = { + // This one is not a usable irregular coding, but is used by cp_Utf8_chars. + CODING_INIT(3,128,0,0), + + // Fixed-length codings: + CODING_INIT(1,256,0,0), + CODING_INIT(1,256,1,0), + CODING_INIT(1,256,0,1), + CODING_INIT(1,256,1,1), + CODING_INIT(2,256,0,0), + CODING_INIT(2,256,1,0), + CODING_INIT(2,256,0,1), + CODING_INIT(2,256,1,1), + CODING_INIT(3,256,0,0), + CODING_INIT(3,256,1,0), + CODING_INIT(3,256,0,1), + CODING_INIT(3,256,1,1), + CODING_INIT(4,256,0,0), + CODING_INIT(4,256,1,0), + CODING_INIT(4,256,0,1), + CODING_INIT(4,256,1,1), + + // Full-range variable-length codings: + CODING_INIT(5, 4,0,0), + CODING_INIT(5, 4,1,0), + CODING_INIT(5, 4,2,0), + CODING_INIT(5, 16,0,0), + CODING_INIT(5, 16,1,0), + CODING_INIT(5, 16,2,0), + CODING_INIT(5, 32,0,0), + CODING_INIT(5, 32,1,0), + CODING_INIT(5, 32,2,0), + CODING_INIT(5, 64,0,0), + CODING_INIT(5, 64,1,0), + CODING_INIT(5, 64,2,0), + CODING_INIT(5,128,0,0), + CODING_INIT(5,128,1,0), + CODING_INIT(5,128,2,0), + + CODING_INIT(5, 4,0,1), + CODING_INIT(5, 4,1,1), + CODING_INIT(5, 4,2,1), + CODING_INIT(5, 16,0,1), + CODING_INIT(5, 16,1,1), + CODING_INIT(5, 16,2,1), + CODING_INIT(5, 32,0,1), + CODING_INIT(5, 32,1,1), + CODING_INIT(5, 32,2,1), + CODING_INIT(5, 64,0,1), + CODING_INIT(5, 64,1,1), + CODING_INIT(5, 64,2,1), + CODING_INIT(5,128,0,1), + CODING_INIT(5,128,1,1), + CODING_INIT(5,128,2,1), + + // Variable length subrange codings: + CODING_INIT(2,192,0,0), + CODING_INIT(2,224,0,0), + CODING_INIT(2,240,0,0), + CODING_INIT(2,248,0,0), + CODING_INIT(2,252,0,0), + + CODING_INIT(2, 8,0,1), + CODING_INIT(2, 8,1,1), + CODING_INIT(2, 16,0,1), + CODING_INIT(2, 16,1,1), + CODING_INIT(2, 32,0,1), + CODING_INIT(2, 32,1,1), + CODING_INIT(2, 64,0,1), + CODING_INIT(2, 64,1,1), + CODING_INIT(2,128,0,1), + CODING_INIT(2,128,1,1), + CODING_INIT(2,192,0,1), + CODING_INIT(2,192,1,1), + CODING_INIT(2,224,0,1), + CODING_INIT(2,224,1,1), + CODING_INIT(2,240,0,1), + CODING_INIT(2,240,1,1), + CODING_INIT(2,248,0,1), + CODING_INIT(2,248,1,1), + + CODING_INIT(3,192,0,0), + CODING_INIT(3,224,0,0), + CODING_INIT(3,240,0,0), + CODING_INIT(3,248,0,0), + CODING_INIT(3,252,0,0), + + CODING_INIT(3, 8,0,1), + CODING_INIT(3, 8,1,1), + CODING_INIT(3, 16,0,1), + CODING_INIT(3, 16,1,1), + CODING_INIT(3, 32,0,1), + CODING_INIT(3, 32,1,1), + CODING_INIT(3, 64,0,1), + CODING_INIT(3, 64,1,1), + CODING_INIT(3,128,0,1), + CODING_INIT(3,128,1,1), + CODING_INIT(3,192,0,1), + CODING_INIT(3,192,1,1), + CODING_INIT(3,224,0,1), + CODING_INIT(3,224,1,1), + CODING_INIT(3,240,0,1), + CODING_INIT(3,240,1,1), + CODING_INIT(3,248,0,1), + CODING_INIT(3,248,1,1), + + CODING_INIT(4,192,0,0), + CODING_INIT(4,224,0,0), + CODING_INIT(4,240,0,0), + CODING_INIT(4,248,0,0), + CODING_INIT(4,252,0,0), + + CODING_INIT(4, 8,0,1), + CODING_INIT(4, 8,1,1), + CODING_INIT(4, 16,0,1), + CODING_INIT(4, 16,1,1), + CODING_INIT(4, 32,0,1), + CODING_INIT(4, 32,1,1), + CODING_INIT(4, 64,0,1), + CODING_INIT(4, 64,1,1), + CODING_INIT(4,128,0,1), + CODING_INIT(4,128,1,1), + CODING_INIT(4,192,0,1), + CODING_INIT(4,192,1,1), + CODING_INIT(4,224,0,1), + CODING_INIT(4,224,1,1), + CODING_INIT(4,240,0,1), + CODING_INIT(4,240,1,1), + CODING_INIT(4,248,0,1), + CODING_INIT(4,248,1,1), + CODING_INIT(0,0,0,0) +}; +#define BASIC_INDEX_LIMIT \ + (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1) + +coding* coding::findByIndex(int idx) { +#ifndef PRODUCT + /* Tricky assert here, constants and gcc complains about it without local. */ + int index_limit = BASIC_INDEX_LIMIT; + assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit); +#endif + if (idx >= _meta_canon_min && idx <= _meta_canon_max) + return basic_codings[idx].init(); + else + return null; +} + +#ifndef PRODUCT +const char* coding::string() { + CODING_PRIVATE(spec); + bytes buf; + buf.malloc(100); + char maxS[20], minS[20]; + sprintf(maxS, "%d", max); + sprintf(minS, "%d", min); + if (max == INT_MAX_VALUE) strcpy(maxS, "max"); + if (min == INT_MIN_VALUE) strcpy(minS, "min"); + sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", + B,H,S,D,L,minS,maxS); + return (const char*) buf.ptr; +} +#endif