1 /*
   2  * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.java.util.jar.pack;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.OutputStream;
  31 import java.util.HashMap;
  32 
  33 /**
  34  * Define the conversions between sequences of small integers and raw bytes.
  35  * This is a schema of encodings which incorporates varying lengths,
  36  * varying degrees of length variability, and varying amounts of signed-ness.
  37  * @author John Rose
  38  */
  39 class Coding implements Constants, Comparable, CodingMethod, Histogram.BitMetric {
  40     /*
  41       Coding schema for single integers, parameterized by (B,H,S):
  42 
  43       Let B in [1,5], H in [1,256], S in [0,3].
  44       (S limit is arbitrary.  B follows the 32-bit limit.  H is byte size.)
  45 
  46       A given (B,H,S) code varies in length from 1 to B bytes.
  47 
  48       The 256 values a byte may take on are divided into L=(256-H) and H
  49       values, with all the H values larger than the L values.
  50       (That is, the L values are [0,L) and the H are [L,256).)
  51 
  52       The last byte is always either the B-th byte, a byte with "L value"
  53       (<L), or both.  There is no other byte that satisfies these conditions.
  54       All bytes before the last always have "H values" (>=L).
  55 
  56       Therefore, if L==0, the code always has the full length of B bytes.
  57       The coding then becomes a classic B-byte little-endian unsigned integer.
  58       (Also, if L==128, the high bit of each byte acts signals the presence
  59       of a following byte, up to the maximum length.)
  60 
  61       In the unsigned case (S==0), the coding is compact and monotonic
  62       in the ordering of byte sequences defined by appending zero bytes
  63       to pad them to a common length B, reversing them, and ordering them
  64       lexicographically.  (This agrees with "little-endian" byte order.)
  65 
  66       Therefore, the unsigned value of a byte sequence may be defined as:
  67       <pre>
  68         U(b0)           == b0
  69                            in [0..L)
  70                            or [0..256) if B==1 (**)
  71 
  72         U(b0,b1)        == b0 + b1*H
  73                            in [L..L*(1+H))
  74                            or [L..L*(1+H) + H^2) if B==2
  75 
  76         U(b0,b1,b2)     == b0 + b1*H + b2*H^2
  77                            in [L*(1+H)..L*(1+H+H^2))
  78                            or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3
  79 
  80         U(b[i]: i<n)    == Sum[i<n]( b[i] * H^i )
  81                            up to  L*Sum[i<n]( H^i )
  82                            or to  L*Sum[i<n]( H^i ) + H^n if n==B
  83       </pre>
  84 
  85       (**) If B==1, the values H,L play no role in the coding.
  86       As a convention, we require that any (1,H,S) code must always
  87       encode values less than H.  Thus, a simple unsigned byte is coded
  88       specifically by the code (1,256,0).
  89 
  90       (Properly speaking, the unsigned case should be parameterized as
  91       S==Infinity.  If the schema were regular, the case S==0 would really
  92       denote a numbering in which all coded values are negative.)
  93 
  94       If S>0, the unsigned value of a byte sequence is regarded as a binary
  95       integer.  If any of the S low-order bits are zero, the corresponding
  96       signed value will be non-negative.  If all of the S low-order bits
  97       (S>0) are one, the the corresponding signed value will be negative.
  98 
  99       The non-negative signed values are compact and monotonically increasing
 100       (from 0) in the ordering of the corresponding unsigned values.
 101 
 102       The negative signed values are compact and monotonically decreasing
 103       (from -1) in the ordering of the corresponding unsigned values.
 104 
 105       In essence, the low-order S bits function as a collective sign bit
 106       for negative signed numbers, and as a low-order base-(2^S-1) digit
 107       for non-negative signed numbers.
 108 
 109       Therefore, the signed value corresponding to an unsigned value is:
 110       <pre>
 111         Sgn(x)  == x                               if S==0
 112         Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S),  if S>0, (x % 2^S) < 2^S-1
 113         Sgn(x)  == -(x / 2^S)-1,                   if S>0, (x % 2^S) == 2^S-1
 114       </pre>
 115 
 116       Finally, the value of a byte sequence, given the coding parameters
 117       (B,H,S), is defined as:
 118       <pre>
 119         V(b[i]: i<n)  == Sgn(U(b[i]: i<n))
 120       </pre>
 121 
 122       The extremal positive and negative signed value for a given range
 123       of unsigned values may be found by sign-encoding the largest unsigned
 124       value which is not 2^S-1 mod 2^S, and that which is, respectively.
 125 
 126       Because B,H,S are variable, this is not a single coding but a schema
 127       of codings.  For optimal compression, it is necessary to adaptively
 128       select specific codings to the data being compressed.
 129 
 130       For example, if a sequence of values happens never to be negative,
 131       S==0 is the best choice.  If the values are equally balanced between
 132       negative and positive, S==1.  If negative values are rare, then S>1
 133       is more appropriate.
 134 
 135       A (B,H,S) encoding is called a "subrange" if it does not encode
 136       the largest 32-bit value, and if the number R of values it does
 137       encode can be expressed as a positive 32-bit value.  (Note that
 138       B=1 implies R<=256, B=2 implies R<=65536, etc.)
 139 
 140       A delta version of a given (B,H,S) coding encodes an array of integers
 141       by writing their successive differences in the (B,H,S) coding.
 142       The original integers themselves may be recovered by making a
 143       running accumulation of sum of the differences as they are read.
 144 
 145       As a special case, if a (B,H,S) encoding is a subrange, its delta
 146       version will only encode arrays of numbers in the coding's unsigned
 147       range, [0..R-1].  The coding of deltas is still in the normal signed
 148       range, if S!=0.  During delta encoding, all subtraction results are
 149       reduced to the signed range, by adding multiples of R.  Likewise,
 150 .     during encoding, all addition results are reduced to the unsigned range.
 151       This special case for subranges allows the benefits of wraparound
 152       when encoding correlated sequences of very small positive numbers.
 153      */
 154 
 155     // Code-specific limits:
 156     private static int saturate32(long x) {
 157         if (x > Integer.MAX_VALUE)   return Integer.MAX_VALUE;
 158         if (x < Integer.MIN_VALUE)   return Integer.MIN_VALUE;
 159         return (int)x;
 160     }
 161     private static long codeRangeLong(int B, int H) {
 162         return codeRangeLong(B, H, B);
 163     }
 164     private static long codeRangeLong(int B, int H, int nMax) {
 165         // Code range for a all (B,H) codes of length <=nMax (<=B).
 166         // n < B:   L*Sum[i<n]( H^i )
 167         // n == B:  L*Sum[i<B]( H^i ) + H^B
 168         assert(nMax >= 0 && nMax <= B);
 169         assert(B >= 1 && B <= 5);
 170         assert(H >= 1 && H <= 256);
 171         if (nMax == 0)  return 0;  // no codes of zero length
 172         if (B == 1)     return H;  // special case; see (**) above
 173         int L = 256-H;
 174         long sum = 0;
 175         long H_i = 1;
 176         for (int n = 1; n <= nMax; n++) {
 177             sum += H_i;
 178             H_i *= H;
 179         }
 180         sum *= L;
 181         if (nMax == B)
 182             sum += H_i;
 183         return sum;
 184     }
 185     /** Largest int representable by (B,H,S) in up to nMax bytes. */
 186     public static int codeMax(int B, int H, int S, int nMax) {
 187         //assert(S >= 0 && S <= S_MAX);
 188         long range = codeRangeLong(B, H, nMax);
 189         if (range == 0)
 190             return -1;  // degenerate max value for empty set of codes
 191         if (S == 0 || range >= (long)1<<32)
 192             return saturate32(range-1);
 193         long maxPos = range-1;
 194         while (isNegativeCode(maxPos, S))  --maxPos;
 195         if (maxPos < 0)  return -1;  // No positive codings at all.
 196         int smax = decodeSign32(maxPos, S);
 197         // check for 32-bit wraparound:
 198         if (smax < 0)
 199             return Integer.MAX_VALUE;
 200         return smax;
 201     }
 202     /** Smallest int representable by (B,H,S) in up to nMax bytes.
 203         Returns Integer.MIN_VALUE if 32-bit wraparound covers
 204         the entire negative range.
 205      */
 206     public static int codeMin(int B, int H, int S, int nMax) {
 207         //assert(S >= 0 && S <= S_MAX);
 208         long range = codeRangeLong(B, H, nMax);
 209         if (range >= (long)1<<32 && nMax == B) {
 210             // Can code negative values via 32-bit wraparound.
 211             return Integer.MIN_VALUE;
 212         }
 213         if (S == 0) {
 214             return 0;
 215         }
 216         int Smask = (1<<S)-1;
 217         long maxNeg = range-1;
 218         while (!isNegativeCode(maxNeg, S))  --maxNeg;
 219         if (maxNeg < 0)  return 0;  // No negative codings at all.
 220         return decodeSign32(maxNeg, S);
 221     }
 222 
 223     // Some of the arithmetic below is on unsigned 32-bit integers.
 224     // These must be represented in Java as longs in the range [0..2^32-1].
 225     // The conversion to a signed int is just the Java cast (int), but
 226     // the conversion to an unsigned int is the following little method:
 227     private static long toUnsigned32(int sx) {
 228         return ((long)sx << 32) >>> 32;
 229     }
 230 
 231     // Sign encoding:
 232     private static boolean isNegativeCode(long ux, int S) {
 233         assert(S > 0);
 234         assert(ux >= -1);  // can be out of 32-bit range; who cares
 235         int Smask = (1<<S)-1;
 236         return (((int)ux+1) & Smask) == 0;
 237     }
 238     private static boolean hasNegativeCode(int sx, int S) {
 239         assert(S > 0);
 240         // If S>=2 very low negatives are coded by 32-bit-wrapped positives.
 241         // The lowest negative representable by a negative coding is
 242         // ~(umax32 >> S), and the next lower number is coded by wrapping
 243         // the highest positive:
 244         //    CodePos(umax32-1)  ->  (umax32-1)-((umax32-1)>>S)
 245         // which simplifies to ~(umax32 >> S)-1.
 246         return (0 > sx) && (sx >= ~(-1>>>S));
 247     }
 248     private static int decodeSign32(long ux, int S) {
 249         assert(ux == toUnsigned32((int)ux))  // must be unsigned 32-bit number
 250             : (Long.toHexString(ux));
 251         if (S == 0) {
 252             return (int) ux;  // cast to signed int
 253         }
 254         int sx;
 255         if (isNegativeCode(ux, S)) {
 256             // Sgn(x)  == -(x / 2^S)-1
 257             sx = ~((int)ux >>> S);
 258         } else {
 259             // Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S)
 260             sx = (int)ux - ((int)ux >>> S);
 261         }
 262         // Assert special case of S==1:
 263         assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1)));
 264         return sx;
 265     }
 266     private static long encodeSign32(int sx, int S) {
 267         if (S == 0) {
 268             return toUnsigned32(sx);  // unsigned 32-bit int
 269         }
 270         int Smask = (1<<S)-1;
 271         long ux;
 272         if (!hasNegativeCode(sx, S)) {
 273             // InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1))
 274             ux = sx + (toUnsigned32(sx) / Smask);
 275         } else {
 276             // InvSgn(sx) = (-sx-1)*2^S + (2^S-1)
 277             ux = (-sx << S) - 1;
 278         }
 279         ux = toUnsigned32((int)ux);
 280         assert(sx == decodeSign32(ux, S))
 281             : (Long.toHexString(ux)+" -> "+
 282                Integer.toHexString(sx)+" != "+
 283                Integer.toHexString(decodeSign32(ux, S)));
 284         return ux;
 285     }
 286 
 287     // Top-level coding of single integers:
 288     public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) {
 289         long ux = encodeSign32(sx, S);
 290         assert(ux == toUnsigned32((int)ux));
 291         assert(ux < codeRangeLong(B, H))
 292             : Long.toHexString(ux);
 293         int L = 256-H;
 294         long sum = ux;
 295         int pos = outpos[0];
 296         for (int i = 0; i < B-1; i++) {
 297             if (sum < L)
 298                 break;
 299             sum -= L;
 300             int b_i = (int)( L + (sum % H) );
 301             sum /= H;
 302             out[pos++] = (byte)b_i;
 303         }
 304         out[pos++] = (byte)sum;
 305         // Report number of bytes written by updating outpos[0]:
 306         outpos[0] = pos;
 307         // Check right away for mis-coding.
 308         //assert(sx == readInt(out, new int[1], B, H, S));
 309     }
 310     public static int readInt(byte[] in, int[] inpos, int B, int H, int S) {
 311         // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
 312         int L = 256-H;
 313         long sum = 0;
 314         long H_i = 1;
 315         int pos = inpos[0];
 316         for (int i = 0; i < B; i++) {
 317             int b_i = in[pos++] & 0xFF;
 318             sum += b_i*H_i;
 319             H_i *= H;
 320             if (b_i < L)  break;
 321         }
 322         //assert(sum >= 0 && sum < codeRangeLong(B, H));
 323         // Report number of bytes read by updating inpos[0]:
 324         inpos[0] = pos;
 325         return decodeSign32(sum, S);
 326     }
 327     // The Stream version doesn't fetch a byte unless it is needed for coding.
 328     public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException {
 329         // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
 330         int L = 256-H;
 331         long sum = 0;
 332         long H_i = 1;
 333         for (int i = 0; i < B; i++) {
 334             int b_i = in.read();
 335             if (b_i < 0)  throw new RuntimeException("unexpected EOF");
 336             sum += b_i*H_i;
 337             H_i *= H;
 338             if (b_i < L)  break;
 339         }
 340         assert(sum >= 0 && sum < codeRangeLong(B, H));
 341         return decodeSign32(sum, S);
 342     }
 343 
 344     public static final int B_MAX = 5;    /* B: [1,5] */
 345     public static final int H_MAX = 256;  /* H: [1,256] */
 346     public static final int S_MAX = 2;    /* S: [0,2] */
 347 
 348     // END OF STATICS.
 349 
 350     private final int B; /*1..5*/       // # bytes (1..5)
 351     private final int H; /*1..256*/     // # codes requiring a higher byte
 352     private final int L; /*0..255*/     // # codes requiring a higher byte
 353     private final int S; /*0..3*/       // # low-order bits representing sign
 354     private final int del; /*0..2*/     // type of delta encoding (0 == none)
 355     private final int min;              // smallest representable value
 356     private final int max;              // largest representable value
 357     private final int umin;             // smallest representable uns. value
 358     private final int umax;             // largest representable uns. value
 359     private final int[] byteMin;        // smallest repr. value, given # bytes
 360     private final int[] byteMax;        // largest repr. value, given # bytes
 361 
 362     private Coding(int B, int H, int S) {
 363         this(B, H, S, 0);
 364     }
 365     private Coding(int B, int H, int S, int del) {
 366         this.B = B;
 367         this.H = H;
 368         this.L = 256-H;
 369         this.S = S;
 370         this.del = del;
 371         this.min = codeMin(B, H, S, B);
 372         this.max = codeMax(B, H, S, B);
 373         this.umin = codeMin(B, H, 0, B);
 374         this.umax = codeMax(B, H, 0, B);
 375         this.byteMin = new int[B];
 376         this.byteMax = new int[B];
 377 
 378         for (int nMax = 1; nMax <= B; nMax++) {
 379             byteMin[nMax-1] = codeMin(B, H, S, nMax);
 380             byteMax[nMax-1] = codeMax(B, H, S, nMax);
 381         }
 382     }
 383 
 384     public boolean equals(Object x) {
 385         if (!(x instanceof Coding))  return false;
 386         Coding that = (Coding) x;
 387         if (this.B != that.B)  return false;
 388         if (this.H != that.H)  return false;
 389         if (this.S != that.S)  return false;
 390         if (this.del != that.del)  return false;
 391         return true;
 392     }
 393 
 394     public int hashCode() {
 395         return (del<<14)+(S<<11)+(B<<8)+(H<<0);
 396     }
 397 
 398     private static HashMap codeMap;
 399 
 400     private static synchronized Coding of(int B, int H, int S, int del) {
 401         if (codeMap == null)  codeMap = new HashMap();
 402         Coding x0 = new Coding(B, H, S, del);
 403         Coding x1 = (Coding) codeMap.get(x0);
 404         if (x1 == null)  codeMap.put(x0, x1 = x0);
 405         return x1;
 406     }
 407 
 408     public static Coding of(int B, int H) {
 409         return of(B, H, 0, 0);
 410     }
 411 
 412     public static Coding of(int B, int H, int S) {
 413         return of(B, H, S, 0);
 414     }
 415 
 416     public boolean canRepresentValue(int x) {
 417         if (isSubrange())
 418             return canRepresentUnsigned(x);
 419         else
 420             return canRepresentSigned(x);
 421     }
 422     /** Can this coding represent a single value, possibly a delta?
 423      *  This ignores the D property.  That is, for delta codings,
 424      *  this tests whether a delta value of 'x' can be coded.
 425      *  For signed delta codings which produce unsigned end values,
 426      *  use canRepresentUnsigned.
 427      */
 428     public boolean canRepresentSigned(int x) {
 429         return (x >= min && x <= max);
 430     }
 431     /** Can this coding, apart from its S property,
 432      *  represent a single value?  (Negative values
 433      *  can only be represented via 32-bit overflow,
 434      *  so this returns true for negative values
 435      *  if isFullRange is true.)
 436      */
 437     public boolean canRepresentUnsigned(int x) {
 438         return (x >= umin && x <= umax);
 439     }
 440 
 441     // object-oriented code/decode
 442     public int readFrom(byte[] in, int[] inpos) {
 443         return readInt(in, inpos, B, H, S);
 444     }
 445     public void writeTo(byte[] out, int[] outpos, int x) {
 446         writeInt(out, outpos, x, B, H, S);
 447     }
 448 
 449     // Stream versions
 450     public int readFrom(InputStream in) throws IOException {
 451         return readIntFrom(in, B, H, S);
 452     }
 453     public void writeTo(OutputStream out, int x) throws IOException {
 454         byte[] buf = new byte[B];
 455         int[] pos = new int[1];
 456         writeInt(buf, pos, x, B, H, S);
 457         out.write(buf, 0, pos[0]);
 458     }
 459 
 460     // Stream/array versions
 461     public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
 462         // %%% use byte[] buffer
 463         for (int i = start; i < end; i++)
 464             a[i] = readFrom(in);
 465         for (int dstep = 0; dstep < del; dstep++) {
 466             long state = 0;
 467             for (int i = start; i < end; i++) {
 468                 state += a[i];
 469                 // Reduce array values to the required range.
 470                 if (isSubrange()) {
 471                     state = reduceToUnsignedRange(state);
 472                 }
 473                 a[i] = (int) state;
 474             }
 475         }
 476     }
 477     public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
 478         if (end <= start)  return;
 479         for (int dstep = 0; dstep < del; dstep++) {
 480             int[] deltas;
 481             if (!isSubrange())
 482                 deltas = makeDeltas(a, start, end, 0, 0);
 483             else
 484                 deltas = makeDeltas(a, start, end, min, max);
 485             a = deltas;
 486             start = 0;
 487             end = deltas.length;
 488         }
 489         // The following code is a buffered version of this loop:
 490         //    for (int i = start; i < end; i++)
 491         //        writeTo(out, a[i]);
 492         byte[] buf = new byte[1<<8];
 493         final int bufmax = buf.length-B;
 494         int[] pos = { 0 };
 495         for (int i = start; i < end; ) {
 496             while (pos[0] <= bufmax) {
 497                 writeTo(buf, pos, a[i++]);
 498                 if (i >= end)  break;
 499             }
 500             out.write(buf, 0, pos[0]);
 501             pos[0] = 0;
 502         }
 503     }
 504 
 505     /** Tell if the range of this coding (number of distinct
 506      *  representable values) can be expressed in 32 bits.
 507      */
 508     boolean isSubrange() {
 509         return max < Integer.MAX_VALUE
 510             && ((long)max - (long)min + 1) <= Integer.MAX_VALUE;
 511     }
 512 
 513     /** Tell if this coding can represent all 32-bit values.
 514      *  Note:  Some codings, such as unsigned ones, can be neither
 515      *  subranges nor full-range codings.
 516      */
 517     boolean isFullRange() {
 518         return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE;
 519     }
 520 
 521     /** Return the number of values this coding (a subrange) can represent. */
 522     int getRange() {
 523         assert(isSubrange());
 524         return (max - min) + 1;  // range includes both min & max
 525     }
 526 
 527     Coding setB(int B) { return Coding.of(B, H, S, del); }
 528     Coding setH(int H) { return Coding.of(B, H, S, del); }
 529     Coding setS(int S) { return Coding.of(B, H, S, del); }
 530     Coding setL(int L) { return setH(256-L); }
 531     Coding setD(int del) { return Coding.of(B, H, S, del); }
 532     Coding getDeltaCoding() { return setD(del+1); }
 533 
 534     /** Return a coding suitable for representing summed, modulo-reduced values. */
 535     Coding getValueCoding() {
 536         if (isDelta())
 537             return Coding.of(B, H, 0, del-1);
 538         else
 539             return this;
 540     }
 541 
 542     /** Reduce the given value to be within this coding's unsigned range,
 543      *  by adding or subtracting a multiple of (max-min+1).
 544      */
 545     int reduceToUnsignedRange(long value) {
 546         if (value == (int)value && canRepresentUnsigned((int)value))
 547             // already in unsigned range
 548             return (int)value;
 549         int range = getRange();
 550         assert(range > 0);
 551         value %= range;
 552         if (value < 0)  value += range;
 553         assert(canRepresentUnsigned((int)value));
 554         return (int)value;
 555     }
 556 
 557     int reduceToSignedRange(int value) {
 558         if (canRepresentSigned(value))
 559             // already in signed range
 560             return value;
 561         return reduceToSignedRange(value, min, max);
 562     }
 563     static int reduceToSignedRange(int value, int min, int max) {
 564         int range = (max-min+1);
 565         assert(range > 0);
 566         int value0 = value;
 567         value -= min;
 568         if (value < 0 && value0 >= 0) {
 569             // 32-bit overflow, but the next '%=' op needs to be unsigned
 570             value -= range;
 571             assert(value >= 0);
 572         }
 573         value %= range;
 574         if (value < 0)  value += range;
 575         value += min;
 576         assert(min <= value && value <= max);
 577         return value;
 578     }
 579 
 580     /** Does this coding support at least one negative value?
 581         Includes codings that can do so via 32-bit wraparound.
 582      */
 583     boolean isSigned() {
 584         return min < 0;
 585     }
 586     /** Does this coding code arrays by making successive differences? */
 587     boolean isDelta() {
 588         return del != 0;
 589     }
 590 
 591     public int B() { return B; }
 592     public int H() { return H; }
 593     public int L() { return L; }
 594     public int S() { return S; }
 595     public int del() { return del; }
 596     public int min() { return min; }
 597     public int max() { return max; }
 598     public int umin() { return umin; }
 599     public int umax() { return umax; }
 600     public int byteMin(int b) { return byteMin[b-1]; }
 601     public int byteMax(int b) { return byteMax[b-1]; }
 602 
 603     public int compareTo(Object x) {
 604         Coding that = (Coding) x;
 605         int dkey = this.del - that.del;
 606         if (dkey == 0)
 607             dkey = this.B - that.B;
 608         if (dkey == 0)
 609             dkey = this.H - that.H;
 610         if (dkey == 0)
 611             dkey = this.S - that.S;
 612         return dkey;
 613     }
 614 
 615     /** Heuristic measure of the difference between two codings. */
 616     public int distanceFrom(Coding that) {
 617         int diffdel = this.del - that.del;
 618         if (diffdel < 0)  diffdel = -diffdel;
 619         int diffS = this.S - that.S;
 620         if (diffS < 0)  diffS = -diffS;
 621         int diffB = this.B - that.B;
 622         if (diffB < 0)  diffB = -diffB;
 623         int diffHL;
 624         if (this.H == that.H) {
 625             diffHL = 0;
 626         } else {
 627             // Distance in log space of H (<=128) and L (<128).
 628             int thisHL = this.getHL();
 629             int thatHL = that.getHL();
 630             // Double the accuracy of the log:
 631             thisHL *= thisHL;
 632             thatHL *= thatHL;
 633             if (thisHL > thatHL)
 634                 diffHL = ceil_lg2(1+(thisHL-1)/thatHL);
 635             else
 636                 diffHL = ceil_lg2(1+(thatHL-1)/thisHL);
 637         }
 638         int norm = 5*(diffdel + diffS + diffB) + diffHL;
 639         assert(norm != 0 || this.compareTo(that) == 0);
 640         return norm;
 641     }
 642     private int getHL() {
 643         // Follow H in log space by the multiplicative inverse of L.
 644         if (H <= 128)  return H;
 645         if (L >= 1)    return 128*128/L;
 646         return 128*256;
 647     }
 648 
 649     /** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */
 650     static int ceil_lg2(int x) {
 651         assert(x-1 >= 0);  // x in range (int.MIN_VALUE -> 32)
 652         x -= 1;
 653         int lg = 0;
 654         while (x != 0) {
 655             lg++;
 656             x >>= 1;
 657         }
 658         return lg;
 659     }
 660 
 661     static private final byte[] byteBitWidths = new byte[0x100];
 662     static {
 663         for (int b = 0; b < byteBitWidths.length; b++) {
 664             byteBitWidths[b] = (byte) ceil_lg2(b + 1);
 665         }
 666         for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) {
 667             assert(bitWidth(i) == ceil_lg2(i + 1));
 668         }
 669     }
 670 
 671     /** Number of significant bits in i, not counting sign bits.
 672      *  For positive i, it is ceil_lg2(i + 1).
 673      */
 674     static int bitWidth(int i) {
 675         if (i < 0)  i = ~i;  // change sign
 676         int w = 0;
 677         int lo = i;
 678         if (lo < byteBitWidths.length)
 679             return byteBitWidths[lo];
 680         int hi;
 681         hi = (lo >>> 16);
 682         if (hi != 0) {
 683             lo = hi;
 684             w += 16;
 685         }
 686         hi = (lo >>> 8);
 687         if (hi != 0) {
 688             lo = hi;
 689             w += 8;
 690         }
 691         w += byteBitWidths[lo];
 692         //assert(w == ceil_lg2(i + 1));
 693         return w;
 694     }
 695 
 696     /** Create an array of successive differences.
 697      *  If min==max, accept any and all 32-bit overflow.
 698      *  Otherwise, avoid 32-bit overflow, and reduce all differences
 699      *  to a value in the given range, by adding or subtracting
 700      *  multiples of the range cardinality (max-min+1).
 701      *  Also, the values are assumed to be in the range [0..(max-min)].
 702      */
 703     static int[] makeDeltas(int[] values, int start, int end,
 704                             int min, int max) {
 705         assert(max >= min);
 706         int count = end-start;
 707         int[] deltas = new int[count];
 708         int state = 0;
 709         if (min == max) {
 710             for (int i = 0; i < count; i++) {
 711                 int value = values[start+i];
 712                 deltas[i] = value - state;
 713                 state = value;
 714             }
 715         } else {
 716             for (int i = 0; i < count; i++) {
 717                 int value = values[start+i];
 718                 assert(value >= 0 && value+min <= max);
 719                 int delta = value - state;
 720                 assert(delta == (long)value - (long)state); // no overflow
 721                 state = value;
 722                 // Reduce delta values to the required range.
 723                 delta = reduceToSignedRange(delta, min, max);
 724                 deltas[i] = delta;
 725             }
 726         }
 727         return deltas;
 728     }
 729 
 730     boolean canRepresent(int minValue, int maxValue) {
 731         assert(minValue <= maxValue);
 732         if (del > 0) {
 733             if (isSubrange()) {
 734                 // We will force the values to reduce to the right subrange.
 735                 return canRepresentUnsigned(maxValue)
 736                     && canRepresentUnsigned(minValue);
 737             } else {
 738                 // Huge range; delta values must assume full 32-bit range.
 739                 return isFullRange();
 740             }
 741         }
 742         else
 743             // final values must be representable
 744             return canRepresentSigned(maxValue)
 745                 && canRepresentSigned(minValue);
 746     }
 747 
 748     boolean canRepresent(int[] values, int start, int end) {
 749         int len = end-start;
 750         if (len == 0)       return true;
 751         if (isFullRange())  return true;
 752         // Calculate max, min:
 753         int max = values[start];
 754         int min = max;
 755         for (int i = 1; i < len; i++) {
 756             int value = values[start+i];
 757             if (max < value)  max = value;
 758             if (min > value)  min = value;
 759         }
 760         return canRepresent(min, max);
 761     }
 762 
 763     public double getBitLength(int value) {  // implements BitMetric
 764         return (double) getLength(value) * 8;
 765     }
 766 
 767     /** How many bytes are in the coding of this value?
 768      *  Returns Integer.MAX_VALUE if the value has no coding.
 769      *  The coding must not be a delta coding, since there is no
 770      *  definite size for a single value apart from its context.
 771      */
 772     public int getLength(int value) {
 773         if (isDelta() && isSubrange()) {
 774             if (!canRepresentUnsigned(value))
 775                 return Integer.MAX_VALUE;
 776             value = reduceToSignedRange(value);
 777         }
 778         if (value >= 0) {
 779             for (int n = 0; n < B; n++) {
 780                 if (value <= byteMax[n])  return n+1;
 781             }
 782         } else {
 783             for (int n = 0; n < B; n++) {
 784                 if (value >= byteMin[n])  return n+1;
 785             }
 786         }
 787         return Integer.MAX_VALUE;
 788     }
 789 
 790     public int getLength(int[] values, int start, int end) {
 791         int len = end-start;
 792         if (B == 1)  return len;
 793         if (L == 0)  return len * B;
 794         if (isDelta()) {
 795             int[] deltas;
 796             if (!isSubrange())
 797                 deltas = makeDeltas(values, start, end, 0, 0);
 798             else
 799                 deltas = makeDeltas(values, start, end, min, max);
 800             //return Coding.of(B, H, S).getLength(deltas, 0, len);
 801             values = deltas;
 802             start = 0;
 803             end = values.length;
 804         }
 805         int sum = len;  // at least 1 byte per
 806         // add extra bytes for extra-long values
 807         for (int n = 1; n <= B; n++) {
 808             // what is the coding interval [min..max] for n bytes?
 809             int max = byteMax[n-1];
 810             int min = byteMin[n-1];
 811             int longer = 0;  // count of guys longer than n bytes
 812             for (int i = 0; i < len; i++) {
 813                 int value = values[start+i];
 814                 if (value >= 0) {
 815                     if (value > max)  longer++;
 816                 } else {
 817                     if (value < min)  longer++;
 818                 }
 819             }
 820             if (longer == 0)  break;  // no more passes needed
 821             if (n == B)  return Integer.MAX_VALUE;  // cannot represent!
 822             sum += longer;
 823         }
 824         return sum;
 825     }
 826 
 827     public byte[] getMetaCoding(Coding dflt) {
 828         if (dflt == this)  return new byte[]{ (byte) _meta_default };
 829         int canonicalIndex = BandStructure.indexOf(this);
 830         if (canonicalIndex > 0)
 831             return new byte[]{ (byte) canonicalIndex };
 832         return new byte[]{
 833             (byte)_meta_arb,
 834             (byte)(del + 2*S + 8*(B-1)),
 835             (byte)(H-1)
 836         };
 837     }
 838     public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
 839         int op = bytes[pos++] & 0xFF;
 840         if (_meta_canon_min <= op && op <= _meta_canon_max) {
 841             Coding c = BandStructure.codingForIndex(op);
 842             assert(c != null);
 843             res[0] = c;
 844             return pos;
 845         }
 846         if (op == _meta_arb) {
 847             int dsb = bytes[pos++] & 0xFF;
 848             int H_1 = bytes[pos++] & 0xFF;
 849             int del = dsb % 2;
 850             int S = (dsb / 2) % 4;
 851             int B = (dsb / 8)+1;
 852             int H = H_1+1;
 853             if (!((1 <= B && B <= B_MAX) &&
 854                   (0 <= S && S <= S_MAX) &&
 855                   (1 <= H && H <= H_MAX) &&
 856                   (0 <= del && del <= 1))
 857                 || (B == 1 && H != 256)
 858                 || (B == 5 && H == 256)) {
 859                 throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del);
 860             }
 861             res[0] = Coding.of(B, H, S, del);
 862             return pos;
 863         }
 864         return pos-1;  // backup
 865     }
 866 
 867 
 868     public String keyString() {
 869         return "("+B+","+H+","+S+","+del+")";
 870     }
 871 
 872     public String toString() {
 873         String str = "Coding"+keyString();
 874         // If -ea, print out more informative strings!
 875         //assert((str = stringForDebug()) != null);
 876         return str;
 877     }
 878 
 879     static boolean verboseStringForDebug = false;
 880     String stringForDebug() {
 881         String minS = (min == Integer.MIN_VALUE ? "min" : ""+min);
 882         String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max);
 883         String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]";
 884         if (isSubrange())
 885             str += " subrange";
 886         else if (!isFullRange())
 887             str += " MIDRANGE";
 888         if (verboseStringForDebug) {
 889             str += " {";
 890             int prev_range = 0;
 891             for (int n = 1; n <= B; n++) {
 892                 int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1);
 893                 assert(range_n == saturate32(codeRangeLong(B, H, n)));
 894                 range_n -= prev_range;
 895                 prev_range = range_n;
 896                 String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n);
 897                 str += " #"+n+"="+rngS;
 898             }
 899             str += " }";
 900         }
 901         return str;
 902     }
 903 }