Old src/java.base/share/classes/com/sun/java/util/jar/pack/Coding.java

   1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.java.util.jar.pack;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.OutputStream;
  31 import java.util.HashMap;
  32 import java.util.Map;
  33 import static com.sun.java.util.jar.pack.Constants.*;
  34 /**
  35  * Define the conversions between sequences of small integers and raw bytes.
  36  * This is a schema of encodings which incorporates varying lengths,
  37  * varying degrees of length variability, and varying amounts of signed-ness.
  38  * @author John Rose
  39  */
  40 class Coding implements Comparable<Coding>, CodingMethod, Histogram.BitMetric {
  41     /*
  42       Coding schema for single integers, parameterized by (B,H,S):
  43 
  44       Let B in [1,5], H in [1,256], S in [0,3].
  45       (S limit is arbitrary.  B follows the 32-bit limit.  H is byte size.)
  46 
  47       A given (B,H,S) code varies in length from 1 to B bytes.
  48 
  49       The 256 values a byte may take on are divided into L=(256-H) and H
  50       values, with all the H values larger than the L values.
  51       (That is, the L values are [0,L) and the H are [L,256).)
  52 
  53       The last byte is always either the B-th byte, a byte with "L value"
  54       (<L), or both.  There is no other byte that satisfies these conditions.
  55       All bytes before the last always have "H values" (>=L).
  56 
  57       Therefore, if L==0, the code always has the full length of B bytes.
  58       The coding then becomes a classic B-byte little-endian unsigned integer.
  59       (Also, if L==128, the high bit of each byte acts signals the presence
  60       of a following byte, up to the maximum length.)
  61 
  62       In the unsigned case (S==0), the coding is compact and monotonic
  63       in the ordering of byte sequences defined by appending zero bytes
  64       to pad them to a common length B, reversing them, and ordering them
  65       lexicographically.  (This agrees with "little-endian" byte order.)
  66 
  67       Therefore, the unsigned value of a byte sequence may be defined as:
  68       <pre>
  69         U(b0)           == b0
  70                            in [0..L)
  71                            or [0..256) if B==1 (**)
  72 
  73         U(b0,b1)        == b0 + b1*H
  74                            in [L..L*(1+H))
  75                            or [L..L*(1+H) + H^2) if B==2
  76 
  77         U(b0,b1,b2)     == b0 + b1*H + b2*H^2
  78                            in [L*(1+H)..L*(1+H+H^2))
  79                            or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3
  80 
  81         U(b[i]: i<n)    == Sum[i<n]( b[i] * H^i )
  82                            up to  L*Sum[i<n]( H^i )
  83                            or to  L*Sum[i<n]( H^i ) + H^n if n==B
  84       </pre>
  85 
  86       (**) If B==1, the values H,L play no role in the coding.
  87       As a convention, we require that any (1,H,S) code must always
  88       encode values less than H.  Thus, a simple unsigned byte is coded
  89       specifically by the code (1,256,0).
  90 
  91       (Properly speaking, the unsigned case should be parameterized as
  92       S==Infinity.  If the schema were regular, the case S==0 would really
  93       denote a numbering in which all coded values are negative.)
  94 
  95       If S>0, the unsigned value of a byte sequence is regarded as a binary
  96       integer.  If any of the S low-order bits are zero, the corresponding
  97       signed value will be non-negative.  If all of the S low-order bits
  98       (S>0) are one, the corresponding signed value will be negative.
  99 
 100       The non-negative signed values are compact and monotonically increasing
 101       (from 0) in the ordering of the corresponding unsigned values.
 102 
 103       The negative signed values are compact and monotonically decreasing
 104       (from -1) in the ordering of the corresponding unsigned values.
 105 
 106       In essence, the low-order S bits function as a collective sign bit
 107       for negative signed numbers, and as a low-order base-(2^S-1) digit
 108       for non-negative signed numbers.
 109 
 110       Therefore, the signed value corresponding to an unsigned value is:
 111       <pre>
 112         Sgn(x)  == x                               if S==0
 113         Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S),  if S>0, (x % 2^S) < 2^S-1
 114         Sgn(x)  == -(x / 2^S)-1,                   if S>0, (x % 2^S) == 2^S-1
 115       </pre>
 116 
 117       Finally, the value of a byte sequence, given the coding parameters
 118       (B,H,S), is defined as:
 119       <pre>
 120         V(b[i]: i<n)  == Sgn(U(b[i]: i<n))
 121       </pre>
 122 
 123       The extremal positive and negative signed value for a given range
 124       of unsigned values may be found by sign-encoding the largest unsigned
 125       value which is not 2^S-1 mod 2^S, and that which is, respectively.
 126 
 127       Because B,H,S are variable, this is not a single coding but a schema
 128       of codings.  For optimal compression, it is necessary to adaptively
 129       select specific codings to the data being compressed.
 130 
 131       For example, if a sequence of values happens never to be negative,
 132       S==0 is the best choice.  If the values are equally balanced between
 133       negative and positive, S==1.  If negative values are rare, then S>1
 134       is more appropriate.
 135 
 136       A (B,H,S) encoding is called a "subrange" if it does not encode
 137       the largest 32-bit value, and if the number R of values it does
 138       encode can be expressed as a positive 32-bit value.  (Note that
 139       B=1 implies R<=256, B=2 implies R<=65536, etc.)
 140 
 141       A delta version of a given (B,H,S) coding encodes an array of integers
 142       by writing their successive differences in the (B,H,S) coding.
 143       The original integers themselves may be recovered by making a
 144       running accumulation of sum of the differences as they are read.
 145 
 146       As a special case, if a (B,H,S) encoding is a subrange, its delta
 147       version will only encode arrays of numbers in the coding's unsigned
 148       range, [0..R-1].  The coding of deltas is still in the normal signed
 149       range, if S!=0.  During delta encoding, all subtraction results are
 150       reduced to the signed range, by adding multiples of R.  Likewise,
 151 .     during encoding, all addition results are reduced to the unsigned range.
 152       This special case for subranges allows the benefits of wraparound
 153       when encoding correlated sequences of very small positive numbers.
 154      */
 155 
 156     // Code-specific limits:
 157     private static int saturate32(long x) {
 158         if (x > Integer.MAX_VALUE)   return Integer.MAX_VALUE;
 159         if (x < Integer.MIN_VALUE)   return Integer.MIN_VALUE;
 160         return (int)x;
 161     }
 162     private static long codeRangeLong(int B, int H) {
 163         return codeRangeLong(B, H, B);
 164     }
 165     private static long codeRangeLong(int B, int H, int nMax) {
 166         // Code range for a all (B,H) codes of length <=nMax (<=B).
 167         // n < B:   L*Sum[i<n]( H^i )
 168         // n == B:  L*Sum[i<B]( H^i ) + H^B
 169         assert(nMax >= 0 && nMax <= B);
 170         assert(B >= 1 && B <= 5);
 171         assert(H >= 1 && H <= 256);
 172         if (nMax == 0)  return 0;  // no codes of zero length
 173         if (B == 1)     return H;  // special case; see (**) above
 174         int L = 256-H;
 175         long sum = 0;
 176         long H_i = 1;
 177         for (int n = 1; n <= nMax; n++) {
 178             sum += H_i;
 179             H_i *= H;
 180         }
 181         sum *= L;
 182         if (nMax == B)
 183             sum += H_i;
 184         return sum;
 185     }
 186     /** Largest int representable by (B,H,S) in up to nMax bytes. */
 187     public static int codeMax(int B, int H, int S, int nMax) {
 188         //assert(S >= 0 && S <= S_MAX);
 189         long range = codeRangeLong(B, H, nMax);
 190         if (range == 0)
 191             return -1;  // degenerate max value for empty set of codes
 192         if (S == 0 || range >= (long)1<<32)
 193             return saturate32(range-1);
 194         long maxPos = range-1;
 195         while (isNegativeCode(maxPos, S)) {
 196             --maxPos;
 197         }
 198         if (maxPos < 0)  return -1;  // No positive codings at all.
 199         int smax = decodeSign32(maxPos, S);
 200         // check for 32-bit wraparound:
 201         if (smax < 0)
 202             return Integer.MAX_VALUE;
 203         return smax;
 204     }
 205     /** Smallest int representable by (B,H,S) in up to nMax bytes.
 206         Returns Integer.MIN_VALUE if 32-bit wraparound covers
 207         the entire negative range.
 208      */
 209     public static int codeMin(int B, int H, int S, int nMax) {
 210         //assert(S >= 0 && S <= S_MAX);
 211         long range = codeRangeLong(B, H, nMax);
 212         if (range >= (long)1<<32 && nMax == B) {
 213             // Can code negative values via 32-bit wraparound.
 214             return Integer.MIN_VALUE;
 215         }
 216         if (S == 0) {
 217             return 0;
 218         }
 219         long maxNeg = range-1;
 220         while (!isNegativeCode(maxNeg, S))
 221             --maxNeg;
 222 
 223         if (maxNeg < 0)  return 0;  // No negative codings at all.
 224         return decodeSign32(maxNeg, S);
 225     }
 226 
 227     // Some of the arithmetic below is on unsigned 32-bit integers.
 228     // These must be represented in Java as longs in the range [0..2^32-1].
 229     // The conversion to a signed int is just the Java cast (int), but
 230     // the conversion to an unsigned int is the following little method:
 231     private static long toUnsigned32(int sx) {
 232         return ((long)sx << 32) >>> 32;
 233     }
 234 
 235     // Sign encoding:
 236     private static boolean isNegativeCode(long ux, int S) {
 237         assert(S > 0);
 238         assert(ux >= -1);  // can be out of 32-bit range; who cares
 239         int Smask = (1<<S)-1;
 240         return (((int)ux+1) & Smask) == 0;
 241     }
 242     private static boolean hasNegativeCode(int sx, int S) {
 243         assert(S > 0);
 244         // If S>=2 very low negatives are coded by 32-bit-wrapped positives.
 245         // The lowest negative representable by a negative coding is
 246         // ~(umax32 >> S), and the next lower number is coded by wrapping
 247         // the highest positive:
 248         //    CodePos(umax32-1)  ->  (umax32-1)-((umax32-1)>>S)
 249         // which simplifies to ~(umax32 >> S)-1.
 250         return (0 > sx) && (sx >= ~(-1>>>S));
 251     }
 252     private static int decodeSign32(long ux, int S) {
 253         assert(ux == toUnsigned32((int)ux))  // must be unsigned 32-bit number
 254             : (Long.toHexString(ux));
 255         if (S == 0) {
 256             return (int) ux;  // cast to signed int
 257         }
 258         int sx;
 259         if (isNegativeCode(ux, S)) {
 260             // Sgn(x)  == -(x / 2^S)-1
 261             sx = ~((int)ux >>> S);
 262         } else {
 263             // Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S)
 264             sx = (int)ux - ((int)ux >>> S);
 265         }
 266         // Assert special case of S==1:
 267         assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1)));
 268         return sx;
 269     }
 270     private static long encodeSign32(int sx, int S) {
 271         if (S == 0) {
 272             return toUnsigned32(sx);  // unsigned 32-bit int
 273         }
 274         int Smask = (1<<S)-1;
 275         long ux;
 276         if (!hasNegativeCode(sx, S)) {
 277             // InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1))
 278             ux = sx + (toUnsigned32(sx) / Smask);
 279         } else {
 280             // InvSgn(sx) = (-sx-1)*2^S + (2^S-1)
 281             ux = (-sx << S) - 1;
 282         }
 283         ux = toUnsigned32((int)ux);
 284         assert(sx == decodeSign32(ux, S))
 285             : (Long.toHexString(ux)+" -> "+
 286                Integer.toHexString(sx)+" != "+
 287                Integer.toHexString(decodeSign32(ux, S)));
 288         return ux;
 289     }
 290 
 291     // Top-level coding of single integers:
 292     public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) {
 293         long ux = encodeSign32(sx, S);
 294         assert(ux == toUnsigned32((int)ux));
 295         assert(ux < codeRangeLong(B, H))
 296             : Long.toHexString(ux);
 297         int L = 256-H;
 298         long sum = ux;
 299         int pos = outpos[0];
 300         for (int i = 0; i < B-1; i++) {
 301             if (sum < L)
 302                 break;
 303             sum -= L;
 304             int b_i = (int)( L + (sum % H) );
 305             sum /= H;
 306             out[pos++] = (byte)b_i;
 307         }
 308         out[pos++] = (byte)sum;
 309         // Report number of bytes written by updating outpos[0]:
 310         outpos[0] = pos;
 311         // Check right away for mis-coding.
 312         //assert(sx == readInt(out, new int[1], B, H, S));
 313     }
 314     public static int readInt(byte[] in, int[] inpos, int B, int H, int S) {
 315         // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
 316         int L = 256-H;
 317         long sum = 0;
 318         long H_i = 1;
 319         int pos = inpos[0];
 320         for (int i = 0; i < B; i++) {
 321             int b_i = in[pos++] & 0xFF;
 322             sum += b_i*H_i;
 323             H_i *= H;
 324             if (b_i < L)  break;
 325         }
 326         //assert(sum >= 0 && sum < codeRangeLong(B, H));
 327         // Report number of bytes read by updating inpos[0]:
 328         inpos[0] = pos;
 329         return decodeSign32(sum, S);
 330     }
 331     // The Stream version doesn't fetch a byte unless it is needed for coding.
 332     public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException {
 333         // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
 334         int L = 256-H;
 335         long sum = 0;
 336         long H_i = 1;
 337         for (int i = 0; i < B; i++) {
 338             int b_i = in.read();
 339             if (b_i < 0)  throw new RuntimeException("unexpected EOF");
 340             sum += b_i*H_i;
 341             H_i *= H;
 342             if (b_i < L)  break;
 343         }
 344         assert(sum >= 0 && sum < codeRangeLong(B, H));
 345         return decodeSign32(sum, S);
 346     }
 347 
 348     public static final int B_MAX = 5;    /* B: [1,5] */
 349     public static final int H_MAX = 256;  /* H: [1,256] */
 350     public static final int S_MAX = 2;    /* S: [0,2] */
 351 
 352     // END OF STATICS.
 353 
 354     private final int B; /*1..5*/       // # bytes (1..5)
 355     private final int H; /*1..256*/     // # codes requiring a higher byte
 356     private final int L; /*0..255*/     // # codes requiring a higher byte
 357     private final int S; /*0..3*/       // # low-order bits representing sign
 358     private final int del; /*0..2*/     // type of delta encoding (0 == none)
 359     private final int min;              // smallest representable value
 360     private final int max;              // largest representable value
 361     private final int umin;             // smallest representable uns. value
 362     private final int umax;             // largest representable uns. value
 363     private final int[] byteMin;        // smallest repr. value, given # bytes
 364     private final int[] byteMax;        // largest repr. value, given # bytes
 365 
 366     private Coding(int B, int H, int S) {
 367         this(B, H, S, 0);
 368     }
 369     private Coding(int B, int H, int S, int del) {
 370         this.B = B;
 371         this.H = H;
 372         this.L = 256-H;
 373         this.S = S;
 374         this.del = del;
 375         this.min = codeMin(B, H, S, B);
 376         this.max = codeMax(B, H, S, B);
 377         this.umin = codeMin(B, H, 0, B);
 378         this.umax = codeMax(B, H, 0, B);
 379         this.byteMin = new int[B];
 380         this.byteMax = new int[B];
 381 
 382         for (int nMax = 1; nMax <= B; nMax++) {
 383             byteMin[nMax-1] = codeMin(B, H, S, nMax);
 384             byteMax[nMax-1] = codeMax(B, H, S, nMax);
 385         }
 386     }
 387 
 388     public boolean equals(Object x) {
 389         if (!(x instanceof Coding))  return false;
 390         Coding that = (Coding) x;
 391         if (this.B != that.B)  return false;
 392         if (this.H != that.H)  return false;
 393         if (this.S != that.S)  return false;
 394         if (this.del != that.del)  return false;
 395         return true;
 396     }
 397 
 398     public int hashCode() {
 399         return (del<<14)+(S<<11)+(B<<8)+(H<<0);
 400     }
 401 
 402     private static Map<Coding, Coding> codeMap;
 403 
 404     private static synchronized Coding of(int B, int H, int S, int del) {
 405         if (codeMap == null)  codeMap = new HashMap<>();
 406         Coding x0 = new Coding(B, H, S, del);
 407         Coding x1 = codeMap.get(x0);
 408         if (x1 == null)  codeMap.put(x0, x1 = x0);
 409         return x1;
 410     }
 411 
 412     public static Coding of(int B, int H) {
 413         return of(B, H, 0, 0);
 414     }
 415 
 416     public static Coding of(int B, int H, int S) {
 417         return of(B, H, S, 0);
 418     }
 419 
 420     public boolean canRepresentValue(int x) {
 421         if (isSubrange())
 422             return canRepresentUnsigned(x);
 423         else
 424             return canRepresentSigned(x);
 425     }
 426     /** Can this coding represent a single value, possibly a delta?
 427      *  This ignores the D property.  That is, for delta codings,
 428      *  this tests whether a delta value of 'x' can be coded.
 429      *  For signed delta codings which produce unsigned end values,
 430      *  use canRepresentUnsigned.
 431      */
 432     public boolean canRepresentSigned(int x) {
 433         return (x >= min && x <= max);
 434     }
 435     /** Can this coding, apart from its S property,
 436      *  represent a single value?  (Negative values
 437      *  can only be represented via 32-bit overflow,
 438      *  so this returns true for negative values
 439      *  if isFullRange is true.)
 440      */
 441     public boolean canRepresentUnsigned(int x) {
 442         return (x >= umin && x <= umax);
 443     }
 444 
 445     // object-oriented code/decode
 446     public int readFrom(byte[] in, int[] inpos) {
 447         return readInt(in, inpos, B, H, S);
 448     }
 449     public void writeTo(byte[] out, int[] outpos, int x) {
 450         writeInt(out, outpos, x, B, H, S);
 451     }
 452 
 453     // Stream versions
 454     public int readFrom(InputStream in) throws IOException {
 455         return readIntFrom(in, B, H, S);
 456     }
 457     public void writeTo(OutputStream out, int x) throws IOException {
 458         byte[] buf = new byte[B];
 459         int[] pos = new int[1];
 460         writeInt(buf, pos, x, B, H, S);
 461         out.write(buf, 0, pos[0]);
 462     }
 463 
 464     // Stream/array versions
 465     public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
 466         // %%% use byte[] buffer
 467         for (int i = start; i < end; i++)
 468             a[i] = readFrom(in);
 469 
 470         for (int dstep = 0; dstep < del; dstep++) {
 471             long state = 0;
 472             for (int i = start; i < end; i++) {
 473                 state += a[i];
 474                 // Reduce array values to the required range.
 475                 if (isSubrange()) {
 476                     state = reduceToUnsignedRange(state);
 477                 }
 478                 a[i] = (int) state;
 479             }
 480         }
 481     }
 482     public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
 483         if (end <= start)  return;
 484         for (int dstep = 0; dstep < del; dstep++) {
 485             int[] deltas;
 486             if (!isSubrange())
 487                 deltas = makeDeltas(a, start, end, 0, 0);
 488             else
 489                 deltas = makeDeltas(a, start, end, min, max);
 490             a = deltas;
 491             start = 0;
 492             end = deltas.length;
 493         }
 494         // The following code is a buffered version of this loop:
 495         //    for (int i = start; i < end; i++)
 496         //        writeTo(out, a[i]);
 497         byte[] buf = new byte[1<<8];
 498         final int bufmax = buf.length-B;
 499         int[] pos = { 0 };
 500         for (int i = start; i < end; ) {
 501             while (pos[0] <= bufmax) {
 502                 writeTo(buf, pos, a[i++]);
 503                 if (i >= end)  break;
 504             }
 505             out.write(buf, 0, pos[0]);
 506             pos[0] = 0;
 507         }
 508     }
 509 
 510     /** Tell if the range of this coding (number of distinct
 511      *  representable values) can be expressed in 32 bits.
 512      */
 513     boolean isSubrange() {
 514         return max < Integer.MAX_VALUE
 515             && ((long)max - (long)min + 1) <= Integer.MAX_VALUE;
 516     }
 517 
 518     /** Tell if this coding can represent all 32-bit values.
 519      *  Note:  Some codings, such as unsigned ones, can be neither
 520      *  subranges nor full-range codings.
 521      */
 522     boolean isFullRange() {
 523         return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE;
 524     }
 525 
 526     /** Return the number of values this coding (a subrange) can represent. */
 527     int getRange() {
 528         assert(isSubrange());
 529         return (max - min) + 1;  // range includes both min & max
 530     }
 531 
 532     Coding setB(int B) { return Coding.of(B, H, S, del); }
 533     Coding setH(int H) { return Coding.of(B, H, S, del); }
 534     Coding setS(int S) { return Coding.of(B, H, S, del); }
 535     Coding setL(int L) { return setH(256-L); }
 536     Coding setD(int del) { return Coding.of(B, H, S, del); }
 537     Coding getDeltaCoding() { return setD(del+1); }
 538 
 539     /** Return a coding suitable for representing summed, modulo-reduced values. */
 540     Coding getValueCoding() {
 541         if (isDelta())
 542             return Coding.of(B, H, 0, del-1);
 543         else
 544             return this;
 545     }
 546 
 547     /** Reduce the given value to be within this coding's unsigned range,
 548      *  by adding or subtracting a multiple of (max-min+1).
 549      */
 550     int reduceToUnsignedRange(long value) {
 551         if (value == (int)value && canRepresentUnsigned((int)value))
 552             // already in unsigned range
 553             return (int)value;
 554         int range = getRange();
 555         assert(range > 0);
 556         value %= range;
 557         if (value < 0)  value += range;
 558         assert(canRepresentUnsigned((int)value));
 559         return (int)value;
 560     }
 561 
 562     int reduceToSignedRange(int value) {
 563         if (canRepresentSigned(value))
 564             // already in signed range
 565             return value;
 566         return reduceToSignedRange(value, min, max);
 567     }
 568     static int reduceToSignedRange(int value, int min, int max) {
 569         int range = (max-min+1);
 570         assert(range > 0);
 571         int value0 = value;
 572         value -= min;
 573         if (value < 0 && value0 >= 0) {
 574             // 32-bit overflow, but the next '%=' op needs to be unsigned
 575             value -= range;
 576             assert(value >= 0);
 577         }
 578         value %= range;
 579         if (value < 0)  value += range;
 580         value += min;
 581         assert(min <= value && value <= max);
 582         return value;
 583     }
 584 
 585     /** Does this coding support at least one negative value?
 586         Includes codings that can do so via 32-bit wraparound.
 587      */
 588     boolean isSigned() {
 589         return min < 0;
 590     }
 591     /** Does this coding code arrays by making successive differences? */
 592     boolean isDelta() {
 593         return del != 0;
 594     }
 595 
 596     public int B() { return B; }
 597     public int H() { return H; }
 598     public int L() { return L; }
 599     public int S() { return S; }
 600     public int del() { return del; }
 601     public int min() { return min; }
 602     public int max() { return max; }
 603     public int umin() { return umin; }
 604     public int umax() { return umax; }
 605     public int byteMin(int b) { return byteMin[b-1]; }
 606     public int byteMax(int b) { return byteMax[b-1]; }
 607 
 608     public int compareTo(Coding that) {
 609         int dkey = this.del - that.del;
 610         if (dkey == 0)
 611             dkey = this.B - that.B;
 612         if (dkey == 0)
 613             dkey = this.H - that.H;
 614         if (dkey == 0)
 615             dkey = this.S - that.S;
 616         return dkey;
 617     }
 618 
 619     /** Heuristic measure of the difference between two codings. */
 620     public int distanceFrom(Coding that) {
 621         int diffdel = this.del - that.del;
 622         if (diffdel < 0)  diffdel = -diffdel;
 623         int diffS = this.S - that.S;
 624         if (diffS < 0)  diffS = -diffS;
 625         int diffB = this.B - that.B;
 626         if (diffB < 0)  diffB = -diffB;
 627         int diffHL;
 628         if (this.H == that.H) {
 629             diffHL = 0;
 630         } else {
 631             // Distance in log space of H (<=128) and L (<128).
 632             int thisHL = this.getHL();
 633             int thatHL = that.getHL();
 634             // Double the accuracy of the log:
 635             thisHL *= thisHL;
 636             thatHL *= thatHL;
 637             if (thisHL > thatHL)
 638                 diffHL = ceil_lg2(1+(thisHL-1)/thatHL);
 639             else
 640                 diffHL = ceil_lg2(1+(thatHL-1)/thisHL);
 641         }
 642         int norm = 5*(diffdel + diffS + diffB) + diffHL;
 643         assert(norm != 0 || this.compareTo(that) == 0);
 644         return norm;
 645     }
 646     private int getHL() {
 647         // Follow H in log space by the multiplicative inverse of L.
 648         if (H <= 128)  return H;
 649         if (L >= 1)    return 128*128/L;
 650         return 128*256;
 651     }
 652 
 653     /** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */
 654     static int ceil_lg2(int x) {
 655         assert(x-1 >= 0);  // x in range (int.MIN_VALUE -> 32)
 656         x -= 1;
 657         int lg = 0;
 658         while (x != 0) {
 659             lg++;
 660             x >>= 1;
 661         }
 662         return lg;
 663     }
 664 
 665     private static final byte[] byteBitWidths = new byte[0x100];
 666     static {
 667         for (int b = 0; b < byteBitWidths.length; b++) {
 668             byteBitWidths[b] = (byte) ceil_lg2(b + 1);
 669         }
 670         for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) {
 671             assert(bitWidth(i) == ceil_lg2(i + 1));
 672         }
 673     }
 674 
 675     /** Number of significant bits in i, not counting sign bits.
 676      *  For positive i, it is ceil_lg2(i + 1).
 677      */
 678     static int bitWidth(int i) {
 679         if (i < 0)  i = ~i;  // change sign
 680         int w = 0;
 681         int lo = i;
 682         if (lo < byteBitWidths.length)
 683             return byteBitWidths[lo];
 684         int hi;
 685         hi = (lo >>> 16);
 686         if (hi != 0) {
 687             lo = hi;
 688             w += 16;
 689         }
 690         hi = (lo >>> 8);
 691         if (hi != 0) {
 692             lo = hi;
 693             w += 8;
 694         }
 695         w += byteBitWidths[lo];
 696         //assert(w == ceil_lg2(i + 1));
 697         return w;
 698     }
 699 
 700     /** Create an array of successive differences.
 701      *  If min==max, accept any and all 32-bit overflow.
 702      *  Otherwise, avoid 32-bit overflow, and reduce all differences
 703      *  to a value in the given range, by adding or subtracting
 704      *  multiples of the range cardinality (max-min+1).
 705      *  Also, the values are assumed to be in the range [0..(max-min)].
 706      */
 707     static int[] makeDeltas(int[] values, int start, int end,
 708                             int min, int max) {
 709         assert(max >= min);
 710         int count = end-start;
 711         int[] deltas = new int[count];
 712         int state = 0;
 713         if (min == max) {
 714             for (int i = 0; i < count; i++) {
 715                 int value = values[start+i];
 716                 deltas[i] = value - state;
 717                 state = value;
 718             }
 719         } else {
 720             for (int i = 0; i < count; i++) {
 721                 int value = values[start+i];
 722                 assert(value >= 0 && value+min <= max);
 723                 int delta = value - state;
 724                 assert(delta == (long)value - (long)state); // no overflow
 725                 state = value;
 726                 // Reduce delta values to the required range.
 727                 delta = reduceToSignedRange(delta, min, max);
 728                 deltas[i] = delta;
 729             }
 730         }
 731         return deltas;
 732     }
 733 
 734     boolean canRepresent(int minValue, int maxValue) {
 735         assert(minValue <= maxValue);
 736         if (del > 0) {
 737             if (isSubrange()) {
 738                 // We will force the values to reduce to the right subrange.
 739                 return canRepresentUnsigned(maxValue)
 740                     && canRepresentUnsigned(minValue);
 741             } else {
 742                 // Huge range; delta values must assume full 32-bit range.
 743                 return isFullRange();
 744             }
 745         }
 746         else
 747             // final values must be representable
 748             return canRepresentSigned(maxValue)
 749                 && canRepresentSigned(minValue);
 750     }
 751 
 752     boolean canRepresent(int[] values, int start, int end) {
 753         int len = end-start;
 754         if (len == 0)       return true;
 755         if (isFullRange())  return true;
 756         // Calculate max, min:
 757         int lmax = values[start];
 758         int lmin = lmax;
 759         for (int i = 1; i < len; i++) {
 760             int value = values[start+i];
 761             if (lmax < value)  lmax = value;
 762             if (lmin > value)  lmin = value;
 763         }
 764         return canRepresent(lmin, lmax);
 765     }
 766 
 767     public double getBitLength(int value) {  // implements BitMetric
 768         return (double) getLength(value) * 8;
 769     }
 770 
 771     /** How many bytes are in the coding of this value?
 772      *  Returns Integer.MAX_VALUE if the value has no coding.
 773      *  The coding must not be a delta coding, since there is no
 774      *  definite size for a single value apart from its context.
 775      */
 776     public int getLength(int value) {
 777         if (isDelta() && isSubrange()) {
 778             if (!canRepresentUnsigned(value))
 779                 return Integer.MAX_VALUE;
 780             value = reduceToSignedRange(value);
 781         }
 782         if (value >= 0) {
 783             for (int n = 0; n < B; n++) {
 784                 if (value <= byteMax[n])  return n+1;
 785             }
 786         } else {
 787             for (int n = 0; n < B; n++) {
 788                 if (value >= byteMin[n])  return n+1;
 789             }
 790         }
 791         return Integer.MAX_VALUE;
 792     }
 793 
 794     public int getLength(int[] values, int start, int end) {
 795         int len = end-start;
 796         if (B == 1)  return len;
 797         if (L == 0)  return len * B;
 798         if (isDelta()) {
 799             int[] deltas;
 800             if (!isSubrange())
 801                 deltas = makeDeltas(values, start, end, 0, 0);
 802             else
 803                 deltas = makeDeltas(values, start, end, min, max);
 804             //return Coding.of(B, H, S).getLength(deltas, 0, len);
 805             values = deltas;
 806             start = 0;
 807         }
 808         int sum = len;  // at least 1 byte per
 809         // add extra bytes for extra-long values
 810         for (int n = 1; n <= B; n++) {
 811             // what is the coding interval [min..max] for n bytes?
 812             int lmax = byteMax[n-1];
 813             int lmin = byteMin[n-1];
 814             int longer = 0;  // count of guys longer than n bytes
 815             for (int i = 0; i < len; i++) {
 816                 int value = values[start+i];
 817                 if (value >= 0) {
 818                     if (value > lmax)  longer++;
 819                 } else {
 820                     if (value < lmin)  longer++;
 821                 }
 822             }
 823             if (longer == 0)  break;  // no more passes needed
 824             if (n == B)  return Integer.MAX_VALUE;  // cannot represent!
 825             sum += longer;
 826         }
 827         return sum;
 828     }
 829 
 830     public byte[] getMetaCoding(Coding dflt) {
 831         if (dflt == this)  return new byte[]{ (byte) _meta_default };
 832         int canonicalIndex = BandStructure.indexOf(this);
 833         if (canonicalIndex > 0)
 834             return new byte[]{ (byte) canonicalIndex };
 835         return new byte[]{
 836             (byte)_meta_arb,
 837             (byte)(del + 2*S + 8*(B-1)),
 838             (byte)(H-1)
 839         };
 840     }
 841     public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
 842         int op = bytes[pos++] & 0xFF;
 843         if (_meta_canon_min <= op && op <= _meta_canon_max) {
 844             Coding c = BandStructure.codingForIndex(op);
 845             assert(c != null);
 846             res[0] = c;
 847             return pos;
 848         }
 849         if (op == _meta_arb) {
 850             int dsb = bytes[pos++] & 0xFF;
 851             int H_1 = bytes[pos++] & 0xFF;
 852             int del = dsb % 2;
 853             int S = (dsb / 2) % 4;
 854             int B = (dsb / 8)+1;
 855             int H = H_1+1;
 856             if (!((1 <= B && B <= B_MAX) &&
 857                   (0 <= S && S <= S_MAX) &&
 858                   (1 <= H && H <= H_MAX) &&
 859                   (0 <= del && del <= 1))
 860                 || (B == 1 && H != 256)
 861                 || (B == 5 && H == 256)) {
 862                 throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del);
 863             }
 864             res[0] = Coding.of(B, H, S, del);
 865             return pos;
 866         }
 867         return pos-1;  // backup
 868     }
 869 
 870 
 871     public String keyString() {
 872         return "("+B+","+H+","+S+","+del+")";
 873     }
 874 
 875     public String toString() {
 876         String str = "Coding"+keyString();
 877         // If -ea, print out more informative strings!
 878         //assert((str = stringForDebug()) != null);
 879         return str;
 880     }
 881 
 882     static boolean verboseStringForDebug = false;
 883     String stringForDebug() {
 884         String minS = (min == Integer.MIN_VALUE ? "min" : ""+min);
 885         String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max);
 886         String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]";
 887         if (isSubrange())
 888             str += " subrange";
 889         else if (!isFullRange())
 890             str += " MIDRANGE";
 891         if (verboseStringForDebug) {
 892             str += " {";
 893             int prev_range = 0;
 894             for (int n = 1; n <= B; n++) {
 895                 int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1);
 896                 assert(range_n == saturate32(codeRangeLong(B, H, n)));
 897                 range_n -= prev_range;
 898                 prev_range = range_n;
 899                 String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n);
 900                 str += " #"+n+"="+rngS;
 901             }
 902             str += " }";
 903         }
 904         return str;
 905     }
 906 }