1 /* 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.java.util.jar.pack; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.io.OutputStream; 31 import java.util.HashMap; 32 import java.util.Map; 33 import static com.sun.java.util.jar.pack.Constants.*; 34 /** 35 * Define the conversions between sequences of small integers and raw bytes. 36 * This is a schema of encodings which incorporates varying lengths, 37 * varying degrees of length variability, and varying amounts of signed-ness. 38 * @author John Rose 39 */ 40 class Coding implements Comparable<Coding>, CodingMethod, Histogram.BitMetric { 41 /* 42 Coding schema for single integers, parameterized by (B,H,S): 43 44 Let B in [1,5], H in [1,256], S in [0,3]. 45 (S limit is arbitrary. B follows the 32-bit limit. H is byte size.) 46 47 A given (B,H,S) code varies in length from 1 to B bytes. 48 49 The 256 values a byte may take on are divided into L=(256-H) and H 50 values, with all the H values larger than the L values. 51 (That is, the L values are [0,L) and the H are [L,256).) 52 53 The last byte is always either the B-th byte, a byte with "L value" 54 (<L), or both. There is no other byte that satisfies these conditions. 55 All bytes before the last always have "H values" (>=L). 56 57 Therefore, if L==0, the code always has the full length of B bytes. 58 The coding then becomes a classic B-byte little-endian unsigned integer. 59 (Also, if L==128, the high bit of each byte acts signals the presence 60 of a following byte, up to the maximum length.) 61 62 In the unsigned case (S==0), the coding is compact and monotonic 63 in the ordering of byte sequences defined by appending zero bytes 64 to pad them to a common length B, reversing them, and ordering them 65 lexicographically. (This agrees with "little-endian" byte order.) 66 67 Therefore, the unsigned value of a byte sequence may be defined as: 68 <pre> 69 U(b0) == b0 70 in [0..L) 71 or [0..256) if B==1 (**) 72 73 U(b0,b1) == b0 + b1*H 74 in [L..L*(1+H)) 75 or [L..L*(1+H) + H^2) if B==2 76 77 U(b0,b1,b2) == b0 + b1*H + b2*H^2 78 in [L*(1+H)..L*(1+H+H^2)) 79 or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3 80 81 U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 82 up to L*Sum[i<n]( H^i ) 83 or to L*Sum[i<n]( H^i ) + H^n if n==B 84 </pre> 85 86 (**) If B==1, the values H,L play no role in the coding. 87 As a convention, we require that any (1,H,S) code must always 88 encode values less than H. Thus, a simple unsigned byte is coded 89 specifically by the code (1,256,0). 90 91 (Properly speaking, the unsigned case should be parameterized as 92 S==Infinity. If the schema were regular, the case S==0 would really 93 denote a numbering in which all coded values are negative.) 94 95 If S>0, the unsigned value of a byte sequence is regarded as a binary 96 integer. If any of the S low-order bits are zero, the corresponding 97 signed value will be non-negative. If all of the S low-order bits 98 (S>0) are one, the corresponding signed value will be negative. 99 100 The non-negative signed values are compact and monotonically increasing 101 (from 0) in the ordering of the corresponding unsigned values. 102 103 The negative signed values are compact and monotonically decreasing 104 (from -1) in the ordering of the corresponding unsigned values. 105 106 In essence, the low-order S bits function as a collective sign bit 107 for negative signed numbers, and as a low-order base-(2^S-1) digit 108 for non-negative signed numbers. 109 110 Therefore, the signed value corresponding to an unsigned value is: 111 <pre> 112 Sgn(x) == x if S==0 113 Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S), if S>0, (x % 2^S) < 2^S-1 114 Sgn(x) == -(x / 2^S)-1, if S>0, (x % 2^S) == 2^S-1 115 </pre> 116 117 Finally, the value of a byte sequence, given the coding parameters 118 (B,H,S), is defined as: 119 <pre> 120 V(b[i]: i<n) == Sgn(U(b[i]: i<n)) 121 </pre> 122 123 The extremal positive and negative signed value for a given range 124 of unsigned values may be found by sign-encoding the largest unsigned 125 value which is not 2^S-1 mod 2^S, and that which is, respectively. 126 127 Because B,H,S are variable, this is not a single coding but a schema 128 of codings. For optimal compression, it is necessary to adaptively 129 select specific codings to the data being compressed. 130 131 For example, if a sequence of values happens never to be negative, 132 S==0 is the best choice. If the values are equally balanced between 133 negative and positive, S==1. If negative values are rare, then S>1 134 is more appropriate. 135 136 A (B,H,S) encoding is called a "subrange" if it does not encode 137 the largest 32-bit value, and if the number R of values it does 138 encode can be expressed as a positive 32-bit value. (Note that 139 B=1 implies R<=256, B=2 implies R<=65536, etc.) 140 141 A delta version of a given (B,H,S) coding encodes an array of integers 142 by writing their successive differences in the (B,H,S) coding. 143 The original integers themselves may be recovered by making a 144 running accumulation of sum of the differences as they are read. 145 146 As a special case, if a (B,H,S) encoding is a subrange, its delta 147 version will only encode arrays of numbers in the coding's unsigned 148 range, [0..R-1]. The coding of deltas is still in the normal signed 149 range, if S!=0. During delta encoding, all subtraction results are 150 reduced to the signed range, by adding multiples of R. Likewise, 151 . during encoding, all addition results are reduced to the unsigned range. 152 This special case for subranges allows the benefits of wraparound 153 when encoding correlated sequences of very small positive numbers. 154 */ 155 156 // Code-specific limits: 157 private static int saturate32(long x) { 158 if (x > Integer.MAX_VALUE) return Integer.MAX_VALUE; 159 if (x < Integer.MIN_VALUE) return Integer.MIN_VALUE; 160 return (int)x; 161 } 162 private static long codeRangeLong(int B, int H) { 163 return codeRangeLong(B, H, B); 164 } 165 private static long codeRangeLong(int B, int H, int nMax) { 166 // Code range for a all (B,H) codes of length <=nMax (<=B). 167 // n < B: L*Sum[i<n]( H^i ) 168 // n == B: L*Sum[i<B]( H^i ) + H^B 169 assert(nMax >= 0 && nMax <= B); 170 assert(B >= 1 && B <= 5); 171 assert(H >= 1 && H <= 256); 172 if (nMax == 0) return 0; // no codes of zero length 173 if (B == 1) return H; // special case; see (**) above 174 int L = 256-H; 175 long sum = 0; 176 long H_i = 1; 177 for (int n = 1; n <= nMax; n++) { 178 sum += H_i; 179 H_i *= H; 180 } 181 sum *= L; 182 if (nMax == B) 183 sum += H_i; 184 return sum; 185 } 186 /** Largest int representable by (B,H,S) in up to nMax bytes. */ 187 public static int codeMax(int B, int H, int S, int nMax) { 188 //assert(S >= 0 && S <= S_MAX); 189 long range = codeRangeLong(B, H, nMax); 190 if (range == 0) 191 return -1; // degenerate max value for empty set of codes 192 if (S == 0 || range >= (long)1<<32) 193 return saturate32(range-1); 194 long maxPos = range-1; 195 while (isNegativeCode(maxPos, S)) { 196 --maxPos; 197 } 198 if (maxPos < 0) return -1; // No positive codings at all. 199 int smax = decodeSign32(maxPos, S); 200 // check for 32-bit wraparound: 201 if (smax < 0) 202 return Integer.MAX_VALUE; 203 return smax; 204 } 205 /** Smallest int representable by (B,H,S) in up to nMax bytes. 206 Returns Integer.MIN_VALUE if 32-bit wraparound covers 207 the entire negative range. 208 */ 209 public static int codeMin(int B, int H, int S, int nMax) { 210 //assert(S >= 0 && S <= S_MAX); 211 long range = codeRangeLong(B, H, nMax); 212 if (range >= (long)1<<32 && nMax == B) { 213 // Can code negative values via 32-bit wraparound. 214 return Integer.MIN_VALUE; 215 } 216 if (S == 0) { 217 return 0; 218 } 219 long maxNeg = range-1; 220 while (!isNegativeCode(maxNeg, S)) 221 --maxNeg; 222 223 if (maxNeg < 0) return 0; // No negative codings at all. 224 return decodeSign32(maxNeg, S); 225 } 226 227 // Some of the arithmetic below is on unsigned 32-bit integers. 228 // These must be represented in Java as longs in the range [0..2^32-1]. 229 // The conversion to a signed int is just the Java cast (int), but 230 // the conversion to an unsigned int is the following little method: 231 private static long toUnsigned32(int sx) { 232 return ((long)sx << 32) >>> 32; 233 } 234 235 // Sign encoding: 236 private static boolean isNegativeCode(long ux, int S) { 237 assert(S > 0); 238 assert(ux >= -1); // can be out of 32-bit range; who cares 239 int Smask = (1<<S)-1; 240 return (((int)ux+1) & Smask) == 0; 241 } 242 private static boolean hasNegativeCode(int sx, int S) { 243 assert(S > 0); 244 // If S>=2 very low negatives are coded by 32-bit-wrapped positives. 245 // The lowest negative representable by a negative coding is 246 // ~(umax32 >> S), and the next lower number is coded by wrapping 247 // the highest positive: 248 // CodePos(umax32-1) -> (umax32-1)-((umax32-1)>>S) 249 // which simplifies to ~(umax32 >> S)-1. 250 return (0 > sx) && (sx >= ~(-1>>>S)); 251 } 252 private static int decodeSign32(long ux, int S) { 253 assert(ux == toUnsigned32((int)ux)) // must be unsigned 32-bit number 254 : (Long.toHexString(ux)); 255 if (S == 0) { 256 return (int) ux; // cast to signed int 257 } 258 int sx; 259 if (isNegativeCode(ux, S)) { 260 // Sgn(x) == -(x / 2^S)-1 261 sx = ~((int)ux >>> S); 262 } else { 263 // Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S) 264 sx = (int)ux - ((int)ux >>> S); 265 } 266 // Assert special case of S==1: 267 assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1))); 268 return sx; 269 } 270 private static long encodeSign32(int sx, int S) { 271 if (S == 0) { 272 return toUnsigned32(sx); // unsigned 32-bit int 273 } 274 int Smask = (1<<S)-1; 275 long ux; 276 if (!hasNegativeCode(sx, S)) { 277 // InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1)) 278 ux = sx + (toUnsigned32(sx) / Smask); 279 } else { 280 // InvSgn(sx) = (-sx-1)*2^S + (2^S-1) 281 ux = (-sx << S) - 1; 282 } 283 ux = toUnsigned32((int)ux); 284 assert(sx == decodeSign32(ux, S)) 285 : (Long.toHexString(ux)+" -> "+ 286 Integer.toHexString(sx)+" != "+ 287 Integer.toHexString(decodeSign32(ux, S))); 288 return ux; 289 } 290 291 // Top-level coding of single integers: 292 public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) { 293 long ux = encodeSign32(sx, S); 294 assert(ux == toUnsigned32((int)ux)); 295 assert(ux < codeRangeLong(B, H)) 296 : Long.toHexString(ux); 297 int L = 256-H; 298 long sum = ux; 299 int pos = outpos[0]; 300 for (int i = 0; i < B-1; i++) { 301 if (sum < L) 302 break; 303 sum -= L; 304 int b_i = (int)( L + (sum % H) ); 305 sum /= H; 306 out[pos++] = (byte)b_i; 307 } 308 out[pos++] = (byte)sum; 309 // Report number of bytes written by updating outpos[0]: 310 outpos[0] = pos; 311 // Check right away for mis-coding. 312 //assert(sx == readInt(out, new int[1], B, H, S)); 313 } 314 public static int readInt(byte[] in, int[] inpos, int B, int H, int S) { 315 // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 316 int L = 256-H; 317 long sum = 0; 318 long H_i = 1; 319 int pos = inpos[0]; 320 for (int i = 0; i < B; i++) { 321 int b_i = in[pos++] & 0xFF; 322 sum += b_i*H_i; 323 H_i *= H; 324 if (b_i < L) break; 325 } 326 //assert(sum >= 0 && sum < codeRangeLong(B, H)); 327 // Report number of bytes read by updating inpos[0]: 328 inpos[0] = pos; 329 return decodeSign32(sum, S); 330 } 331 // The Stream version doesn't fetch a byte unless it is needed for coding. 332 public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException { 333 // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 334 int L = 256-H; 335 long sum = 0; 336 long H_i = 1; 337 for (int i = 0; i < B; i++) { 338 int b_i = in.read(); 339 if (b_i < 0) throw new RuntimeException("unexpected EOF"); 340 sum += b_i*H_i; 341 H_i *= H; 342 if (b_i < L) break; 343 } 344 assert(sum >= 0 && sum < codeRangeLong(B, H)); 345 return decodeSign32(sum, S); 346 } 347 348 public static final int B_MAX = 5; /* B: [1,5] */ 349 public static final int H_MAX = 256; /* H: [1,256] */ 350 public static final int S_MAX = 2; /* S: [0,2] */ 351 352 // END OF STATICS. 353 354 private final int B; /*1..5*/ // # bytes (1..5) 355 private final int H; /*1..256*/ // # codes requiring a higher byte 356 private final int L; /*0..255*/ // # codes requiring a higher byte 357 private final int S; /*0..3*/ // # low-order bits representing sign 358 private final int del; /*0..2*/ // type of delta encoding (0 == none) 359 private final int min; // smallest representable value 360 private final int max; // largest representable value 361 private final int umin; // smallest representable uns. value 362 private final int umax; // largest representable uns. value 363 private final int[] byteMin; // smallest repr. value, given # bytes 364 private final int[] byteMax; // largest repr. value, given # bytes 365 366 private Coding(int B, int H, int S) { 367 this(B, H, S, 0); 368 } 369 private Coding(int B, int H, int S, int del) { 370 this.B = B; 371 this.H = H; 372 this.L = 256-H; 373 this.S = S; 374 this.del = del; 375 this.min = codeMin(B, H, S, B); 376 this.max = codeMax(B, H, S, B); 377 this.umin = codeMin(B, H, 0, B); 378 this.umax = codeMax(B, H, 0, B); 379 this.byteMin = new int[B]; 380 this.byteMax = new int[B]; 381 382 for (int nMax = 1; nMax <= B; nMax++) { 383 byteMin[nMax-1] = codeMin(B, H, S, nMax); 384 byteMax[nMax-1] = codeMax(B, H, S, nMax); 385 } 386 } 387 388 public boolean equals(Object x) { 389 if (!(x instanceof Coding)) return false; 390 Coding that = (Coding) x; 391 if (this.B != that.B) return false; 392 if (this.H != that.H) return false; 393 if (this.S != that.S) return false; 394 if (this.del != that.del) return false; 395 return true; 396 } 397 398 public int hashCode() { 399 return (del<<14)+(S<<11)+(B<<8)+(H<<0); 400 } 401 402 private static Map<Coding, Coding> codeMap; 403 404 private static synchronized Coding of(int B, int H, int S, int del) { 405 if (codeMap == null) codeMap = new HashMap<>(); 406 Coding x0 = new Coding(B, H, S, del); 407 Coding x1 = codeMap.get(x0); 408 if (x1 == null) codeMap.put(x0, x1 = x0); 409 return x1; 410 } 411 412 public static Coding of(int B, int H) { 413 return of(B, H, 0, 0); 414 } 415 416 public static Coding of(int B, int H, int S) { 417 return of(B, H, S, 0); 418 } 419 420 public boolean canRepresentValue(int x) { 421 if (isSubrange()) 422 return canRepresentUnsigned(x); 423 else 424 return canRepresentSigned(x); 425 } 426 /** Can this coding represent a single value, possibly a delta? 427 * This ignores the D property. That is, for delta codings, 428 * this tests whether a delta value of 'x' can be coded. 429 * For signed delta codings which produce unsigned end values, 430 * use canRepresentUnsigned. 431 */ 432 public boolean canRepresentSigned(int x) { 433 return (x >= min && x <= max); 434 } 435 /** Can this coding, apart from its S property, 436 * represent a single value? (Negative values 437 * can only be represented via 32-bit overflow, 438 * so this returns true for negative values 439 * if isFullRange is true.) 440 */ 441 public boolean canRepresentUnsigned(int x) { 442 return (x >= umin && x <= umax); 443 } 444 445 // object-oriented code/decode 446 public int readFrom(byte[] in, int[] inpos) { 447 return readInt(in, inpos, B, H, S); 448 } 449 public void writeTo(byte[] out, int[] outpos, int x) { 450 writeInt(out, outpos, x, B, H, S); 451 } 452 453 // Stream versions 454 public int readFrom(InputStream in) throws IOException { 455 return readIntFrom(in, B, H, S); 456 } 457 public void writeTo(OutputStream out, int x) throws IOException { 458 byte[] buf = new byte[B]; 459 int[] pos = new int[1]; 460 writeInt(buf, pos, x, B, H, S); 461 out.write(buf, 0, pos[0]); 462 } 463 464 // Stream/array versions 465 public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { 466 // %%% use byte[] buffer 467 for (int i = start; i < end; i++) 468 a[i] = readFrom(in); 469 470 for (int dstep = 0; dstep < del; dstep++) { 471 long state = 0; 472 for (int i = start; i < end; i++) { 473 state += a[i]; 474 // Reduce array values to the required range. 475 if (isSubrange()) { 476 state = reduceToUnsignedRange(state); 477 } 478 a[i] = (int) state; 479 } 480 } 481 } 482 public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { 483 if (end <= start) return; 484 for (int dstep = 0; dstep < del; dstep++) { 485 int[] deltas; 486 if (!isSubrange()) 487 deltas = makeDeltas(a, start, end, 0, 0); 488 else 489 deltas = makeDeltas(a, start, end, min, max); 490 a = deltas; 491 start = 0; 492 end = deltas.length; 493 } 494 // The following code is a buffered version of this loop: 495 // for (int i = start; i < end; i++) 496 // writeTo(out, a[i]); 497 byte[] buf = new byte[1<<8]; 498 final int bufmax = buf.length-B; 499 int[] pos = { 0 }; 500 for (int i = start; i < end; ) { 501 while (pos[0] <= bufmax) { 502 writeTo(buf, pos, a[i++]); 503 if (i >= end) break; 504 } 505 out.write(buf, 0, pos[0]); 506 pos[0] = 0; 507 } 508 } 509 510 /** Tell if the range of this coding (number of distinct 511 * representable values) can be expressed in 32 bits. 512 */ 513 boolean isSubrange() { 514 return max < Integer.MAX_VALUE 515 && ((long)max - (long)min + 1) <= Integer.MAX_VALUE; 516 } 517 518 /** Tell if this coding can represent all 32-bit values. 519 * Note: Some codings, such as unsigned ones, can be neither 520 * subranges nor full-range codings. 521 */ 522 boolean isFullRange() { 523 return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE; 524 } 525 526 /** Return the number of values this coding (a subrange) can represent. */ 527 int getRange() { 528 assert(isSubrange()); 529 return (max - min) + 1; // range includes both min & max 530 } 531 532 Coding setB(int B) { return Coding.of(B, H, S, del); } 533 Coding setH(int H) { return Coding.of(B, H, S, del); } 534 Coding setS(int S) { return Coding.of(B, H, S, del); } 535 Coding setL(int L) { return setH(256-L); } 536 Coding setD(int del) { return Coding.of(B, H, S, del); } 537 Coding getDeltaCoding() { return setD(del+1); } 538 539 /** Return a coding suitable for representing summed, modulo-reduced values. */ 540 Coding getValueCoding() { 541 if (isDelta()) 542 return Coding.of(B, H, 0, del-1); 543 else 544 return this; 545 } 546 547 /** Reduce the given value to be within this coding's unsigned range, 548 * by adding or subtracting a multiple of (max-min+1). 549 */ 550 int reduceToUnsignedRange(long value) { 551 if (value == (int)value && canRepresentUnsigned((int)value)) 552 // already in unsigned range 553 return (int)value; 554 int range = getRange(); 555 assert(range > 0); 556 value %= range; 557 if (value < 0) value += range; 558 assert(canRepresentUnsigned((int)value)); 559 return (int)value; 560 } 561 562 int reduceToSignedRange(int value) { 563 if (canRepresentSigned(value)) 564 // already in signed range 565 return value; 566 return reduceToSignedRange(value, min, max); 567 } 568 static int reduceToSignedRange(int value, int min, int max) { 569 int range = (max-min+1); 570 assert(range > 0); 571 int value0 = value; 572 value -= min; 573 if (value < 0 && value0 >= 0) { 574 // 32-bit overflow, but the next '%=' op needs to be unsigned 575 value -= range; 576 assert(value >= 0); 577 } 578 value %= range; 579 if (value < 0) value += range; 580 value += min; 581 assert(min <= value && value <= max); 582 return value; 583 } 584 585 /** Does this coding support at least one negative value? 586 Includes codings that can do so via 32-bit wraparound. 587 */ 588 boolean isSigned() { 589 return min < 0; 590 } 591 /** Does this coding code arrays by making successive differences? */ 592 boolean isDelta() { 593 return del != 0; 594 } 595 596 public int B() { return B; } 597 public int H() { return H; } 598 public int L() { return L; } 599 public int S() { return S; } 600 public int del() { return del; } 601 public int min() { return min; } 602 public int max() { return max; } 603 public int umin() { return umin; } 604 public int umax() { return umax; } 605 public int byteMin(int b) { return byteMin[b-1]; } 606 public int byteMax(int b) { return byteMax[b-1]; } 607 608 public int compareTo(Coding that) { 609 int dkey = this.del - that.del; 610 if (dkey == 0) 611 dkey = this.B - that.B; 612 if (dkey == 0) 613 dkey = this.H - that.H; 614 if (dkey == 0) 615 dkey = this.S - that.S; 616 return dkey; 617 } 618 619 /** Heuristic measure of the difference between two codings. */ 620 public int distanceFrom(Coding that) { 621 int diffdel = this.del - that.del; 622 if (diffdel < 0) diffdel = -diffdel; 623 int diffS = this.S - that.S; 624 if (diffS < 0) diffS = -diffS; 625 int diffB = this.B - that.B; 626 if (diffB < 0) diffB = -diffB; 627 int diffHL; 628 if (this.H == that.H) { 629 diffHL = 0; 630 } else { 631 // Distance in log space of H (<=128) and L (<128). 632 int thisHL = this.getHL(); 633 int thatHL = that.getHL(); 634 // Double the accuracy of the log: 635 thisHL *= thisHL; 636 thatHL *= thatHL; 637 if (thisHL > thatHL) 638 diffHL = ceil_lg2(1+(thisHL-1)/thatHL); 639 else 640 diffHL = ceil_lg2(1+(thatHL-1)/thisHL); 641 } 642 int norm = 5*(diffdel + diffS + diffB) + diffHL; 643 assert(norm != 0 || this.compareTo(that) == 0); 644 return norm; 645 } 646 private int getHL() { 647 // Follow H in log space by the multiplicative inverse of L. 648 if (H <= 128) return H; 649 if (L >= 1) return 128*128/L; 650 return 128*256; 651 } 652 653 /** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */ 654 static int ceil_lg2(int x) { 655 assert(x-1 >= 0); // x in range (int.MIN_VALUE -> 32) 656 x -= 1; 657 int lg = 0; 658 while (x != 0) { 659 lg++; 660 x >>= 1; 661 } 662 return lg; 663 } 664 665 private static final byte[] byteBitWidths = new byte[0x100]; 666 static { 667 for (int b = 0; b < byteBitWidths.length; b++) { 668 byteBitWidths[b] = (byte) ceil_lg2(b + 1); 669 } 670 for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) { 671 assert(bitWidth(i) == ceil_lg2(i + 1)); 672 } 673 } 674 675 /** Number of significant bits in i, not counting sign bits. 676 * For positive i, it is ceil_lg2(i + 1). 677 */ 678 static int bitWidth(int i) { 679 if (i < 0) i = ~i; // change sign 680 int w = 0; 681 int lo = i; 682 if (lo < byteBitWidths.length) 683 return byteBitWidths[lo]; 684 int hi; 685 hi = (lo >>> 16); 686 if (hi != 0) { 687 lo = hi; 688 w += 16; 689 } 690 hi = (lo >>> 8); 691 if (hi != 0) { 692 lo = hi; 693 w += 8; 694 } 695 w += byteBitWidths[lo]; 696 //assert(w == ceil_lg2(i + 1)); 697 return w; 698 } 699 700 /** Create an array of successive differences. 701 * If min==max, accept any and all 32-bit overflow. 702 * Otherwise, avoid 32-bit overflow, and reduce all differences 703 * to a value in the given range, by adding or subtracting 704 * multiples of the range cardinality (max-min+1). 705 * Also, the values are assumed to be in the range [0..(max-min)]. 706 */ 707 static int[] makeDeltas(int[] values, int start, int end, 708 int min, int max) { 709 assert(max >= min); 710 int count = end-start; 711 int[] deltas = new int[count]; 712 int state = 0; 713 if (min == max) { 714 for (int i = 0; i < count; i++) { 715 int value = values[start+i]; 716 deltas[i] = value - state; 717 state = value; 718 } 719 } else { 720 for (int i = 0; i < count; i++) { 721 int value = values[start+i]; 722 assert(value >= 0 && value+min <= max); 723 int delta = value - state; 724 assert(delta == (long)value - (long)state); // no overflow 725 state = value; 726 // Reduce delta values to the required range. 727 delta = reduceToSignedRange(delta, min, max); 728 deltas[i] = delta; 729 } 730 } 731 return deltas; 732 } 733 734 boolean canRepresent(int minValue, int maxValue) { 735 assert(minValue <= maxValue); 736 if (del > 0) { 737 if (isSubrange()) { 738 // We will force the values to reduce to the right subrange. 739 return canRepresentUnsigned(maxValue) 740 && canRepresentUnsigned(minValue); 741 } else { 742 // Huge range; delta values must assume full 32-bit range. 743 return isFullRange(); 744 } 745 } 746 else 747 // final values must be representable 748 return canRepresentSigned(maxValue) 749 && canRepresentSigned(minValue); 750 } 751 752 boolean canRepresent(int[] values, int start, int end) { 753 int len = end-start; 754 if (len == 0) return true; 755 if (isFullRange()) return true; 756 // Calculate max, min: 757 int lmax = values[start]; 758 int lmin = lmax; 759 for (int i = 1; i < len; i++) { 760 int value = values[start+i]; 761 if (lmax < value) lmax = value; 762 if (lmin > value) lmin = value; 763 } 764 return canRepresent(lmin, lmax); 765 } 766 767 public double getBitLength(int value) { // implements BitMetric 768 return (double) getLength(value) * 8; 769 } 770 771 /** How many bytes are in the coding of this value? 772 * Returns Integer.MAX_VALUE if the value has no coding. 773 * The coding must not be a delta coding, since there is no 774 * definite size for a single value apart from its context. 775 */ 776 public int getLength(int value) { 777 if (isDelta() && isSubrange()) { 778 if (!canRepresentUnsigned(value)) 779 return Integer.MAX_VALUE; 780 value = reduceToSignedRange(value); 781 } 782 if (value >= 0) { 783 for (int n = 0; n < B; n++) { 784 if (value <= byteMax[n]) return n+1; 785 } 786 } else { 787 for (int n = 0; n < B; n++) { 788 if (value >= byteMin[n]) return n+1; 789 } 790 } 791 return Integer.MAX_VALUE; 792 } 793 794 public int getLength(int[] values, int start, int end) { 795 int len = end-start; 796 if (B == 1) return len; 797 if (L == 0) return len * B; 798 if (isDelta()) { 799 int[] deltas; 800 if (!isSubrange()) 801 deltas = makeDeltas(values, start, end, 0, 0); 802 else 803 deltas = makeDeltas(values, start, end, min, max); 804 //return Coding.of(B, H, S).getLength(deltas, 0, len); 805 values = deltas; 806 start = 0; 807 } 808 int sum = len; // at least 1 byte per 809 // add extra bytes for extra-long values 810 for (int n = 1; n <= B; n++) { 811 // what is the coding interval [min..max] for n bytes? 812 int lmax = byteMax[n-1]; 813 int lmin = byteMin[n-1]; 814 int longer = 0; // count of guys longer than n bytes 815 for (int i = 0; i < len; i++) { 816 int value = values[start+i]; 817 if (value >= 0) { 818 if (value > lmax) longer++; 819 } else { 820 if (value < lmin) longer++; 821 } 822 } 823 if (longer == 0) break; // no more passes needed 824 if (n == B) return Integer.MAX_VALUE; // cannot represent! 825 sum += longer; 826 } 827 return sum; 828 } 829 830 public byte[] getMetaCoding(Coding dflt) { 831 if (dflt == this) return new byte[]{ (byte) _meta_default }; 832 int canonicalIndex = BandStructure.indexOf(this); 833 if (canonicalIndex > 0) 834 return new byte[]{ (byte) canonicalIndex }; 835 return new byte[]{ 836 (byte)_meta_arb, 837 (byte)(del + 2*S + 8*(B-1)), 838 (byte)(H-1) 839 }; 840 } 841 public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { 842 int op = bytes[pos++] & 0xFF; 843 if (_meta_canon_min <= op && op <= _meta_canon_max) { 844 Coding c = BandStructure.codingForIndex(op); 845 assert(c != null); 846 res[0] = c; 847 return pos; 848 } 849 if (op == _meta_arb) { 850 int dsb = bytes[pos++] & 0xFF; 851 int H_1 = bytes[pos++] & 0xFF; 852 int del = dsb % 2; 853 int S = (dsb / 2) % 4; 854 int B = (dsb / 8)+1; 855 int H = H_1+1; 856 if (!((1 <= B && B <= B_MAX) && 857 (0 <= S && S <= S_MAX) && 858 (1 <= H && H <= H_MAX) && 859 (0 <= del && del <= 1)) 860 || (B == 1 && H != 256) 861 || (B == 5 && H == 256)) { 862 throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del); 863 } 864 res[0] = Coding.of(B, H, S, del); 865 return pos; 866 } 867 return pos-1; // backup 868 } 869 870 871 public String keyString() { 872 return "("+B+","+H+","+S+","+del+")"; 873 } 874 875 public String toString() { 876 String str = "Coding"+keyString(); 877 // If -ea, print out more informative strings! 878 //assert((str = stringForDebug()) != null); 879 return str; 880 } 881 882 static boolean verboseStringForDebug = false; 883 String stringForDebug() { 884 String minS = (min == Integer.MIN_VALUE ? "min" : ""+min); 885 String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max); 886 String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]"; 887 if (isSubrange()) 888 str += " subrange"; 889 else if (!isFullRange()) 890 str += " MIDRANGE"; 891 if (verboseStringForDebug) { 892 str += " {"; 893 int prev_range = 0; 894 for (int n = 1; n <= B; n++) { 895 int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1); 896 assert(range_n == saturate32(codeRangeLong(B, H, n))); 897 range_n -= prev_range; 898 prev_range = range_n; 899 String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n); 900 str += " #"+n+"="+rngS; 901 } 902 str += " }"; 903 } 904 return str; 905 } 906 }