1 /* 2 * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.java.util.jar.pack; 27 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.io.OutputStream; 31 import java.util.HashMap; 32 33 /** 34 * Define the conversions between sequences of small integers and raw bytes. 35 * This is a schema of encodings which incorporates varying lengths, 36 * varying degrees of length variability, and varying amounts of signed-ness. 37 * @author John Rose 38 */ 39 class Coding implements Constants, Comparable, CodingMethod, Histogram.BitMetric { 40 /* 41 Coding schema for single integers, parameterized by (B,H,S): 42 43 Let B in [1,5], H in [1,256], S in [0,3]. 44 (S limit is arbitrary. B follows the 32-bit limit. H is byte size.) 45 46 A given (B,H,S) code varies in length from 1 to B bytes. 47 48 The 256 values a byte may take on are divided into L=(256-H) and H 49 values, with all the H values larger than the L values. 50 (That is, the L values are [0,L) and the H are [L,256).) 51 52 The last byte is always either the B-th byte, a byte with "L value" 53 (<L), or both. There is no other byte that satisfies these conditions. 54 All bytes before the last always have "H values" (>=L). 55 56 Therefore, if L==0, the code always has the full length of B bytes. 57 The coding then becomes a classic B-byte little-endian unsigned integer. 58 (Also, if L==128, the high bit of each byte acts signals the presence 59 of a following byte, up to the maximum length.) 60 61 In the unsigned case (S==0), the coding is compact and monotonic 62 in the ordering of byte sequences defined by appending zero bytes 63 to pad them to a common length B, reversing them, and ordering them 64 lexicographically. (This agrees with "little-endian" byte order.) 65 66 Therefore, the unsigned value of a byte sequence may be defined as: 67 <pre> 68 U(b0) == b0 69 in [0..L) 70 or [0..256) if B==1 (**) 71 72 U(b0,b1) == b0 + b1*H 73 in [L..L*(1+H)) 74 or [L..L*(1+H) + H^2) if B==2 75 76 U(b0,b1,b2) == b0 + b1*H + b2*H^2 77 in [L*(1+H)..L*(1+H+H^2)) 78 or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3 79 80 U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 81 up to L*Sum[i<n]( H^i ) 82 or to L*Sum[i<n]( H^i ) + H^n if n==B 83 </pre> 84 85 (**) If B==1, the values H,L play no role in the coding. 86 As a convention, we require that any (1,H,S) code must always 87 encode values less than H. Thus, a simple unsigned byte is coded 88 specifically by the code (1,256,0). 89 90 (Properly speaking, the unsigned case should be parameterized as 91 S==Infinity. If the schema were regular, the case S==0 would really 92 denote a numbering in which all coded values are negative.) 93 94 If S>0, the unsigned value of a byte sequence is regarded as a binary 95 integer. If any of the S low-order bits are zero, the corresponding 96 signed value will be non-negative. If all of the S low-order bits 97 (S>0) are one, the the corresponding signed value will be negative. 98 99 The non-negative signed values are compact and monotonically increasing 100 (from 0) in the ordering of the corresponding unsigned values. 101 102 The negative signed values are compact and monotonically decreasing 103 (from -1) in the ordering of the corresponding unsigned values. 104 105 In essence, the low-order S bits function as a collective sign bit 106 for negative signed numbers, and as a low-order base-(2^S-1) digit 107 for non-negative signed numbers. 108 109 Therefore, the signed value corresponding to an unsigned value is: 110 <pre> 111 Sgn(x) == x if S==0 112 Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S), if S>0, (x % 2^S) < 2^S-1 113 Sgn(x) == -(x / 2^S)-1, if S>0, (x % 2^S) == 2^S-1 114 </pre> 115 116 Finally, the value of a byte sequence, given the coding parameters 117 (B,H,S), is defined as: 118 <pre> 119 V(b[i]: i<n) == Sgn(U(b[i]: i<n)) 120 </pre> 121 122 The extremal positive and negative signed value for a given range 123 of unsigned values may be found by sign-encoding the largest unsigned 124 value which is not 2^S-1 mod 2^S, and that which is, respectively. 125 126 Because B,H,S are variable, this is not a single coding but a schema 127 of codings. For optimal compression, it is necessary to adaptively 128 select specific codings to the data being compressed. 129 130 For example, if a sequence of values happens never to be negative, 131 S==0 is the best choice. If the values are equally balanced between 132 negative and positive, S==1. If negative values are rare, then S>1 133 is more appropriate. 134 135 A (B,H,S) encoding is called a "subrange" if it does not encode 136 the largest 32-bit value, and if the number R of values it does 137 encode can be expressed as a positive 32-bit value. (Note that 138 B=1 implies R<=256, B=2 implies R<=65536, etc.) 139 140 A delta version of a given (B,H,S) coding encodes an array of integers 141 by writing their successive differences in the (B,H,S) coding. 142 The original integers themselves may be recovered by making a 143 running accumulation of sum of the differences as they are read. 144 145 As a special case, if a (B,H,S) encoding is a subrange, its delta 146 version will only encode arrays of numbers in the coding's unsigned 147 range, [0..R-1]. The coding of deltas is still in the normal signed 148 range, if S!=0. During delta encoding, all subtraction results are 149 reduced to the signed range, by adding multiples of R. Likewise, 150 . during encoding, all addition results are reduced to the unsigned range. 151 This special case for subranges allows the benefits of wraparound 152 when encoding correlated sequences of very small positive numbers. 153 */ 154 155 // Code-specific limits: 156 private static int saturate32(long x) { 157 if (x > Integer.MAX_VALUE) return Integer.MAX_VALUE; 158 if (x < Integer.MIN_VALUE) return Integer.MIN_VALUE; 159 return (int)x; 160 } 161 private static long codeRangeLong(int B, int H) { 162 return codeRangeLong(B, H, B); 163 } 164 private static long codeRangeLong(int B, int H, int nMax) { 165 // Code range for a all (B,H) codes of length <=nMax (<=B). 166 // n < B: L*Sum[i<n]( H^i ) 167 // n == B: L*Sum[i<B]( H^i ) + H^B 168 assert(nMax >= 0 && nMax <= B); 169 assert(B >= 1 && B <= 5); 170 assert(H >= 1 && H <= 256); 171 if (nMax == 0) return 0; // no codes of zero length 172 if (B == 1) return H; // special case; see (**) above 173 int L = 256-H; 174 long sum = 0; 175 long H_i = 1; 176 for (int n = 1; n <= nMax; n++) { 177 sum += H_i; 178 H_i *= H; 179 } 180 sum *= L; 181 if (nMax == B) 182 sum += H_i; 183 return sum; 184 } 185 /** Largest int representable by (B,H,S) in up to nMax bytes. */ 186 public static int codeMax(int B, int H, int S, int nMax) { 187 //assert(S >= 0 && S <= S_MAX); 188 long range = codeRangeLong(B, H, nMax); 189 if (range == 0) 190 return -1; // degenerate max value for empty set of codes 191 if (S == 0 || range >= (long)1<<32) 192 return saturate32(range-1); 193 long maxPos = range-1; 194 while (isNegativeCode(maxPos, S)) --maxPos; 195 if (maxPos < 0) return -1; // No positive codings at all. 196 int smax = decodeSign32(maxPos, S); 197 // check for 32-bit wraparound: 198 if (smax < 0) 199 return Integer.MAX_VALUE; 200 return smax; 201 } 202 /** Smallest int representable by (B,H,S) in up to nMax bytes. 203 Returns Integer.MIN_VALUE if 32-bit wraparound covers 204 the entire negative range. 205 */ 206 public static int codeMin(int B, int H, int S, int nMax) { 207 //assert(S >= 0 && S <= S_MAX); 208 long range = codeRangeLong(B, H, nMax); 209 if (range >= (long)1<<32 && nMax == B) { 210 // Can code negative values via 32-bit wraparound. 211 return Integer.MIN_VALUE; 212 } 213 if (S == 0) { 214 return 0; 215 } 216 int Smask = (1<<S)-1; 217 long maxNeg = range-1; 218 while (!isNegativeCode(maxNeg, S)) --maxNeg; 219 if (maxNeg < 0) return 0; // No negative codings at all. 220 return decodeSign32(maxNeg, S); 221 } 222 223 // Some of the arithmetic below is on unsigned 32-bit integers. 224 // These must be represented in Java as longs in the range [0..2^32-1]. 225 // The conversion to a signed int is just the Java cast (int), but 226 // the conversion to an unsigned int is the following little method: 227 private static long toUnsigned32(int sx) { 228 return ((long)sx << 32) >>> 32; 229 } 230 231 // Sign encoding: 232 private static boolean isNegativeCode(long ux, int S) { 233 assert(S > 0); 234 assert(ux >= -1); // can be out of 32-bit range; who cares 235 int Smask = (1<<S)-1; 236 return (((int)ux+1) & Smask) == 0; 237 } 238 private static boolean hasNegativeCode(int sx, int S) { 239 assert(S > 0); 240 // If S>=2 very low negatives are coded by 32-bit-wrapped positives. 241 // The lowest negative representable by a negative coding is 242 // ~(umax32 >> S), and the next lower number is coded by wrapping 243 // the highest positive: 244 // CodePos(umax32-1) -> (umax32-1)-((umax32-1)>>S) 245 // which simplifies to ~(umax32 >> S)-1. 246 return (0 > sx) && (sx >= ~(-1>>>S)); 247 } 248 private static int decodeSign32(long ux, int S) { 249 assert(ux == toUnsigned32((int)ux)) // must be unsigned 32-bit number 250 : (Long.toHexString(ux)); 251 if (S == 0) { 252 return (int) ux; // cast to signed int 253 } 254 int sx; 255 if (isNegativeCode(ux, S)) { 256 // Sgn(x) == -(x / 2^S)-1 257 sx = ~((int)ux >>> S); 258 } else { 259 // Sgn(x) == (x / 2^S)*(2^S-1) + (x % 2^S) 260 sx = (int)ux - ((int)ux >>> S); 261 } 262 // Assert special case of S==1: 263 assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1))); 264 return sx; 265 } 266 private static long encodeSign32(int sx, int S) { 267 if (S == 0) { 268 return toUnsigned32(sx); // unsigned 32-bit int 269 } 270 int Smask = (1<<S)-1; 271 long ux; 272 if (!hasNegativeCode(sx, S)) { 273 // InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1)) 274 ux = sx + (toUnsigned32(sx) / Smask); 275 } else { 276 // InvSgn(sx) = (-sx-1)*2^S + (2^S-1) 277 ux = (-sx << S) - 1; 278 } 279 ux = toUnsigned32((int)ux); 280 assert(sx == decodeSign32(ux, S)) 281 : (Long.toHexString(ux)+" -> "+ 282 Integer.toHexString(sx)+" != "+ 283 Integer.toHexString(decodeSign32(ux, S))); 284 return ux; 285 } 286 287 // Top-level coding of single integers: 288 public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) { 289 long ux = encodeSign32(sx, S); 290 assert(ux == toUnsigned32((int)ux)); 291 assert(ux < codeRangeLong(B, H)) 292 : Long.toHexString(ux); 293 int L = 256-H; 294 long sum = ux; 295 int pos = outpos[0]; 296 for (int i = 0; i < B-1; i++) { 297 if (sum < L) 298 break; 299 sum -= L; 300 int b_i = (int)( L + (sum % H) ); 301 sum /= H; 302 out[pos++] = (byte)b_i; 303 } 304 out[pos++] = (byte)sum; 305 // Report number of bytes written by updating outpos[0]: 306 outpos[0] = pos; 307 // Check right away for mis-coding. 308 //assert(sx == readInt(out, new int[1], B, H, S)); 309 } 310 public static int readInt(byte[] in, int[] inpos, int B, int H, int S) { 311 // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 312 int L = 256-H; 313 long sum = 0; 314 long H_i = 1; 315 int pos = inpos[0]; 316 for (int i = 0; i < B; i++) { 317 int b_i = in[pos++] & 0xFF; 318 sum += b_i*H_i; 319 H_i *= H; 320 if (b_i < L) break; 321 } 322 //assert(sum >= 0 && sum < codeRangeLong(B, H)); 323 // Report number of bytes read by updating inpos[0]: 324 inpos[0] = pos; 325 return decodeSign32(sum, S); 326 } 327 // The Stream version doesn't fetch a byte unless it is needed for coding. 328 public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException { 329 // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i ) 330 int L = 256-H; 331 long sum = 0; 332 long H_i = 1; 333 for (int i = 0; i < B; i++) { 334 int b_i = in.read(); 335 if (b_i < 0) throw new RuntimeException("unexpected EOF"); 336 sum += b_i*H_i; 337 H_i *= H; 338 if (b_i < L) break; 339 } 340 assert(sum >= 0 && sum < codeRangeLong(B, H)); 341 return decodeSign32(sum, S); 342 } 343 344 public static final int B_MAX = 5; /* B: [1,5] */ 345 public static final int H_MAX = 256; /* H: [1,256] */ 346 public static final int S_MAX = 2; /* S: [0,2] */ 347 348 // END OF STATICS. 349 350 private final int B; /*1..5*/ // # bytes (1..5) 351 private final int H; /*1..256*/ // # codes requiring a higher byte 352 private final int L; /*0..255*/ // # codes requiring a higher byte 353 private final int S; /*0..3*/ // # low-order bits representing sign 354 private final int del; /*0..2*/ // type of delta encoding (0 == none) 355 private final int min; // smallest representable value 356 private final int max; // largest representable value 357 private final int umin; // smallest representable uns. value 358 private final int umax; // largest representable uns. value 359 private final int[] byteMin; // smallest repr. value, given # bytes 360 private final int[] byteMax; // largest repr. value, given # bytes 361 362 private Coding(int B, int H, int S) { 363 this(B, H, S, 0); 364 } 365 private Coding(int B, int H, int S, int del) { 366 this.B = B; 367 this.H = H; 368 this.L = 256-H; 369 this.S = S; 370 this.del = del; 371 this.min = codeMin(B, H, S, B); 372 this.max = codeMax(B, H, S, B); 373 this.umin = codeMin(B, H, 0, B); 374 this.umax = codeMax(B, H, 0, B); 375 this.byteMin = new int[B]; 376 this.byteMax = new int[B]; 377 378 for (int nMax = 1; nMax <= B; nMax++) { 379 byteMin[nMax-1] = codeMin(B, H, S, nMax); 380 byteMax[nMax-1] = codeMax(B, H, S, nMax); 381 } 382 } 383 384 public boolean equals(Object x) { 385 if (!(x instanceof Coding)) return false; 386 Coding that = (Coding) x; 387 if (this.B != that.B) return false; 388 if (this.H != that.H) return false; 389 if (this.S != that.S) return false; 390 if (this.del != that.del) return false; 391 return true; 392 } 393 394 public int hashCode() { 395 return (del<<14)+(S<<11)+(B<<8)+(H<<0); 396 } 397 398 private static HashMap codeMap; 399 400 private static synchronized Coding of(int B, int H, int S, int del) { 401 if (codeMap == null) codeMap = new HashMap(); 402 Coding x0 = new Coding(B, H, S, del); 403 Coding x1 = (Coding) codeMap.get(x0); 404 if (x1 == null) codeMap.put(x0, x1 = x0); 405 return x1; 406 } 407 408 public static Coding of(int B, int H) { 409 return of(B, H, 0, 0); 410 } 411 412 public static Coding of(int B, int H, int S) { 413 return of(B, H, S, 0); 414 } 415 416 public boolean canRepresentValue(int x) { 417 if (isSubrange()) 418 return canRepresentUnsigned(x); 419 else 420 return canRepresentSigned(x); 421 } 422 /** Can this coding represent a single value, possibly a delta? 423 * This ignores the D property. That is, for delta codings, 424 * this tests whether a delta value of 'x' can be coded. 425 * For signed delta codings which produce unsigned end values, 426 * use canRepresentUnsigned. 427 */ 428 public boolean canRepresentSigned(int x) { 429 return (x >= min && x <= max); 430 } 431 /** Can this coding, apart from its S property, 432 * represent a single value? (Negative values 433 * can only be represented via 32-bit overflow, 434 * so this returns true for negative values 435 * if isFullRange is true.) 436 */ 437 public boolean canRepresentUnsigned(int x) { 438 return (x >= umin && x <= umax); 439 } 440 441 // object-oriented code/decode 442 public int readFrom(byte[] in, int[] inpos) { 443 return readInt(in, inpos, B, H, S); 444 } 445 public void writeTo(byte[] out, int[] outpos, int x) { 446 writeInt(out, outpos, x, B, H, S); 447 } 448 449 // Stream versions 450 public int readFrom(InputStream in) throws IOException { 451 return readIntFrom(in, B, H, S); 452 } 453 public void writeTo(OutputStream out, int x) throws IOException { 454 byte[] buf = new byte[B]; 455 int[] pos = new int[1]; 456 writeInt(buf, pos, x, B, H, S); 457 out.write(buf, 0, pos[0]); 458 } 459 460 // Stream/array versions 461 public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { 462 // %%% use byte[] buffer 463 for (int i = start; i < end; i++) 464 a[i] = readFrom(in); 465 for (int dstep = 0; dstep < del; dstep++) { 466 long state = 0; 467 for (int i = start; i < end; i++) { 468 state += a[i]; 469 // Reduce array values to the required range. 470 if (isSubrange()) { 471 state = reduceToUnsignedRange(state); 472 } 473 a[i] = (int) state; 474 } 475 } 476 } 477 public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { 478 if (end <= start) return; 479 for (int dstep = 0; dstep < del; dstep++) { 480 int[] deltas; 481 if (!isSubrange()) 482 deltas = makeDeltas(a, start, end, 0, 0); 483 else 484 deltas = makeDeltas(a, start, end, min, max); 485 a = deltas; 486 start = 0; 487 end = deltas.length; 488 } 489 // The following code is a buffered version of this loop: 490 // for (int i = start; i < end; i++) 491 // writeTo(out, a[i]); 492 byte[] buf = new byte[1<<8]; 493 final int bufmax = buf.length-B; 494 int[] pos = { 0 }; 495 for (int i = start; i < end; ) { 496 while (pos[0] <= bufmax) { 497 writeTo(buf, pos, a[i++]); 498 if (i >= end) break; 499 } 500 out.write(buf, 0, pos[0]); 501 pos[0] = 0; 502 } 503 } 504 505 /** Tell if the range of this coding (number of distinct 506 * representable values) can be expressed in 32 bits. 507 */ 508 boolean isSubrange() { 509 return max < Integer.MAX_VALUE 510 && ((long)max - (long)min + 1) <= Integer.MAX_VALUE; 511 } 512 513 /** Tell if this coding can represent all 32-bit values. 514 * Note: Some codings, such as unsigned ones, can be neither 515 * subranges nor full-range codings. 516 */ 517 boolean isFullRange() { 518 return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE; 519 } 520 521 /** Return the number of values this coding (a subrange) can represent. */ 522 int getRange() { 523 assert(isSubrange()); 524 return (max - min) + 1; // range includes both min & max 525 } 526 527 Coding setB(int B) { return Coding.of(B, H, S, del); } 528 Coding setH(int H) { return Coding.of(B, H, S, del); } 529 Coding setS(int S) { return Coding.of(B, H, S, del); } 530 Coding setL(int L) { return setH(256-L); } 531 Coding setD(int del) { return Coding.of(B, H, S, del); } 532 Coding getDeltaCoding() { return setD(del+1); } 533 534 /** Return a coding suitable for representing summed, modulo-reduced values. */ 535 Coding getValueCoding() { 536 if (isDelta()) 537 return Coding.of(B, H, 0, del-1); 538 else 539 return this; 540 } 541 542 /** Reduce the given value to be within this coding's unsigned range, 543 * by adding or subtracting a multiple of (max-min+1). 544 */ 545 int reduceToUnsignedRange(long value) { 546 if (value == (int)value && canRepresentUnsigned((int)value)) 547 // already in unsigned range 548 return (int)value; 549 int range = getRange(); 550 assert(range > 0); 551 value %= range; 552 if (value < 0) value += range; 553 assert(canRepresentUnsigned((int)value)); 554 return (int)value; 555 } 556 557 int reduceToSignedRange(int value) { 558 if (canRepresentSigned(value)) 559 // already in signed range 560 return value; 561 return reduceToSignedRange(value, min, max); 562 } 563 static int reduceToSignedRange(int value, int min, int max) { 564 int range = (max-min+1); 565 assert(range > 0); 566 int value0 = value; 567 value -= min; 568 if (value < 0 && value0 >= 0) { 569 // 32-bit overflow, but the next '%=' op needs to be unsigned 570 value -= range; 571 assert(value >= 0); 572 } 573 value %= range; 574 if (value < 0) value += range; 575 value += min; 576 assert(min <= value && value <= max); 577 return value; 578 } 579 580 /** Does this coding support at least one negative value? 581 Includes codings that can do so via 32-bit wraparound. 582 */ 583 boolean isSigned() { 584 return min < 0; 585 } 586 /** Does this coding code arrays by making successive differences? */ 587 boolean isDelta() { 588 return del != 0; 589 } 590 591 public int B() { return B; } 592 public int H() { return H; } 593 public int L() { return L; } 594 public int S() { return S; } 595 public int del() { return del; } 596 public int min() { return min; } 597 public int max() { return max; } 598 public int umin() { return umin; } 599 public int umax() { return umax; } 600 public int byteMin(int b) { return byteMin[b-1]; } 601 public int byteMax(int b) { return byteMax[b-1]; } 602 603 public int compareTo(Object x) { 604 Coding that = (Coding) x; 605 int dkey = this.del - that.del; 606 if (dkey == 0) 607 dkey = this.B - that.B; 608 if (dkey == 0) 609 dkey = this.H - that.H; 610 if (dkey == 0) 611 dkey = this.S - that.S; 612 return dkey; 613 } 614 615 /** Heuristic measure of the difference between two codings. */ 616 public int distanceFrom(Coding that) { 617 int diffdel = this.del - that.del; 618 if (diffdel < 0) diffdel = -diffdel; 619 int diffS = this.S - that.S; 620 if (diffS < 0) diffS = -diffS; 621 int diffB = this.B - that.B; 622 if (diffB < 0) diffB = -diffB; 623 int diffHL; 624 if (this.H == that.H) { 625 diffHL = 0; 626 } else { 627 // Distance in log space of H (<=128) and L (<128). 628 int thisHL = this.getHL(); 629 int thatHL = that.getHL(); 630 // Double the accuracy of the log: 631 thisHL *= thisHL; 632 thatHL *= thatHL; 633 if (thisHL > thatHL) 634 diffHL = ceil_lg2(1+(thisHL-1)/thatHL); 635 else 636 diffHL = ceil_lg2(1+(thatHL-1)/thisHL); 637 } 638 int norm = 5*(diffdel + diffS + diffB) + diffHL; 639 assert(norm != 0 || this.compareTo(that) == 0); 640 return norm; 641 } 642 private int getHL() { 643 // Follow H in log space by the multiplicative inverse of L. 644 if (H <= 128) return H; 645 if (L >= 1) return 128*128/L; 646 return 128*256; 647 } 648 649 /** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */ 650 static int ceil_lg2(int x) { 651 assert(x-1 >= 0); // x in range (int.MIN_VALUE -> 32) 652 x -= 1; 653 int lg = 0; 654 while (x != 0) { 655 lg++; 656 x >>= 1; 657 } 658 return lg; 659 } 660 661 static private final byte[] byteBitWidths = new byte[0x100]; 662 static { 663 for (int b = 0; b < byteBitWidths.length; b++) { 664 byteBitWidths[b] = (byte) ceil_lg2(b + 1); 665 } 666 for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) { 667 assert(bitWidth(i) == ceil_lg2(i + 1)); 668 } 669 } 670 671 /** Number of significant bits in i, not counting sign bits. 672 * For positive i, it is ceil_lg2(i + 1). 673 */ 674 static int bitWidth(int i) { 675 if (i < 0) i = ~i; // change sign 676 int w = 0; 677 int lo = i; 678 if (lo < byteBitWidths.length) 679 return byteBitWidths[lo]; 680 int hi; 681 hi = (lo >>> 16); 682 if (hi != 0) { 683 lo = hi; 684 w += 16; 685 } 686 hi = (lo >>> 8); 687 if (hi != 0) { 688 lo = hi; 689 w += 8; 690 } 691 w += byteBitWidths[lo]; 692 //assert(w == ceil_lg2(i + 1)); 693 return w; 694 } 695 696 /** Create an array of successive differences. 697 * If min==max, accept any and all 32-bit overflow. 698 * Otherwise, avoid 32-bit overflow, and reduce all differences 699 * to a value in the given range, by adding or subtracting 700 * multiples of the range cardinality (max-min+1). 701 * Also, the values are assumed to be in the range [0..(max-min)]. 702 */ 703 static int[] makeDeltas(int[] values, int start, int end, 704 int min, int max) { 705 assert(max >= min); 706 int count = end-start; 707 int[] deltas = new int[count]; 708 int state = 0; 709 if (min == max) { 710 for (int i = 0; i < count; i++) { 711 int value = values[start+i]; 712 deltas[i] = value - state; 713 state = value; 714 } 715 } else { 716 for (int i = 0; i < count; i++) { 717 int value = values[start+i]; 718 assert(value >= 0 && value+min <= max); 719 int delta = value - state; 720 assert(delta == (long)value - (long)state); // no overflow 721 state = value; 722 // Reduce delta values to the required range. 723 delta = reduceToSignedRange(delta, min, max); 724 deltas[i] = delta; 725 } 726 } 727 return deltas; 728 } 729 730 boolean canRepresent(int minValue, int maxValue) { 731 assert(minValue <= maxValue); 732 if (del > 0) { 733 if (isSubrange()) { 734 // We will force the values to reduce to the right subrange. 735 return canRepresentUnsigned(maxValue) 736 && canRepresentUnsigned(minValue); 737 } else { 738 // Huge range; delta values must assume full 32-bit range. 739 return isFullRange(); 740 } 741 } 742 else 743 // final values must be representable 744 return canRepresentSigned(maxValue) 745 && canRepresentSigned(minValue); 746 } 747 748 boolean canRepresent(int[] values, int start, int end) { 749 int len = end-start; 750 if (len == 0) return true; 751 if (isFullRange()) return true; 752 // Calculate max, min: 753 int max = values[start]; 754 int min = max; 755 for (int i = 1; i < len; i++) { 756 int value = values[start+i]; 757 if (max < value) max = value; 758 if (min > value) min = value; 759 } 760 return canRepresent(min, max); 761 } 762 763 public double getBitLength(int value) { // implements BitMetric 764 return (double) getLength(value) * 8; 765 } 766 767 /** How many bytes are in the coding of this value? 768 * Returns Integer.MAX_VALUE if the value has no coding. 769 * The coding must not be a delta coding, since there is no 770 * definite size for a single value apart from its context. 771 */ 772 public int getLength(int value) { 773 if (isDelta() && isSubrange()) { 774 if (!canRepresentUnsigned(value)) 775 return Integer.MAX_VALUE; 776 value = reduceToSignedRange(value); 777 } 778 if (value >= 0) { 779 for (int n = 0; n < B; n++) { 780 if (value <= byteMax[n]) return n+1; 781 } 782 } else { 783 for (int n = 0; n < B; n++) { 784 if (value >= byteMin[n]) return n+1; 785 } 786 } 787 return Integer.MAX_VALUE; 788 } 789 790 public int getLength(int[] values, int start, int end) { 791 int len = end-start; 792 if (B == 1) return len; 793 if (L == 0) return len * B; 794 if (isDelta()) { 795 int[] deltas; 796 if (!isSubrange()) 797 deltas = makeDeltas(values, start, end, 0, 0); 798 else 799 deltas = makeDeltas(values, start, end, min, max); 800 //return Coding.of(B, H, S).getLength(deltas, 0, len); 801 values = deltas; 802 start = 0; 803 end = values.length; 804 } 805 int sum = len; // at least 1 byte per 806 // add extra bytes for extra-long values 807 for (int n = 1; n <= B; n++) { 808 // what is the coding interval [min..max] for n bytes? 809 int max = byteMax[n-1]; 810 int min = byteMin[n-1]; 811 int longer = 0; // count of guys longer than n bytes 812 for (int i = 0; i < len; i++) { 813 int value = values[start+i]; 814 if (value >= 0) { 815 if (value > max) longer++; 816 } else { 817 if (value < min) longer++; 818 } 819 } 820 if (longer == 0) break; // no more passes needed 821 if (n == B) return Integer.MAX_VALUE; // cannot represent! 822 sum += longer; 823 } 824 return sum; 825 } 826 827 public byte[] getMetaCoding(Coding dflt) { 828 if (dflt == this) return new byte[]{ (byte) _meta_default }; 829 int canonicalIndex = BandStructure.indexOf(this); 830 if (canonicalIndex > 0) 831 return new byte[]{ (byte) canonicalIndex }; 832 return new byte[]{ 833 (byte)_meta_arb, 834 (byte)(del + 2*S + 8*(B-1)), 835 (byte)(H-1) 836 }; 837 } 838 public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { 839 int op = bytes[pos++] & 0xFF; 840 if (_meta_canon_min <= op && op <= _meta_canon_max) { 841 Coding c = BandStructure.codingForIndex(op); 842 assert(c != null); 843 res[0] = c; 844 return pos; 845 } 846 if (op == _meta_arb) { 847 int dsb = bytes[pos++] & 0xFF; 848 int H_1 = bytes[pos++] & 0xFF; 849 int del = dsb % 2; 850 int S = (dsb / 2) % 4; 851 int B = (dsb / 8)+1; 852 int H = H_1+1; 853 if (!((1 <= B && B <= B_MAX) && 854 (0 <= S && S <= S_MAX) && 855 (1 <= H && H <= H_MAX) && 856 (0 <= del && del <= 1)) 857 || (B == 1 && H != 256) 858 || (B == 5 && H == 256)) { 859 throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del); 860 } 861 res[0] = Coding.of(B, H, S, del); 862 return pos; 863 } 864 return pos-1; // backup 865 } 866 867 868 public String keyString() { 869 return "("+B+","+H+","+S+","+del+")"; 870 } 871 872 public String toString() { 873 String str = "Coding"+keyString(); 874 // If -ea, print out more informative strings! 875 //assert((str = stringForDebug()) != null); 876 return str; 877 } 878 879 static boolean verboseStringForDebug = false; 880 String stringForDebug() { 881 String minS = (min == Integer.MIN_VALUE ? "min" : ""+min); 882 String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max); 883 String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]"; 884 if (isSubrange()) 885 str += " subrange"; 886 else if (!isFullRange()) 887 str += " MIDRANGE"; 888 if (verboseStringForDebug) { 889 str += " {"; 890 int prev_range = 0; 891 for (int n = 1; n <= B; n++) { 892 int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1); 893 assert(range_n == saturate32(codeRangeLong(B, H, n))); 894 range_n -= prev_range; 895 prev_range = range_n; 896 String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n); 897 str += " #"+n+"="+rngS; 898 } 899 str += " }"; 900 } 901 return str; 902 } 903 }