1 /* 2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.java.util.jar.pack; 27 28 import java.io.ByteArrayOutputStream; 29 import java.io.IOException; 30 import java.io.InputStream; 31 import java.io.OutputStream; 32 import java.util.Arrays; 33 import java.util.HashSet; 34 import java.util.Set; 35 import static com.sun.java.util.jar.pack.Constants.*; 36 37 /** 38 * Population-based coding. 39 * See the section "Encodings of Uncorrelated Values" in the Pack200 spec. 40 * @author John Rose 41 */ 42 // This tactic alone reduces the final zipped rt.jar by about a percent. 43 class PopulationCoding implements CodingMethod { 44 Histogram vHist; // histogram of all values 45 int[] fValues; // list of favored values 46 int fVlen; // inclusive max index 47 long[] symtab; // int map of favored value -> token [1..#fValues] 48 49 CodingMethod favoredCoding; 50 CodingMethod tokenCoding; 51 CodingMethod unfavoredCoding; 52 53 int L = -1; //preferred L value for tokenCoding 54 55 public void setFavoredValues(int[] fValues, int fVlen) { 56 // Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1]. 57 // This is because zero is an exceptional favored value index. 58 assert(fValues[0] == 0); // must be empty 59 assert(this.fValues == null); // do not do this twice 60 this.fValues = fValues; 61 this.fVlen = fVlen; 62 if (L >= 0) { 63 setL(L); // reassert 64 } 65 } 66 public void setFavoredValues(int[] fValues) { 67 int lfVlen = fValues.length-1; 68 setFavoredValues(fValues, lfVlen); 69 } 70 public void setHistogram(Histogram vHist) { 71 this.vHist = vHist; 72 } 73 public void setL(int L) { 74 this.L = L; 75 if (L >= 0 && fValues != null && tokenCoding == null) { 76 tokenCoding = fitTokenCoding(fVlen, L); 77 assert(tokenCoding != null); 78 } 79 } 80 81 public static Coding fitTokenCoding(int fVlen, int L) { 82 // Find the smallest B s.t. (B,H,0) covers fVlen. 83 if (fVlen < 256) 84 // H/L do not matter when B==1 85 return BandStructure.BYTE1; 86 Coding longest = BandStructure.UNSIGNED5.setL(L); 87 if (!longest.canRepresentUnsigned(fVlen)) 88 return null; // failure; L is too sharp and fVlen too large 89 Coding tc = longest; 90 for (Coding shorter = longest; ; ) { 91 shorter = shorter.setB(shorter.B()-1); 92 if (shorter.umax() < fVlen) 93 break; 94 tc = shorter; // shorten it by reducing B 95 } 96 return tc; 97 } 98 99 public void setFavoredCoding(CodingMethod favoredCoding) { 100 this.favoredCoding = favoredCoding; 101 } 102 public void setTokenCoding(CodingMethod tokenCoding) { 103 this.tokenCoding = tokenCoding; 104 this.L = -1; 105 if (tokenCoding instanceof Coding && fValues != null) { 106 Coding tc = (Coding) tokenCoding; 107 if (tc == fitTokenCoding(fVlen, tc.L())) 108 this.L = tc.L(); 109 // Otherwise, it's a non-default coding. 110 } 111 } 112 public void setUnfavoredCoding(CodingMethod unfavoredCoding) { 113 this.unfavoredCoding = unfavoredCoding; 114 } 115 116 public int favoredValueMaxLength() { 117 if (L == 0) 118 return Integer.MAX_VALUE; 119 else 120 return BandStructure.UNSIGNED5.setL(L).umax(); 121 } 122 123 public void resortFavoredValues() { 124 Coding tc = (Coding) tokenCoding; 125 // Make a local copy before reordering. 126 fValues = BandStructure.realloc(fValues, 1+fVlen); 127 // Resort favoredValues within each byte-size cadre. 128 int fillp = 1; // skip initial zero 129 for (int n = 1; n <= tc.B(); n++) { 130 int nmax = tc.byteMax(n); 131 if (nmax > fVlen) 132 nmax = fVlen; 133 if (nmax < tc.byteMin(n)) 134 break; 135 int low = fillp; 136 int high = nmax+1; 137 if (high == low) continue; 138 assert(high > low) 139 : high+"!>"+low; 140 assert(tc.getLength(low) == n) 141 : n+" != len("+(low)+") == "+ 142 tc.getLength(low); 143 assert(tc.getLength(high-1) == n) 144 : n+" != len("+(high-1)+") == "+ 145 tc.getLength(high-1); 146 int midTarget = low + (high-low)/2; 147 int mid = low; 148 // Divide the values into cadres, and sort within each. 149 int prevCount = -1; 150 int prevLimit = low; 151 for (int i = low; i < high; i++) { 152 int val = fValues[i]; 153 int count = vHist.getFrequency(val); 154 if (prevCount != count) { 155 if (n == 1) { 156 // For the single-byte encoding, keep strict order 157 // among frequency groups. 158 Arrays.sort(fValues, prevLimit, i); 159 } else if (Math.abs(mid - midTarget) > 160 Math.abs(i - midTarget)) { 161 // Find a single inflection point 162 // close to the middle of the byte-size cadre. 163 mid = i; 164 } 165 prevCount = count; 166 prevLimit = i; 167 } 168 } 169 if (n == 1) { 170 Arrays.sort(fValues, prevLimit, high); 171 } else { 172 // Sort up to the midpoint, if any. 173 Arrays.sort(fValues, low, mid); 174 Arrays.sort(fValues, mid, high); 175 } 176 assert(tc.getLength(low) == tc.getLength(mid)); 177 assert(tc.getLength(low) == tc.getLength(high-1)); 178 fillp = nmax+1; 179 } 180 assert(fillp == fValues.length); 181 182 // Reset symtab. 183 symtab = null; 184 } 185 186 public int getToken(int value) { 187 if (symtab == null) 188 symtab = makeSymtab(); 189 int pos = Arrays.binarySearch(symtab, (long)value << 32); 190 if (pos < 0) pos = -pos-1; 191 if (pos < symtab.length && value == (int)(symtab[pos] >>> 32)) 192 return (int)symtab[pos]; 193 else 194 return 0; 195 } 196 197 public int[][] encodeValues(int[] values, int start, int end) { 198 // Compute token sequence. 199 int[] tokens = new int[end-start]; 200 int nuv = 0; 201 for (int i = 0; i < tokens.length; i++) { 202 int val = values[start+i]; 203 int tok = getToken(val); 204 if (tok != 0) 205 tokens[i] = tok; 206 else 207 nuv += 1; 208 } 209 // Compute unfavored value sequence. 210 int[] unfavoredValues = new int[nuv]; 211 nuv = 0; // reset 212 for (int i = 0; i < tokens.length; i++) { 213 if (tokens[i] != 0) continue; // already covered 214 int val = values[start+i]; 215 unfavoredValues[nuv++] = val; 216 } 217 assert(nuv == unfavoredValues.length); 218 return new int[][]{ tokens, unfavoredValues }; 219 } 220 221 private long[] makeSymtab() { 222 long[] lsymtab = new long[fVlen]; 223 for (int token = 1; token <= fVlen; token++) { 224 lsymtab[token-1] = ((long)fValues[token] << 32) | token; 225 } 226 // Index by value: 227 Arrays.sort(lsymtab); 228 return lsymtab; 229 } 230 231 private Coding getTailCoding(CodingMethod c) { 232 while (c instanceof AdaptiveCoding) 233 c = ((AdaptiveCoding)c).tailCoding; 234 return (Coding) c; 235 } 236 237 // CodingMethod methods. 238 public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { 239 int[][] vals = encodeValues(a, start, end); 240 writeSequencesTo(out, vals[0], vals[1]); 241 } 242 void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException { 243 favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen); 244 getTailCoding(favoredCoding).writeTo(out, computeSentinelValue()); 245 tokenCoding.writeArrayTo(out, tokens, 0, tokens.length); 246 if (uValues.length > 0) 247 unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length); 248 } 249 250 int computeSentinelValue() { 251 Coding fc = getTailCoding(favoredCoding); 252 if (fc.isDelta()) { 253 // repeat the last favored value, using delta=0 254 return 0; 255 } else { 256 // else repeat the shorter of the min or last value 257 int min = fValues[1]; 258 int last = min; 259 // (remember that fVlen is an inclusive limit in fValues) 260 for (int i = 2; i <= fVlen; i++) { 261 last = fValues[i]; 262 min = moreCentral(min, last); 263 } 264 int endVal; 265 if (fc.getLength(min) <= fc.getLength(last)) 266 return min; 267 else 268 return last; 269 } 270 } 271 272 public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { 273 // Parameters are fCode, L, uCode. 274 setFavoredValues(readFavoredValuesFrom(in, end-start)); 275 // Read the tokens. Read them into the final array, for the moment. 276 tokenCoding.readArrayFrom(in, a, start, end); 277 // Decode the favored tokens. 278 int headp = 0, tailp = -1; 279 int uVlen = 0; 280 for (int i = start; i < end; i++) { 281 int tok = a[i]; 282 if (tok == 0) { 283 // Make a linked list, and decode in a second pass. 284 if (tailp < 0) { 285 headp = i; 286 } else { 287 a[tailp] = i; 288 } 289 tailp = i; 290 uVlen += 1; 291 } else { 292 a[i] = fValues[tok]; 293 } 294 } 295 // Walk the linked list of "zero" locations, decoding unfavored vals. 296 int[] uValues = new int[uVlen]; 297 if (uVlen > 0) 298 unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen); 299 for (int i = 0; i < uVlen; i++) { 300 int nextp = a[headp]; 301 a[headp] = uValues[i]; 302 headp = nextp; 303 } 304 } 305 306 int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException { 307 int[] lfValues = new int[1000]; // realloc as needed 308 // The set uniqueValuesForDebug records all favored values. 309 // As each new value is added, we assert that the value 310 // was not already in the set. 311 Set<Integer> uniqueValuesForDebug = null; 312 assert((uniqueValuesForDebug = new HashSet<>()) != null); 313 int fillp = 1; 314 maxForDebug += fillp; 315 int min = Integer.MIN_VALUE; // farthest from the center 316 //int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec. 317 int last = 0; 318 CodingMethod fcm = favoredCoding; 319 while (fcm instanceof AdaptiveCoding) { 320 AdaptiveCoding ac = (AdaptiveCoding) fcm; 321 int len = ac.headLength; 322 while (fillp + len > lfValues.length) { 323 lfValues = BandStructure.realloc(lfValues); 324 } 325 int newFillp = fillp + len; 326 ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp); 327 while (fillp < newFillp) { 328 int val = lfValues[fillp++]; 329 assert(uniqueValuesForDebug.add(val)); 330 assert(fillp <= maxForDebug); 331 last = val; 332 min = moreCentral(min, val); 333 //min2 = moreCentral2(min2, val, min); 334 } 335 fcm = ac.tailCoding; 336 } 337 Coding fc = (Coding) fcm; 338 if (fc.isDelta()) { 339 for (long state = 0;;) { 340 // Read a new value: 341 state += fc.readFrom(in); 342 int val; 343 if (fc.isSubrange()) 344 val = fc.reduceToUnsignedRange(state); 345 else 346 val = (int)state; 347 state = val; 348 if (fillp > 1 && (val == last || val == min)) //|| val == min2 349 break; 350 if (fillp == lfValues.length) 351 lfValues = BandStructure.realloc(lfValues); 352 lfValues[fillp++] = val; 353 assert(uniqueValuesForDebug.add(val)); 354 assert(fillp <= maxForDebug); 355 last = val; 356 min = moreCentral(min, val); 357 //min2 = moreCentral(min2, val); 358 } 359 } else { 360 for (;;) { 361 int val = fc.readFrom(in); 362 if (fillp > 1 && (val == last || val == min)) //|| val == min2 363 break; 364 if (fillp == lfValues.length) 365 lfValues = BandStructure.realloc(lfValues); 366 lfValues[fillp++] = val; 367 assert(uniqueValuesForDebug.add(val)); 368 assert(fillp <= maxForDebug); 369 last = val; 370 min = moreCentral(min, val); 371 //min2 = moreCentral2(min2, val, min); 372 } 373 } 374 return BandStructure.realloc(lfValues, fillp); 375 } 376 377 private static int moreCentral(int x, int y) { 378 int kx = (x >> 31) ^ (x << 1); 379 int ky = (y >> 31) ^ (y << 1); 380 // bias kx/ky to get an unsigned comparison: 381 kx -= Integer.MIN_VALUE; 382 ky -= Integer.MIN_VALUE; 383 int xy = (kx < ky? x: y); 384 // assert that this ALU-ish version is the same: 385 assert(xy == moreCentralSlow(x, y)); 386 return xy; 387 } 388 // private static int moreCentral2(int x, int y, int min) { 389 // // Strict implementation of buggy 150.7 specification. 390 // // The bug is that the spec. says absolute-value ties are broken 391 // // in favor of positive numbers, but the suggested implementation 392 // // (also mentioned in the spec.) breaks ties in favor of negatives. 393 // if (x + y == 0) return (x > y? x : y); 394 // return min; 395 // } 396 private static int moreCentralSlow(int x, int y) { 397 int ax = x; 398 if (ax < 0) ax = -ax; 399 if (ax < 0) return y; //x is MIN_VALUE 400 int ay = y; 401 if (ay < 0) ay = -ay; 402 if (ay < 0) return x; //y is MIN_VALUE 403 if (ax < ay) return x; 404 if (ax > ay) return y; 405 // At this point the absolute values agree, and the negative wins. 406 return x < y ? x : y; 407 } 408 409 static final int[] LValuesCoded 410 = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; 411 412 public byte[] getMetaCoding(Coding dflt) { 413 int K = fVlen; 414 int LCoded = 0; 415 if (tokenCoding instanceof Coding) { 416 Coding tc = (Coding) tokenCoding; 417 if (tc.B() == 1) { 418 LCoded = 1; 419 } else if (L >= 0) { 420 assert(L == tc.L()); 421 for (int i = 1; i < LValuesCoded.length; i++) { 422 if (LValuesCoded[i] == L) { LCoded = i; break; } 423 } 424 } 425 } 426 CodingMethod tokenDflt = null; 427 if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) { 428 // A simple L value is enough to recover the tokenCoding. 429 tokenDflt = tokenCoding; 430 } 431 int FDef = (favoredCoding == dflt)?1:0; 432 int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0; 433 int TDef = (tokenCoding == tokenDflt)?1:0; 434 int TDefL = (TDef == 1) ? LCoded : 0; 435 assert(TDef == ((TDefL>0)?1:0)); 436 ByteArrayOutputStream bytes = new ByteArrayOutputStream(10); 437 bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL); 438 try { 439 if (FDef == 0) bytes.write(favoredCoding.getMetaCoding(dflt)); 440 if (TDef == 0) bytes.write(tokenCoding.getMetaCoding(dflt)); 441 if (UDef == 0) bytes.write(unfavoredCoding.getMetaCoding(dflt)); 442 } catch (IOException ee) { 443 throw new RuntimeException(ee); 444 } 445 return bytes.toByteArray(); 446 } 447 public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { 448 int op = bytes[pos++] & 0xFF; 449 if (op < _meta_pop || op >= _meta_limit) return pos-1; // backup 450 op -= _meta_pop; 451 int FDef = op % 2; 452 int UDef = (op / 2) % 2; 453 int TDefL = (op / 4); 454 int TDef = (TDefL > 0)?1:0; 455 int L = LValuesCoded[TDefL]; 456 CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt}; 457 if (FDef == 0) 458 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode); 459 if (TDef == 0) 460 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode); 461 if (UDef == 0) 462 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode); 463 PopulationCoding pop = new PopulationCoding(); 464 pop.L = L; // might be -1 465 pop.favoredCoding = FCode[0]; 466 pop.tokenCoding = TCode[0]; // might be null! 467 pop.unfavoredCoding = UCode[0]; 468 res[0] = pop; 469 return pos; 470 } 471 472 private String keyString(CodingMethod m) { 473 if (m instanceof Coding) 474 return ((Coding)m).keyString(); 475 if (m == null) 476 return "none"; 477 return m.toString(); 478 } 479 public String toString() { 480 PropMap p200 = Utils.currentPropMap(); 481 boolean verbose 482 = (p200 != null && 483 p200.getBoolean(Utils.COM_PREFIX+"verbose.pop")); 484 StringBuilder res = new StringBuilder(100); 485 res.append("pop(").append("fVlen=").append(fVlen); 486 if (verbose && fValues != null) { 487 res.append(" fV=["); 488 for (int i = 1; i <= fVlen; i++) { 489 res.append(i==1?"":",").append(fValues[i]); 490 } 491 res.append(";").append(computeSentinelValue()); 492 res.append("]"); 493 } 494 res.append(" fc=").append(keyString(favoredCoding)); 495 res.append(" tc=").append(keyString(tokenCoding)); 496 res.append(" uc=").append(keyString(unfavoredCoding)); 497 res.append(")"); 498 return res.toString(); 499 } 500 }