1 /*
   2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.java.util.jar.pack;
  27 
  28 import java.io.ByteArrayOutputStream;
  29 import java.io.IOException;
  30 import java.io.InputStream;
  31 import java.io.OutputStream;
  32 import java.util.Arrays;
  33 import java.util.HashSet;
  34 import java.util.Set;
  35 import static com.sun.java.util.jar.pack.Constants.*;
  36 
  37 /**
  38  * Population-based coding.
  39  * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
  40  * @author John Rose
  41  */
  42 // This tactic alone reduces the final zipped rt.jar by about a percent.
  43 class PopulationCoding implements CodingMethod {
  44     Histogram vHist;   // histogram of all values
  45     int[]     fValues; // list of favored values
  46     int       fVlen;   // inclusive max index
  47     long[]    symtab;  // int map of favored value -> token [1..#fValues]
  48 
  49     CodingMethod favoredCoding;
  50     CodingMethod tokenCoding;
  51     CodingMethod unfavoredCoding;
  52 
  53     int L = -1;  //preferred L value for tokenCoding
  54 
  55     public void setFavoredValues(int[] fValues, int fVlen) {
  56         // Note:  {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
  57         // This is because zero is an exceptional favored value index.
  58         assert(fValues[0] == 0);  // must be empty
  59         assert(this.fValues == null);  // do not do this twice
  60         this.fValues = fValues;
  61         this.fVlen   = fVlen;
  62         if (L >= 0) {
  63             setL(L);  // reassert
  64         }
  65     }
  66     public void setFavoredValues(int[] fValues) {
  67         int lfVlen = fValues.length-1;
  68         setFavoredValues(fValues, lfVlen);
  69     }
  70     public void setHistogram(Histogram vHist) {
  71         this.vHist = vHist;
  72     }
  73     public void setL(int L) {
  74         this.L = L;
  75         if (L >= 0 && fValues != null && tokenCoding == null) {
  76             tokenCoding = fitTokenCoding(fVlen, L);
  77             assert(tokenCoding != null);
  78         }
  79     }
  80 
  81     public static Coding fitTokenCoding(int fVlen, int L) {
  82         // Find the smallest B s.t. (B,H,0) covers fVlen.
  83         if (fVlen < 256)
  84             // H/L do not matter when B==1
  85             return BandStructure.BYTE1;
  86         Coding longest = BandStructure.UNSIGNED5.setL(L);
  87         if (!longest.canRepresentUnsigned(fVlen))
  88             return null;  // failure; L is too sharp and fVlen too large
  89         Coding tc = longest;
  90         for (Coding shorter = longest; ; ) {
  91             shorter = shorter.setB(shorter.B()-1);
  92             if (shorter.umax() < fVlen)
  93                 break;
  94             tc = shorter;  // shorten it by reducing B
  95         }
  96         return tc;
  97     }
  98 
  99     public void setFavoredCoding(CodingMethod favoredCoding) {
 100         this.favoredCoding = favoredCoding;
 101     }
 102     public void setTokenCoding(CodingMethod tokenCoding) {
 103         this.tokenCoding = tokenCoding;
 104         this.L = -1;
 105         if (tokenCoding instanceof Coding && fValues != null) {
 106             Coding tc = (Coding) tokenCoding;
 107             if (tc == fitTokenCoding(fVlen, tc.L()))
 108                 this.L = tc.L();
 109             // Otherwise, it's a non-default coding.
 110         }
 111     }
 112     public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
 113         this.unfavoredCoding = unfavoredCoding;
 114     }
 115 
 116     public int favoredValueMaxLength() {
 117         if (L == 0)
 118             return Integer.MAX_VALUE;
 119         else
 120             return BandStructure.UNSIGNED5.setL(L).umax();
 121     }
 122 
 123     public void resortFavoredValues() {
 124         Coding tc = (Coding) tokenCoding;
 125         // Make a local copy before reordering.
 126         fValues = BandStructure.realloc(fValues, 1+fVlen);
 127         // Resort favoredValues within each byte-size cadre.
 128         int fillp = 1;  // skip initial zero
 129         for (int n = 1; n <= tc.B(); n++) {
 130             int nmax = tc.byteMax(n);
 131             if (nmax > fVlen)
 132                 nmax = fVlen;
 133             if (nmax < tc.byteMin(n))
 134                 break;
 135             int low = fillp;
 136             int high = nmax+1;
 137             if (high == low)  continue;
 138             assert(high > low)
 139                 : high+"!>"+low;
 140             assert(tc.getLength(low) == n)
 141                 : n+" != len("+(low)+") == "+
 142                   tc.getLength(low);
 143             assert(tc.getLength(high-1) == n)
 144                 : n+" != len("+(high-1)+") == "+
 145                   tc.getLength(high-1);
 146             int midTarget = low + (high-low)/2;
 147             int mid = low;
 148             // Divide the values into cadres, and sort within each.
 149             int prevCount = -1;
 150             int prevLimit = low;
 151             for (int i = low; i < high; i++) {
 152                 int val = fValues[i];
 153                 int count = vHist.getFrequency(val);
 154                 if (prevCount != count) {
 155                     if (n == 1) {
 156                         // For the single-byte encoding, keep strict order
 157                         // among frequency groups.
 158                         Arrays.sort(fValues, prevLimit, i);
 159                     } else if (Math.abs(mid - midTarget) >
 160                                Math.abs(i   - midTarget)) {
 161                         // Find a single inflection point
 162                         // close to the middle of the byte-size cadre.
 163                         mid = i;
 164                     }
 165                     prevCount = count;
 166                     prevLimit = i;
 167                 }
 168             }
 169             if (n == 1) {
 170                 Arrays.sort(fValues, prevLimit, high);
 171             } else {
 172                 // Sort up to the midpoint, if any.
 173                 Arrays.sort(fValues, low, mid);
 174                 Arrays.sort(fValues, mid, high);
 175             }
 176             assert(tc.getLength(low) == tc.getLength(mid));
 177             assert(tc.getLength(low) == tc.getLength(high-1));
 178             fillp = nmax+1;
 179         }
 180         assert(fillp == fValues.length);
 181 
 182         // Reset symtab.
 183         symtab = null;
 184     }
 185 
 186     public int getToken(int value) {
 187         if (symtab == null)
 188             symtab = makeSymtab();
 189         int pos = Arrays.binarySearch(symtab, (long)value << 32);
 190         if (pos < 0)  pos = -pos-1;
 191         if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
 192             return (int)symtab[pos];
 193         else
 194             return 0;
 195     }
 196 
 197     public int[][] encodeValues(int[] values, int start, int end) {
 198         // Compute token sequence.
 199         int[] tokens = new int[end-start];
 200         int nuv = 0;
 201         for (int i = 0; i < tokens.length; i++) {
 202             int val = values[start+i];
 203             int tok = getToken(val);
 204             if (tok != 0)
 205                 tokens[i] = tok;
 206             else
 207                 nuv += 1;
 208         }
 209         // Compute unfavored value sequence.
 210         int[] unfavoredValues = new int[nuv];
 211         nuv = 0;  // reset
 212         for (int i = 0; i < tokens.length; i++) {
 213             if (tokens[i] != 0)  continue;  // already covered
 214             int val = values[start+i];
 215             unfavoredValues[nuv++] = val;
 216         }
 217         assert(nuv == unfavoredValues.length);
 218         return new int[][]{ tokens, unfavoredValues };
 219     }
 220 
 221     private long[] makeSymtab() {
 222         long[] lsymtab = new long[fVlen];
 223         for (int token = 1; token <= fVlen; token++) {
 224             lsymtab[token-1] = ((long)fValues[token] << 32) | token;
 225         }
 226         // Index by value:
 227         Arrays.sort(lsymtab);
 228         return lsymtab;
 229     }
 230 
 231     private Coding getTailCoding(CodingMethod c) {
 232         while (c instanceof AdaptiveCoding)
 233             c = ((AdaptiveCoding)c).tailCoding;
 234         return (Coding) c;
 235     }
 236 
 237     // CodingMethod methods.
 238     public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
 239         int[][] vals = encodeValues(a, start, end);
 240         writeSequencesTo(out, vals[0], vals[1]);
 241     }
 242     void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
 243         favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
 244         getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
 245         tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
 246         if (uValues.length > 0)
 247             unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
 248     }
 249 
 250    int computeSentinelValue() {
 251         Coding fc = getTailCoding(favoredCoding);
 252         if (fc.isDelta()) {
 253             // repeat the last favored value, using delta=0
 254             return 0;
 255         } else {
 256             // else repeat the shorter of the min or last value
 257             int min = fValues[1];
 258             int last = min;
 259             // (remember that fVlen is an inclusive limit in fValues)
 260             for (int i = 2; i <= fVlen; i++) {
 261                 last = fValues[i];
 262                 min = moreCentral(min, last);
 263             }
 264             int endVal;
 265             if (fc.getLength(min) <= fc.getLength(last))
 266                 return min;
 267             else
 268                 return last;
 269         }
 270    }
 271 
 272     public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
 273         // Parameters are fCode, L, uCode.
 274         setFavoredValues(readFavoredValuesFrom(in, end-start));
 275         // Read the tokens.  Read them into the final array, for the moment.
 276         tokenCoding.readArrayFrom(in, a, start, end);
 277         // Decode the favored tokens.
 278         int headp = 0, tailp = -1;
 279         int uVlen = 0;
 280         for (int i = start; i < end; i++) {
 281             int tok = a[i];
 282             if (tok == 0) {
 283                 // Make a linked list, and decode in a second pass.
 284                 if (tailp < 0) {
 285                     headp = i;
 286                 } else {
 287                     a[tailp] = i;
 288                 }
 289                 tailp = i;
 290                 uVlen += 1;
 291             } else {
 292                 a[i] = fValues[tok];
 293             }
 294         }
 295         // Walk the linked list of "zero" locations, decoding unfavored vals.
 296         int[] uValues = new int[uVlen];
 297         if (uVlen > 0)
 298             unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
 299         for (int i = 0; i < uVlen; i++) {
 300             int nextp = a[headp];
 301             a[headp] = uValues[i];
 302             headp = nextp;
 303         }
 304     }
 305 
 306     int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
 307         int[] lfValues = new int[1000];  // realloc as needed
 308         // The set uniqueValuesForDebug records all favored values.
 309         // As each new value is added, we assert that the value
 310         // was not already in the set.
 311         Set<Integer> uniqueValuesForDebug = null;
 312         assert((uniqueValuesForDebug = new HashSet<>()) != null);
 313         int fillp = 1;
 314         maxForDebug += fillp;
 315         int min = Integer.MIN_VALUE;  // farthest from the center
 316         //int min2 = Integer.MIN_VALUE;  // emulate buggy 150.7 spec.
 317         int last = 0;
 318         CodingMethod fcm = favoredCoding;
 319         while (fcm instanceof AdaptiveCoding) {
 320             AdaptiveCoding ac = (AdaptiveCoding) fcm;
 321             int len = ac.headLength;
 322             while (fillp + len > lfValues.length) {
 323                 lfValues = BandStructure.realloc(lfValues);
 324             }
 325             int newFillp = fillp + len;
 326             ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
 327             while (fillp < newFillp) {
 328                 int val = lfValues[fillp++];
 329                 assert(uniqueValuesForDebug.add(val));
 330                 assert(fillp <= maxForDebug);
 331                 last = val;
 332                 min = moreCentral(min, val);
 333                 //min2 = moreCentral2(min2, val, min);
 334             }
 335             fcm = ac.tailCoding;
 336         }
 337         Coding fc = (Coding) fcm;
 338         if (fc.isDelta()) {
 339             for (long state = 0;;) {
 340                 // Read a new value:
 341                 state += fc.readFrom(in);
 342                 int val;
 343                 if (fc.isSubrange())
 344                     val = fc.reduceToUnsignedRange(state);
 345                 else
 346                     val = (int)state;
 347                 state = val;
 348                 if (fillp > 1 && (val == last || val == min)) //|| val == min2
 349                     break;
 350                 if (fillp == lfValues.length)
 351                     lfValues = BandStructure.realloc(lfValues);
 352                 lfValues[fillp++] = val;
 353                 assert(uniqueValuesForDebug.add(val));
 354                 assert(fillp <= maxForDebug);
 355                 last = val;
 356                 min = moreCentral(min, val);
 357                 //min2 = moreCentral(min2, val);
 358             }
 359         } else {
 360             for (;;) {
 361                 int val = fc.readFrom(in);
 362                 if (fillp > 1 && (val == last || val == min)) //|| val == min2
 363                     break;
 364                 if (fillp == lfValues.length)
 365                     lfValues = BandStructure.realloc(lfValues);
 366                 lfValues[fillp++] = val;
 367                 assert(uniqueValuesForDebug.add(val));
 368                 assert(fillp <= maxForDebug);
 369                 last = val;
 370                 min = moreCentral(min, val);
 371                 //min2 = moreCentral2(min2, val, min);
 372             }
 373         }
 374         return BandStructure.realloc(lfValues, fillp);
 375     }
 376 
 377     private static int moreCentral(int x, int y) {
 378         int kx = (x >> 31) ^ (x << 1);
 379         int ky = (y >> 31) ^ (y << 1);
 380         // bias kx/ky to get an unsigned comparison:
 381         kx -= Integer.MIN_VALUE;
 382         ky -= Integer.MIN_VALUE;
 383         int xy = (kx < ky? x: y);
 384         // assert that this ALU-ish version is the same:
 385         assert(xy == moreCentralSlow(x, y));
 386         return xy;
 387     }
 388 //  private static int moreCentral2(int x, int y, int min) {
 389 //      // Strict implementation of buggy 150.7 specification.
 390 //      // The bug is that the spec. says absolute-value ties are broken
 391 //      // in favor of positive numbers, but the suggested implementation
 392 //      // (also mentioned in the spec.) breaks ties in favor of negatives.
 393 //      if (x + y == 0)  return (x > y? x : y);
 394 //      return min;
 395 //  }
 396     private static int moreCentralSlow(int x, int y) {
 397         int ax = x;
 398         if (ax < 0)  ax = -ax;
 399         if (ax < 0)  return y;  //x is MIN_VALUE
 400         int ay = y;
 401         if (ay < 0)  ay = -ay;
 402         if (ay < 0)  return x;  //y is MIN_VALUE
 403         if (ax < ay)  return x;
 404         if (ax > ay)  return y;
 405         // At this point the absolute values agree, and the negative wins.
 406         return x < y ? x : y;
 407     }
 408 
 409     static final int[] LValuesCoded
 410         = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
 411 
 412     public byte[] getMetaCoding(Coding dflt) {
 413         int K = fVlen;
 414         int LCoded = 0;
 415         if (tokenCoding instanceof Coding) {
 416             Coding tc = (Coding) tokenCoding;
 417             if (tc.B() == 1) {
 418                 LCoded = 1;
 419             } else if (L >= 0) {
 420                 assert(L == tc.L());
 421                 for (int i = 1; i < LValuesCoded.length; i++) {
 422                     if (LValuesCoded[i] == L) { LCoded = i; break; }
 423                 }
 424             }
 425         }
 426         CodingMethod tokenDflt = null;
 427         if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
 428             // A simple L value is enough to recover the tokenCoding.
 429             tokenDflt = tokenCoding;
 430         }
 431         int FDef = (favoredCoding == dflt)?1:0;
 432         int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
 433         int TDef = (tokenCoding == tokenDflt)?1:0;
 434         int TDefL = (TDef == 1) ? LCoded : 0;
 435         assert(TDef == ((TDefL>0)?1:0));
 436         ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
 437         bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
 438         try {
 439             if (FDef == 0)  bytes.write(favoredCoding.getMetaCoding(dflt));
 440             if (TDef == 0)  bytes.write(tokenCoding.getMetaCoding(dflt));
 441             if (UDef == 0)  bytes.write(unfavoredCoding.getMetaCoding(dflt));
 442         } catch (IOException ee) {
 443             throw new RuntimeException(ee);
 444         }
 445         return bytes.toByteArray();
 446     }
 447     public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod[] res) {
 448         int op = bytes[pos++] & 0xFF;
 449         if (op < _meta_pop || op >= _meta_limit)  return pos-1; // backup
 450         op -= _meta_pop;
 451         int FDef = op % 2;
 452         int UDef = (op / 2) % 2;
 453         int TDefL = (op / 4);
 454         int TDef = (TDefL > 0)?1:0;
 455         int L = LValuesCoded[TDefL];
 456         CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
 457         if (FDef == 0)
 458             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
 459         if (TDef == 0)
 460             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
 461         if (UDef == 0)
 462             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
 463         PopulationCoding pop = new PopulationCoding();
 464         pop.L = L;  // might be -1
 465         pop.favoredCoding   = FCode[0];
 466         pop.tokenCoding     = TCode[0];  // might be null!
 467         pop.unfavoredCoding = UCode[0];
 468         res[0] = pop;
 469         return pos;
 470     }
 471 
 472     private String keyString(CodingMethod m) {
 473         if (m instanceof Coding)
 474             return ((Coding)m).keyString();
 475         if (m == null)
 476             return "none";
 477         return m.toString();
 478     }
 479     public String toString() {
 480         PropMap p200 = Utils.currentPropMap();
 481         boolean verbose
 482             = (p200 != null &&
 483                p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
 484         StringBuilder res = new StringBuilder(100);
 485         res.append("pop(").append("fVlen=").append(fVlen);
 486         if (verbose && fValues != null) {
 487             res.append(" fV=[");
 488             for (int i = 1; i <= fVlen; i++) {
 489                 res.append(i==1?"":",").append(fValues[i]);
 490             }
 491             res.append(";").append(computeSentinelValue());
 492             res.append("]");
 493         }
 494         res.append(" fc=").append(keyString(favoredCoding));
 495         res.append(" tc=").append(keyString(tokenCoding));
 496         res.append(" uc=").append(keyString(unfavoredCoding));
 497         res.append(")");
 498         return res.toString();
 499     }
 500 }