1 /*
   2  * Copyright (c) 2020 SAP SE. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package jdk.test.lib.hprof.parser;
  24 
  25 import java.io.ByteArrayOutputStream;
  26 import java.io.Closeable;
  27 import java.io.EOFException;
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.io.RandomAccessFile;
  31 import java.util.ArrayList;
  32 import java.util.Collections;
  33 import java.util.Comparator;
  34 import java.util.zip.DataFormatException;
  35 import java.util.zip.Inflater;
  36 
  37 public class GzipRandomAccess implements AutoCloseable, Closeable {
  38     // A comparator which compares chunks by their file offset.
  39     private static FileOffsetComparator fileOffsetComp = new FileOffsetComparator();
  40 
  41     // A comparator which compares chunks by their offset.
  42     private static OffsetComparator offsetComp = new OffsetComparator();
  43 
  44     // The size to use when reading from the random access file.
  45     private static final int READ_SIZE = 65536;
  46 
  47     // The last used buffer.
  48     private Buffer last;
  49 
  50     // The underlying random access file to use.
  51     private final RandomAccessFile raf;
  52 
  53     // The length of the random access file.
  54     private final long fileSize;
  55 
  56     // The maximum size of a buffer cache.
  57     private final int cacheSize;
  58 
  59     // The maximum numbers of buffers to cache.
  60     private final int maxCachedBuffers;
  61 
  62     // A buffer used to read from the file.
  63     private final byte[] in;
  64 
  65     // A sorted list of the buffers we know so far.
  66     private final ArrayList<Buffer> buffers;
  67 
  68     // The inflater to use.
  69     private final Inflater inf;
  70 
  71     // The head of the list which contains the buffers with cached data.
  72     private final Buffer cacheHead;
  73 
  74     // The number of cached buffers in the list.
  75     private int cachedBuffers;
  76 
  77     // This is private to ensure we only handle the specific hprof gzip files
  78     // written by the VM.
  79     private GzipRandomAccess(String file, int bufferSize, int maxCachedBuffers) throws IOException {
  80         last = null;
  81         raf = new RandomAccessFile(file, "r");
  82         fileSize = raf.length();
  83         this.cacheSize = bufferSize;
  84         this.maxCachedBuffers = maxCachedBuffers;
  85         cachedBuffers = 0;
  86         in = new byte[READ_SIZE];
  87         buffers = new ArrayList<Buffer>();
  88         inf = new Inflater(true);
  89         cacheHead = new Buffer(-1, -1);
  90         cacheHead.next = cacheHead;
  91         cacheHead.prev = cacheHead;
  92         buffers.add(new Buffer(0, 0));
  93     }
  94 
  95     /**
  96      * Clears the cache.
  97      */
  98     public synchronized void clearCache() {
  99         while (cacheHead.next != cacheHead) {
 100             assert cacheHead.next.cache != null;
 101             Buffer buf = cacheHead.next;
 102             remove(buf);
 103             buf.cache = null;
 104         }
 105 
 106         last = null;
 107         cachedBuffers = 0;
 108     }
 109 
 110     /**
 111      * Returns an approximate file offset for the given offset. The return value should only be used for progress
 112      * indication and the like. Note that you should only query offsets you've already read.
 113      *
 114      * @param offset The offset.
 115      * @return The approximate file offset.
 116      */
 117     public synchronized long getFileOffset(long offset) {
 118         int pos = Collections.binarySearch(buffers, new Buffer(0, offset), offsetComp);
 119         int realPos = pos >= 0 ? pos : -pos - 2;
 120 
 121         if (realPos >= buffers.size() - 1) {
 122             return buffers.get(buffers.size() - 1).fileOffset;
 123         }
 124 
 125         // Assume uniform compression.
 126         Buffer buf = buffers.get(realPos);
 127         long diff = offset - buf.offset;
 128         long bufFileEnd = buffers.get(realPos + 1).fileOffset;
 129         long fileDiff = bufFileEnd - buf.fileOffset;
 130         double filePerDiff = (double) Math.max(1, fileDiff) / Math.max(1, buf.cacheLen);
 131 
 132         return buf.fileOffset + (long) (filePerDiff * diff);
 133     }
 134 
 135     /**
 136      * @return Returns the size of the underlying file.
 137      */
 138     public long getFileSize() {
 139         return fileSize;
 140     }
 141 
 142     /**
 143      * Returns an @link {@link InputStream} to read from the given offset. Note
 144      * that closing the input stream does not closes the underlying @link
 145      * {@link GzipRandomAccess} object.
 146      *
 147      * @param offset The offset.
 148      * @return The input stream.
 149      */
 150     public InputStream asStream(long offset) {
 151         return new InputStreamImpl(offset, this);
 152     }
 153 
 154     /**
 155      * Returns a @link ReadBuffer which uses this object to do the actual
 156      * operation. Note that closing the returned object closes the the
 157      * underlying @link {@link GzipRandomAccess} object.
 158      *
 159      * @return The @link ReadBuffer.
 160      */
 161     public ReadBuffer asFileBuffer() {
 162         return new ReadBufferImpl(this);
 163     }
 164 
 165     /**
 166      * Closes the object and clears the cache. The object is unusable after this
 167      * call.
 168      */
 169     @Override
 170     public synchronized void close() throws IOException {
 171         clearCache();
 172         buffers.clear();
 173         raf.close();
 174         inf.end();
 175     }
 176 
 177     /**
 178      * Reads bytes from the gzip file.
 179      *
 180      * @param offset The offset from which to start the read.
 181      * @param b The buffer to read into.
 182      * @param off The offset in the buffer to use.
 183      * @param len The number of bytes to read at most.
 184      * @return The number of bytes read or -1 if we are at the end of the file.
 185      * @throws IOException On error.
 186      */
 187     public synchronized int read(long offset, byte[] b, int off, int len) throws IOException {
 188         Buffer buf = last;
 189 
 190         while (buf == null || (buf.offset > offset) || (buf.offset + buf.cacheLen <= offset)) {
 191             int pos = Collections.binarySearch(buffers, new Buffer(0, offset), offsetComp);
 192             buf = buffers.get(pos >= 0 ? pos : -pos - 2);
 193 
 194             if (buf.fileOffset >= fileSize) {
 195                 return -1;
 196             }
 197 
 198             if (buf.cache != null) {
 199                 // If already loaded, move to front of the cache list.
 200                 last = buf;
 201 
 202                 if (cacheHead.next != buf) {
 203                     remove(buf);
 204                     addFirst(buf);
 205                 }
 206             } else {
 207                 try {
 208                     // Note that the load will also add the following buffer to the list, so the while
 209                     // loop will eventually terminate.
 210                     loadBuffer(buf);
 211                 } catch (DataFormatException e) {
 212                     throw new IOException(e);
 213                 }
 214             }
 215         }
 216 
 217         int copyOffset = (int) (offset - buf.offset);
 218         int toCopyMax = buf.cacheLen - copyOffset;
 219         int toCopy = Math.min(toCopyMax, len);
 220 
 221         if (toCopy <= 0) {
 222             return -1;
 223         }
 224 
 225         System.arraycopy(buf.cache, copyOffset, b, off, toCopy);
 226 
 227         return toCopy;
 228     }
 229 
 230     /**
 231      * Returns the access object for the given file or <code>null</code> if not supported for the file.
 232      *
 233      * @param file The file name.
 234      * @param cacheSizeInMB The size of the cache to use in megabytes.
 235      * @return The access object or <code>null</code>.
 236      */
 237     public static GzipRandomAccess getAccess(String file, int cacheSizeInMB) throws IOException {
 238         try  (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
 239             int header = raf.readInt();
 240 
 241             if ((header >>> 8) != 0x1f8b08) {
 242                 // No gzip with deflate.
 243                 return null;
 244             }
 245 
 246             if ((header & 16) == 0) {
 247                 // No comment
 248                 return null;
 249             }
 250 
 251             raf.readInt(); // timestamp
 252             raf.readChar(); // Extra flags and os.
 253 
 254             if ((header & 4) != 0) {
 255                 // Skip extra flags.
 256                 raf.skipBytes(raf.read() + (raf.read() << 256));
 257             }
 258 
 259             // Skip name
 260             if ((header & 8) != 0) {
 261                 while (raf.read() != 0) {
 262                     // Wait for the last 0.
 263                 }
 264             }
 265 
 266             // Read the comment.
 267             ByteArrayOutputStream bos = new ByteArrayOutputStream();
 268             int b;
 269 
 270             while ((b = raf.read()) > 0) {
 271                 bos.write(b);
 272             }
 273 
 274             // Check if the block size is included in the comment.
 275             String comment = bos.toString("UTF-8");
 276             String expectedPrefix = "HPROF BLOCKSIZE=";
 277 
 278             if (comment.startsWith(expectedPrefix)) {
 279                 String chunkSizeStr = comment.substring(expectedPrefix.length()).split(" ")[0];
 280 
 281                 try {
 282                     int chunkSize = Integer.parseInt(chunkSizeStr);
 283 
 284                     if (chunkSize > 0) {
 285                         long nrOfChunks = Math.max(1000, cacheSizeInMB * 1024L * 1024L / chunkSize);
 286 
 287                         return new GzipRandomAccess(file, chunkSize, (int) nrOfChunks);
 288                     }
 289                 } catch (NumberFormatException e) {
 290                     // Could not parse.
 291                 }
 292             }
 293         }
 294 
 295         return null;
 296     }
 297 
 298     // Loads the content of a buffer. If this is the first time the buffer is
 299     // loaded, the next buffer is added too (but not loaded).
 300     private void loadBuffer(Buffer buf) throws IOException, DataFormatException {
 301         // If we have used all caches, take a cache from the least recently used cached buffer.
 302         if (cachedBuffers >= maxCachedBuffers) {
 303             Buffer toRemove = cacheHead.prev;
 304             remove(toRemove);
 305             buf.cache = toRemove.cache;
 306             toRemove.cache = null;
 307         } else {
 308             // Otherwise allocate a new cache.
 309             buf.cache = new byte[cacheSize];
 310             cachedBuffers += 1;
 311         }
 312 
 313         // Move to front of LRU list.
 314         last = buf;
 315         addFirst(buf);
 316 
 317         // Fill in the cache
 318         inf.reset();
 319         raf.seek(buf.fileOffset);
 320 
 321         int read = raf.read(in, 0, READ_SIZE);
 322         int inCount = read;
 323         int outCount = 0;
 324 
 325         // Skip header, but check at least a little
 326         if (read < 4) {
 327             throw new IOException("Missing data");
 328         }
 329 
 330         if ((in[0] != 0x1f) || ((in[1] & 0xff) != 0x8b)) {
 331             throw new IOException("Missing gzip id");
 332         }
 333 
 334         if (in[2] != 8) {
 335             throw new IOException("Only supports deflate");
 336         }
 337 
 338         int off = 10;
 339 
 340         // Extras
 341         if ((in[3] & 4) != 0) {
 342             int len = (in[off + 1] & 0xff) * 256 + (in[off] & 0xff);
 343             off += 2 + len;
 344         }
 345 
 346         // Name
 347         if ((in[3] & 8) != 0) {
 348             int len = 0;
 349 
 350             while (in[off + len] != 0) {
 351                 ++len;
 352             }
 353 
 354             off += len + 1;
 355         }
 356 
 357         // Comment
 358         if ((in[3] & 16) != 0) {
 359             int len = 0;
 360 
 361             while (in[off + len] != 0) {
 362                 ++len;
 363             }
 364 
 365             off += len + 1;
 366         }
 367 
 368         // Header CRC
 369         if ((in[3] & 2) != 0) {
 370             off += 2;
 371         }
 372 
 373         inf.setInput(in, off, read - off);
 374         outCount = inf.inflate(buf.cache, 0, buf.cache.length);
 375 
 376         while (!inf.finished()) {
 377             if (inf.needsInput()) {
 378                 read = raf.read(in, 0, READ_SIZE);
 379                 inf.setInput(in, 0, read);
 380                 inCount += read;
 381             }
 382 
 383             outCount += inf.inflate(buf.cache, outCount, buf.cache.length - outCount);
 384         }
 385 
 386         // Add the following buffer too if needed.
 387         if ((inf.getRemaining() != 0) || (inCount + buf.fileOffset + 8 != fileSize)) {
 388             long nextFileOffset = inCount - inf.getRemaining() + buf.fileOffset + 8 /* CRC */;
 389             long nextOffset = outCount + buf.offset;
 390 
 391             Buffer nextChunk = new Buffer(nextFileOffset, nextOffset);
 392             int pos = Collections.binarySearch(buffers, nextChunk, fileOffsetComp);
 393 
 394             if (pos < 0) {
 395                 buffers.add(-pos - 1, nextChunk);
 396             }
 397         }
 398 
 399         buf.cacheLen = outCount;
 400     }
 401 
 402     // Adds the buffer to the front of the LRU list.
 403     private void addFirst(Buffer buf) {
 404         assert buf.next == null;
 405         assert buf.prev == null;
 406         assert buf.cache != null;
 407 
 408         if (cacheHead.prev == cacheHead) {
 409             cacheHead.prev = buf;
 410         }
 411 
 412         cacheHead.next.prev = buf;
 413         buf.next = cacheHead.next;
 414         buf.prev = cacheHead;
 415         cacheHead.next = buf;
 416     }
 417 
 418     // Removes the buffer from the LRU list.
 419     private void remove(Buffer buf) {
 420         assert buf.prev != null;
 421         assert buf.next != null;
 422         assert buf.cache != null;
 423         assert cacheHead.prev != cacheHead;
 424 
 425         buf.prev.next = buf.next;
 426         buf.next.prev = buf.prev;
 427         buf.next = null;
 428         buf.prev = null;
 429     }
 430 
 431     // Represents a gzipped buffer. The gzipped hprof file consists of a list of these buffers.
 432     private static class Buffer {
 433         public byte[] cache;
 434         public int cacheLen;
 435         public final long fileOffset;
 436         public final long offset;
 437         public Buffer next;
 438         public Buffer prev;
 439 
 440         public Buffer(long fileOffset, long offset) {
 441             this.cache = null;
 442             this.cacheLen = 0;
 443             this.fileOffset = fileOffset;
 444             this.offset = offset;
 445             this.next = null;
 446             this.prev = null;
 447         }
 448     }
 449 
 450     // Compares chunks by file offset.
 451     private static class FileOffsetComparator implements Comparator<Buffer> {
 452 
 453         @Override
 454         public int compare(Buffer x, Buffer y) {
 455             return Long.compare(x.fileOffset, y.fileOffset);
 456         }
 457     }
 458 
 459     // Compares chunks by offset.
 460     private static class OffsetComparator implements Comparator<Buffer> {
 461 
 462         @Override
 463         public int compare(Buffer x, Buffer y) {
 464             return Long.compare(x.offset, y.offset);
 465         }
 466     }
 467 
 468     // Implements an InputStream for this object.
 469     private static class InputStreamImpl extends InputStream {
 470 
 471         private long offset;
 472         private final GzipRandomAccess access;
 473 
 474         public InputStreamImpl(long offset, GzipRandomAccess access) {
 475             this.offset = offset;
 476             this.access = access;
 477         }
 478 
 479         @Override
 480         public synchronized int read(byte[] b, int off, int len) throws IOException {
 481             int read = access.read(offset, b, off, len);
 482 
 483             if (read > 0) {
 484                 this.offset += read;
 485             }
 486 
 487             return read;
 488         }
 489 
 490         @Override
 491         public int read() throws IOException {
 492             byte[] b = new byte[1];
 493             int read = read(b, 0, 1);
 494 
 495             if (read != 1) {
 496                 return -1;
 497             }
 498 
 499             return b[0] & 0xff;
 500         }
 501     }
 502 
 503     // Implements a ReadBuffer for this object.
 504     public static class ReadBufferImpl implements ReadBuffer {
 505 
 506         private final GzipRandomAccess access;
 507         private final byte[] tmp = new byte[8];
 508 
 509         public ReadBufferImpl(GzipRandomAccess access) {
 510             this.access = access;
 511         }
 512 
 513         private void readFully(long pos, int len) throws IOException {
 514             int left = len;
 515             int off = 0;
 516 
 517             while (left > 0) {
 518                 int read = access.read(pos, tmp, off, left);
 519 
 520                 if (read <= 0) {
 521                     throw new EOFException("Could not read at " + pos);
 522                 }
 523 
 524                 left -= read;
 525                 off += read;
 526                 pos += read;
 527             }
 528         }
 529 
 530         private int readInt(int offset) {
 531             return (((tmp[offset + 0] & 0xff) << 24) | ((tmp[offset + 1] & 0xff) << 16) |
 532                      ((tmp[offset + 2] & 0xff) << 8) | (tmp[offset + 3] & 0xff));
 533         }
 534 
 535         @Override
 536         public void close() throws Exception {
 537             access.close();
 538         }
 539 
 540         @Override
 541         public char getChar(long pos) throws IOException {
 542             readFully(pos, 2);
 543             return (char) (((tmp[0] & 0xff) << 8) | (tmp[1] & 0xff));
 544         }
 545 
 546         @Override
 547         public byte getByte(long pos) throws IOException {
 548             readFully(pos, 1);
 549             return tmp[0];
 550         }
 551 
 552         @Override
 553         public short getShort(long pos) throws IOException {
 554             return (short) getChar(pos);
 555         }
 556 
 557         @Override
 558         public int getInt(long pos) throws IOException {
 559             readFully(pos, 4);
 560             return readInt(0);
 561         }
 562 
 563         @Override
 564         public long getLong(long pos) throws IOException {
 565             readFully(pos, 8);
 566             long i1 = readInt(0);
 567             int i2 = readInt(4);
 568 
 569             return (i1 << 32) | (i2 & 0xffffffffl);
 570         }
 571     }
 572 }