1 /* 2 * Copyright (c) 2020 SAP SE. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package jdk.test.lib.hprof.parser; 24 25 import java.io.ByteArrayOutputStream; 26 import java.io.Closeable; 27 import java.io.EOFException; 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.io.RandomAccessFile; 31 import java.util.ArrayList; 32 import java.util.Collections; 33 import java.util.Comparator; 34 import java.util.zip.DataFormatException; 35 import java.util.zip.Inflater; 36 37 public class GzipRandomAccess implements AutoCloseable, Closeable { 38 // A comparator which compares chunks by their file offset. 39 private static FileOffsetComparator fileOffsetComp = new FileOffsetComparator(); 40 41 // A comparator which compares chunks by their offset. 42 private static OffsetComparator offsetComp = new OffsetComparator(); 43 44 // The size to use when reading from the random access file. 45 private static final int READ_SIZE = 65536; 46 47 // The last used buffer. 48 private Buffer last; 49 50 // The underlying random access file to use. 51 private final RandomAccessFile raf; 52 53 // The length of the random access file. 54 private final long fileSize; 55 56 // The maximum size of a buffer cache. 57 private final int cacheSize; 58 59 // The maximum numbers of buffers to cache. 60 private final int maxCachedBuffers; 61 62 // A buffer used to read from the file. 63 private final byte[] in; 64 65 // A sorted list of the buffers we know so far. 66 private final ArrayList<Buffer> buffers; 67 68 // The inflater to use. 69 private final Inflater inf; 70 71 // The head of the list which contains the buffers with cached data. 72 private final Buffer cacheHead; 73 74 // The number of cached buffers in the list. 75 private int cachedBuffers; 76 77 // This is private to ensure we only handle the specific hprof gzip files 78 // written by the VM. 79 private GzipRandomAccess(String file, int bufferSize, int maxCachedBuffers) throws IOException { 80 last = null; 81 raf = new RandomAccessFile(file, "r"); 82 fileSize = raf.length(); 83 this.cacheSize = bufferSize; 84 this.maxCachedBuffers = maxCachedBuffers; 85 cachedBuffers = 0; 86 in = new byte[READ_SIZE]; 87 buffers = new ArrayList<Buffer>(); 88 inf = new Inflater(true); 89 cacheHead = new Buffer(-1, -1); 90 cacheHead.next = cacheHead; 91 cacheHead.prev = cacheHead; 92 buffers.add(new Buffer(0, 0)); 93 } 94 95 /** 96 * Clears the cache. 97 */ 98 public synchronized void clearCache() { 99 while (cacheHead.next != cacheHead) { 100 assert cacheHead.next.cache != null; 101 Buffer buf = cacheHead.next; 102 remove(buf); 103 buf.cache = null; 104 } 105 106 last = null; 107 cachedBuffers = 0; 108 } 109 110 /** 111 * Returns an approximate file offset for the given offset. The return value should only be used for progress 112 * indication and the like. Note that you should only query offsets you've already read. 113 * 114 * @param offset The offset. 115 * @return The approximate file offset. 116 */ 117 public synchronized long getFileOffset(long offset) { 118 int pos = Collections.binarySearch(buffers, new Buffer(0, offset), offsetComp); 119 int realPos = pos >= 0 ? pos : -pos - 2; 120 121 if (realPos >= buffers.size() - 1) { 122 return buffers.get(buffers.size() - 1).fileOffset; 123 } 124 125 // Assume uniform compression. 126 Buffer buf = buffers.get(realPos); 127 long diff = offset - buf.offset; 128 long bufFileEnd = buffers.get(realPos + 1).fileOffset; 129 long fileDiff = bufFileEnd - buf.fileOffset; 130 double filePerDiff = (double) Math.max(1, fileDiff) / Math.max(1, buf.cacheLen); 131 132 return buf.fileOffset + (long) (filePerDiff * diff); 133 } 134 135 /** 136 * @return Returns the size of the underlying file. 137 */ 138 public long getFileSize() { 139 return fileSize; 140 } 141 142 /** 143 * Returns an @link {@link InputStream} to read from the given offset. Note 144 * that closing the input stream does not closes the underlying @link 145 * {@link GzipRandomAccess} object. 146 * 147 * @param offset The offset. 148 * @return The input stream. 149 */ 150 public InputStream asStream(long offset) { 151 return new InputStreamImpl(offset, this); 152 } 153 154 /** 155 * Returns a @link ReadBuffer which uses this object to do the actual 156 * operation. Note that closing the returned object closes the the 157 * underlying @link {@link GzipRandomAccess} object. 158 * 159 * @return The @link ReadBuffer. 160 */ 161 public ReadBuffer asFileBuffer() { 162 return new ReadBufferImpl(this); 163 } 164 165 /** 166 * Closes the object and clears the cache. The object is unusable after this 167 * call. 168 */ 169 @Override 170 public synchronized void close() throws IOException { 171 clearCache(); 172 buffers.clear(); 173 raf.close(); 174 inf.end(); 175 } 176 177 /** 178 * Reads bytes from the gzip file. 179 * 180 * @param offset The offset from which to start the read. 181 * @param b The buffer to read into. 182 * @param off The offset in the buffer to use. 183 * @param len The number of bytes to read at most. 184 * @return The number of bytes read or -1 if we are at the end of the file. 185 * @throws IOException On error. 186 */ 187 public synchronized int read(long offset, byte[] b, int off, int len) throws IOException { 188 Buffer buf = last; 189 190 while (buf == null || (buf.offset > offset) || (buf.offset + buf.cacheLen <= offset)) { 191 int pos = Collections.binarySearch(buffers, new Buffer(0, offset), offsetComp); 192 buf = buffers.get(pos >= 0 ? pos : -pos - 2); 193 194 if (buf.fileOffset >= fileSize) { 195 return -1; 196 } 197 198 if (buf.cache != null) { 199 // If already loaded, move to front of the cache list. 200 last = buf; 201 202 if (cacheHead.next != buf) { 203 remove(buf); 204 addFirst(buf); 205 } 206 } else { 207 try { 208 // Note that the load will also add the following buffer to the list, so the while 209 // loop will eventually terminate. 210 loadBuffer(buf); 211 } catch (DataFormatException e) { 212 throw new IOException(e); 213 } 214 } 215 } 216 217 int copyOffset = (int) (offset - buf.offset); 218 int toCopyMax = buf.cacheLen - copyOffset; 219 int toCopy = Math.min(toCopyMax, len); 220 221 if (toCopy <= 0) { 222 return -1; 223 } 224 225 System.arraycopy(buf.cache, copyOffset, b, off, toCopy); 226 227 return toCopy; 228 } 229 230 /** 231 * Returns the access object for the given file or <code>null</code> if not supported for the file. 232 * 233 * @param file The file name. 234 * @param cacheSizeInMB The size of the cache to use in megabytes. 235 * @return The access object or <code>null</code>. 236 */ 237 public static GzipRandomAccess getAccess(String file, int cacheSizeInMB) throws IOException { 238 try (RandomAccessFile raf = new RandomAccessFile(file, "r")) { 239 int header = raf.readInt(); 240 241 if ((header >>> 8) != 0x1f8b08) { 242 // No gzip with deflate. 243 return null; 244 } 245 246 if ((header & 16) == 0) { 247 // No comment 248 return null; 249 } 250 251 raf.readInt(); // timestamp 252 raf.readChar(); // Extra flags and os. 253 254 if ((header & 4) != 0) { 255 // Skip extra flags. 256 raf.skipBytes(raf.read() + (raf.read() << 256)); 257 } 258 259 // Skip name 260 if ((header & 8) != 0) { 261 while (raf.read() != 0) { 262 // Wait for the last 0. 263 } 264 } 265 266 // Read the comment. 267 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 268 int b; 269 270 while ((b = raf.read()) > 0) { 271 bos.write(b); 272 } 273 274 // Check if the block size is included in the comment. 275 String comment = bos.toString("UTF-8"); 276 String expectedPrefix = "HPROF BLOCKSIZE="; 277 278 if (comment.startsWith(expectedPrefix)) { 279 String chunkSizeStr = comment.substring(expectedPrefix.length()).split(" ")[0]; 280 281 try { 282 int chunkSize = Integer.parseInt(chunkSizeStr); 283 284 if (chunkSize > 0) { 285 long nrOfChunks = Math.max(1000, cacheSizeInMB * 1024L * 1024L / chunkSize); 286 287 return new GzipRandomAccess(file, chunkSize, (int) nrOfChunks); 288 } 289 } catch (NumberFormatException e) { 290 // Could not parse. 291 } 292 } 293 } 294 295 return null; 296 } 297 298 // Loads the content of a buffer. If this is the first time the buffer is 299 // loaded, the next buffer is added too (but not loaded). 300 private void loadBuffer(Buffer buf) throws IOException, DataFormatException { 301 // If we have used all caches, take a cache from the least recently used cached buffer. 302 if (cachedBuffers >= maxCachedBuffers) { 303 Buffer toRemove = cacheHead.prev; 304 remove(toRemove); 305 buf.cache = toRemove.cache; 306 toRemove.cache = null; 307 } else { 308 // Otherwise allocate a new cache. 309 buf.cache = new byte[cacheSize]; 310 cachedBuffers += 1; 311 } 312 313 // Move to front of LRU list. 314 last = buf; 315 addFirst(buf); 316 317 // Fill in the cache 318 inf.reset(); 319 raf.seek(buf.fileOffset); 320 321 int read = raf.read(in, 0, READ_SIZE); 322 int inCount = read; 323 int outCount = 0; 324 325 // Skip header, but check at least a little 326 if (read < 4) { 327 throw new IOException("Missing data"); 328 } 329 330 if ((in[0] != 0x1f) || ((in[1] & 0xff) != 0x8b)) { 331 throw new IOException("Missing gzip id"); 332 } 333 334 if (in[2] != 8) { 335 throw new IOException("Only supports deflate"); 336 } 337 338 int off = 10; 339 340 // Extras 341 if ((in[3] & 4) != 0) { 342 int len = (in[off + 1] & 0xff) * 256 + (in[off] & 0xff); 343 off += 2 + len; 344 } 345 346 // Name 347 if ((in[3] & 8) != 0) { 348 int len = 0; 349 350 while (in[off + len] != 0) { 351 ++len; 352 } 353 354 off += len + 1; 355 } 356 357 // Comment 358 if ((in[3] & 16) != 0) { 359 int len = 0; 360 361 while (in[off + len] != 0) { 362 ++len; 363 } 364 365 off += len + 1; 366 } 367 368 // Header CRC 369 if ((in[3] & 2) != 0) { 370 off += 2; 371 } 372 373 inf.setInput(in, off, read - off); 374 outCount = inf.inflate(buf.cache, 0, buf.cache.length); 375 376 while (!inf.finished()) { 377 if (inf.needsInput()) { 378 read = raf.read(in, 0, READ_SIZE); 379 inf.setInput(in, 0, read); 380 inCount += read; 381 } 382 383 outCount += inf.inflate(buf.cache, outCount, buf.cache.length - outCount); 384 } 385 386 // Add the following buffer too if needed. 387 if ((inf.getRemaining() != 0) || (inCount + buf.fileOffset + 8 != fileSize)) { 388 long nextFileOffset = inCount - inf.getRemaining() + buf.fileOffset + 8 /* CRC */; 389 long nextOffset = outCount + buf.offset; 390 391 Buffer nextChunk = new Buffer(nextFileOffset, nextOffset); 392 int pos = Collections.binarySearch(buffers, nextChunk, fileOffsetComp); 393 394 if (pos < 0) { 395 buffers.add(-pos - 1, nextChunk); 396 } 397 } 398 399 buf.cacheLen = outCount; 400 } 401 402 // Adds the buffer to the front of the LRU list. 403 private void addFirst(Buffer buf) { 404 assert buf.next == null; 405 assert buf.prev == null; 406 assert buf.cache != null; 407 408 if (cacheHead.prev == cacheHead) { 409 cacheHead.prev = buf; 410 } 411 412 cacheHead.next.prev = buf; 413 buf.next = cacheHead.next; 414 buf.prev = cacheHead; 415 cacheHead.next = buf; 416 } 417 418 // Removes the buffer from the LRU list. 419 private void remove(Buffer buf) { 420 assert buf.prev != null; 421 assert buf.next != null; 422 assert buf.cache != null; 423 assert cacheHead.prev != cacheHead; 424 425 buf.prev.next = buf.next; 426 buf.next.prev = buf.prev; 427 buf.next = null; 428 buf.prev = null; 429 } 430 431 // Represents a gzipped buffer. The gzipped hprof file consists of a list of these buffers. 432 private static class Buffer { 433 public byte[] cache; 434 public int cacheLen; 435 public final long fileOffset; 436 public final long offset; 437 public Buffer next; 438 public Buffer prev; 439 440 public Buffer(long fileOffset, long offset) { 441 this.cache = null; 442 this.cacheLen = 0; 443 this.fileOffset = fileOffset; 444 this.offset = offset; 445 this.next = null; 446 this.prev = null; 447 } 448 } 449 450 // Compares chunks by file offset. 451 private static class FileOffsetComparator implements Comparator<Buffer> { 452 453 @Override 454 public int compare(Buffer x, Buffer y) { 455 return Long.compare(x.fileOffset, y.fileOffset); 456 } 457 } 458 459 // Compares chunks by offset. 460 private static class OffsetComparator implements Comparator<Buffer> { 461 462 @Override 463 public int compare(Buffer x, Buffer y) { 464 return Long.compare(x.offset, y.offset); 465 } 466 } 467 468 // Implements an InputStream for this object. 469 private static class InputStreamImpl extends InputStream { 470 471 private long offset; 472 private final GzipRandomAccess access; 473 474 public InputStreamImpl(long offset, GzipRandomAccess access) { 475 this.offset = offset; 476 this.access = access; 477 } 478 479 @Override 480 public synchronized int read(byte[] b, int off, int len) throws IOException { 481 int read = access.read(offset, b, off, len); 482 483 if (read > 0) { 484 this.offset += read; 485 } 486 487 return read; 488 } 489 490 @Override 491 public int read() throws IOException { 492 byte[] b = new byte[1]; 493 int read = read(b, 0, 1); 494 495 if (read != 1) { 496 return -1; 497 } 498 499 return b[0] & 0xff; 500 } 501 } 502 503 // Implements a ReadBuffer for this object. 504 public static class ReadBufferImpl implements ReadBuffer { 505 506 private final GzipRandomAccess access; 507 private final byte[] tmp = new byte[8]; 508 509 public ReadBufferImpl(GzipRandomAccess access) { 510 this.access = access; 511 } 512 513 private void readFully(long pos, int len) throws IOException { 514 int left = len; 515 int off = 0; 516 517 while (left > 0) { 518 int read = access.read(pos, tmp, off, left); 519 520 if (read <= 0) { 521 throw new EOFException("Could not read at " + pos); 522 } 523 524 left -= read; 525 off += read; 526 pos += read; 527 } 528 } 529 530 private int readInt(int offset) { 531 return (((tmp[offset + 0] & 0xff) << 24) | ((tmp[offset + 1] & 0xff) << 16) | 532 ((tmp[offset + 2] & 0xff) << 8) | (tmp[offset + 3] & 0xff)); 533 } 534 535 @Override 536 public void close() throws Exception { 537 access.close(); 538 } 539 540 @Override 541 public char getChar(long pos) throws IOException { 542 readFully(pos, 2); 543 return (char) (((tmp[0] & 0xff) << 8) | (tmp[1] & 0xff)); 544 } 545 546 @Override 547 public byte getByte(long pos) throws IOException { 548 readFully(pos, 1); 549 return tmp[0]; 550 } 551 552 @Override 553 public short getShort(long pos) throws IOException { 554 return (short) getChar(pos); 555 } 556 557 @Override 558 public int getInt(long pos) throws IOException { 559 readFully(pos, 4); 560 return readInt(0); 561 } 562 563 @Override 564 public long getLong(long pos) throws IOException { 565 readFully(pos, 8); 566 long i1 = readInt(0); 567 int i2 = readInt(4); 568 569 return (i1 << 32) | (i2 & 0xffffffffl); 570 } 571 } 572 }