New core/org.openjdk.jmc.flightrecorder/src/main/java/org/openjdk/jmc/flightrecorder/util/ChunkReader.java

   1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * The contents of this file are subject to the terms of either the Universal Permissive License
   7  * v 1.0 as shown at http://oss.oracle.com/licenses/upl
   8  *
   9  * or the following license:
  10  *
  11  * Redistribution and use in source and binary forms, with or without modification, are permitted
  12  * provided that the following conditions are met:
  13  *
  14  * 1. Redistributions of source code must retain the above copyright notice, this list of conditions
  15  * and the following disclaimer.
  16  *
  17  * 2. Redistributions in binary form must reproduce the above copyright notice, this list of
  18  * conditions and the following disclaimer in the documentation and/or other materials provided with
  19  * the distribution.
  20  *
  21  * 3. Neither the name of the copyright holder nor the names of its contributors may be used to
  22  * endorse or promote products derived from this software without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  26  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  27  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  30  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  31  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33 package org.openjdk.jmc.flightrecorder.util;
  34 
  35 import java.io.BufferedInputStream;
  36 import java.io.ByteArrayInputStream;
  37 import java.io.DataInputStream;
  38 import java.io.File;
  39 import java.io.IOException;
  40 import java.io.InputStream;
  41 import java.io.RandomAccessFile;
  42 import java.nio.MappedByteBuffer;
  43 import java.nio.channels.FileChannel;
  44 import java.util.Iterator;
  45 import java.util.NoSuchElementException;
  46 
  47 import org.openjdk.jmc.common.io.IOToolkit;
  48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit;
  49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit;
  50 
  51 /**
  52  * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will
  53  * depend on whether the JFR file is available as a stream or as a file, and whether or not the data
  54  * is compressed or not.
  55  * <p>
  56  * Each chunk will be self-contained and parsable, for example by wrapping it in a
  57  * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}.
  58  */
  59 public final class ChunkReader {
  60         private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0};
  61         private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0};
  62         private static final int ZIP_MAGIC[] = new int[] {31, 139};
  63         private static final int GZ_MAGIC[] = new int[] {31, 139};
  64         // For JDK 8 this is the size of the magic + version and offset to the meta data event.
  65         // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size.
  66         private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE
  67                         + DataInputToolkit.LONG_SIZE;
  68 
  69         private ChunkReader() {
  70                 throw new UnsupportedOperationException("Not to be instantiated"); //$NON-NLS-1$
  71         }
  72         
  73         /**
  74          * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk,
  75          * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with
  76          * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done
  77          * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long
  78          * as it is referenced. The JFR file must not be zip or gzip compressed.
  79          * <p>
  80          * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters
  81          * a corrupted chunk.
  82          */
  83         private static class ChunkIterator implements Iterator<byte[]> {
  84                 int lastChunkOffset;
  85                 private RandomAccessFile file;
  86                 private final FileChannel channel;
  87                 private final MappedByteBuffer buffer;
  88 
  89                 private ChunkIterator(File jfrFile) throws IOException {
  90                         try {
  91                                 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$
  92                                 channel = file.getChannel();
  93                                 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
  94                                 if (!bufferHasMagic(JFR_MAGIC)) {
  95                                         if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) {
  96                                                 throw new IOException(
  97                                                                 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$
  98                                         } else {
  99                                                 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$
 100                                         }
 101                                 }
 102                         } catch (Exception e) {
 103                                 if (file != null) {
 104                                         file.close();
 105                                 }
 106                                 throw e;
 107                         }
 108                 }
 109 
 110                 @Override
 111                 public boolean hasNext() {
 112                         boolean hasNext = checkHasMore();
 113                         if (!hasNext) {
 114                                 try {
 115                                         channel.close();
 116                                         file.close();
 117                                 } catch (IOException e) {
 118                                         // Shouldn't happen.
 119                                         e.printStackTrace();
 120                                 }
 121                         }
 122                         return hasNext;
 123                 }
 124 
 125                 private boolean checkHasMore() {
 126                         return lastChunkOffset < buffer.limit();
 127                 }
 128 
 129                 @Override
 130                 public byte[] next() {
 131                         if (!checkHasMore()) {
 132                                 throw new NoSuchElementException();
 133                         }
 134                         if (!bufferHasMagic(JFR_MAGIC)) {
 135                                 lastChunkOffset = buffer.limit() + 1;
 136                                 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$
 137                         }
 138 
 139                         int index = lastChunkOffset + JFR_MAGIC.length;
 140                         short versionMSB = buffer.getShort(index);
 141                         // short versionLSB = buffer.getShort(index + SHORT_SIZE);
 142                         index += 2 * DataInputToolkit.SHORT_SIZE;
 143                         int size = 0;
 144 
 145                         if (versionMSB >= 1) {
 146                                 // We have a JDK 9+ recording - chunk size can be directly read from header
 147                                 size = (int) buffer.getLong(index);
 148                                 index = lastChunkOffset + size;
 149                         } else {
 150                                 // Got a pre JDK 9 recording. Need to find the metadata event index, read and
 151                                 // add the size of the metadata event to find the chunk boundary
 152                                 index = lastChunkOffset + (int) buffer.getLong(index);
 153                                 // Reading the metadata event size
 154                                 int lastEventSize = buffer.getInt(index);
 155                                 index += lastEventSize;
 156                                 size = index - lastChunkOffset;
 157                         }
 158                         // Read the chunk and return it
 159                         byte[] result = new byte[size];
 160                         buffer.position(lastChunkOffset);
 161                         buffer.get(result, 0, result.length);
 162                         lastChunkOffset = index;
 163                         return result;
 164                 }
 165 
 166                 private boolean bufferHasMagic(int[] magicBytes) {
 167                         for (int i = 0; i < magicBytes.length; i++) {
 168                                 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) {
 169                                         return false;
 170                                 }
 171                         }
 172                         return true;
 173                 }
 174 
 175                 @Override
 176                 public void remove() {
 177                         throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$
 178                 }
 179         }
 180 
 181         private enum StreamState {
 182                 NEXT_CHUNK, JFR_CHECKED, ERROR
 183         }
 184 
 185         /**
 186          * Iterator reading JFR chunks from a stream.
 187          */
 188         private static class StreamChunkIterator implements Iterator<byte[]> {
 189                 private final DataInputStream inputStream;
 190                 private StreamState streamState = StreamState.NEXT_CHUNK;
 191                 private Throwable lastError = null;
 192 
 193                 public StreamChunkIterator(InputStream inputStream) {
 194                         this.inputStream = getDataStream(inputStream);
 195                 }
 196 
 197                 private DataInputStream getDataStream(InputStream is) {
 198                         if (is.markSupported()) {
 199                                 return new DataInputStream(is);
 200                         }
 201                         return new DataInputStream(new BufferedInputStream(is));
 202                 }
 203 
 204                 @Override
 205                 public boolean hasNext() {
 206                         boolean hasNext = false;
 207                         if (streamState == StreamState.NEXT_CHUNK) {
 208                                 hasNext = validateJFRMagic();
 209                         } else if (streamState == StreamState.JFR_CHECKED) {
 210                                 hasNext = true;
 211                         }
 212                         if (!hasNext) {
 213                                 IOToolkit.closeSilently(inputStream);
 214                         }
 215                         return hasNext;
 216                 }
 217 
 218                 private boolean validateJFRMagic() {
 219                         try {
 220                                 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) {
 221                                         streamState = StreamState.JFR_CHECKED;
 222                                         return true;
 223                                 } else {
 224                                         streamState = StreamState.ERROR;
 225                                         lastError = new Exception(
 226                                                         "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$
 227                                         return false;
 228                                 }
 229                         } catch (IOException e) {
 230                                 streamState = StreamState.ERROR;
 231                                 lastError = e;
 232                                 return false;
 233                         }
 234                 }
 235 
 236                 @Override
 237                 public byte[] next() {
 238                         if (!hasNext()) {
 239                                 throw new NoSuchElementException();
 240                         }
 241                         switch (streamState) {
 242                         case ERROR:
 243                                 throw new IllegalArgumentException(lastError);
 244                         case NEXT_CHUNK:
 245                                 if (!validateJFRMagic()) {
 246                                         throw new IllegalArgumentException(lastError);
 247                                 }
 248                                 // Fall through
 249                         case JFR_CHECKED:
 250                                 try {
 251                                         return retrieveNextChunk();
 252                                 } catch (IOException e) {
 253                                         lastError = e;
 254                                         throw new IllegalArgumentException(e);
 255                                 }
 256                         default:
 257                                 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$
 258                         }
 259                 }
 260 
 261                 private byte[] retrieveNextChunk() throws IOException {
 262                         byte[] chunkHeader = new byte[HEADER_SIZE];
 263                         // Copy in the magic
 264                         System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length);
 265                         // Read rest of chunk header
 266                         readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length);
 267                         short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length);
 268                         byte[] chunkTotal = null;
 269                         if (majorVersion >= 1) {
 270                                 // JDK 9+ recording
 271                                 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE);
 272                                 int readSize = (int) fullSize - HEADER_SIZE;
 273                                 chunkTotal = new byte[(int) fullSize];
 274                                 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length);
 275                                 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize);
 276                         } else {
 277                                 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE);
 278                                 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE);
 279                                 byte[] chunkEvents = new byte[eventReadSize];
 280                                 readBytesFromStream(chunkEvents, 0, chunkEvents.length);
 281                                 int metadataEventSize = DataInputToolkit.readInt(chunkEvents,
 282                                                 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE;
 283                                 byte[] chunkMetadata = new byte[metadataEventSize];
 284                                 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length);
 285 
 286                                 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length];
 287                                 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length);
 288                                 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length);
 289                                 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length,
 290                                                 chunkMetadata.length);
 291                         }
 292                         streamState = StreamState.NEXT_CHUNK;
 293                         return chunkTotal;
 294                 }
 295 
 296                 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException {
 297                         int totalRead = 0;
 298                         while (totalRead < count) {
 299                                 int read = inputStream.read(bytes, offset + totalRead, count - totalRead);
 300                                 if (read == -1) {
 301                                         throw new IOException("Unexpected end of data."); //$NON-NLS-1$
 302                                 }
 303                                 totalRead += read;
 304                         }
 305                 }
 306 
 307                 @Override
 308                 public void remove() {
 309                         throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$
 310                 }
 311         }
 312 
 313         /**
 314          * Reads a JFR file, chunk by chunk.
 315          * <p>
 316          * Each chunk will be self contained and parsable, for example by wrapping it in a
 317          * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw
 318          * {@link IllegalArgumentException} if it encounters a corrupted chunk.
 319          *
 320          * @param jfrFile
 321          *            the file to read binary data from
 322          * @return returns an iterator over byte arrays, where each byte array is a self containing jfr
 323          *         chunk
 324          */
 325         public static Iterator<byte[]> readChunks(File jfrFile) throws IOException {
 326                 // We fall back to using a StreamChunkIterator if the file is compressed.
 327                 if (IOToolkit.isCompressedFile(jfrFile)) {
 328                         return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile));
 329                 }
 330                 return new ChunkIterator(jfrFile);
 331         }
 332 
 333         /**
 334          * Reads a JFR file, chunk by chunk, from a stream.
 335          * <p>
 336          * Each chunk will be self contained and parsable, for example by wrapping it in a
 337          * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw
 338          * {@link IllegalArgumentException} if it encounters a corrupted chunk.
 339          *
 340          * @param jfrStream
 341          *            the stream to read binary data from
 342          * @return returns an iterator over byte arrays, where each byte array is a self containing JFR
 343          *         chunk
 344          */
 345         public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException {
 346                 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream));
 347         }
 348 
 349         /**
 350          * Program for listing the number of chunks in a recording.
 351          *
 352          * @param args
 353          *            takes one argument, which must be the path to a recording
 354          * @throws IOException
 355          *             if there was a problem reading the file
 356          */
 357         public static void main(String[] args) throws IOException {
 358                 long nanoStart = System.nanoTime();
 359                 int chunkCount = 0, byteCount = 0;
 360 
 361                 if (args.length != 1) {
 362                         System.out.println("Usage: ChunkReader <file>"); //$NON-NLS-1$
 363                         System.exit(2);
 364                 }
 365                 File file = new File(args[0]);
 366                 if (!file.exists()) {
 367                         System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); //$NON-NLS-1$ //$NON-NLS-2$
 368                         System.exit(3);
 369                 }
 370                 Iterator<byte[]> iter = readChunks(file);
 371                 while (iter.hasNext()) {
 372                         byte[] bytes = iter.next();
 373                         chunkCount += 1;
 374                         byteCount += bytes.length;
 375                         System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$
 376                 }
 377                 double duration = (System.nanoTime() - nanoStart) / 1_000_000d;
 378 
 379                 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
 380         }
 381 }