Old core/org.openjdk.jmc.flightrecorder/src/main/java/org/openjdk/jmc/flightrecorder/util/ChunkReader.java

   1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * The contents of this file are subject to the terms of either the Universal Permissive License
   7  * v 1.0 as shown at http://oss.oracle.com/licenses/upl
   8  *
   9  * or the following license:
  10  *
  11  * Redistribution and use in source and binary forms, with or without modification, are permitted
  12  * provided that the following conditions are met:
  13  *
  14  * 1. Redistributions of source code must retain the above copyright notice, this list of conditions
  15  * and the following disclaimer.
  16  *
  17  * 2. Redistributions in binary form must reproduce the above copyright notice, this list of
  18  * conditions and the following disclaimer in the documentation and/or other materials provided with
  19  * the distribution.
  20  *
  21  * 3. Neither the name of the copyright holder nor the names of its contributors may be used to
  22  * endorse or promote products derived from this software without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  26  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  27  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  30  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  31  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33 package org.openjdk.jmc.flightrecorder.util;
  34 
  35 import java.io.BufferedInputStream;
  36 import java.io.ByteArrayInputStream;
  37 import java.io.DataInputStream;
  38 import java.io.File;
  39 import java.io.IOException;
  40 import java.io.InputStream;
  41 import java.io.RandomAccessFile;
  42 import java.nio.MappedByteBuffer;
  43 import java.nio.channels.FileChannel;
  44 import java.util.Iterator;
  45 import java.util.NoSuchElementException;
  46 
  47 import org.openjdk.jmc.common.io.IOToolkit;
  48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit;
  49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit;
  50 
  51 /**
  52  * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will
  53  * depend on whether the JFR file is available as a stream or as a file, and whether or not the data
  54  * is compressed or not.
  55  * <p>
  56  * Each chunk will be self-contained and parsable, for example by wrapping it in a
  57  * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}.
  58  */
  59 public final class ChunkReader {
  60         private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0};
  61         private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0};
  62         private static final int ZIP_MAGIC[] = new int[] {31, 139};
  63         private static final int GZ_MAGIC[] = new int[] {31, 139};
  64         // For JDK 8 this is the size of the magic + version and offset to the meta data event.
  65         // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size.
  66         private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE
  67                         + DataInputToolkit.LONG_SIZE;
  68 
  69         /**
  70          * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk,
  71          * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with
  72          * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done
  73          * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long
  74          * as it is referenced. The JFR file must not be zip or gzip compressed.
  75          * <p>
  76          * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters
  77          * a corrupted chunk.
  78          */
  79         private static class ChunkIterator implements Iterator<byte[]> {
  80                 int lastChunkOffset;
  81                 private RandomAccessFile file;
  82                 private final FileChannel channel;
  83                 private final MappedByteBuffer buffer;
  84 
  85                 private ChunkIterator(File jfrFile) throws IOException {
  86                         try {
  87                                 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$
  88                                 channel = file.getChannel();
  89                                 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
  90                                 if (!bufferHasMagic(JFR_MAGIC)) {
  91                                         if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) {
  92                                                 throw new IOException(
  93                                                                 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$
  94                                         } else {
  95                                                 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$
  96                                         }
  97                                 }
  98                         } catch (Exception e) {
  99                                 if (file != null) {
 100                                         file.close();
 101                                 }
 102                                 throw e;
 103                         }
 104                 }
 105 
 106                 @Override
 107                 public boolean hasNext() {
 108                         boolean hasNext = checkHasMore();
 109                         if (!hasNext) {
 110                                 try {
 111                                         channel.close();
 112                                         file.close();
 113                                 } catch (IOException e) {
 114                                         // Shouldn't happen.
 115                                         e.printStackTrace();
 116                                 }
 117                         }
 118                         return hasNext;
 119                 }
 120 
 121                 private boolean checkHasMore() {
 122                         return lastChunkOffset < buffer.limit();
 123                 }
 124 
 125                 @Override
 126                 public byte[] next() {
 127                         if (!checkHasMore()) {
 128                                 throw new NoSuchElementException();
 129                         }
 130                         if (!bufferHasMagic(JFR_MAGIC)) {
 131                                 lastChunkOffset = buffer.limit() + 1;
 132                                 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$
 133                         }
 134 
 135                         int index = lastChunkOffset + JFR_MAGIC.length;
 136                         short versionMSB = buffer.getShort(index);
 137                         // short versionLSB = buffer.getShort(index + SHORT_SIZE);
 138                         index += 2 * DataInputToolkit.SHORT_SIZE;
 139                         int size = 0;
 140 
 141                         if (versionMSB >= 1) {
 142                                 // We have a JDK 9+ recording - chunk size can be directly read from header
 143                                 size = (int) buffer.getLong(index);
 144                                 index = lastChunkOffset + size;
 145                         } else {
 146                                 // Got a pre JDK 9 recording. Need to find the metadata event index, read and
 147                                 // add the size of the metadata event to find the chunk boundary
 148                                 index = lastChunkOffset + (int) buffer.getLong(index);
 149                                 // Reading the metadata event size
 150                                 int lastEventSize = buffer.getInt(index);
 151                                 index += lastEventSize;
 152                                 size = index - lastChunkOffset;
 153                         }
 154                         // Read the chunk and return it
 155                         byte[] result = new byte[size];
 156                         buffer.position(lastChunkOffset);
 157                         buffer.get(result, 0, result.length);
 158                         lastChunkOffset = index;
 159                         return result;
 160                 }
 161 
 162                 private boolean bufferHasMagic(int[] magicBytes) {
 163                         for (int i = 0; i < magicBytes.length; i++) {
 164                                 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) {
 165                                         return false;
 166                                 }
 167                         }
 168                         return true;
 169                 }
 170 
 171                 @Override
 172                 public void remove() {
 173                         throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$
 174                 }
 175         }
 176 
 177         private enum StreamState {
 178                 NEXT_CHUNK, JFR_CHECKED, ERROR
 179         }
 180 
 181         /**
 182          * Iterator reading JFR chunks from a stream.
 183          */
 184         private static class StreamChunkIterator implements Iterator<byte[]> {
 185                 private final DataInputStream inputStream;
 186                 private StreamState streamState = StreamState.NEXT_CHUNK;
 187                 private Throwable lastError = null;
 188 
 189                 public StreamChunkIterator(InputStream inputStream) {
 190                         this.inputStream = getDataStream(inputStream);
 191                 }
 192 
 193                 private DataInputStream getDataStream(InputStream is) {
 194                         if (is.markSupported()) {
 195                                 return new DataInputStream(is);
 196                         }
 197                         return new DataInputStream(new BufferedInputStream(is));
 198                 }
 199 
 200                 @Override
 201                 public boolean hasNext() {
 202                         boolean hasNext = false;
 203                         if (streamState == StreamState.NEXT_CHUNK) {
 204                                 hasNext = validateJFRMagic();
 205                         } else if (streamState == StreamState.JFR_CHECKED) {
 206                                 hasNext = true;
 207                         }
 208                         if (!hasNext) {
 209                                 IOToolkit.closeSilently(inputStream);
 210                         }
 211                         return hasNext;
 212                 }
 213 
 214                 private boolean validateJFRMagic() {
 215                         try {
 216                                 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) {
 217                                         streamState = StreamState.JFR_CHECKED;
 218                                         return true;
 219                                 } else {
 220                                         streamState = StreamState.ERROR;
 221                                         lastError = new Exception(
 222                                                         "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$
 223                                         return false;
 224                                 }
 225                         } catch (IOException e) {
 226                                 streamState = StreamState.ERROR;
 227                                 lastError = e;
 228                                 return false;
 229                         }
 230                 }
 231 
 232                 @Override
 233                 public byte[] next() {
 234                         if (!hasNext()) {
 235                                 throw new NoSuchElementException();
 236                         }
 237                         switch (streamState) {
 238                         case ERROR:
 239                                 throw new IllegalArgumentException(lastError);
 240                         case NEXT_CHUNK:
 241                                 if (!validateJFRMagic()) {
 242                                         throw new IllegalArgumentException(lastError);
 243                                 }
 244                                 // Fall through
 245                         case JFR_CHECKED:
 246                                 try {
 247                                         return retrieveNextChunk();
 248                                 } catch (IOException e) {
 249                                         lastError = e;
 250                                         throw new IllegalArgumentException(e);
 251                                 }
 252                         default:
 253                                 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$
 254                         }
 255                 }
 256 
 257                 private byte[] retrieveNextChunk() throws IOException {
 258                         byte[] chunkHeader = new byte[HEADER_SIZE];
 259                         // Copy in the magic
 260                         System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length);
 261                         // Read rest of chunk header
 262                         readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length);
 263                         short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length);
 264                         byte[] chunkTotal = null;
 265                         if (majorVersion >= 1) {
 266                                 // JDK 9+ recording
 267                                 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE);
 268                                 int readSize = (int) fullSize - HEADER_SIZE;
 269                                 chunkTotal = new byte[(int) fullSize];
 270                                 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length);
 271                                 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize);
 272                         } else {
 273                                 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE);
 274                                 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE);
 275                                 byte[] chunkEvents = new byte[eventReadSize];
 276                                 readBytesFromStream(chunkEvents, 0, chunkEvents.length);
 277                                 int metadataEventSize = DataInputToolkit.readInt(chunkEvents,
 278                                                 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE;
 279                                 byte[] chunkMetadata = new byte[metadataEventSize];
 280                                 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length);
 281 
 282                                 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length];
 283                                 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length);
 284                                 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length);
 285                                 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length,
 286                                                 chunkMetadata.length);
 287                         }
 288                         streamState = StreamState.NEXT_CHUNK;
 289                         return chunkTotal;
 290                 }
 291 
 292                 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException {
 293                         int totalRead = 0;
 294                         while (totalRead < count) {
 295                                 int read = inputStream.read(bytes, offset + totalRead, count - totalRead);
 296                                 if (read == -1) {
 297                                         throw new IOException("Unexpected end of data."); //$NON-NLS-1$
 298                                 }
 299                                 totalRead += read;
 300                         }
 301                 }
 302 
 303                 @Override
 304                 public void remove() {
 305                         throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$
 306                 }
 307         }
 308 
 309         /**
 310          * Reads a JFR file, chunk by chunk.
 311          * <p>
 312          * Each chunk will be self contained and parsable, for example by wrapping it in a
 313          * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw
 314          * {@link IllegalArgumentException} if it encounters a corrupted chunk.
 315          *
 316          * @param jfrFile
 317          *            the file to read binary data from
 318          * @return returns an iterator over byte arrays, where each byte array is a self containing jfr
 319          *         chunk
 320          */
 321         public static Iterator<byte[]> readChunks(File jfrFile) throws IOException {
 322                 // We fall back to using a StreamChunkIterator if the file is compressed.
 323                 if (IOToolkit.isCompressedFile(jfrFile)) {
 324                         return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile));
 325                 }
 326                 return new ChunkIterator(jfrFile);
 327         }
 328 
 329         /**
 330          * Reads a JFR file, chunk by chunk, from a stream.
 331          * <p>
 332          * Each chunk will be self contained and parsable, for example by wrapping it in a
 333          * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw
 334          * {@link IllegalArgumentException} if it encounters a corrupted chunk.
 335          *
 336          * @param jfrStream
 337          *            the stream to read binary data from
 338          * @return returns an iterator over byte arrays, where each byte array is a self containing JFR
 339          *         chunk
 340          */
 341         public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException {
 342                 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream));
 343         }
 344 
 345         /**
 346          * Program for listing the number of chunks in a recording.
 347          *
 348          * @param args
 349          *            takes one argument, which must be the path to a recording
 350          * @throws IOException
 351          *             if there was a problem reading the file
 352          */
 353         public static void main(String[] args) throws IOException {
 354                 long nanoStart = System.nanoTime();
 355                 int chunkCount = 0, byteCount = 0;
 356 
 357                 if (args.length != 1) {
 358                         System.out.println("Usage: ChunkReader <file>"); //$NON-NLS-1$
 359                         System.exit(2);
 360                 }
 361                 File file = new File(args[0]);
 362                 if (!file.exists()) {
 363                         System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); //$NON-NLS-1$ //$NON-NLS-2$
 364                         System.exit(3);
 365                 }
 366                 Iterator<byte[]> iter = readChunks(file);
 367                 while (iter.hasNext()) {
 368                         byte[] bytes = iter.next();
 369                         chunkCount += 1;
 370                         byteCount += bytes.length;
 371                         System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$
 372                 }
 373                 double duration = (System.nanoTime() - nanoStart) / 1_000_000d;
 374 
 375                 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
 376         }
 377 }