1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * The contents of this file are subject to the terms of either the Universal Permissive License 7 * v 1.0 as shown at http://oss.oracle.com/licenses/upl 8 * 9 * or the following license: 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are permitted 12 * provided that the following conditions are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions 15 * and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of 18 * conditions and the following disclaimer in the documentation and/or other materials provided with 19 * the distribution. 20 * 21 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to 22 * endorse or promote products derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 26 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 31 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 package org.openjdk.jmc.flightrecorder.util; 34 35 import java.io.BufferedInputStream; 36 import java.io.ByteArrayInputStream; 37 import java.io.DataInputStream; 38 import java.io.File; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.RandomAccessFile; 42 import java.nio.MappedByteBuffer; 43 import java.nio.channels.FileChannel; 44 import java.util.Iterator; 45 import java.util.NoSuchElementException; 46 47 import org.openjdk.jmc.common.io.IOToolkit; 48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit; 49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit; 50 51 /** 52 * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will 53 * depend on whether the JFR file is available as a stream or as a file, and whether or not the data 54 * is compressed or not. 55 * <p> 56 * Each chunk will be self-contained and parsable, for example by wrapping it in a 57 * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}. 58 */ 59 public final class ChunkReader { 60 private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0}; 61 private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0}; 62 private static final int ZIP_MAGIC[] = new int[] {31, 139}; 63 private static final int GZ_MAGIC[] = new int[] {31, 139}; 64 // For JDK 8 this is the size of the magic + version and offset to the meta data event. 65 // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size. 66 private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE 67 + DataInputToolkit.LONG_SIZE; 68 69 /** 70 * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk, 71 * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with 72 * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done 73 * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long 74 * as it is referenced. The JFR file must not be zip or gzip compressed. 75 * <p> 76 * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters 77 * a corrupted chunk. 78 */ 79 private static class ChunkIterator implements Iterator<byte[]> { 80 int lastChunkOffset; 81 private RandomAccessFile file; 82 private final FileChannel channel; 83 private final MappedByteBuffer buffer; 84 85 private ChunkIterator(File jfrFile) throws IOException { 86 try { 87 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$ 88 channel = file.getChannel(); 89 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); 90 if (!bufferHasMagic(JFR_MAGIC)) { 91 if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) { 92 throw new IOException( 93 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$ 94 } else { 95 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$ 96 } 97 } 98 } catch (Exception e) { 99 if (file != null) { 100 file.close(); 101 } 102 throw e; 103 } 104 } 105 106 @Override 107 public boolean hasNext() { 108 boolean hasNext = checkHasMore(); 109 if (!hasNext) { 110 try { 111 channel.close(); 112 file.close(); 113 } catch (IOException e) { 114 // Shouldn't happen. 115 e.printStackTrace(); 116 } 117 } 118 return hasNext; 119 } 120 121 private boolean checkHasMore() { 122 return lastChunkOffset < buffer.limit(); 123 } 124 125 @Override 126 public byte[] next() { 127 if (!checkHasMore()) { 128 throw new NoSuchElementException(); 129 } 130 if (!bufferHasMagic(JFR_MAGIC)) { 131 lastChunkOffset = buffer.limit() + 1; 132 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$ 133 } 134 135 int index = lastChunkOffset + JFR_MAGIC.length; 136 short versionMSB = buffer.getShort(index); 137 // short versionLSB = buffer.getShort(index + SHORT_SIZE); 138 index += 2 * DataInputToolkit.SHORT_SIZE; 139 int size = 0; 140 141 if (versionMSB >= 1) { 142 // We have a JDK 9+ recording - chunk size can be directly read from header 143 size = (int) buffer.getLong(index); 144 index = lastChunkOffset + size; 145 } else { 146 // Got a pre JDK 9 recording. Need to find the metadata event index, read and 147 // add the size of the metadata event to find the chunk boundary 148 index = lastChunkOffset + (int) buffer.getLong(index); 149 // Reading the metadata event size 150 int lastEventSize = buffer.getInt(index); 151 index += lastEventSize; 152 size = index - lastChunkOffset; 153 } 154 // Read the chunk and return it 155 byte[] result = new byte[size]; 156 buffer.position(lastChunkOffset); 157 buffer.get(result, 0, result.length); 158 lastChunkOffset = index; 159 return result; 160 } 161 162 private boolean bufferHasMagic(int[] magicBytes) { 163 for (int i = 0; i < magicBytes.length; i++) { 164 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) { 165 return false; 166 } 167 } 168 return true; 169 } 170 171 @Override 172 public void remove() { 173 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 174 } 175 } 176 177 private enum StreamState { 178 NEXT_CHUNK, JFR_CHECKED, ERROR 179 } 180 181 /** 182 * Iterator reading JFR chunks from a stream. 183 */ 184 private static class StreamChunkIterator implements Iterator<byte[]> { 185 private final DataInputStream inputStream; 186 private StreamState streamState = StreamState.NEXT_CHUNK; 187 private Throwable lastError = null; 188 189 public StreamChunkIterator(InputStream inputStream) { 190 this.inputStream = getDataStream(inputStream); 191 } 192 193 private DataInputStream getDataStream(InputStream is) { 194 if (is.markSupported()) { 195 return new DataInputStream(is); 196 } 197 return new DataInputStream(new BufferedInputStream(is)); 198 } 199 200 @Override 201 public boolean hasNext() { 202 boolean hasNext = false; 203 if (streamState == StreamState.NEXT_CHUNK) { 204 hasNext = validateJFRMagic(); 205 } else if (streamState == StreamState.JFR_CHECKED) { 206 hasNext = true; 207 } 208 if (!hasNext) { 209 if (inputStream != null) { 210 try { 211 inputStream.close(); 212 } catch (IOException e) { 213 e.printStackTrace(); 214 } 215 } 216 } 217 return hasNext; 218 } 219 220 private boolean validateJFRMagic() { 221 try { 222 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) { 223 streamState = StreamState.JFR_CHECKED; 224 return true; 225 } else { 226 streamState = StreamState.ERROR; 227 lastError = new Exception( 228 "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$ 229 return false; 230 } 231 } catch (IOException e) { 232 streamState = StreamState.ERROR; 233 lastError = e; 234 return false; 235 } 236 } 237 238 @Override 239 public byte[] next() { 240 if (!hasNext()) { 241 throw new NoSuchElementException(); 242 } 243 switch (streamState) { 244 case ERROR: 245 throw new IllegalArgumentException(lastError); 246 case NEXT_CHUNK: 247 if (!validateJFRMagic()) { 248 throw new IllegalArgumentException(lastError); 249 } 250 // Fall through 251 case JFR_CHECKED: 252 try { 253 return retrieveNextChunk(); 254 } catch (IOException e) { 255 lastError = e; 256 throw new IllegalArgumentException(e); 257 } 258 default: 259 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$ 260 } 261 } 262 263 private byte[] retrieveNextChunk() throws IOException { 264 byte[] chunkHeader = new byte[HEADER_SIZE]; 265 // Copy in the magic 266 System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length); 267 // Read rest of chunk header 268 readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length); 269 short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length); 270 byte[] chunkTotal = null; 271 if (majorVersion >= 1) { 272 // JDK 9+ recording 273 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 274 int readSize = (int) fullSize - HEADER_SIZE; 275 chunkTotal = new byte[(int) fullSize]; 276 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 277 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize); 278 } else { 279 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 280 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE); 281 byte[] chunkEvents = new byte[eventReadSize]; 282 readBytesFromStream(chunkEvents, 0, chunkEvents.length); 283 int metadataEventSize = DataInputToolkit.readInt(chunkEvents, 284 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE; 285 byte[] chunkMetadata = new byte[metadataEventSize]; 286 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length); 287 288 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length]; 289 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 290 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length); 291 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length, 292 chunkMetadata.length); 293 } 294 streamState = StreamState.NEXT_CHUNK; 295 return chunkTotal; 296 } 297 298 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException { 299 int totalRead = 0; 300 while (totalRead < count) { 301 int read = inputStream.read(bytes, offset + totalRead, count - totalRead); 302 if (read == -1) { 303 throw new IOException("Unexpected end of data."); //$NON-NLS-1$ 304 } 305 totalRead += read; 306 } 307 } 308 309 @Override 310 public void remove() { 311 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 312 } 313 } 314 315 /** 316 * Reads a JFR file, chunk by chunk. 317 * <p> 318 * Each chunk will be self contained and parsable, for example by wrapping it in a 319 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 320 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 321 * 322 * @param jfrFile 323 * the file to read binary data from 324 * @return returns an iterator over byte arrays, where each byte array is a self containing jfr 325 * chunk 326 */ 327 public static Iterator<byte[]> readChunks(File jfrFile) throws IOException { 328 // We fall back to using a StreamChunkIterator if the file is compressed. 329 if (IOToolkit.isCompressedFile(jfrFile)) { 330 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile)); 331 } 332 return new ChunkIterator(jfrFile); 333 } 334 335 /** 336 * Reads a JFR file, chunk by chunk, from a stream. 337 * <p> 338 * Each chunk will be self contained and parsable, for example by wrapping it in a 339 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 340 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 341 * 342 * @param jfrStream 343 * the stream to read binary data from 344 * @return returns an iterator over byte arrays, where each byte array is a self containing JFR 345 * chunk 346 */ 347 public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException { 348 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream)); 349 } 350 351 /** 352 * Program for listing the number of chunks in a recording. 353 * 354 * @param args 355 * takes one argument, which must be the path to a recording 356 * @throws IOException 357 * if there was a problem reading the file 358 */ 359 public static void main(String[] args) throws IOException { 360 long nanoStart = System.nanoTime(); 361 int chunkCount = 0, byteCount = 0; 362 363 if (args.length != 1) { 364 System.out.println("Usage: ChunkReader <file>"); 365 System.exit(2); 366 } 367 File file = new File(args[0]); 368 if (!file.exists()) { 369 System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); 370 System.exit(3); 371 } 372 Iterator<byte[]> iter = readChunks(file); 373 while (iter.hasNext()) { 374 byte[] bytes = iter.next(); 375 chunkCount += 1; 376 byteCount += bytes.length; 377 System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$ 378 } 379 double duration = (System.nanoTime() - nanoStart) / 1_000_000d; 380 381 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ 382 } 383 }