1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * The contents of this file are subject to the terms of either the Universal Permissive License 7 * v 1.0 as shown at http://oss.oracle.com/licenses/upl 8 * 9 * or the following license: 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are permitted 12 * provided that the following conditions are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions 15 * and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of 18 * conditions and the following disclaimer in the documentation and/or other materials provided with 19 * the distribution. 20 * 21 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to 22 * endorse or promote products derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 26 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 31 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 package org.openjdk.jmc.flightrecorder.util; 34 35 import java.io.BufferedInputStream; 36 import java.io.ByteArrayInputStream; 37 import java.io.DataInputStream; 38 import java.io.File; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.RandomAccessFile; 42 import java.nio.MappedByteBuffer; 43 import java.nio.channels.FileChannel; 44 import java.util.Iterator; 45 import java.util.NoSuchElementException; 46 47 import org.openjdk.jmc.common.io.IOToolkit; 48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit; 49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit; 50 51 /** 52 * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will 53 * depend on whether the JFR file is available as a stream or as a file, and whether or not the data 54 * is compressed or not. 55 * <p> 56 * Each chunk will be self-contained and parsable, for example by wrapping it in a 57 * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}. 58 */ 59 public final class ChunkReader { 60 private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0}; 61 private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0}; 62 private static final int ZIP_MAGIC[] = new int[] {31, 139}; 63 private static final int GZ_MAGIC[] = new int[] {31, 139}; 64 // For JDK 8 this is the size of the magic + version and offset to the meta data event. 65 // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size. 66 private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE 67 + DataInputToolkit.LONG_SIZE; 68 69 /** 70 * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk, 71 * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with 72 * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done 73 * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long 74 * as it is referenced. The JFR file must not be zip or gzip compressed. 75 * <p> 76 * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters 77 * a corrupted chunk. 78 */ 79 private static class ChunkIterator implements Iterator<byte[]> { 80 int lastChunkOffset; 81 private RandomAccessFile file; 82 private final FileChannel channel; 83 private final MappedByteBuffer buffer; 84 85 private ChunkIterator(File jfrFile) throws IOException { 86 try { 87 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$ 88 channel = file.getChannel(); 89 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); 90 if (!bufferHasMagic(JFR_MAGIC)) { 91 if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) { 92 throw new IOException( 93 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$ 94 } else { 95 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$ 96 } 97 } 98 } catch (Exception e) { 99 if (file != null) { 100 file.close(); 101 } 102 throw e; 103 } 104 } 105 106 @Override 107 public boolean hasNext() { 108 boolean hasNext = checkHasMore(); 109 if (!hasNext) { 110 try { 111 channel.close(); 112 file.close(); 113 } catch (IOException e) { 114 // Shouldn't happen. 115 e.printStackTrace(); 116 } 117 } 118 return hasNext; 119 } 120 121 private boolean checkHasMore() { 122 return lastChunkOffset < buffer.limit(); 123 } 124 125 @Override 126 public byte[] next() { 127 if (!checkHasMore()) { 128 throw new NoSuchElementException(); 129 } 130 if (!bufferHasMagic(JFR_MAGIC)) { 131 lastChunkOffset = buffer.limit() + 1; 132 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$ 133 } 134 135 int index = lastChunkOffset + JFR_MAGIC.length; 136 short versionMSB = buffer.getShort(index); 137 // short versionLSB = buffer.getShort(index + SHORT_SIZE); 138 index += 2 * DataInputToolkit.SHORT_SIZE; 139 int size = 0; 140 141 if (versionMSB >= 1) { 142 // We have a JDK 9+ recording - chunk size can be directly read from header 143 size = (int) buffer.getLong(index); 144 index = lastChunkOffset + size; 145 } else { 146 // Got a pre JDK 9 recording. Need to find the metadata event index, read and 147 // add the size of the metadata event to find the chunk boundary 148 index = lastChunkOffset + (int) buffer.getLong(index); 149 // Reading the metadata event size 150 int lastEventSize = buffer.getInt(index); 151 index += lastEventSize; 152 size = index - lastChunkOffset; 153 } 154 // Read the chunk and return it 155 byte[] result = new byte[size]; 156 buffer.position(lastChunkOffset); 157 buffer.get(result, 0, result.length); 158 lastChunkOffset = index; 159 return result; 160 } 161 162 private boolean bufferHasMagic(int[] magicBytes) { 163 for (int i = 0; i < magicBytes.length; i++) { 164 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) { 165 return false; 166 } 167 } 168 return true; 169 } 170 171 @Override 172 public void remove() { 173 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 174 } 175 } 176 177 private enum StreamState { 178 NEXT_CHUNK, JFR_CHECKED, ERROR 179 } 180 181 /** 182 * Iterator reading JFR chunks from a stream. 183 */ 184 private static class StreamChunkIterator implements Iterator<byte[]> { 185 private final DataInputStream inputStream; 186 private StreamState streamState = StreamState.NEXT_CHUNK; 187 private Throwable lastError = null; 188 189 public StreamChunkIterator(InputStream inputStream) { 190 this.inputStream = getDataStream(inputStream); 191 } 192 193 private DataInputStream getDataStream(InputStream is) { 194 if (is.markSupported()) { 195 return new DataInputStream(is); 196 } 197 return new DataInputStream(new BufferedInputStream(is)); 198 } 199 200 @Override 201 public boolean hasNext() { 202 boolean hasNext = false; 203 if (streamState == StreamState.NEXT_CHUNK) { 204 hasNext = validateJFRMagic(); 205 } else if (streamState == StreamState.JFR_CHECKED) { 206 hasNext = true; 207 } 208 if (!hasNext) { 209 IOToolkit.closeSilently(inputStream); 210 } 211 return hasNext; 212 } 213 214 private boolean validateJFRMagic() { 215 try { 216 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) { 217 streamState = StreamState.JFR_CHECKED; 218 return true; 219 } else { 220 streamState = StreamState.ERROR; 221 lastError = new Exception( 222 "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$ 223 return false; 224 } 225 } catch (IOException e) { 226 streamState = StreamState.ERROR; 227 lastError = e; 228 return false; 229 } 230 } 231 232 @Override 233 public byte[] next() { 234 if (!hasNext()) { 235 throw new NoSuchElementException(); 236 } 237 switch (streamState) { 238 case ERROR: 239 throw new IllegalArgumentException(lastError); 240 case NEXT_CHUNK: 241 if (!validateJFRMagic()) { 242 throw new IllegalArgumentException(lastError); 243 } 244 // Fall through 245 case JFR_CHECKED: 246 try { 247 return retrieveNextChunk(); 248 } catch (IOException e) { 249 lastError = e; 250 throw new IllegalArgumentException(e); 251 } 252 default: 253 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$ 254 } 255 } 256 257 private byte[] retrieveNextChunk() throws IOException { 258 byte[] chunkHeader = new byte[HEADER_SIZE]; 259 // Copy in the magic 260 System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length); 261 // Read rest of chunk header 262 readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length); 263 short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length); 264 byte[] chunkTotal = null; 265 if (majorVersion >= 1) { 266 // JDK 9+ recording 267 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 268 int readSize = (int) fullSize - HEADER_SIZE; 269 chunkTotal = new byte[(int) fullSize]; 270 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 271 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize); 272 } else { 273 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 274 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE); 275 byte[] chunkEvents = new byte[eventReadSize]; 276 readBytesFromStream(chunkEvents, 0, chunkEvents.length); 277 int metadataEventSize = DataInputToolkit.readInt(chunkEvents, 278 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE; 279 byte[] chunkMetadata = new byte[metadataEventSize]; 280 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length); 281 282 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length]; 283 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 284 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length); 285 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length, 286 chunkMetadata.length); 287 } 288 streamState = StreamState.NEXT_CHUNK; 289 return chunkTotal; 290 } 291 292 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException { 293 int totalRead = 0; 294 while (totalRead < count) { 295 int read = inputStream.read(bytes, offset + totalRead, count - totalRead); 296 if (read == -1) { 297 throw new IOException("Unexpected end of data."); //$NON-NLS-1$ 298 } 299 totalRead += read; 300 } 301 } 302 303 @Override 304 public void remove() { 305 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 306 } 307 } 308 309 /** 310 * Reads a JFR file, chunk by chunk. 311 * <p> 312 * Each chunk will be self contained and parsable, for example by wrapping it in a 313 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 314 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 315 * 316 * @param jfrFile 317 * the file to read binary data from 318 * @return returns an iterator over byte arrays, where each byte array is a self containing jfr 319 * chunk 320 */ 321 public static Iterator<byte[]> readChunks(File jfrFile) throws IOException { 322 // We fall back to using a StreamChunkIterator if the file is compressed. 323 if (IOToolkit.isCompressedFile(jfrFile)) { 324 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile)); 325 } 326 return new ChunkIterator(jfrFile); 327 } 328 329 /** 330 * Reads a JFR file, chunk by chunk, from a stream. 331 * <p> 332 * Each chunk will be self contained and parsable, for example by wrapping it in a 333 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 334 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 335 * 336 * @param jfrStream 337 * the stream to read binary data from 338 * @return returns an iterator over byte arrays, where each byte array is a self containing JFR 339 * chunk 340 */ 341 public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException { 342 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream)); 343 } 344 345 /** 346 * Program for listing the number of chunks in a recording. 347 * 348 * @param args 349 * takes one argument, which must be the path to a recording 350 * @throws IOException 351 * if there was a problem reading the file 352 */ 353 public static void main(String[] args) throws IOException { 354 long nanoStart = System.nanoTime(); 355 int chunkCount = 0, byteCount = 0; 356 357 if (args.length != 1) { 358 System.out.println("Usage: ChunkReader <file>"); //$NON-NLS-1$ 359 System.exit(2); 360 } 361 File file = new File(args[0]); 362 if (!file.exists()) { 363 System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); //$NON-NLS-1$ //$NON-NLS-2$ 364 System.exit(3); 365 } 366 Iterator<byte[]> iter = readChunks(file); 367 while (iter.hasNext()) { 368 byte[] bytes = iter.next(); 369 chunkCount += 1; 370 byteCount += bytes.length; 371 System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$ 372 } 373 double duration = (System.nanoTime() - nanoStart) / 1_000_000d; 374 375 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ 376 } 377 }