1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * The contents of this file are subject to the terms of either the Universal Permissive License 7 * v 1.0 as shown at http://oss.oracle.com/licenses/upl 8 * 9 * or the following license: 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are permitted 12 * provided that the following conditions are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions 15 * and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of 18 * conditions and the following disclaimer in the documentation and/or other materials provided with 19 * the distribution. 20 * 21 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to 22 * endorse or promote products derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 26 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 31 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 package org.openjdk.jmc.flightrecorder.util; 34 35 import java.io.BufferedInputStream; 36 import java.io.ByteArrayInputStream; 37 import java.io.DataInputStream; 38 import java.io.File; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.RandomAccessFile; 42 import java.nio.MappedByteBuffer; 43 import java.nio.channels.FileChannel; 44 import java.util.Iterator; 45 import java.util.NoSuchElementException; 46 47 import org.openjdk.jmc.common.io.IOToolkit; 48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit; 49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit; 50 51 /** 52 * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will 53 * depend on whether the JFR file is available as a stream or as a file, and whether or not the data 54 * is compressed or not. 55 * <p> 56 * Each chunk will be self-contained and parsable, for example by wrapping it in a 57 * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}. 58 */ 59 public final class ChunkReader { 60 private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0}; 61 private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0}; 62 private static final int ZIP_MAGIC[] = new int[] {31, 139}; 63 private static final int GZ_MAGIC[] = new int[] {31, 139}; 64 // For JDK 8 this is the size of the magic + version and offset to the meta data event. 65 // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size. 66 private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE 67 + DataInputToolkit.LONG_SIZE; 68 69 private ChunkReader() { 70 throw new UnsupportedOperationException("Not to be instantiated"); //$NON-NLS-1$ 71 } 72 73 /** 74 * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk, 75 * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with 76 * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done 77 * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long 78 * as it is referenced. The JFR file must not be zip or gzip compressed. 79 * <p> 80 * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters 81 * a corrupted chunk. 82 */ 83 private static class ChunkIterator implements Iterator<byte[]> { 84 int lastChunkOffset; 85 private RandomAccessFile file; 86 private final FileChannel channel; 87 private final MappedByteBuffer buffer; 88 89 private ChunkIterator(File jfrFile) throws IOException { 90 try { 91 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$ 92 channel = file.getChannel(); 93 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); 94 if (!bufferHasMagic(JFR_MAGIC)) { 95 if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) { 96 throw new IOException( 97 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$ 98 } else { 99 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$ 100 } 101 } 102 } catch (Exception e) { 103 if (file != null) { 104 file.close(); 105 } 106 throw e; 107 } 108 } 109 110 @Override 111 public boolean hasNext() { 112 boolean hasNext = checkHasMore(); 113 if (!hasNext) { 114 try { 115 channel.close(); 116 file.close(); 117 } catch (IOException e) { 118 // Shouldn't happen. 119 e.printStackTrace(); 120 } 121 } 122 return hasNext; 123 } 124 125 private boolean checkHasMore() { 126 return lastChunkOffset < buffer.limit(); 127 } 128 129 @Override 130 public byte[] next() { 131 if (!checkHasMore()) { 132 throw new NoSuchElementException(); 133 } 134 if (!bufferHasMagic(JFR_MAGIC)) { 135 lastChunkOffset = buffer.limit() + 1; 136 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$ 137 } 138 139 int index = lastChunkOffset + JFR_MAGIC.length; 140 short versionMSB = buffer.getShort(index); 141 // short versionLSB = buffer.getShort(index + SHORT_SIZE); 142 index += 2 * DataInputToolkit.SHORT_SIZE; 143 int size = 0; 144 145 if (versionMSB >= 1) { 146 // We have a JDK 9+ recording - chunk size can be directly read from header 147 size = (int) buffer.getLong(index); 148 index = lastChunkOffset + size; 149 } else { 150 // Got a pre JDK 9 recording. Need to find the metadata event index, read and 151 // add the size of the metadata event to find the chunk boundary 152 index = lastChunkOffset + (int) buffer.getLong(index); 153 // Reading the metadata event size 154 int lastEventSize = buffer.getInt(index); 155 index += lastEventSize; 156 size = index - lastChunkOffset; 157 } 158 // Read the chunk and return it 159 byte[] result = new byte[size]; 160 buffer.position(lastChunkOffset); 161 buffer.get(result, 0, result.length); 162 lastChunkOffset = index; 163 return result; 164 } 165 166 private boolean bufferHasMagic(int[] magicBytes) { 167 for (int i = 0; i < magicBytes.length; i++) { 168 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) { 169 return false; 170 } 171 } 172 return true; 173 } 174 175 @Override 176 public void remove() { 177 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 178 } 179 } 180 181 private enum StreamState { 182 NEXT_CHUNK, JFR_CHECKED, ERROR 183 } 184 185 /** 186 * Iterator reading JFR chunks from a stream. 187 */ 188 private static class StreamChunkIterator implements Iterator<byte[]> { 189 private final DataInputStream inputStream; 190 private StreamState streamState = StreamState.NEXT_CHUNK; 191 private Throwable lastError = null; 192 193 public StreamChunkIterator(InputStream inputStream) { 194 this.inputStream = getDataStream(inputStream); 195 } 196 197 private DataInputStream getDataStream(InputStream is) { 198 if (is.markSupported()) { 199 return new DataInputStream(is); 200 } 201 return new DataInputStream(new BufferedInputStream(is)); 202 } 203 204 @Override 205 public boolean hasNext() { 206 boolean hasNext = false; 207 if (streamState == StreamState.NEXT_CHUNK) { 208 hasNext = validateJFRMagic(); 209 } else if (streamState == StreamState.JFR_CHECKED) { 210 hasNext = true; 211 } 212 if (!hasNext) { 213 IOToolkit.closeSilently(inputStream); 214 } 215 return hasNext; 216 } 217 218 private boolean validateJFRMagic() { 219 try { 220 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) { 221 streamState = StreamState.JFR_CHECKED; 222 return true; 223 } else { 224 streamState = StreamState.ERROR; 225 lastError = new Exception( 226 "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$ 227 return false; 228 } 229 } catch (IOException e) { 230 streamState = StreamState.ERROR; 231 lastError = e; 232 return false; 233 } 234 } 235 236 @Override 237 public byte[] next() { 238 if (!hasNext()) { 239 throw new NoSuchElementException(); 240 } 241 switch (streamState) { 242 case ERROR: 243 throw new IllegalArgumentException(lastError); 244 case NEXT_CHUNK: 245 if (!validateJFRMagic()) { 246 throw new IllegalArgumentException(lastError); 247 } 248 // Fall through 249 case JFR_CHECKED: 250 try { 251 return retrieveNextChunk(); 252 } catch (IOException e) { 253 lastError = e; 254 throw new IllegalArgumentException(e); 255 } 256 default: 257 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$ 258 } 259 } 260 261 private byte[] retrieveNextChunk() throws IOException { 262 byte[] chunkHeader = new byte[HEADER_SIZE]; 263 // Copy in the magic 264 System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length); 265 // Read rest of chunk header 266 readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length); 267 short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length); 268 byte[] chunkTotal = null; 269 if (majorVersion >= 1) { 270 // JDK 9+ recording 271 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 272 int readSize = (int) fullSize - HEADER_SIZE; 273 chunkTotal = new byte[(int) fullSize]; 274 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 275 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize); 276 } else { 277 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 278 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE); 279 byte[] chunkEvents = new byte[eventReadSize]; 280 readBytesFromStream(chunkEvents, 0, chunkEvents.length); 281 int metadataEventSize = DataInputToolkit.readInt(chunkEvents, 282 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE; 283 byte[] chunkMetadata = new byte[metadataEventSize]; 284 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length); 285 286 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length]; 287 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 288 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length); 289 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length, 290 chunkMetadata.length); 291 } 292 streamState = StreamState.NEXT_CHUNK; 293 return chunkTotal; 294 } 295 296 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException { 297 int totalRead = 0; 298 while (totalRead < count) { 299 int read = inputStream.read(bytes, offset + totalRead, count - totalRead); 300 if (read == -1) { 301 throw new IOException("Unexpected end of data."); //$NON-NLS-1$ 302 } 303 totalRead += read; 304 } 305 } 306 307 @Override 308 public void remove() { 309 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 310 } 311 } 312 313 /** 314 * Reads a JFR file, chunk by chunk. 315 * <p> 316 * Each chunk will be self contained and parsable, for example by wrapping it in a 317 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 318 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 319 * 320 * @param jfrFile 321 * the file to read binary data from 322 * @return returns an iterator over byte arrays, where each byte array is a self containing jfr 323 * chunk 324 */ 325 public static Iterator<byte[]> readChunks(File jfrFile) throws IOException { 326 // We fall back to using a StreamChunkIterator if the file is compressed. 327 if (IOToolkit.isCompressedFile(jfrFile)) { 328 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile)); 329 } 330 return new ChunkIterator(jfrFile); 331 } 332 333 /** 334 * Reads a JFR file, chunk by chunk, from a stream. 335 * <p> 336 * Each chunk will be self contained and parsable, for example by wrapping it in a 337 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 338 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 339 * 340 * @param jfrStream 341 * the stream to read binary data from 342 * @return returns an iterator over byte arrays, where each byte array is a self containing JFR 343 * chunk 344 */ 345 public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException { 346 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream)); 347 } 348 349 /** 350 * Program for listing the number of chunks in a recording. 351 * 352 * @param args 353 * takes one argument, which must be the path to a recording 354 * @throws IOException 355 * if there was a problem reading the file 356 */ 357 public static void main(String[] args) throws IOException { 358 long nanoStart = System.nanoTime(); 359 int chunkCount = 0, byteCount = 0; 360 361 if (args.length != 1) { 362 System.out.println("Usage: ChunkReader <file>"); //$NON-NLS-1$ 363 System.exit(2); 364 } 365 File file = new File(args[0]); 366 if (!file.exists()) { 367 System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); //$NON-NLS-1$ //$NON-NLS-2$ 368 System.exit(3); 369 } 370 Iterator<byte[]> iter = readChunks(file); 371 while (iter.hasNext()) { 372 byte[] bytes = iter.next(); 373 chunkCount += 1; 374 byteCount += bytes.length; 375 System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$ 376 } 377 double duration = (System.nanoTime() - nanoStart) / 1_000_000d; 378 379 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ 380 } 381 }