1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * The contents of this file are subject to the terms of either the Universal Permissive License 7 * v 1.0 as shown at http://oss.oracle.com/licenses/upl 8 * 9 * or the following license: 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are permitted 12 * provided that the following conditions are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions 15 * and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of 18 * conditions and the following disclaimer in the documentation and/or other materials provided with 19 * the distribution. 20 * 21 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to 22 * endorse or promote products derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 26 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 31 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 package org.openjdk.jmc.flightrecorder.util; 34 35 import java.io.BufferedInputStream; 36 import java.io.ByteArrayInputStream; 37 import java.io.DataInputStream; 38 import java.io.File; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.RandomAccessFile; 42 import java.nio.MappedByteBuffer; 43 import java.nio.channels.FileChannel; 44 import java.util.Iterator; 45 import java.util.NoSuchElementException; 46 47 import org.openjdk.jmc.common.io.IOToolkit; 48 import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit; 49 import org.openjdk.jmc.flightrecorder.internal.util.DataInputToolkit; 50 51 /** 52 * Provides an efficient means to read JFR data, chunk by chunk. The actual method employed will 53 * depend on whether the JFR file is available as a stream or as a file, and whether or not the data 54 * is compressed or not. 55 * <p> 56 * Each chunk will be self-contained and parsable, for example by wrapping it in a 57 * {@link ByteArrayInputStream} and using the {@link JfrLoaderToolkit}. 58 */ 59 public final class ChunkReader { 60 private static final byte[] JFR_MAGIC_BYTES = new byte[] {'F', 'L', 'R', 0}; 61 private static final int[] JFR_MAGIC = new int[] {'F', 'L', 'R', 0}; 62 private static final int ZIP_MAGIC[] = new int[] {31, 139}; 63 private static final int GZ_MAGIC[] = new int[] {31, 139}; 64 // For JDK 8 this is the size of the magic + version and offset to the meta data event. 65 // For JDK 9 and later, this it the part of the header right up to, and including, the chunk size. 66 private static final int HEADER_SIZE = DataInputToolkit.INTEGER_SIZE + 2 * DataInputToolkit.SHORT_SIZE 67 + DataInputToolkit.LONG_SIZE; 68 69 /** 70 * Chunk iterator for an uncompressed JFR file. Efficiently reads a JFR file, chunk by chunk, 71 * into memory as byte arrays by memory mapping the JFR file, finding the chunk boundaries with 72 * a minimum of parsing, and then block-transferring the byte arrays. The transfers will be done 73 * on {@link Iterator#next()}, and the resulting byte array will only be reachable for as long 74 * as it is referenced. The JFR file must not be zip or gzip compressed. 75 * <p> 76 * Note that {@link Iterator#next()} can throw {@link IllegalArgumentException} if it encounters 77 * a corrupted chunk. 78 */ 79 private static class ChunkIterator implements Iterator<byte[]> { 80 int lastChunkOffset; 81 private RandomAccessFile file; 82 private final FileChannel channel; 83 private final MappedByteBuffer buffer; 84 85 private ChunkIterator(File jfrFile) throws IOException { 86 try { 87 file = new RandomAccessFile(jfrFile, "r"); //$NON-NLS-1$ 88 channel = file.getChannel(); 89 buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); 90 if (!bufferHasMagic(JFR_MAGIC)) { 91 if (bufferHasMagic(GZ_MAGIC) || bufferHasMagic(ZIP_MAGIC)) { 92 throw new IOException( 93 "Cannot use the ChunkIterators with gzipped JMC files. Please use unzipped recordings."); //$NON-NLS-1$ 94 } else { 95 throw new IOException("The provided file (" + String.valueOf(jfrFile) + ") is not a JFR file!"); //$NON-NLS-1$ //$NON-NLS-2$ 96 } 97 } 98 } catch (Exception e) { 99 if (file != null) { 100 file.close(); 101 } 102 throw e; 103 } 104 } 105 106 @Override 107 public boolean hasNext() { 108 boolean hasNext = checkHasMore(); 109 if (!hasNext) { 110 try { 111 channel.close(); 112 file.close(); 113 } catch (IOException e) { 114 // Shouldn't happen. 115 e.printStackTrace(); 116 } 117 } 118 return hasNext; 119 } 120 121 private boolean checkHasMore() { 122 return lastChunkOffset < buffer.limit(); 123 } 124 125 @Override 126 public byte[] next() { 127 if (!checkHasMore()) { 128 throw new NoSuchElementException(); 129 } 130 if (!bufferHasMagic(JFR_MAGIC)) { 131 lastChunkOffset = buffer.limit() + 1; 132 throw new IllegalArgumentException("Corrupted chunk encountered! Aborting!"); //$NON-NLS-1$ 133 } 134 135 int index = lastChunkOffset + JFR_MAGIC.length; 136 short versionMSB = buffer.getShort(index); 137 // short versionLSB = buffer.getShort(index + SHORT_SIZE); 138 index += 2 * DataInputToolkit.SHORT_SIZE; 139 int size = 0; 140 141 if (versionMSB >= 1) { 142 // We have a JDK 9+ recording - chunk size can be directly read from header 143 size = (int) buffer.getLong(index); 144 index = lastChunkOffset + size; 145 } else { 146 // Got a pre JDK 9 recording. Need to find the metadata event index, read and 147 // add the size of the metadata event to find the chunk boundary 148 index = lastChunkOffset + (int) buffer.getLong(index); 149 // Reading the metadata event size 150 int lastEventSize = buffer.getInt(index); 151 index += lastEventSize; 152 size = index - lastChunkOffset; 153 } 154 // Read the chunk and return it 155 byte[] result = new byte[size]; 156 buffer.position(lastChunkOffset); 157 buffer.get(result, 0, result.length); 158 lastChunkOffset = index; 159 return result; 160 } 161 162 private boolean bufferHasMagic(int[] magicBytes) { 163 for (int i = 0; i < magicBytes.length; i++) { 164 if (buffer.get(lastChunkOffset + i) != magicBytes[i]) { 165 return false; 166 } 167 } 168 return true; 169 } 170 171 @Override 172 public void remove() { 173 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 174 } 175 } 176 177 private enum StreamState { 178 NEXT_CHUNK, JFR_CHECKED, ERROR 179 } 180 181 /** 182 * Iterator reading JFR chunks from a stream. 183 */ 184 private static class StreamChunkIterator implements Iterator<byte[]> { 185 private final DataInputStream inputStream; 186 private StreamState streamState = StreamState.NEXT_CHUNK; 187 private Throwable lastError = null; 188 189 public StreamChunkIterator(InputStream inputStream) { 190 this.inputStream = getDataStream(inputStream); 191 } 192 193 private DataInputStream getDataStream(InputStream is) { 194 if (is.markSupported()) { 195 return new DataInputStream(is); 196 } 197 return new DataInputStream(new BufferedInputStream(is)); 198 } 199 200 @Override 201 public boolean hasNext() { 202 if (streamState == StreamState.NEXT_CHUNK) { 203 return validateJFRMagic(); 204 } else if (streamState == StreamState.JFR_CHECKED) { 205 return true; 206 } 207 return false; 208 } 209 210 private boolean validateJFRMagic() { 211 try { 212 if (IOToolkit.hasMagic(inputStream, JFR_MAGIC)) { 213 streamState = StreamState.JFR_CHECKED; 214 return true; 215 } else { 216 streamState = StreamState.ERROR; 217 lastError = new Exception( 218 "Next chunk has no JFR magic. It is either no JFR file at all or corrupt."); //$NON-NLS-1$ 219 return false; 220 } 221 } catch (IOException e) { 222 streamState = StreamState.ERROR; 223 lastError = e; 224 return false; 225 } 226 } 227 228 @Override 229 public byte[] next() { 230 if (!hasNext()) { 231 throw new NoSuchElementException(); 232 } 233 switch (streamState) { 234 case ERROR: 235 throw new IllegalArgumentException(lastError); 236 case NEXT_CHUNK: 237 if (!validateJFRMagic()) { 238 throw new IllegalArgumentException(lastError); 239 } 240 // Fall through 241 case JFR_CHECKED: 242 try { 243 return retrieveNextChunk(); 244 } catch (IOException e) { 245 lastError = e; 246 throw new IllegalArgumentException(e); 247 } 248 default: 249 throw new IllegalArgumentException("Unknown stream state"); //$NON-NLS-1$ 250 } 251 } 252 253 private byte[] retrieveNextChunk() throws IOException { 254 byte[] chunkHeader = new byte[HEADER_SIZE]; 255 // Copy in the magic 256 System.arraycopy(JFR_MAGIC_BYTES, 0, chunkHeader, 0, JFR_MAGIC_BYTES.length); 257 // Read rest of chunk header 258 readBytesFromStream(chunkHeader, JFR_MAGIC_BYTES.length, HEADER_SIZE - JFR_MAGIC_BYTES.length); 259 short majorVersion = DataInputToolkit.readShort(chunkHeader, JFR_MAGIC_BYTES.length); 260 byte[] chunkTotal = null; 261 if (majorVersion >= 1) { 262 // JDK 9+ recording 263 long fullSize = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 264 int readSize = (int) fullSize - HEADER_SIZE; 265 chunkTotal = new byte[(int) fullSize]; 266 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 267 readBytesFromStream(chunkTotal, HEADER_SIZE, readSize); 268 } else { 269 long metadataIndex = DataInputToolkit.readLong(chunkHeader, HEADER_SIZE - DataInputToolkit.LONG_SIZE); 270 int eventReadSize = (int) (metadataIndex - HEADER_SIZE + DataInputToolkit.INTEGER_SIZE); 271 byte[] chunkEvents = new byte[eventReadSize]; 272 readBytesFromStream(chunkEvents, 0, chunkEvents.length); 273 int metadataEventSize = DataInputToolkit.readInt(chunkEvents, 274 eventReadSize - DataInputToolkit.INTEGER_SIZE) - DataInputToolkit.INTEGER_SIZE; 275 byte[] chunkMetadata = new byte[metadataEventSize]; 276 readBytesFromStream(chunkMetadata, 0, chunkMetadata.length); 277 278 chunkTotal = new byte[chunkHeader.length + chunkEvents.length + chunkMetadata.length]; 279 System.arraycopy(chunkHeader, 0, chunkTotal, 0, chunkHeader.length); 280 System.arraycopy(chunkEvents, 0, chunkTotal, chunkHeader.length, chunkEvents.length); 281 System.arraycopy(chunkMetadata, 0, chunkTotal, chunkHeader.length + chunkEvents.length, 282 chunkMetadata.length); 283 } 284 streamState = StreamState.NEXT_CHUNK; 285 return chunkTotal; 286 } 287 288 private void readBytesFromStream(byte[] bytes, int offset, int count) throws IOException { 289 int totalRead = 0; 290 while (totalRead < count) { 291 int read = inputStream.read(bytes, offset + totalRead, count - totalRead); 292 if (read == -1) { 293 throw new IOException("Unexpected end of data."); //$NON-NLS-1$ 294 } 295 totalRead += read; 296 } 297 } 298 299 @Override 300 public void remove() { 301 throw new UnsupportedOperationException("Cannot remove chunks"); //$NON-NLS-1$ 302 } 303 } 304 305 /** 306 * Reads a JFR file, chunk by chunk. 307 * <p> 308 * Each chunk will be self contained and parsable, for example by wrapping it in a 309 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 310 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 311 * 312 * @param jfrFile 313 * the file to read binary data from 314 * @return returns an iterator over byte arrays, where each byte array is a self containing jfr 315 * chunk 316 */ 317 public static Iterator<byte[]> readChunks(File jfrFile) throws IOException { 318 // We fall back to using a StreamChunkIterator if the file is compressed. 319 if (IOToolkit.isCompressedFile(jfrFile)) { 320 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrFile)); 321 } 322 return new ChunkIterator(jfrFile); 323 } 324 325 /** 326 * Reads a JFR file, chunk by chunk, from a stream. 327 * <p> 328 * Each chunk will be self contained and parsable, for example by wrapping it in a 329 * {@link ByteArrayInputStream}. Note that {@link Iterator#next()} can throw 330 * {@link IllegalArgumentException} if it encounters a corrupted chunk. 331 * 332 * @param jfrStream 333 * the stream to read binary data from 334 * @return returns an iterator over byte arrays, where each byte array is a self containing JFR 335 * chunk 336 */ 337 public static Iterator<byte[]> readChunks(InputStream jfrStream) throws IOException { 338 return new StreamChunkIterator(IOToolkit.openUncompressedStream(jfrStream)); 339 } 340 341 /** 342 * Program for listing the number of chunks in a recording. 343 * 344 * @param args 345 * takes one argument, which must be the path to a recording 346 * @throws IOException 347 * if there was a problem reading the file 348 */ 349 public static void main(String[] args) throws IOException { 350 long nanoStart = System.nanoTime(); 351 int chunkCount = 0, byteCount = 0; 352 353 if (args.length != 1) { 354 System.out.println("Usage: ChunkReader <file>"); 355 System.exit(2); 356 } 357 File file = new File(args[0]); 358 if (!file.exists()) { 359 System.out.println("The file " + file.getAbsolutePath() + " does not exist. Exiting..."); 360 System.exit(3); 361 } 362 Iterator<byte[]> iter = readChunks(file); 363 while (iter.hasNext()) { 364 byte[] bytes = iter.next(); 365 chunkCount += 1; 366 byteCount += bytes.length; 367 System.out.println("Chunk #" + chunkCount + " size: " + bytes.length); //$NON-NLS-1$ //$NON-NLS-2$ 368 } 369 double duration = (System.nanoTime() - nanoStart) / 1_000_000d; 370 371 System.out.println("Chunks: " + chunkCount + " Byte count: " + byteCount + " Time taken: " + duration + " ms"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ 372 } 373 }