1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package java.nio.file; 26 27 import java.io.BufferedReader; 28 import java.io.IOException; 29 import java.io.UncheckedIOException; 30 import java.nio.ByteBuffer; 31 import java.nio.channels.Channels; 32 import java.nio.channels.FileChannel; 33 import java.nio.channels.ReadableByteChannel; 34 import java.nio.charset.Charset; 35 import java.nio.charset.StandardCharsets; 36 import java.util.HashSet; 37 import java.util.Set; 38 import java.util.Spliterator; 39 import java.util.function.Consumer; 40 41 /** 42 * A file-based lines spliterator, leveraging a shared mapped byte buffer and 43 * associated file channel, covering lines of a file for character encodings 44 * where line feed characters can be easily identified from character encoded 45 * bytes. 46 * 47 * <p> 48 * When the root spliterator is first split a mapped byte buffer will be created 49 * over the file for it's size that was observed when the stream was created. 50 * Thus a mapped byte buffer is only required for parallel stream execution. 51 * Sub-spliterators will share that mapped byte buffer. Splitting will use the 52 * mapped byte buffer to find the closest line feed characters(s) to the left or 53 * right of the mid-point of covered range of bytes of the file. If a line feed 54 * is found then the spliterator is split with returned spliterator containing 55 * the identified line feed characters(s) at the end of it's covered range of 56 * bytes. 57 * 58 * <p> 59 * Traversing will create a buffered reader, derived from the file channel, for 60 * the range of bytes of the file. The lines are then read from that buffered 61 * reader. Once traversing commences no further splitting can be performed and 62 * the reference to the mapped byte buffer will be set to null. 63 */ 64 final class FileChannelLinesSpliterator implements Spliterator<String> { 65 66 static final Set<String> SUPPORTED_CHARSET_NAMES; 67 static { 68 SUPPORTED_CHARSET_NAMES = new HashSet<>(); 69 SUPPORTED_CHARSET_NAMES.add(StandardCharsets.UTF_8.name()); 70 SUPPORTED_CHARSET_NAMES.add(StandardCharsets.ISO_8859_1.name()); 71 SUPPORTED_CHARSET_NAMES.add(StandardCharsets.US_ASCII.name()); 72 } 73 74 private final FileChannel fc; 75 private final Charset cs; 76 private int index; 77 private final int fence; 78 79 // Null before first split, non-null when splitting, null when traversing 80 private ByteBuffer buffer; 81 // Non-null when traversing 82 private BufferedReader reader; 83 84 FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) { 85 this.fc = fc; 86 this.cs = cs; 87 this.index = index; 88 this.fence = fence; 89 } 90 91 private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) { 92 this.fc = fc; 93 this.buffer = buffer; 94 this.cs = cs; 95 this.index = index; 96 this.fence = fence; 97 } 98 99 @Override 100 public boolean tryAdvance(Consumer<? super String> action) { 101 String line = readLine(); 102 if (line != null) { 103 action.accept(line); 104 return true; 105 } else { 106 return false; 107 } 108 } 109 110 @Override 111 public void forEachRemaining(Consumer<? super String> action) { 112 String line; 113 while ((line = readLine()) != null) { 114 action.accept(line); 115 } 116 } 117 118 private BufferedReader getBufferedReader() { 119 /** 120 * A readable byte channel that reads bytes from an underlying 121 * file channel over a specified range. 122 */ 123 ReadableByteChannel rrbc = new ReadableByteChannel() { 124 @Override 125 public int read(ByteBuffer dst) throws IOException { 126 int bytesToRead = fence - index; 127 if (bytesToRead == 0) 128 return -1; 129 130 int bytesRead; 131 if (bytesToRead < dst.remaining()) { 132 // The number of bytes to read is less than remaining 133 // bytes in the buffer 134 // Snapshot the limit, reduce it, read, then restore 135 int oldLimit = dst.limit(); 136 dst.limit(dst.position() + bytesToRead); 137 bytesRead = fc.read(dst, index); 138 dst.limit(oldLimit); 139 } else { 140 bytesRead = fc.read(dst, index); 141 } 142 if (bytesRead == -1) { 143 index = fence; 144 return bytesRead; 145 } 146 147 index += bytesRead; 148 return bytesRead; 149 } 150 151 @Override 152 public boolean isOpen() { 153 return fc.isOpen(); 154 } 155 156 @Override 157 public void close() throws IOException { 158 fc.close(); 159 } 160 }; 161 return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1)); 162 } 163 164 private String readLine() { 165 if (reader == null) { 166 reader = getBufferedReader(); 167 buffer = null; 168 } 169 170 try { 171 return reader.readLine(); 172 } catch (IOException e) { 173 throw new UncheckedIOException(e); 174 } 175 } 176 177 private ByteBuffer getMappedByteBuffer() { 178 // TODO can the mapped byte buffer be explicitly unmapped? 179 // It's possible, via a shared-secret mechanism, when either 180 // 1) the spliterator starts traversing, although traversal can 181 // happen concurrently for mulitple spliterators, so care is 182 // needed in this case; or 183 // 2) when the stream is closed using some shared holder to pass 184 // the mapped byte buffer when it is created. 185 try { 186 return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence); 187 } catch (IOException e) { 188 throw new UncheckedIOException(e); 189 } 190 } 191 192 @Override 193 public Spliterator<String> trySplit() { 194 // Cannot split after partial traverse 195 if (reader != null) 196 return null; 197 198 ByteBuffer b; 199 if ((b = buffer) == null) { 200 b = buffer = getMappedByteBuffer(); 201 } 202 203 final int hi = fence, lo = index; 204 205 // Check if line separator hits the mid point 206 int mid = (lo + hi) >>> 1; 207 int c = b.get(mid); 208 if (c == '\n') { 209 mid++; 210 } else if (c == '\r') { 211 // Check if a line separator of "\r\n" 212 if (++mid < hi && b.get(mid) == '\n') { 213 mid++; 214 } 215 } else { 216 // TODO give up after a certain distance from the mid point? 217 // Scan to the left and right of the mid point 218 int midL = mid - 1; 219 int midR = mid + 1; 220 mid = 0; 221 while (midL > lo && midR < hi) { 222 // Sample to the left 223 c = b.get(midL--); 224 if (c == '\n' || c == '\r') { 225 // If c is "\r" then no need to check for "\r\n" 226 // since the subsequent value was previously checked 227 mid = midL + 2; 228 break; 229 } 230 231 // Sample to the right 232 c = b.get(midR++); 233 if (c == '\n' || c == '\r') { 234 mid = midR; 235 // Check if line-separator is "\r\n" 236 if (c == '\r' && mid < hi && b.get(mid) == '\n') { 237 mid++; 238 } 239 break; 240 } 241 } 242 } 243 244 // The left spliterator will have the line-separator at the end 245 return (mid > lo && mid < hi) 246 ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b) 247 : null; 248 } 249 250 @Override 251 public long estimateSize() { 252 // Use the number of bytes as an estimate. 253 // We could divide by a constant that is the average number of 254 // characters per-line, but that constant will be factored out. 255 return fence - index; 256 } 257 258 @Override 259 public long getExactSizeIfKnown() { 260 return -1; 261 } 262 263 @Override 264 public int characteristics() { 265 return Spliterator.ORDERED | Spliterator.NONNULL; 266 } 267 }