1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package java.nio.file; 26 27 import java.io.BufferedReader; 28 import java.io.IOException; 29 import java.io.UncheckedIOException; 30 import java.nio.ByteBuffer; 31 import java.nio.channels.Channels; 32 import java.nio.channels.FileChannel; 33 import java.nio.channels.ReadableByteChannel; 34 import java.nio.charset.Charset; 35 import java.util.HashSet; 36 import java.util.Set; 37 import java.util.Spliterator; 38 import java.util.function.Consumer; 39 40 import sun.nio.cs.ISO_8859_1; 41 import sun.nio.cs.US_ASCII; 42 import sun.nio.cs.UTF_8; 43 44 /** 45 * A file-based lines spliterator, leveraging a shared mapped byte buffer and 46 * associated file channel, covering lines of a file for character encodings 47 * where line feed characters can be easily identified from character encoded 48 * bytes. 49 * 50 * <p> 51 * When the root spliterator is first split a mapped byte buffer will be created 52 * over the file for it's size that was observed when the stream was created. 53 * Thus a mapped byte buffer is only required for parallel stream execution. 54 * Sub-spliterators will share that mapped byte buffer. Splitting will use the 55 * mapped byte buffer to find the closest line feed characters(s) to the left or 56 * right of the mid-point of covered range of bytes of the file. If a line feed 57 * is found then the spliterator is split with returned spliterator containing 58 * the identified line feed characters(s) at the end of it's covered range of 59 * bytes. 60 * 61 * <p> 62 * Traversing will create a buffered reader, derived from the file channel, for 63 * the range of bytes of the file. The lines are then read from that buffered 64 * reader. Once traversing commences no further splitting can be performed and 65 * the reference to the mapped byte buffer will be set to null. 66 */ 67 final class FileChannelLinesSpliterator implements Spliterator<String> { 68 69 static final Set<String> SUPPORTED_CHARSET_NAMES; 70 static { 71 SUPPORTED_CHARSET_NAMES = new HashSet<>(); 72 SUPPORTED_CHARSET_NAMES.add(UTF_8.INSTANCE.name()); 73 SUPPORTED_CHARSET_NAMES.add(ISO_8859_1.INSTANCE.name()); 74 SUPPORTED_CHARSET_NAMES.add(US_ASCII.INSTANCE.name()); 75 } 76 77 private final FileChannel fc; 78 private final Charset cs; 79 private int index; 80 private final int fence; 81 82 // Null before first split, non-null when splitting, null when traversing 83 private ByteBuffer buffer; 84 // Non-null when traversing 85 private BufferedReader reader; 86 87 FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) { 88 this.fc = fc; 89 this.cs = cs; 90 this.index = index; 91 this.fence = fence; 92 } 93 94 private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) { 95 this.fc = fc; 96 this.buffer = buffer; 97 this.cs = cs; 98 this.index = index; 99 this.fence = fence; 100 } 101 102 @Override 103 public boolean tryAdvance(Consumer<? super String> action) { 104 String line = readLine(); 105 if (line != null) { 106 action.accept(line); 107 return true; 108 } else { 109 return false; 110 } 111 } 112 113 @Override 114 public void forEachRemaining(Consumer<? super String> action) { 115 String line; 116 while ((line = readLine()) != null) { 117 action.accept(line); 118 } 119 } 120 121 private BufferedReader getBufferedReader() { 122 /** 123 * A readable byte channel that reads bytes from an underlying 124 * file channel over a specified range. 125 */ 126 ReadableByteChannel rrbc = new ReadableByteChannel() { 127 @Override 128 public int read(ByteBuffer dst) throws IOException { 129 int bytesToRead = fence - index; 130 if (bytesToRead == 0) 131 return -1; 132 133 int bytesRead; 134 if (bytesToRead < dst.remaining()) { 135 // The number of bytes to read is less than remaining 136 // bytes in the buffer 137 // Snapshot the limit, reduce it, read, then restore 138 int oldLimit = dst.limit(); 139 dst.limit(dst.position() + bytesToRead); 140 bytesRead = fc.read(dst, index); 141 dst.limit(oldLimit); 142 } else { 143 bytesRead = fc.read(dst, index); 144 } 145 if (bytesRead == -1) { 146 index = fence; 147 return bytesRead; 148 } 149 150 index += bytesRead; 151 return bytesRead; 152 } 153 154 @Override 155 public boolean isOpen() { 156 return fc.isOpen(); 157 } 158 159 @Override 160 public void close() throws IOException { 161 fc.close(); 162 } 163 }; 164 return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1)); 165 } 166 167 private String readLine() { 168 if (reader == null) { 169 reader = getBufferedReader(); 170 buffer = null; 171 } 172 173 try { 174 return reader.readLine(); 175 } catch (IOException e) { 176 throw new UncheckedIOException(e); 177 } 178 } 179 180 private ByteBuffer getMappedByteBuffer() { 181 // TODO can the mapped byte buffer be explicitly unmapped? 182 // It's possible, via a shared-secret mechanism, when either 183 // 1) the spliterator starts traversing, although traversal can 184 // happen concurrently for mulitple spliterators, so care is 185 // needed in this case; or 186 // 2) when the stream is closed using some shared holder to pass 187 // the mapped byte buffer when it is created. 188 try { 189 return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence); 190 } catch (IOException e) { 191 throw new UncheckedIOException(e); 192 } 193 } 194 195 @Override 196 public Spliterator<String> trySplit() { 197 // Cannot split after partial traverse 198 if (reader != null) 199 return null; 200 201 ByteBuffer b; 202 if ((b = buffer) == null) { 203 b = buffer = getMappedByteBuffer(); 204 } 205 206 final int hi = fence, lo = index; 207 208 // Check if line separator hits the mid point 209 int mid = (lo + hi) >>> 1; 210 int c = b.get(mid); 211 if (c == '\n') { 212 mid++; 213 } else if (c == '\r') { 214 // Check if a line separator of "\r\n" 215 if (++mid < hi && b.get(mid) == '\n') { 216 mid++; 217 } 218 } else { 219 // TODO give up after a certain distance from the mid point? 220 // Scan to the left and right of the mid point 221 int midL = mid - 1; 222 int midR = mid + 1; 223 mid = 0; 224 while (midL > lo && midR < hi) { 225 // Sample to the left 226 c = b.get(midL--); 227 if (c == '\n' || c == '\r') { 228 // If c is "\r" then no need to check for "\r\n" 229 // since the subsequent value was previously checked 230 mid = midL + 2; 231 break; 232 } 233 234 // Sample to the right 235 c = b.get(midR++); 236 if (c == '\n' || c == '\r') { 237 mid = midR; 238 // Check if line-separator is "\r\n" 239 if (c == '\r' && mid < hi && b.get(mid) == '\n') { 240 mid++; 241 } 242 break; 243 } 244 } 245 } 246 247 // The left spliterator will have the line-separator at the end 248 return (mid > lo && mid < hi) 249 ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b) 250 : null; 251 } 252 253 @Override 254 public long estimateSize() { 255 // Use the number of bytes as an estimate. 256 // We could divide by a constant that is the average number of 257 // characters per-line, but that constant will be factored out. 258 return fence - index; 259 } 260 261 @Override 262 public long getExactSizeIfKnown() { 263 return -1; 264 } 265 266 @Override 267 public int characteristics() { 268 return Spliterator.ORDERED | Spliterator.NONNULL; 269 } 270 }