1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package java.nio.file;
  26 
  27 import java.io.BufferedReader;
  28 import java.io.IOException;
  29 import java.io.UncheckedIOException;
  30 import java.nio.ByteBuffer;
  31 import java.nio.channels.Channels;
  32 import java.nio.channels.FileChannel;
  33 import java.nio.channels.ReadableByteChannel;
  34 import java.nio.charset.Charset;
  35 import java.nio.charset.StandardCharsets;
  36 import java.util.HashSet;
  37 import java.util.Set;
  38 import java.util.Spliterator;
  39 import java.util.function.Consumer;
  40 
  41 /**
  42  * A file-based lines spliterator, leveraging a shared mapped byte buffer and
  43  * associated file channel, covering lines of a file for character encodings
  44  * where line feed characters can be easily identified from character encoded
  45  * bytes.
  46  *
  47  * <p>
  48  * When the root spliterator is first split a mapped byte buffer will be created
  49  * over the file for it's size that was observed when the stream was created.
  50  * Thus a mapped byte buffer is only required for parallel stream execution.
  51  * Sub-spliterators will share that mapped byte buffer.  Splitting will use the
  52  * mapped byte buffer to find the closest line feed characters(s) to the left or
  53  * right of the mid-point of covered range of bytes of the file.  If a line feed
  54  * is found then the spliterator is split with returned spliterator containing
  55  * the identified line feed characters(s) at the end of it's covered range of
  56  * bytes.
  57  *
  58  * <p>
  59  * Traversing will create a buffered reader, derived from the file channel, for
  60  * the range of bytes of the file.  The lines are then read from that buffered
  61  * reader.  Once traversing commences no further splitting can be performed and
  62  * the reference to the mapped byte buffer will be set to null.
  63  */
  64 final class FileChannelLinesSpliterator implements Spliterator<String> {
  65 
  66     static final Set<String> SUPPORTED_CHARSET_NAMES;
  67     static {
  68         SUPPORTED_CHARSET_NAMES = new HashSet<>();
  69         SUPPORTED_CHARSET_NAMES.add(StandardCharsets.UTF_8.name());
  70         SUPPORTED_CHARSET_NAMES.add(StandardCharsets.ISO_8859_1.name());
  71         SUPPORTED_CHARSET_NAMES.add(StandardCharsets.US_ASCII.name());
  72     }
  73 
  74     private final FileChannel fc;
  75     private final Charset cs;
  76     private int index;
  77     private final int fence;
  78 
  79     // Null before first split, non-null when splitting, null when traversing
  80     private ByteBuffer buffer;
  81     // Non-null when traversing
  82     private BufferedReader reader;
  83 
  84     FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) {
  85         this.fc = fc;
  86         this.cs = cs;
  87         this.index = index;
  88         this.fence = fence;
  89     }
  90 
  91     private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) {
  92         this.fc = fc;
  93         this.buffer = buffer;
  94         this.cs = cs;
  95         this.index = index;
  96         this.fence = fence;
  97     }
  98 
  99     @Override
 100     public boolean tryAdvance(Consumer<? super String> action) {
 101         String line = readLine();
 102         if (line != null) {
 103             action.accept(line);
 104             return true;
 105         } else {
 106             return false;
 107         }
 108     }
 109 
 110     @Override
 111     public void forEachRemaining(Consumer<? super String> action) {
 112         String line;
 113         while ((line = readLine()) != null) {
 114             action.accept(line);
 115         }
 116     }
 117 
 118     private BufferedReader getBufferedReader() {
 119         /**
 120          * A readable byte channel that reads bytes from an underlying
 121          * file channel over a specified range.
 122          */
 123         ReadableByteChannel rrbc = new ReadableByteChannel() {
 124             @Override
 125             public int read(ByteBuffer dst) throws IOException {
 126                 int bytesToRead = fence - index;
 127                 if (bytesToRead == 0)
 128                     return -1;
 129 
 130                 int bytesRead;
 131                 if (bytesToRead < dst.remaining()) {
 132                     // The number of bytes to read is less than remaining
 133                     // bytes in the buffer
 134                     // Snapshot the limit, reduce it, read, then restore
 135                     int oldLimit = dst.limit();
 136                     dst.limit(dst.position() + bytesToRead);
 137                     bytesRead = fc.read(dst, index);
 138                     dst.limit(oldLimit);
 139                 } else {
 140                     bytesRead = fc.read(dst, index);
 141                 }
 142                 if (bytesRead == -1) {
 143                     index = fence;
 144                     return bytesRead;
 145                 }
 146 
 147                 index += bytesRead;
 148                 return bytesRead;
 149             }
 150 
 151             @Override
 152             public boolean isOpen() {
 153                 return fc.isOpen();
 154             }
 155 
 156             @Override
 157             public void close() throws IOException {
 158                 fc.close();
 159             }
 160         };
 161         return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1));
 162     }
 163 
 164     private String readLine() {
 165         if (reader == null) {
 166             reader = getBufferedReader();
 167             buffer = null;
 168         }
 169 
 170         try {
 171             return reader.readLine();
 172         } catch (IOException e) {
 173             throw new UncheckedIOException(e);
 174         }
 175     }
 176 
 177     private ByteBuffer getMappedByteBuffer() {
 178         // TODO can the mapped byte buffer be explicitly unmapped?
 179         // It's possible, via a shared-secret mechanism, when either
 180         // 1) the spliterator starts traversing, although traversal can
 181         //    happen concurrently for mulitple spliterators, so care is
 182         //    needed in this case; or
 183         // 2) when the stream is closed using some shared holder to pass
 184         //    the mapped byte buffer when it is created.
 185         try {
 186             return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence);
 187         } catch (IOException e) {
 188             throw new UncheckedIOException(e);
 189         }
 190     }
 191 
 192     @Override
 193     public Spliterator<String> trySplit() {
 194         // Cannot split after partial traverse
 195         if (reader != null)
 196             return null;
 197 
 198         ByteBuffer b;
 199         if ((b = buffer) == null) {
 200             b = buffer = getMappedByteBuffer();
 201         }
 202 
 203         final int hi = fence, lo = index;
 204 
 205         // Check if line separator hits the mid point
 206         int mid = (lo + hi) >>> 1;
 207         int c =  b.get(mid);
 208         if (c == '\n') {
 209             mid++;
 210         } else if (c == '\r') {
 211             // Check if a line separator of "\r\n"
 212             if (++mid < hi && b.get(mid) == '\n') {
 213                 mid++;
 214             }
 215         } else {
 216             // TODO give up after a certain distance from the mid point?
 217             // Scan to the left and right of the mid point
 218             int midL = mid - 1;
 219             int midR = mid + 1;
 220             mid = 0;
 221             while (midL > lo && midR < hi) {
 222                 // Sample to the left
 223                 c = b.get(midL--);
 224                 if (c == '\n' || c == '\r') {
 225                     // If c is "\r" then no need to check for "\r\n"
 226                     // since the subsequent value was previously checked
 227                     mid = midL + 2;
 228                     break;
 229                 }
 230 
 231                 // Sample to the right
 232                 c = b.get(midR++);
 233                 if (c == '\n' || c == '\r') {
 234                     mid = midR;
 235                     // Check if line-separator is "\r\n"
 236                     if (c == '\r' && mid < hi && b.get(mid) == '\n') {
 237                         mid++;
 238                     }
 239                     break;
 240                 }
 241             }
 242         }
 243 
 244         // The left spliterator will have the line-separator at the end
 245         return (mid > lo && mid < hi)
 246                ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b)
 247                : null;
 248     }
 249 
 250     @Override
 251     public long estimateSize() {
 252         // Use the number of bytes as an estimate.
 253         // We could divide by a constant that is the average number of
 254         // characters per-line, but that constant will be factored out.
 255         return fence - index;
 256     }
 257 
 258     @Override
 259     public long getExactSizeIfKnown() {
 260         return -1;
 261     }
 262 
 263     @Override
 264     public int characteristics() {
 265         return Spliterator.ORDERED | Spliterator.NONNULL;
 266     }
 267 }