1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package java.nio.file;
  26 
  27 import java.io.BufferedReader;
  28 import java.io.IOException;
  29 import java.io.UncheckedIOException;
  30 import java.nio.ByteBuffer;
  31 import java.nio.channels.Channels;
  32 import java.nio.channels.FileChannel;
  33 import java.nio.channels.ReadableByteChannel;
  34 import java.nio.charset.Charset;
  35 import java.util.HashSet;
  36 import java.util.Set;
  37 import java.util.Spliterator;
  38 import java.util.function.Consumer;
  39 
  40 import sun.nio.cs.ISO_8859_1;
  41 import sun.nio.cs.US_ASCII;
  42 import sun.nio.cs.UTF_8;
  43 
  44 /**
  45  * A file-based lines spliterator, leveraging a shared mapped byte buffer and
  46  * associated file channel, covering lines of a file for character encodings
  47  * where line feed characters can be easily identified from character encoded
  48  * bytes.
  49  *
  50  * <p>
  51  * When the root spliterator is first split a mapped byte buffer will be created
  52  * over the file for it's size that was observed when the stream was created.
  53  * Thus a mapped byte buffer is only required for parallel stream execution.
  54  * Sub-spliterators will share that mapped byte buffer.  Splitting will use the
  55  * mapped byte buffer to find the closest line feed characters(s) to the left or
  56  * right of the mid-point of covered range of bytes of the file.  If a line feed
  57  * is found then the spliterator is split with returned spliterator containing
  58  * the identified line feed characters(s) at the end of it's covered range of
  59  * bytes.
  60  *
  61  * <p>
  62  * Traversing will create a buffered reader, derived from the file channel, for
  63  * the range of bytes of the file.  The lines are then read from that buffered
  64  * reader.  Once traversing commences no further splitting can be performed and
  65  * the reference to the mapped byte buffer will be set to null.
  66  */
  67 final class FileChannelLinesSpliterator implements Spliterator<String> {
  68 
  69     static final Set<String> SUPPORTED_CHARSET_NAMES;
  70     static {
  71         SUPPORTED_CHARSET_NAMES = new HashSet<>();
  72         SUPPORTED_CHARSET_NAMES.add(UTF_8.INSTANCE.name());
  73         SUPPORTED_CHARSET_NAMES.add(ISO_8859_1.INSTANCE.name());
  74         SUPPORTED_CHARSET_NAMES.add(US_ASCII.INSTANCE.name());
  75     }
  76 
  77     private final FileChannel fc;
  78     private final Charset cs;
  79     private int index;
  80     private final int fence;
  81 
  82     // Null before first split, non-null when splitting, null when traversing
  83     private ByteBuffer buffer;
  84     // Non-null when traversing
  85     private BufferedReader reader;
  86 
  87     FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) {
  88         this.fc = fc;
  89         this.cs = cs;
  90         this.index = index;
  91         this.fence = fence;
  92     }
  93 
  94     private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) {
  95         this.fc = fc;
  96         this.buffer = buffer;
  97         this.cs = cs;
  98         this.index = index;
  99         this.fence = fence;
 100     }
 101 
 102     @Override
 103     public boolean tryAdvance(Consumer<? super String> action) {
 104         String line = readLine();
 105         if (line != null) {
 106             action.accept(line);
 107             return true;
 108         } else {
 109             return false;
 110         }
 111     }
 112 
 113     @Override
 114     public void forEachRemaining(Consumer<? super String> action) {
 115         String line;
 116         while ((line = readLine()) != null) {
 117             action.accept(line);
 118         }
 119     }
 120 
 121     private BufferedReader getBufferedReader() {
 122         /**
 123          * A readable byte channel that reads bytes from an underlying
 124          * file channel over a specified range.
 125          */
 126         ReadableByteChannel rrbc = new ReadableByteChannel() {
 127             @Override
 128             public int read(ByteBuffer dst) throws IOException {
 129                 int bytesToRead = fence - index;
 130                 if (bytesToRead == 0)
 131                     return -1;
 132 
 133                 int bytesRead;
 134                 if (bytesToRead < dst.remaining()) {
 135                     // The number of bytes to read is less than remaining
 136                     // bytes in the buffer
 137                     // Snapshot the limit, reduce it, read, then restore
 138                     int oldLimit = dst.limit();
 139                     dst.limit(dst.position() + bytesToRead);
 140                     bytesRead = fc.read(dst, index);
 141                     dst.limit(oldLimit);
 142                 } else {
 143                     bytesRead = fc.read(dst, index);
 144                 }
 145                 if (bytesRead == -1) {
 146                     index = fence;
 147                     return bytesRead;
 148                 }
 149 
 150                 index += bytesRead;
 151                 return bytesRead;
 152             }
 153 
 154             @Override
 155             public boolean isOpen() {
 156                 return fc.isOpen();
 157             }
 158 
 159             @Override
 160             public void close() throws IOException {
 161                 fc.close();
 162             }
 163         };
 164         return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1));
 165     }
 166 
 167     private String readLine() {
 168         if (reader == null) {
 169             reader = getBufferedReader();
 170             buffer = null;
 171         }
 172 
 173         try {
 174             return reader.readLine();
 175         } catch (IOException e) {
 176             throw new UncheckedIOException(e);
 177         }
 178     }
 179 
 180     private ByteBuffer getMappedByteBuffer() {
 181         // TODO can the mapped byte buffer be explicitly unmapped?
 182         // It's possible, via a shared-secret mechanism, when either
 183         // 1) the spliterator starts traversing, although traversal can
 184         //    happen concurrently for mulitple spliterators, so care is
 185         //    needed in this case; or
 186         // 2) when the stream is closed using some shared holder to pass
 187         //    the mapped byte buffer when it is created.
 188         try {
 189             return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence);
 190         } catch (IOException e) {
 191             throw new UncheckedIOException(e);
 192         }
 193     }
 194 
 195     @Override
 196     public Spliterator<String> trySplit() {
 197         // Cannot split after partial traverse
 198         if (reader != null)
 199             return null;
 200 
 201         ByteBuffer b;
 202         if ((b = buffer) == null) {
 203             b = buffer = getMappedByteBuffer();
 204         }
 205 
 206         final int hi = fence, lo = index;
 207 
 208         // Check if line separator hits the mid point
 209         int mid = (lo + hi) >>> 1;
 210         int c =  b.get(mid);
 211         if (c == '\n') {
 212             mid++;
 213         } else if (c == '\r') {
 214             // Check if a line separator of "\r\n"
 215             if (++mid < hi && b.get(mid) == '\n') {
 216                 mid++;
 217             }
 218         } else {
 219             // TODO give up after a certain distance from the mid point?
 220             // Scan to the left and right of the mid point
 221             int midL = mid - 1;
 222             int midR = mid + 1;
 223             mid = 0;
 224             while (midL > lo && midR < hi) {
 225                 // Sample to the left
 226                 c = b.get(midL--);
 227                 if (c == '\n' || c == '\r') {
 228                     // If c is "\r" then no need to check for "\r\n"
 229                     // since the subsequent value was previously checked
 230                     mid = midL + 2;
 231                     break;
 232                 }
 233 
 234                 // Sample to the right
 235                 c = b.get(midR++);
 236                 if (c == '\n' || c == '\r') {
 237                     mid = midR;
 238                     // Check if line-separator is "\r\n"
 239                     if (c == '\r' && mid < hi && b.get(mid) == '\n') {
 240                         mid++;
 241                     }
 242                     break;
 243                 }
 244             }
 245         }
 246 
 247         // The left spliterator will have the line-separator at the end
 248         return (mid > lo && mid < hi)
 249                ? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b)
 250                : null;
 251     }
 252 
 253     @Override
 254     public long estimateSize() {
 255         // Use the number of bytes as an estimate.
 256         // We could divide by a constant that is the average number of
 257         // characters per-line, but that constant will be factored out.
 258         return fence - index;
 259     }
 260 
 261     @Override
 262     public long getExactSizeIfKnown() {
 263         return -1;
 264     }
 265 
 266     @Override
 267     public int characteristics() {
 268         return Spliterator.ORDERED | Spliterator.NONNULL;
 269     }
 270 }