1 /*
   2  * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  *
   8  *   - Redistributions of source code must retain the above copyright
   9  *     notice, this list of conditions and the following disclaimer.
  10  *
  11  *   - Redistributions in binary form must reproduce the above copyright
  12  *     notice, this list of conditions and the following disclaimer in the
  13  *     documentation and/or other materials provided with the distribution.
  14  *
  15  *   - Neither the name of Oracle nor the names of its
  16  *     contributors may be used to endorse or promote products derived
  17  *     from this software without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31 
  32 /*
  33  * This source code is provided to illustrate the usage of a given feature
  34  * or technique and has been deliberately simplified. Additional steps
  35  * required for a production-quality application, such as security checks,
  36  * input validation and proper error handling, might not be present in
  37  * this sample code.
  38  */
  39 import java.io.BufferedReader;
  40 import java.io.FileNotFoundException;
  41 import java.io.FileReader;
  42 import java.io.IOException;
  43 import java.util.Arrays;
  44 import java.util.function.Consumer;
  45 
  46 /**
  47  * WC - print newline, word, and character counts for each file. See
  48  * {@link #usage} method to run from command line. This sample shows usages of:
  49  * <ol>
  50  * <li>Lambda and bulk operations. Shows how to create custom collector to
  51  * gather custom statistic. Also there is implementation of collecting
  52  * statistics using built-in API.</li>
  53  * <li>Constructor reference.</li>
  54  * <li>Try-with-resources feature.</li>
  55  * </ol>
  56  *
  57  * @author Andrey Nazarov
  58  */
  59 public class WC {
  60 
  61     //Number of characters that may be read
  62     private static final int READ_AHEAD_LIMIT = 100_000_000;
  63 
  64     /**
  65      * The main method for the WC program. Run program with empty argument list
  66      * to see possible arguments.
  67      *
  68      * @param args the argument list for WC.
  69      * @throws java.io.IOException If an input exception occurred.
  70      */
  71     public static void main(String[] args) throws IOException {
  72 
  73         if (args.length != 1) {
  74             usage();
  75             return;
  76         }
  77 
  78         try (BufferedReader reader = new BufferedReader(
  79                 new FileReader(args[0]))) {
  80             reader.mark(READ_AHEAD_LIMIT);
  81 
  82             collectInFourPasses(reader);
  83             collectInOnePass(reader);
  84         } catch (FileNotFoundException e) {
  85             usage();
  86             System.out.println(e);
  87         }
  88     }
  89 
  90     private static void collectInFourPasses(BufferedReader reader)
  91             throws IOException {
  92         /*
  93          * Input is read as a stream of lines by lines().
  94          * Every line in turned into a stream of chars by flatMap(...) method.
  95          * Length of the stream is counted by count().
  96          */
  97         System.out.println("Character counts = "
  98                 + reader.lines().flatMap(str -> str.chars().boxed()).count());
  99         reader.reset();
 100         /*
 101          * Input is read as a stream of lines by lines().
 102          * Every line is split by white spaces into words by flatMap(...)
 103          * method.
 104          * Empty lines are removed by filter(...) method.
 105          * Length of the stream is counted by count().
 106          */
 107         System.out.println("Word counts = "
 108                 + reader.lines()
 109                 .flatMap(str -> Arrays.stream(str.split("\\W")))
 110                 .filter(str -> !str.isEmpty()).count());
 111         reader.reset();
 112 
 113         System.out.println("Newline counts = " + reader.lines().count());
 114         reader.reset();
 115         /*
 116          * Input is read as a stream of lines by lines()
 117          * Every line is mapped to its length
 118          * Maximum of the lengths is calculated
 119          */
 120         System.out.println("Max line length = "
 121                 + reader.lines().mapToInt(String::length).max().getAsInt());
 122         reader.reset();
 123     }
 124 
 125     private static void collectInOnePass(BufferedReader reader) {
 126         /*
 127          * collect() method has 3 params:
 128          * The first parameter is WCStatistic constructor reference.
 129          * collect() will create WCStatistics instances, where statistics will
 130          * be aggregated.
 131          * The second parameter show how WCStatistics will process String value.
 132          * The third parameter show how to merge two WCStatistic instances.
 133          */
 134         System.out.println(reader.lines()
 135                 .collect(WCStatistics::new, WCStatistics::accept,
 136                         WCStatistics::combine));
 137     }
 138 
 139     private static void usage() {
 140         System.out.println("Usage: " + WC.class.getSimpleName() + " FILE");
 141         System.out.println("Print newline, word,"
 142                 + "  character counts and max line length for FILE.");
 143     }
 144 
 145     private static class WCStatistics implements Consumer<String> {
 146         /*
 147          * @implNote This implementation is not thread safe. However, it is safe to use
 148          * WCStatistics on a parallel stream, because the parallel
 149          * implementation of {@link java.util.stream.Stream#collect Stream.collect()}
 150          * provides the necessary partitioning, isolation, and merging of results for
 151          * safe and efficient parallel execution.
 152          */
 153 
 154         private long characterCount;
 155         private long lineCount;
 156         private long wordCount;
 157         private long maxLineLength;
 158 
 159         /*
 160          * Processes line.
 161          */
 162         @Override
 163         public void accept(String line) {
 164             characterCount += line.length();
 165             lineCount++;
 166             wordCount += Arrays.stream(line.split("\\W"))
 167                     .filter(str -> !str.isEmpty()).count();
 168             maxLineLength = Math.max(maxLineLength, line.length());
 169         }
 170 
 171         /*
 172          * Merges two WCStatistics.
 173          */
 174         public void combine(WCStatistics stat) {
 175             wordCount += stat.wordCount;
 176             lineCount += stat.lineCount;
 177             characterCount += stat.characterCount;
 178             maxLineLength = Math.max(maxLineLength, stat.maxLineLength);
 179         }
 180 
 181         @Override
 182         public String toString() {
 183             StringBuilder sb = new StringBuilder();
 184             sb.append("#------WCStatistic------#\n");
 185             sb.append("Character counts = ").append(characterCount).append("\n");
 186             sb.append("Word counts = ").append(wordCount).append("\n");
 187             sb.append("Newline counts = ").append(lineCount).append("\n");
 188             sb.append("Max line length = ").append(maxLineLength).append("\n");
 189             return sb.toString();
 190         }
 191     }
 192 }