1 /*
   2  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generateemojidata;
  27 
  28 import java.io.IOException;
  29 import java.nio.file.Files;
  30 import java.nio.file.Paths;
  31 import java.nio.file.StandardOpenOption;
  32 import java.util.ArrayList;
  33 import java.util.List;
  34 import java.util.function.Predicate;
  35 import java.util.stream.Collectors;
  36 import java.util.stream.Stream;
  37 
  38 /**
  39  * Generate EmojiData.java
  40  *    args[0]: Full path string to the template file
  41  *    args[1]: Full path string to the directory that contains "emoji-data.txt"
  42  *    args[2]: Full path string to the generated .java file
  43  */
  44 public class GenerateEmojiData {
  45     public static void main(String[] args) {
  46         try {
  47             final Range[] last = new Range[1]; // last extended pictographic range
  48             last[0] = new Range(0, 0);
  49 
  50             List<Range> extPictRanges = Files.lines(Paths.get(args[1], "emoji", "emoji-data.txt"))
  51                 .filter(Predicate.not(l -> l.startsWith("#") || l.isBlank()))
  52                 .filter(l -> l.contains("; Extended_Pictograph"))
  53                 .map(l -> new Range(l.replaceFirst(" .*", "")))
  54                 .sorted()
  55                 .collect(ArrayList<Range>::new,
  56                     (list, r) -> {
  57                         // collapsing consecutive pictographic ranges
  58                         int lastIndex = list.size() - 1;
  59                         if (lastIndex >= 0) {
  60                             Range lastRange = list.get(lastIndex);
  61                             if (lastRange.last + 1 == r.start) {
  62                                 list.set(lastIndex, new Range(lastRange.start, r.last));
  63                                 return;
  64                             }
  65                         }
  66                         list.add(r);
  67                     },
  68                     ArrayList<Range>::addAll);
  69 
  70 
  71             // make the code point conditions
  72             // only very few codepoints below 0x2000 are "emojis", so separate them
  73             // out to generate a fast-path check that can be efficiently inlined
  74             String lowExtPictCodePoints = extPictRanges.stream()
  75                     .takeWhile(r -> r.last < 0x2000)
  76                     .map(r -> rangeToString(r))
  77                     .collect(Collectors.joining(" ||\n", "", ";\n"));
  78 
  79             String highExtPictCodePoints = extPictRanges.stream()
  80                     .dropWhile(r -> r.last < 0x2000)
  81                     .map(r -> rangeToString(r))
  82                     .collect(Collectors.joining(" ||\n", "", ";\n"));
  83 
  84             // Generate EmojiData.java file
  85             Files.write(Paths.get(args[2]),
  86                 Files.lines(Paths.get(args[0]))
  87                     .flatMap(l -> {
  88                         if (l.equals("%%%EXTPICT_LOW%%%")) {
  89                             return Stream.of(lowExtPictCodePoints);
  90                         } else if (l.equals("%%%EXTPICT_HIGH%%%")) {
  91                             return Stream.of(highExtPictCodePoints);
  92                         } else {
  93                             return Stream.of(l);
  94                         }
  95                     })
  96                     .collect(Collectors.toList()),
  97                 StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
  98         } catch (IOException e) {
  99             e.printStackTrace();
 100         }
 101     }
 102 
 103     static String rangeToString(Range r) {
 104         if (r.start == r.last) {
 105             return (" ".repeat(16) + "cp == 0x" + toHexString(r.start));
 106         } else  if (r.start == r.last - 1) {
 107             return " ".repeat(16) + "cp == 0x" + toHexString(r.start) + " ||\n" +
 108                     " ".repeat(16) + "cp == 0x" + toHexString(r.last);
 109         } else {
 110             return " ".repeat(15) + "(cp >= 0x" + toHexString(r.start) +
 111                     " && cp <= 0x" + toHexString(r.last) + ")";
 112         }
 113     }
 114 
 115     static int toInt(String hexStr) {
 116         return Integer.parseUnsignedInt(hexStr, 16);
 117     }
 118 
 119     static String toHexString(int cp) {
 120         String ret = Integer.toUnsignedString(cp, 16).toUpperCase();
 121         if (ret.length() < 4) {
 122             ret = "0".repeat(4 - ret.length()) + ret;
 123         }
 124         return ret;
 125     }
 126 
 127     static class Range implements Comparable<Range> {
 128         int start;
 129         int last;
 130 
 131         Range (int start, int last) {
 132             this.start = start;
 133             this.last = last;
 134         }
 135 
 136         Range (String input) {
 137             input = input.replaceFirst("\\s#.*", "");
 138             start = toInt(input.replaceFirst("[\\s\\.].*", ""));
 139             last = input.contains("..") ?
 140                     toInt(input.replaceFirst(".*\\.\\.", "")
 141                             .replaceFirst(";.*", "").trim())
 142                     : start;
 143         }
 144 
 145         @Override
 146         public String toString() {
 147             return "Start: " + toHexString(start) + ", Last: " + toHexString(last);
 148         }
 149 
 150         @Override
 151         public int compareTo(Range other) {
 152             return Integer.compare(start, other.start);
 153         }
 154     }
 155 }