1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatebreakiteratordata;
  27 
  28 import java.util.Hashtable;
  29 import java.util.Vector;
  30 
  31 /**
  32  * The Builder class for DictionaryBasedBreakIterator inherits almost all of
  33  * its functionality from RuleBasedBreakIteratorBuilder, but extends it with
  34  * extra logic to handle the "<dictionary>" token.
  35  */
  36 class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder {
  37 
  38     /**
  39      * A list of flags indicating which character categories are contained in
  40      * the dictionary file (this is used to determine which ranges of characters
  41      * to apply the dictionary to)
  42      */
  43     private boolean[] categoryFlags;
  44 
  45     /**
  46      * A CharSet that contains all the characters represented in the dictionary
  47      */
  48     private CharSet dictionaryChars = new CharSet();
  49     private String dictionaryExpression = "";
  50 
  51     public DictionaryBasedBreakIteratorBuilder(String description) {
  52         super(description);
  53     }
  54 
  55     /**
  56      * We override handleSpecialSubstitution() to add logic to handle
  57      * the <dictionary> tag.  If we see a substitution named "<dictionary>",
  58      * parse the substitution expression and store the result in
  59      * dictionaryChars.
  60      */
  61     protected void handleSpecialSubstitution(String replace, String replaceWith,
  62                                              int startPos, String description) {
  63         super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
  64 
  65         if (replace.equals("<dictionary>")) {
  66             if (replaceWith.charAt(0) == '(') {
  67                 error("Dictionary group can't be enclosed in (", startPos, description);
  68             }
  69             dictionaryExpression = replaceWith;
  70             dictionaryChars = CharSet.parseString(replaceWith);
  71         }
  72     }
  73 
  74     /**
  75      * The other half of the logic to handle the dictionary characters happens
  76      * here. After the inherited builder has derived the real character
  77      * categories, we set up the categoryFlags array in the iterator. This array
  78      * contains "true" for every character category that includes a dictionary
  79      * character.
  80      */
  81     protected void buildCharCategories(Vector<String> tempRuleList) {
  82         super.buildCharCategories(tempRuleList);
  83 
  84         categoryFlags = new boolean[categories.size()];
  85         for (int i = 0; i < categories.size(); i++) {
  86             CharSet cs = categories.elementAt(i);
  87             if (!(cs.intersection(dictionaryChars).empty())) {
  88                 categoryFlags[i] = true;
  89             }
  90         }
  91     }
  92 
  93     // This function is actually called by
  94     // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by
  95     // the function above. This gives us a way to create a separate character
  96     // category for the dictionary characters even when
  97     // RuleBasedBreakIteratorBuilder isn't making a distinction.
  98     protected void mungeExpressionList(Hashtable<String, Object> expressions) {
  99         expressions.put(dictionaryExpression, dictionaryChars);
 100     }
 101 
 102     void makeFile(String filename) {
 103         super.setAdditionalData(super.toByteArray(categoryFlags));
 104         super.makeFile(filename);
 105     }
 106 }