< prev index next >

src/java.base/share/classes/java/util/regex/Grapheme.java

Print this page
rev 58251 : [mq]: 8216332
   1 /*
   2  * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  54      * @param src the {@code CharSequence} to be scanned
  55      * @param off offset to start looking for the next boundary in the src
  56      * @param limit limit offset in the src (exclusive)
  57      * @return the next possible boundary
  58      */
  59     static int nextBoundary(CharSequence src, int off, int limit) {
  60         Objects.checkFromToIndex(off, limit, src.length());
  61 
  62         int ch0 = Character.codePointAt(src, 0);
  63         int ret = Character.charCount(ch0);
  64         int ch1;
  65         // indicates whether gb11 or gb12 is underway
  66         int t0 = getGraphemeType(ch0);
  67         int riCount = t0 == RI ? 1 : 0;
  68         boolean gb11 = t0 == EXTENDED_PICTOGRAPHIC;
  69         while (ret < limit) {
  70             ch1 = Character.codePointAt(src, ret);
  71             int t1 = getGraphemeType(ch1);
  72 
  73             if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
  74                 gb11 = false;
  75             } else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
  76                 // continue for gb12
  77             } else if (rules[t0][t1]) {
  78                 if (ret > off) {
  79                     break;
  80                 } else {
  81                     gb11 = t1 == EXTENDED_PICTOGRAPHIC;
  82                     riCount = 0;
  83                 }
  84             }
  85 
  86             riCount += (t1 == RI) ? 1 : 0;
  87             t0 = t1;
  88 
  89             ret += Character.charCount(ch1);
  90         }
  91         return ret;
  92     }
  93 
  94     // types


   1 /*
   2  * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  54      * @param src the {@code CharSequence} to be scanned
  55      * @param off offset to start looking for the next boundary in the src
  56      * @param limit limit offset in the src (exclusive)
  57      * @return the next possible boundary
  58      */
  59     static int nextBoundary(CharSequence src, int off, int limit) {
  60         Objects.checkFromToIndex(off, limit, src.length());
  61 
  62         int ch0 = Character.codePointAt(src, 0);
  63         int ret = Character.charCount(ch0);
  64         int ch1;
  65         // indicates whether gb11 or gb12 is underway
  66         int t0 = getGraphemeType(ch0);
  67         int riCount = t0 == RI ? 1 : 0;
  68         boolean gb11 = t0 == EXTENDED_PICTOGRAPHIC;
  69         while (ret < limit) {
  70             ch1 = Character.codePointAt(src, ret);
  71             int t1 = getGraphemeType(ch1);
  72 
  73             if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
  74                 // continue for gb11
  75             } else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
  76                 // continue for gb12
  77             } else if (rules[t0][t1]) {
  78                 if (ret > off) {
  79                     break;
  80                 } else {
  81                     gb11 = t1 == EXTENDED_PICTOGRAPHIC;
  82                     riCount = 0;
  83                 }
  84             }
  85 
  86             riCount += (t1 == RI) ? 1 : 0;
  87             t0 = t1;
  88 
  89             ret += Character.charCount(ch1);
  90         }
  91         return ret;
  92     }
  93 
  94     // types


< prev index next >