1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2011,2012,2014  Google, Inc.
  32  *
  33  *  This is part of HarfBuzz, a text shaping library.
  34  *
  35  * Permission is hereby granted, without written agreement and without
  36  * license or royalty fees, to use, copy, modify, and distribute this
  37  * software and its documentation for any purpose, provided that the
  38  * above copyright notice and the following two paragraphs appear in
  39  * all copies of this software.
  40  *
  41  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  42  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  43  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  44  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  45  * DAMAGE.
  46  *
  47  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  48  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  49  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  50  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  51  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  52  *
  53  * Google Author(s): Behdad Esfahbod
  54  */
  55 
  56 #ifndef HB_UTF_PRIVATE_HH
  57 #define HB_UTF_PRIVATE_HH
  58 
  59 #include "hb-private.hh"
  60 
  61 
  62 struct hb_utf8_t
  63 {
  64   typedef uint8_t codepoint_t;
  65 
  66   static inline const uint8_t *
  67   next (const uint8_t *text,
  68         const uint8_t *end,
  69         hb_codepoint_t *unicode,
  70         hb_codepoint_t replacement)
  71   {
  72     /* Written to only accept well-formed sequences.
  73      * Based on ideas from ICU's U8_NEXT.
  74      * Generates one "replacement" for each ill-formed byte. */
  75 
  76     hb_codepoint_t c = *text++;
  77 
  78     if (c > 0x7Fu)
  79     {
  80       if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */
  81       {
  82         unsigned int t1;
  83         if (likely (text < end &&
  84                     (t1 = text[0] - 0x80u) <= 0x3Fu))
  85         {
  86           c = ((c&0x1Fu)<<6) | t1;
  87           text++;
  88         }
  89         else
  90           goto error;
  91       }
  92       else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */
  93       {
  94         unsigned int t1, t2;
  95         if (likely (1 < end - text &&
  96                     (t1 = text[0] - 0x80u) <= 0x3Fu &&
  97                     (t2 = text[1] - 0x80u) <= 0x3Fu))
  98         {
  99           c = ((c&0xFu)<<12) | (t1<<6) | t2;
 100           if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu)))
 101             goto error;
 102           text += 2;
 103         }
 104         else
 105           goto error;
 106       }
 107       else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */
 108       {
 109         unsigned int t1, t2, t3;
 110         if (likely (2 < end - text &&
 111                     (t1 = text[0] - 0x80u) <= 0x3Fu &&
 112                     (t2 = text[1] - 0x80u) <= 0x3Fu &&
 113                     (t3 = text[2] - 0x80u) <= 0x3Fu))
 114         {
 115           c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
 116           if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu)))
 117             goto error;
 118           text += 3;
 119         }
 120         else
 121           goto error;
 122       }
 123       else
 124         goto error;
 125     }
 126 
 127     *unicode = c;
 128     return text;
 129 
 130   error:
 131     *unicode = replacement;
 132     return text;
 133   }
 134 
 135   static inline const uint8_t *
 136   prev (const uint8_t *text,
 137         const uint8_t *start,
 138         hb_codepoint_t *unicode,
 139         hb_codepoint_t replacement)
 140   {
 141     const uint8_t *end = text--;
 142     while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
 143       text--;
 144 
 145     if (likely (next (text, end, unicode, replacement) == end))
 146       return text;
 147 
 148     *unicode = replacement;
 149     return end - 1;
 150   }
 151 
 152   static inline unsigned int
 153   strlen (const uint8_t *text)
 154   {
 155     return ::strlen ((const char *) text);
 156   }
 157 };
 158 
 159 
 160 struct hb_utf16_t
 161 {
 162   typedef uint16_t codepoint_t;
 163 
 164   static inline const uint16_t *
 165   next (const uint16_t *text,
 166         const uint16_t *end,
 167         hb_codepoint_t *unicode,
 168         hb_codepoint_t replacement)
 169   {
 170     hb_codepoint_t c = *text++;
 171 
 172     if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
 173     {
 174       *unicode = c;
 175       return text;
 176     }
 177 
 178     if (likely (hb_in_range (c, 0xD800u, 0xDBFFu)))
 179     {
 180       /* High-surrogate in c */
 181       hb_codepoint_t l;
 182       if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu))))
 183       {
 184         /* Low-surrogate in l */
 185         *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
 186          text++;
 187          return text;
 188       }
 189     }
 190 
 191     /* Lonely / out-of-order surrogate. */
 192     *unicode = replacement;
 193     return text;
 194   }
 195 
 196   static inline const uint16_t *
 197   prev (const uint16_t *text,
 198         const uint16_t *start,
 199         hb_codepoint_t *unicode,
 200         hb_codepoint_t replacement)
 201   {
 202     const uint16_t *end = text--;
 203     hb_codepoint_t c = *text;
 204 
 205     if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
 206     {
 207       *unicode = c;
 208       return text;
 209     }
 210 
 211     if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu)))
 212       text--;
 213 
 214     if (likely (next (text, end, unicode, replacement) == end))
 215       return text;
 216 
 217     *unicode = replacement;
 218     return end - 1;
 219   }
 220 
 221 
 222   static inline unsigned int
 223   strlen (const uint16_t *text)
 224   {
 225     unsigned int l = 0;
 226     while (*text++) l++;
 227     return l;
 228   }
 229 };
 230 
 231 
 232 template <bool validate=true>
 233 struct hb_utf32_t
 234 {
 235   typedef uint32_t codepoint_t;
 236 
 237   static inline const uint32_t *
 238   next (const uint32_t *text,
 239         const uint32_t *end HB_UNUSED,
 240         hb_codepoint_t *unicode,
 241         hb_codepoint_t replacement)
 242   {
 243     hb_codepoint_t c = *text++;
 244     if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu)))
 245       goto error;
 246     *unicode = c;
 247     return text;
 248 
 249   error:
 250     *unicode = replacement;
 251     return text;
 252   }
 253 
 254   static inline const uint32_t *
 255   prev (const uint32_t *text,
 256         const uint32_t *start HB_UNUSED,
 257         hb_codepoint_t *unicode,
 258         hb_codepoint_t replacement)
 259   {
 260     next (text - 1, text, unicode, replacement);
 261     return text - 1;
 262   }
 263 
 264   static inline unsigned int
 265   strlen (const uint32_t *text)
 266   {
 267     unsigned int l = 0;
 268     while (*text++) l++;
 269     return l;
 270   }
 271 };
 272 
 273 
 274 struct hb_latin1_t
 275 {
 276   typedef uint8_t codepoint_t;
 277 
 278   static inline const uint8_t *
 279   next (const uint8_t *text,
 280         const uint8_t *end HB_UNUSED,
 281         hb_codepoint_t *unicode,
 282         hb_codepoint_t replacement HB_UNUSED)
 283   {
 284     *unicode = *text++;
 285     return text;
 286   }
 287 
 288   static inline const uint8_t *
 289   prev (const uint8_t *text,
 290         const uint8_t *start HB_UNUSED,
 291         hb_codepoint_t *unicode,
 292         hb_codepoint_t replacement)
 293   {
 294     *unicode = *--text;
 295     return text;
 296   }
 297 
 298   static inline unsigned int
 299   strlen (const uint8_t *text)
 300   {
 301     unsigned int l = 0;
 302     while (*text++) l++;
 303     return l;
 304   }
 305 };
 306 
 307 #endif /* HB_UTF_PRIVATE_HH */