1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. Oracle designates this 7 * particular file as subject to the "Classpath" exception as provided 8 * by Oracle in the LICENSE file that accompanied this code. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 // This file is available under and governed by the GNU General Public 26 // License version 2 only, as published by the Free Software Foundation. 27 // However, the following notice accompanied the original version of this 28 // file: 29 // 30 /* 31 * Copyright © 2011,2012,2014 Google, Inc. 32 * 33 * This is part of HarfBuzz, a text shaping library. 34 * 35 * Permission is hereby granted, without written agreement and without 36 * license or royalty fees, to use, copy, modify, and distribute this 37 * software and its documentation for any purpose, provided that the 38 * above copyright notice and the following two paragraphs appear in 39 * all copies of this software. 40 * 41 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 42 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 43 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 44 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 45 * DAMAGE. 46 * 47 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 48 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 49 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 50 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 51 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 52 * 53 * Google Author(s): Behdad Esfahbod 54 */ 55 56 #ifndef HB_UTF_PRIVATE_HH 57 #define HB_UTF_PRIVATE_HH 58 59 #include "hb-private.hh" 60 61 62 struct hb_utf8_t 63 { 64 typedef uint8_t codepoint_t; 65 66 static inline const uint8_t * 67 next (const uint8_t *text, 68 const uint8_t *end, 69 hb_codepoint_t *unicode, 70 hb_codepoint_t replacement) 71 { 72 /* Written to only accept well-formed sequences. 73 * Based on ideas from ICU's U8_NEXT. 74 * Generates one "replacement" for each ill-formed byte. */ 75 76 hb_codepoint_t c = *text++; 77 78 if (c > 0x7Fu) 79 { 80 if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */ 81 { 82 unsigned int t1; 83 if (likely (text < end && 84 (t1 = text[0] - 0x80u) <= 0x3Fu)) 85 { 86 c = ((c&0x1Fu)<<6) | t1; 87 text++; 88 } 89 else 90 goto error; 91 } 92 else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */ 93 { 94 unsigned int t1, t2; 95 if (likely (1 < end - text && 96 (t1 = text[0] - 0x80u) <= 0x3Fu && 97 (t2 = text[1] - 0x80u) <= 0x3Fu)) 98 { 99 c = ((c&0xFu)<<12) | (t1<<6) | t2; 100 if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu))) 101 goto error; 102 text += 2; 103 } 104 else 105 goto error; 106 } 107 else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */ 108 { 109 unsigned int t1, t2, t3; 110 if (likely (2 < end - text && 111 (t1 = text[0] - 0x80u) <= 0x3Fu && 112 (t2 = text[1] - 0x80u) <= 0x3Fu && 113 (t3 = text[2] - 0x80u) <= 0x3Fu)) 114 { 115 c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; 116 if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu))) 117 goto error; 118 text += 3; 119 } 120 else 121 goto error; 122 } 123 else 124 goto error; 125 } 126 127 *unicode = c; 128 return text; 129 130 error: 131 *unicode = replacement; 132 return text; 133 } 134 135 static inline const uint8_t * 136 prev (const uint8_t *text, 137 const uint8_t *start, 138 hb_codepoint_t *unicode, 139 hb_codepoint_t replacement) 140 { 141 const uint8_t *end = text--; 142 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) 143 text--; 144 145 if (likely (next (text, end, unicode, replacement) == end)) 146 return text; 147 148 *unicode = replacement; 149 return end - 1; 150 } 151 152 static inline unsigned int 153 strlen (const uint8_t *text) 154 { 155 return ::strlen ((const char *) text); 156 } 157 }; 158 159 160 struct hb_utf16_t 161 { 162 typedef uint16_t codepoint_t; 163 164 static inline const uint16_t * 165 next (const uint16_t *text, 166 const uint16_t *end, 167 hb_codepoint_t *unicode, 168 hb_codepoint_t replacement) 169 { 170 hb_codepoint_t c = *text++; 171 172 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) 173 { 174 *unicode = c; 175 return text; 176 } 177 178 if (likely (hb_in_range (c, 0xD800u, 0xDBFFu))) 179 { 180 /* High-surrogate in c */ 181 hb_codepoint_t l; 182 if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))) 183 { 184 /* Low-surrogate in l */ 185 *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); 186 text++; 187 return text; 188 } 189 } 190 191 /* Lonely / out-of-order surrogate. */ 192 *unicode = replacement; 193 return text; 194 } 195 196 static inline const uint16_t * 197 prev (const uint16_t *text, 198 const uint16_t *start, 199 hb_codepoint_t *unicode, 200 hb_codepoint_t replacement) 201 { 202 const uint16_t *end = text--; 203 hb_codepoint_t c = *text; 204 205 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) 206 { 207 *unicode = c; 208 return text; 209 } 210 211 if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) 212 text--; 213 214 if (likely (next (text, end, unicode, replacement) == end)) 215 return text; 216 217 *unicode = replacement; 218 return end - 1; 219 } 220 221 222 static inline unsigned int 223 strlen (const uint16_t *text) 224 { 225 unsigned int l = 0; 226 while (*text++) l++; 227 return l; 228 } 229 }; 230 231 232 template <bool validate=true> 233 struct hb_utf32_t 234 { 235 typedef uint32_t codepoint_t; 236 237 static inline const uint32_t * 238 next (const uint32_t *text, 239 const uint32_t *end HB_UNUSED, 240 hb_codepoint_t *unicode, 241 hb_codepoint_t replacement) 242 { 243 hb_codepoint_t c = *text++; 244 if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) 245 goto error; 246 *unicode = c; 247 return text; 248 249 error: 250 *unicode = replacement; 251 return text; 252 } 253 254 static inline const uint32_t * 255 prev (const uint32_t *text, 256 const uint32_t *start HB_UNUSED, 257 hb_codepoint_t *unicode, 258 hb_codepoint_t replacement) 259 { 260 next (text - 1, text, unicode, replacement); 261 return text - 1; 262 } 263 264 static inline unsigned int 265 strlen (const uint32_t *text) 266 { 267 unsigned int l = 0; 268 while (*text++) l++; 269 return l; 270 } 271 }; 272 273 274 struct hb_latin1_t 275 { 276 typedef uint8_t codepoint_t; 277 278 static inline const uint8_t * 279 next (const uint8_t *text, 280 const uint8_t *end HB_UNUSED, 281 hb_codepoint_t *unicode, 282 hb_codepoint_t replacement HB_UNUSED) 283 { 284 *unicode = *text++; 285 return text; 286 } 287 288 static inline const uint8_t * 289 prev (const uint8_t *text, 290 const uint8_t *start HB_UNUSED, 291 hb_codepoint_t *unicode, 292 hb_codepoint_t replacement) 293 { 294 *unicode = *--text; 295 return text; 296 } 297 298 static inline unsigned int 299 strlen (const uint8_t *text) 300 { 301 unsigned int l = 0; 302 while (*text++) l++; 303 return l; 304 } 305 }; 306 307 #endif /* HB_UTF_PRIVATE_HH */