1 /* 2 * xsltlocale.c: locale handling 3 * 4 * Reference: 5 * RFC 3066: Tags for the Identification of Languages 6 * http://www.ietf.org/rfc/rfc3066.txt 7 * ISO 639-1, ISO 3166-1 8 * 9 * Author: Nick Wellnhofer 10 * winapi port: Roumen Petrov 11 */ 12 13 #define IN_LIBXSLT 14 #include "libxslt.h" 15 16 #include <string.h> 17 #include <libxml/xmlmemory.h> 18 19 #include "xsltlocale.h" 20 #include "xsltutils.h" 21 22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2 23 #define newlocale __newlocale 24 #define freelocale __freelocale 25 #define strxfrm_l __strxfrm_l 26 #define LC_COLLATE_MASK (1 << LC_COLLATE) 27 #endif 28 29 #define TOUPPER(c) (c & ~0x20) 30 #define TOLOWER(c) (c | 0x20) 31 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26) 32 33 /*without terminating null character*/ 34 #define XSLTMAX_ISO639LANGLEN 8 35 #define XSLTMAX_ISO3166CNTRYLEN 8 36 /* <lang>-<cntry> */ 37 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN) 38 39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); 40 41 #ifdef XSLT_LOCALE_WINAPI 42 xmlRMutexPtr xsltLocaleMutex = NULL; 43 44 struct xsltRFC1766Info_s { 45 /*note typedef unsigned char xmlChar !*/ 46 xmlChar tag[XSLTMAX_LANGTAGLEN+1]; 47 /*note typedef LCID xsltLocale !*/ 48 xsltLocale lcid; 49 }; 50 typedef struct xsltRFC1766Info_s xsltRFC1766Info; 51 52 static int xsltLocaleListSize = 0; 53 static xsltRFC1766Info *xsltLocaleList = NULL; 54 55 56 static xsltLocale 57 xslt_locale_WINAPI(const xmlChar *languageTag) { 58 int k; 59 xsltRFC1766Info *p = xsltLocaleList; 60 61 for (k=0; k<xsltLocaleListSize; k++, p++) 62 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; 63 return((xsltLocale)0); 64 } 65 66 static void xsltEnumSupportedLocales(void); 67 #endif 68 69 /** 70 * xsltFreeLocales: 71 * 72 * Cleanup function for the locale support on shutdown 73 */ 74 void 75 xsltFreeLocales(void) { 76 #ifdef XSLT_LOCALE_WINAPI 77 xmlRMutexLock(xsltLocaleMutex); 78 xmlFree(xsltLocaleList); 79 xsltLocaleList = NULL; 80 xmlRMutexUnlock(xsltLocaleMutex); 81 #endif 82 } 83 84 /** 85 * xsltNewLocale: 86 * @languageTag: RFC 3066 language tag 87 * 88 * Creates a new locale of an opaque system dependent type based on the 89 * language tag. 90 * 91 * Returns the locale or NULL on error or if no matching locale was found 92 */ 93 xsltLocale 94 xsltNewLocale(const xmlChar *languageTag) { 95 #ifdef XSLT_LOCALE_XLOCALE 96 xsltLocale locale; 97 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ 98 const xmlChar *p = languageTag; 99 const char *region = NULL; 100 char *q = localeName; 101 int i, llen; 102 103 /* Convert something like "pt-br" to "pt_BR.utf8" */ 104 105 if (languageTag == NULL) 106 return(NULL); 107 108 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 109 *q++ = TOLOWER(*p++); 110 111 if (i == 0) 112 return(NULL); 113 114 llen = i; 115 116 if (*p) { 117 if (*p++ != '-') 118 return(NULL); 119 *q++ = '_'; 120 121 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 122 *q++ = TOUPPER(*p++); 123 124 if (i == 0 || *p) 125 return(NULL); 126 127 memcpy(q, ".utf8", 6); 128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 129 if (locale != NULL) 130 return(locale); 131 132 /* Continue without using country code */ 133 134 q = localeName + llen; 135 } 136 137 /* Try locale without territory, e.g. for Esperanto (eo) */ 138 139 memcpy(q, ".utf8", 6); 140 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 141 if (locale != NULL) 142 return(locale); 143 144 /* Try to find most common country for language */ 145 146 if (llen != 2) 147 return(NULL); 148 149 region = (char *)xsltDefaultRegion((xmlChar *)localeName); 150 if (region == NULL) 151 return(NULL); 152 153 q = localeName + llen; 154 *q++ = '_'; 155 *q++ = region[0]; 156 *q++ = region[1]; 157 memcpy(q, ".utf8", 6); 158 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 159 160 return(locale); 161 #endif 162 163 #ifdef XSLT_LOCALE_WINAPI 164 { 165 xsltLocale locale = (xsltLocale)0; 166 xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; 167 xmlChar *q = localeName; 168 const xmlChar *p = languageTag; 169 int i, llen; 170 const xmlChar *region = NULL; 171 172 if (languageTag == NULL) goto end; 173 174 xsltEnumSupportedLocales(); 175 176 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 177 *q++ = TOLOWER(*p++); 178 if (i == 0) goto end; 179 180 llen = i; 181 *q++ = '-'; 182 if (*p) { /*if country tag is given*/ 183 if (*p++ != '-') goto end; 184 185 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 186 *q++ = TOUPPER(*p++); 187 if (i == 0 || *p) goto end; 188 189 *q = '\0'; 190 locale = xslt_locale_WINAPI(localeName); 191 if (locale != (xsltLocale)0) goto end; 192 } 193 /* Try to find most common country for language */ 194 region = xsltDefaultRegion(localeName); 195 if (region == NULL) goto end; 196 197 strcpy(localeName + llen + 1, region); 198 locale = xslt_locale_WINAPI(localeName); 199 end: 200 return(locale); 201 } 202 #endif 203 204 #ifdef XSLT_LOCALE_NONE 205 return(NULL); 206 #endif 207 } 208 209 static const xmlChar* 210 xsltDefaultRegion(const xmlChar *localeName) { 211 xmlChar c; 212 /* region should be xmlChar, but gcc warns on all string assignments */ 213 const char *region = NULL; 214 215 c = localeName[1]; 216 /* This is based on the locales from glibc 2.3.3 */ 217 218 switch (localeName[0]) { 219 case 'a': 220 if (c == 'a' || c == 'm') region = "ET"; 221 else if (c == 'f') region = "ZA"; 222 else if (c == 'n') region = "ES"; 223 else if (c == 'r') region = "AE"; 224 else if (c == 'z') region = "AZ"; 225 break; 226 case 'b': 227 if (c == 'e') region = "BY"; 228 else if (c == 'g') region = "BG"; 229 else if (c == 'n') region = "BD"; 230 else if (c == 'r') region = "FR"; 231 else if (c == 's') region = "BA"; 232 break; 233 case 'c': 234 if (c == 'a') region = "ES"; 235 else if (c == 's') region = "CZ"; 236 else if (c == 'y') region = "GB"; 237 break; 238 case 'd': 239 if (c == 'a') region = "DK"; 240 else if (c == 'e') region = "DE"; 241 break; 242 case 'e': 243 if (c == 'l') region = "GR"; 244 else if (c == 'n' || c == 'o') region = "US"; 245 else if (c == 's' || c == 'u') region = "ES"; 246 else if (c == 't') region = "EE"; 247 break; 248 case 'f': 249 if (c == 'a') region = "IR"; 250 else if (c == 'i') region = "FI"; 251 else if (c == 'o') region = "FO"; 252 else if (c == 'r') region = "FR"; 253 break; 254 case 'g': 255 if (c == 'a') region = "IE"; 256 else if (c == 'l') region = "ES"; 257 else if (c == 'v') region = "GB"; 258 break; 259 case 'h': 260 if (c == 'e') region = "IL"; 261 else if (c == 'i') region = "IN"; 262 else if (c == 'r') region = "HT"; 263 else if (c == 'u') region = "HU"; 264 break; 265 case 'i': 266 if (c == 'd') region = "ID"; 267 else if (c == 's') region = "IS"; 268 else if (c == 't') region = "IT"; 269 else if (c == 'w') region = "IL"; 270 break; 271 case 'j': 272 if (c == 'a') region = "JP"; 273 break; 274 case 'k': 275 if (c == 'l') region = "GL"; 276 else if (c == 'o') region = "KR"; 277 else if (c == 'w') region = "GB"; 278 break; 279 case 'l': 280 if (c == 't') region = "LT"; 281 else if (c == 'v') region = "LV"; 282 break; 283 case 'm': 284 if (c == 'k') region = "MK"; 285 else if (c == 'l' || c == 'r') region = "IN"; 286 else if (c == 'n') region = "MN"; 287 else if (c == 's') region = "MY"; 288 else if (c == 't') region = "MT"; 289 break; 290 case 'n': 291 if (c == 'b' || c == 'n' || c == 'o') region = "NO"; 292 else if (c == 'e') region = "NP"; 293 else if (c == 'l') region = "NL"; 294 break; 295 case 'o': 296 if (c == 'm') region = "ET"; 297 break; 298 case 'p': 299 if (c == 'a') region = "IN"; 300 else if (c == 'l') region = "PL"; 301 else if (c == 't') region = "PT"; 302 break; 303 case 'r': 304 if (c == 'o') region = "RO"; 305 else if (c == 'u') region = "RU"; 306 break; 307 case 's': 308 switch (c) { 309 case 'e': region = "NO"; break; 310 case 'h': region = "YU"; break; 311 case 'k': region = "SK"; break; 312 case 'l': region = "SI"; break; 313 case 'o': region = "ET"; break; 314 case 'q': region = "AL"; break; 315 case 't': region = "ZA"; break; 316 case 'v': region = "SE"; break; 317 } 318 break; 319 case 't': 320 if (c == 'a' || c == 'e') region = "IN"; 321 else if (c == 'h') region = "TH"; 322 else if (c == 'i') region = "ER"; 323 else if (c == 'r') region = "TR"; 324 else if (c == 't') region = "RU"; 325 break; 326 case 'u': 327 if (c == 'k') region = "UA"; 328 else if (c == 'r') region = "PK"; 329 break; 330 case 'v': 331 if (c == 'i') region = "VN"; 332 break; 333 case 'w': 334 if (c == 'a') region = "BE"; 335 break; 336 case 'x': 337 if (c == 'h') region = "ZA"; 338 break; 339 case 'z': 340 if (c == 'h') region = "CN"; 341 else if (c == 'u') region = "ZA"; 342 break; 343 } 344 return((xmlChar *)region); 345 } 346 347 /** 348 * xsltFreeLocale: 349 * @locale: the locale to free 350 * 351 * Frees a locale created with xsltNewLocale 352 */ 353 void 354 xsltFreeLocale(xsltLocale locale) { 355 #ifdef XSLT_LOCALE_XLOCALE 356 freelocale(locale); 357 #endif 358 } 359 360 /** 361 * xsltStrxfrm: 362 * @locale: locale created with xsltNewLocale 363 * @string: UTF-8 string to transform 364 * 365 * Transforms a string according to locale. The transformed string must then be 366 * compared with xsltLocaleStrcmp and freed with xmlFree. 367 * 368 * Returns the transformed string or NULL on error 369 */ 370 xsltLocaleChar * 371 xsltStrxfrm(xsltLocale locale, const xmlChar *string) 372 { 373 #ifdef XSLT_LOCALE_NONE 374 return(NULL); 375 #else 376 size_t xstrlen, r; 377 xsltLocaleChar *xstr; 378 379 #ifdef XSLT_LOCALE_XLOCALE 380 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; 381 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); 382 if (xstr == NULL) { 383 xsltTransformError(NULL, NULL, NULL, 384 "xsltStrxfrm : out of memory error\n"); 385 return(NULL); 386 } 387 388 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); 389 #endif 390 391 #ifdef XSLT_LOCALE_WINAPI 392 xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); 393 if (xstrlen == 0) { 394 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n"); 395 return(NULL); 396 } 397 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); 398 if (xstr == NULL) { 399 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); 400 return(NULL); 401 } 402 r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen); 403 if (r == 0) { 404 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n"); 405 xmlFree(xstr); 406 return(NULL); 407 } 408 return(xstr); 409 #endif /* XSLT_LOCALE_WINAPI */ 410 411 if (r >= xstrlen) { 412 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); 413 xmlFree(xstr); 414 return(NULL); 415 } 416 417 return(xstr); 418 #endif /* XSLT_LOCALE_NONE */ 419 } 420 421 /** 422 * xsltLocaleStrcmp: 423 * @locale: a locale identifier 424 * @str1: a string transformed with xsltStrxfrm 425 * @str2: a string transformed with xsltStrxfrm 426 * 427 * Compares two strings transformed with xsltStrxfrm 428 * 429 * Returns a value < 0 if str1 sorts before str2, 430 * a value > 0 if str1 sorts after str2, 431 * 0 if str1 and str2 are equal wrt sorting 432 */ 433 int 434 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) { 435 (void)locale; 436 #ifdef XSLT_LOCALE_WINAPI 437 { 438 int ret; 439 if (str1 == str2) return(0); 440 if (str1 == NULL) return(-1); 441 if (str2 == NULL) return(1); 442 ret = CompareStringW(locale, 0, str1, -1, str2, -1); 443 if (ret == 0) { 444 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n"); 445 return(0); 446 } 447 return(ret - 2); 448 } 449 #else 450 return(xmlStrcmp(str1, str2)); 451 #endif 452 } 453 454 #ifdef XSLT_LOCALE_WINAPI 455 /** 456 * xsltCountSupportedLocales: 457 * @lcid: not used 458 * 459 * callback used to count locales 460 * 461 * Returns TRUE 462 */ 463 BOOL CALLBACK 464 xsltCountSupportedLocales(LPSTR lcid) { 465 (void) lcid; 466 ++xsltLocaleListSize; 467 return(TRUE); 468 } 469 470 /** 471 * xsltIterateSupportedLocales: 472 * @lcid: not used 473 * 474 * callback used to track locales 475 * 476 * Returns TRUE if not at the end of the array 477 */ 478 BOOL CALLBACK 479 xsltIterateSupportedLocales(LPSTR lcid) { 480 static int count = 0; 481 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; 482 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; 483 int k, l; 484 xsltRFC1766Info *p = xsltLocaleList + count; 485 486 k = sscanf(lcid, "%lx", (long*)&p->lcid); 487 if (k < 1) goto end; 488 /*don't count terminating null character*/ 489 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang )); 490 if (--k < 1) goto end; 491 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry)); 492 if (--l < 1) goto end; 493 494 { /*fill results*/ 495 xmlChar *q = p->tag; 496 memcpy(q, iso639lang, k); 497 q += k; 498 *q++ = '-'; 499 memcpy(q, iso3136ctry, l); 500 q += l; 501 *q = '\0'; 502 } 503 ++count; 504 end: 505 return((count < xsltLocaleListSize) ? TRUE : FALSE); 506 } 507 508 509 static void 510 xsltEnumSupportedLocales(void) { 511 xmlRMutexLock(xsltLocaleMutex); 512 if (xsltLocaleListSize <= 0) { 513 size_t len; 514 515 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); 516 517 len = xsltLocaleListSize * sizeof(xsltRFC1766Info); 518 xsltLocaleList = xmlMalloc(len); 519 memset(xsltLocaleList, 0, len); 520 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); 521 } 522 xmlRMutexUnlock(xsltLocaleMutex); 523 } 524 525 #endif /*def XSLT_LOCALE_WINAPI*/