1 /*
   2  * xsltlocale.c: locale handling
   3  *
   4  * Reference:
   5  * RFC 3066: Tags for the Identification of Languages
   6  * http://www.ietf.org/rfc/rfc3066.txt
   7  * ISO 639-1, ISO 3166-1
   8  *
   9  * Author: Nick Wellnhofer
  10  * winapi port: Roumen Petrov
  11  */
  12 
  13 #define IN_LIBXSLT
  14 #include "libxslt.h"
  15 
  16 #include <string.h>
  17 #include <libxml/xmlmemory.h>
  18 
  19 #include "xsltlocale.h"
  20 #include "xsltutils.h"
  21 
  22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
  23 #define newlocale __newlocale
  24 #define freelocale __freelocale
  25 #define strxfrm_l __strxfrm_l
  26 #define LC_COLLATE_MASK (1 << LC_COLLATE)
  27 #endif
  28 
  29 #define TOUPPER(c) (c & ~0x20)
  30 #define TOLOWER(c) (c | 0x20)
  31 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
  32 
  33 /*without terminating null character*/
  34 #define XSLTMAX_ISO639LANGLEN       8
  35 #define XSLTMAX_ISO3166CNTRYLEN     8
  36                     /* <lang>-<cntry> */
  37 #define XSLTMAX_LANGTAGLEN      (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
  38 
  39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
  40 
  41 #ifdef XSLT_LOCALE_WINAPI
  42 xmlRMutexPtr xsltLocaleMutex = NULL;
  43 
  44 struct xsltRFC1766Info_s {
  45       /*note typedef unsigned char xmlChar !*/
  46     xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
  47       /*note typedef LCID xsltLocale !*/
  48     xsltLocale lcid;
  49 };
  50 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
  51 
  52 static int xsltLocaleListSize = 0;
  53 static xsltRFC1766Info *xsltLocaleList = NULL;
  54 
  55 
  56 static xsltLocale
  57 xslt_locale_WINAPI(const xmlChar *languageTag) {
  58     int k;
  59     xsltRFC1766Info *p = xsltLocaleList;
  60 
  61     for (k=0; k<xsltLocaleListSize; k++, p++)
  62     if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
  63     return((xsltLocale)0);
  64 }
  65 
  66 static void xsltEnumSupportedLocales(void);
  67 #endif
  68 
  69 /**
  70  * xsltFreeLocales:
  71  *
  72  * Cleanup function for the locale support on shutdown
  73  */
  74 void
  75 xsltFreeLocales(void) {
  76 #ifdef XSLT_LOCALE_WINAPI
  77     xmlRMutexLock(xsltLocaleMutex);
  78     xmlFree(xsltLocaleList);
  79     xsltLocaleList = NULL;
  80     xmlRMutexUnlock(xsltLocaleMutex);
  81 #endif
  82 }
  83 
  84 /**
  85  * xsltNewLocale:
  86  * @languageTag: RFC 3066 language tag
  87  *
  88  * Creates a new locale of an opaque system dependent type based on the
  89  * language tag.
  90  *
  91  * Returns the locale or NULL on error or if no matching locale was found
  92  */
  93 xsltLocale
  94 xsltNewLocale(const xmlChar *languageTag) {
  95 #ifdef XSLT_LOCALE_XLOCALE
  96     xsltLocale locale;
  97     char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
  98     const xmlChar *p = languageTag;
  99     const char *region = NULL;
 100     char *q = localeName;
 101     int i, llen;
 102 
 103     /* Convert something like "pt-br" to "pt_BR.utf8" */
 104 
 105     if (languageTag == NULL)
 106     return(NULL);
 107 
 108     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
 109     *q++ = TOLOWER(*p++);
 110 
 111     if (i == 0)
 112     return(NULL);
 113 
 114     llen = i;
 115 
 116     if (*p) {
 117     if (*p++ != '-')
 118         return(NULL);
 119         *q++ = '_';
 120 
 121     for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
 122         *q++ = TOUPPER(*p++);
 123 
 124     if (i == 0 || *p)
 125         return(NULL);
 126 
 127         memcpy(q, ".utf8", 6);
 128         locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
 129         if (locale != NULL)
 130             return(locale);
 131 
 132         /* Continue without using country code */
 133 
 134         q = localeName + llen;
 135     }
 136 
 137     /* Try locale without territory, e.g. for Esperanto (eo) */
 138 
 139     memcpy(q, ".utf8", 6);
 140     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
 141     if (locale != NULL)
 142         return(locale);
 143 
 144     /* Try to find most common country for language */
 145 
 146     if (llen != 2)
 147         return(NULL);
 148 
 149     region = (char *)xsltDefaultRegion((xmlChar *)localeName);
 150     if (region == NULL)
 151         return(NULL);
 152 
 153     q = localeName + llen;
 154     *q++ = '_';
 155     *q++ = region[0];
 156     *q++ = region[1];
 157     memcpy(q, ".utf8", 6);
 158     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
 159 
 160     return(locale);
 161 #endif
 162 
 163 #ifdef XSLT_LOCALE_WINAPI
 164 {
 165     xsltLocale    locale = (xsltLocale)0;
 166     xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
 167     xmlChar       *q = localeName;
 168     const xmlChar *p = languageTag;
 169     int           i, llen;
 170     const xmlChar *region = NULL;
 171 
 172     if (languageTag == NULL) goto end;
 173 
 174     xsltEnumSupportedLocales();
 175 
 176     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
 177     *q++ = TOLOWER(*p++);
 178     if (i == 0) goto end;
 179 
 180     llen = i;
 181     *q++ = '-';
 182     if (*p) { /*if country tag is given*/
 183     if (*p++ != '-') goto end;
 184 
 185     for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
 186         *q++ = TOUPPER(*p++);
 187     if (i == 0 || *p) goto end;
 188 
 189     *q = '\0';
 190     locale = xslt_locale_WINAPI(localeName);
 191     if (locale != (xsltLocale)0) goto end;
 192     }
 193     /* Try to find most common country for language */
 194     region = xsltDefaultRegion(localeName);
 195     if (region == NULL) goto end;
 196 
 197     strcpy(localeName + llen + 1, region);
 198     locale = xslt_locale_WINAPI(localeName);
 199 end:
 200     return(locale);
 201 }
 202 #endif
 203 
 204 #ifdef XSLT_LOCALE_NONE
 205     return(NULL);
 206 #endif
 207 }
 208 
 209 static const xmlChar*
 210 xsltDefaultRegion(const xmlChar *localeName) {
 211     xmlChar c;
 212     /* region should be xmlChar, but gcc warns on all string assignments */
 213     const char *region = NULL;
 214 
 215     c = localeName[1];
 216     /* This is based on the locales from glibc 2.3.3 */
 217 
 218     switch (localeName[0]) {
 219         case 'a':
 220             if (c == 'a' || c == 'm') region = "ET";
 221             else if (c == 'f') region = "ZA";
 222             else if (c == 'n') region = "ES";
 223             else if (c == 'r') region = "AE";
 224             else if (c == 'z') region = "AZ";
 225             break;
 226         case 'b':
 227             if (c == 'e') region = "BY";
 228             else if (c == 'g') region = "BG";
 229             else if (c == 'n') region = "BD";
 230             else if (c == 'r') region = "FR";
 231             else if (c == 's') region = "BA";
 232             break;
 233         case 'c':
 234             if (c == 'a') region = "ES";
 235             else if (c == 's') region = "CZ";
 236             else if (c == 'y') region = "GB";
 237             break;
 238         case 'd':
 239             if (c == 'a') region = "DK";
 240             else if (c == 'e') region = "DE";
 241             break;
 242         case 'e':
 243             if (c == 'l') region = "GR";
 244             else if (c == 'n' || c == 'o') region = "US";
 245             else if (c == 's' || c == 'u') region = "ES";
 246             else if (c == 't') region = "EE";
 247             break;
 248         case 'f':
 249             if (c == 'a') region = "IR";
 250             else if (c == 'i') region = "FI";
 251             else if (c == 'o') region = "FO";
 252             else if (c == 'r') region = "FR";
 253             break;
 254         case 'g':
 255             if (c == 'a') region = "IE";
 256             else if (c == 'l') region = "ES";
 257             else if (c == 'v') region = "GB";
 258             break;
 259         case 'h':
 260             if (c == 'e') region = "IL";
 261             else if (c == 'i') region = "IN";
 262             else if (c == 'r') region = "HT";
 263             else if (c == 'u') region = "HU";
 264             break;
 265         case 'i':
 266             if (c == 'd') region = "ID";
 267             else if (c == 's') region = "IS";
 268             else if (c == 't') region = "IT";
 269             else if (c == 'w') region = "IL";
 270             break;
 271         case 'j':
 272             if (c == 'a') region = "JP";
 273             break;
 274         case 'k':
 275             if (c == 'l') region = "GL";
 276             else if (c == 'o') region = "KR";
 277             else if (c == 'w') region = "GB";
 278             break;
 279         case 'l':
 280             if (c == 't') region = "LT";
 281             else if (c == 'v') region = "LV";
 282             break;
 283         case 'm':
 284             if (c == 'k') region = "MK";
 285             else if (c == 'l' || c == 'r') region = "IN";
 286             else if (c == 'n') region = "MN";
 287             else if (c == 's') region = "MY";
 288             else if (c == 't') region = "MT";
 289             break;
 290         case 'n':
 291             if (c == 'b' || c == 'n' || c == 'o') region = "NO";
 292             else if (c == 'e') region = "NP";
 293             else if (c == 'l') region = "NL";
 294             break;
 295         case 'o':
 296             if (c == 'm') region = "ET";
 297             break;
 298         case 'p':
 299             if (c == 'a') region = "IN";
 300             else if (c == 'l') region = "PL";
 301             else if (c == 't') region = "PT";
 302             break;
 303         case 'r':
 304             if (c == 'o') region = "RO";
 305             else if (c == 'u') region = "RU";
 306             break;
 307         case 's':
 308             switch (c) {
 309                 case 'e': region = "NO"; break;
 310                 case 'h': region = "YU"; break;
 311                 case 'k': region = "SK"; break;
 312                 case 'l': region = "SI"; break;
 313                 case 'o': region = "ET"; break;
 314                 case 'q': region = "AL"; break;
 315                 case 't': region = "ZA"; break;
 316                 case 'v': region = "SE"; break;
 317             }
 318             break;
 319         case 't':
 320             if (c == 'a' || c == 'e') region = "IN";
 321             else if (c == 'h') region = "TH";
 322             else if (c == 'i') region = "ER";
 323             else if (c == 'r') region = "TR";
 324             else if (c == 't') region = "RU";
 325             break;
 326         case 'u':
 327             if (c == 'k') region = "UA";
 328             else if (c == 'r') region = "PK";
 329             break;
 330         case 'v':
 331             if (c == 'i') region = "VN";
 332             break;
 333         case 'w':
 334             if (c == 'a') region = "BE";
 335             break;
 336         case 'x':
 337             if (c == 'h') region = "ZA";
 338             break;
 339         case 'z':
 340             if (c == 'h') region = "CN";
 341             else if (c == 'u') region = "ZA";
 342             break;
 343     }
 344     return((xmlChar *)region);
 345 }
 346 
 347 /**
 348  * xsltFreeLocale:
 349  * @locale: the locale to free
 350  *
 351  * Frees a locale created with xsltNewLocale
 352  */
 353 void
 354 xsltFreeLocale(xsltLocale locale) {
 355 #ifdef XSLT_LOCALE_XLOCALE
 356     freelocale(locale);
 357 #endif
 358 }
 359 
 360 /**
 361  * xsltStrxfrm:
 362  * @locale: locale created with xsltNewLocale
 363  * @string: UTF-8 string to transform
 364  *
 365  * Transforms a string according to locale. The transformed string must then be
 366  * compared with xsltLocaleStrcmp and freed with xmlFree.
 367  *
 368  * Returns the transformed string or NULL on error
 369  */
 370 xsltLocaleChar *
 371 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
 372 {
 373 #ifdef XSLT_LOCALE_NONE
 374     return(NULL);
 375 #else
 376     size_t xstrlen, r;
 377     xsltLocaleChar *xstr;
 378 
 379 #ifdef XSLT_LOCALE_XLOCALE
 380     xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
 381     xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
 382     if (xstr == NULL) {
 383     xsltTransformError(NULL, NULL, NULL,
 384         "xsltStrxfrm : out of memory error\n");
 385     return(NULL);
 386     }
 387 
 388     r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
 389 #endif
 390 
 391 #ifdef XSLT_LOCALE_WINAPI
 392     xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
 393     if (xstrlen == 0) {
 394         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
 395         return(NULL);
 396     }
 397     xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
 398     if (xstr == NULL) {
 399         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
 400         return(NULL);
 401     }
 402     r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
 403     if (r == 0) {
 404         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
 405         xmlFree(xstr);
 406         return(NULL);
 407     }
 408     return(xstr);
 409 #endif /* XSLT_LOCALE_WINAPI */
 410 
 411     if (r >= xstrlen) {
 412     xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
 413         xmlFree(xstr);
 414         return(NULL);
 415     }
 416 
 417     return(xstr);
 418 #endif /* XSLT_LOCALE_NONE */
 419 }
 420 
 421 /**
 422  * xsltLocaleStrcmp:
 423  * @locale: a locale identifier
 424  * @str1: a string transformed with xsltStrxfrm
 425  * @str2: a string transformed with xsltStrxfrm
 426  *
 427  * Compares two strings transformed with xsltStrxfrm
 428  *
 429  * Returns a value < 0 if str1 sorts before str2,
 430  *         a value > 0 if str1 sorts after str2,
 431  *         0 if str1 and str2 are equal wrt sorting
 432  */
 433 int
 434 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
 435     (void)locale;
 436 #ifdef XSLT_LOCALE_WINAPI
 437 {
 438     int ret;
 439     if (str1 == str2) return(0);
 440     if (str1 == NULL) return(-1);
 441     if (str2 == NULL) return(1);
 442     ret = CompareStringW(locale, 0, str1, -1, str2, -1);
 443     if (ret == 0) {
 444         xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
 445         return(0);
 446     }
 447     return(ret - 2);
 448 }
 449 #else
 450     return(xmlStrcmp(str1, str2));
 451 #endif
 452 }
 453 
 454 #ifdef XSLT_LOCALE_WINAPI
 455 /**
 456  * xsltCountSupportedLocales:
 457  * @lcid: not used
 458  *
 459  * callback used to count locales
 460  *
 461  * Returns TRUE
 462  */
 463 BOOL CALLBACK
 464 xsltCountSupportedLocales(LPSTR lcid) {
 465     (void) lcid;
 466     ++xsltLocaleListSize;
 467     return(TRUE);
 468 }
 469 
 470 /**
 471  * xsltIterateSupportedLocales:
 472  * @lcid: not used
 473  *
 474  * callback used to track locales
 475  *
 476  * Returns TRUE if not at the end of the array
 477  */
 478 BOOL CALLBACK
 479 xsltIterateSupportedLocales(LPSTR lcid) {
 480     static int count = 0;
 481     xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
 482     xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
 483     int        k, l;
 484     xsltRFC1766Info *p = xsltLocaleList + count;
 485 
 486     k = sscanf(lcid, "%lx", (long*)&p->lcid);
 487     if (k < 1) goto end;
 488     /*don't count terminating null character*/
 489     k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
 490     if (--k < 1) goto end;
 491     l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
 492     if (--l < 1) goto end;
 493 
 494     {  /*fill results*/
 495     xmlChar    *q = p->tag;
 496     memcpy(q, iso639lang, k);
 497     q += k;
 498     *q++ = '-';
 499     memcpy(q, iso3136ctry, l);
 500     q += l;
 501     *q = '\0';
 502     }
 503     ++count;
 504 end:
 505     return((count < xsltLocaleListSize) ? TRUE : FALSE);
 506 }
 507 
 508 
 509 static void
 510 xsltEnumSupportedLocales(void) {
 511     xmlRMutexLock(xsltLocaleMutex);
 512     if (xsltLocaleListSize <= 0) {
 513     size_t len;
 514 
 515     EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
 516 
 517     len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
 518     xsltLocaleList = xmlMalloc(len);
 519     memset(xsltLocaleList, 0, len);
 520     EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
 521     }
 522     xmlRMutexUnlock(xsltLocaleMutex);
 523 }
 524 
 525 #endif /*def XSLT_LOCALE_WINAPI*/