< prev index next >

src/java.base/macosx/native/libjava/java_props_macosx.c

Print this page
rev 17428 : [mq]: 8160199

@@ -45,10 +45,11 @@
 }
 
 #define LOCALEIDLENGTH  128
 char *getMacOSXLocale(int cat) {
     const char* retVal = NULL;
+    char localeString[LOCALEIDLENGTH];
 
     switch (cat) {
     case LC_MESSAGES:
         {
             // get preferred language code

@@ -72,77 +73,119 @@
                 CFRelease(languages);
                 return NULL;
             }
             CFRelease(languages);
 
-            retVal = languageString;
-
-            // Special case for Portuguese in Brazil:
-            // The language code needs the "_BR" region code (to distinguish it
-            // from Portuguese in Portugal), but this is missing when using the
-            // "Portuguese (Brazil)" language.
-            // If language is "pt" and the current locale is pt_BR, return pt_BR.
-            char localeString[LOCALEIDLENGTH];
-            if (strcmp(retVal, "pt") == 0 &&
+            // Explicitly supply region, if there is none
                     CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
-                                       localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()) &&
-                    strcmp(localeString, "pt_BR") == 0) {
-                retVal = localeString;
+                               localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding());
+            char *hyphenPos = strchr(languageString, '-');
+            int langStrLen = strlen(languageString);
+
+            if (hyphenPos == NULL || // languageString contains ISO639 only, e.g., "en"
+                languageString + langStrLen - hyphenPos == 5) { // ISO639-ScriptCode, e.g., "en-Latn"
+                char *underscorePos = strrchr(localeString, '_');
+                char *region = NULL;
+
+                if (underscorePos != NULL) {
+                    region = underscorePos + 1;
+                }
+
+                if (region != NULL) {
+                    strcat(languageString, "-");
+                    strcat(languageString, region);
             }
         }
+
+            retVal = languageString;
+        }
         break;
+
     default:
         {
-            char localeString[LOCALEIDLENGTH];
             if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
                                     localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) {
                 return NULL;
             }
+
             retVal = localeString;
         }
         break;
     }
 
     if (retVal != NULL) {
-        // Language IDs use the language designators and (optional) region
-        // and script designators of BCP 47.  So possible formats are:
-        //
-        // "en"         (language designator only)
-        // "haw"        (3-letter lanuage designator)
-        // "en-GB"      (language with alpha-2 region designator)
-        // "es-419"     (language with 3-digit UN M.49 area code)
-        // "zh-Hans"    (language with ISO 15924 script designator)
-        // "zh-Hans-US"  (language with ISO 15924 script designator and region)
-        // "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
-        //
-        // In the case of region designators (alpha-2 and/or UN M.49), we convert
-        // to our locale string format by changing '-' to '_'.  That is, if
-        // the '-' is followed by fewer than 4 chars.
-        char* scriptOrRegion = strchr(retVal, '-');
-        if (scriptOrRegion != NULL) {
-            int length = strlen(scriptOrRegion);
-            if (length > 5) {
-                // Region and script both exist. Honor the script for now
-                scriptOrRegion[5] = '\0';
-            } else if (length < 5) {
-                *scriptOrRegion = '_';
-
-                assert((length == 3 &&
-                    // '-' followed by a 2 character region designator
-                      isalpha(scriptOrRegion[1]) &&
-                      isalpha(scriptOrRegion[2])) ||
-                       (length == 4 &&
-                    // '-' followed by a 3-digit UN M.49 area code
-                      isdigit(scriptOrRegion[1]) &&
-                      isdigit(scriptOrRegion[2]) &&
-                      isdigit(scriptOrRegion[3])));
+        return strdup(convertToPOSIXLocale(retVal));
             }
+
+    return NULL;
+}
+
+/* Language IDs use the language designators and (optional) region
+ * and script designators of BCP 47.  So possible formats are:
+ *
+ * "en"         (language designator only)
+ * "haw"        (3-letter lanuage designator)
+ * "en-GB"      (language with alpha-2 region designator)
+ * "es-419"     (language with 3-digit UN M.49 area code)
+ * "zh-Hans"    (language with ISO 15924 script designator)
+ * "zh-Hans-US"  (language with ISO 15924 script designator and region)
+ * "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
+ *
+ * convert these tags into POSIX conforming locale string, i.e.,
+ * lang{_region}{@script}. e.g.,
+ * for "zh-Hans-US" into "zh_US@Hans"
+ */
+const char * convertToPOSIXLocale(const char* src) {
+    char* scriptRegion = strchr(src, '-');
+    if (scriptRegion != NULL) {
+        int length = strlen(scriptRegion);
+        char* region = strchr(scriptRegion + 1, '-');
+        char* atMark = NULL;
+
+        if (region == NULL) {
+            // CFLocaleGetIdentifier() returns '_' before region
+            region = strchr(scriptRegion + 1, '_');
         }
 
-        return strdup(retVal);
+        *scriptRegion = '_';
+        if (length > 5) {
+            // Region and script both exist.
+            char tmp[4];
+            int regionLength = length - 6;
+            atMark = scriptRegion + 1 + regionLength;
+            memcpy(tmp, scriptRegion + 1, 4);
+            memmove(scriptRegion + 1, region + 1, regionLength);
+            memcpy(atMark + 1, tmp, 4);
+        } else if (length == 5) {
+            // script only
+            atMark = scriptRegion;
+        }
+
+        if (atMark != NULL) {
+            *atMark = '@';
+
+            // assert script code
+            assert(isalpha(atMark[1]) &&
+                   isalpha(atMark[2]) &&
+                   isalpha(atMark[3]) &&
+                   isalpha(atMark[4]));
+        }
+
+        assert((length == 3 || length == 8) &&
+            // '_' followed by a 2 character region designator
+                isalpha(scriptRegion[1]) &&
+                isalpha(scriptRegion[2]) ||
+                (length == 4 || length == 9) &&
+            // '_' followed by a 3-digit UN M.49 area code
+                isdigit(scriptRegion[1]) &&
+                isdigit(scriptRegion[2]) &&
+                isdigit(scriptRegion[3]) ||
+            // '@' followed by a 4 character script code (already validated above)
+                length == 5);
     }
-    return NULL;
+
+    return src;
 }
 
 char *setupMacOSXLocale(int cat) {
     char * ret = getMacOSXLocale(cat);
 
< prev index next >