1 /*
   2  * Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 #include <stdio.h>
  26 #include <stddef.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <locale.h>
  31 #include <langinfo.h>
  32 #include <iconv.h>
  33 
  34 /* Routines to convert back and forth between Platform Encoding and UTF-8 */
  35 
  36 /* Use THIS_FILE when it is available. */
  37 #ifndef THIS_FILE
  38     #define THIS_FILE __FILE__
  39 #endif
  40 
  41 /* Error and assert macros */
  42 #define UTF_ERROR(m) utfError(THIS_FILE, __LINE__,  m)
  43 #define UTF_ASSERT(x) ( (x)==0 ? UTF_ERROR("ASSERT ERROR " #x) : (void)0 )
  44 #define UTF_DEBUG(x)
  45 
  46 /* Global variables */
  47 static iconv_t iconvToPlatform          = (iconv_t)-1;
  48 static iconv_t iconvFromPlatform        = (iconv_t)-1;
  49 
  50 /*
  51  * Error handler
  52  */
  53 static void
  54 utfError(char *file, int line, char *message)
  55 {
  56     (void)fprintf(stderr, "UTF ERROR [\"%s\":%d]: %s\n", file, line, message);
  57     abort();
  58 }
  59 
  60 /*
  61  * Initialize all utf processing.
  62  */
  63 static void
  64 utfInitialize(void)
  65 {
  66     const char* codeset;
  67 
  68     /* Set the locale from the environment */
  69     (void)setlocale(LC_ALL, "");
  70 
  71     /* Get the codeset name */
  72     codeset = (char*)nl_langinfo(CODESET);
  73     if ( codeset == NULL || codeset[0] == 0 ) {
  74         UTF_DEBUG(("NO codeset returned by nl_langinfo(CODESET)\n"));
  75         return;
  76     }
  77 
  78     UTF_DEBUG(("Codeset = %s\n", codeset));
  79 
  80 #ifdef MACOSX
  81     /* On Mac, if US-ASCII, but with no env hints, use UTF-8 */
  82     const char* env_lang = getenv("LANG");
  83     const char* env_lc_all = getenv("LC_ALL");
  84     const char* env_lc_ctype = getenv("LC_CTYPE");
  85 
  86     if (strcmp(codeset,"US-ASCII") == 0 &&
  87         (env_lang == NULL || strlen(env_lang) == 0) &&
  88         (env_lc_all == NULL || strlen(env_lc_all) == 0) &&
  89         (env_lc_ctype == NULL || strlen(env_lc_ctype) == 0)) {    
  90         codeset = "UTF-8";
  91     }
  92 #endif
  93     
  94     /* If we don't need this, skip it */
  95     if (strcmp(codeset, "UTF-8") == 0 || strcmp(codeset, "utf8") == 0 ) {
  96         UTF_DEBUG(("NO iconv() being used because it is not needed\n"));
  97         return;
  98     }
  99 
 100     /* Open conversion descriptors */
 101     iconvToPlatform   = iconv_open(codeset, "UTF-8");
 102     if ( iconvToPlatform == (iconv_t)-1 ) {
 103         UTF_ERROR("Failed to complete iconv_open() setup");
 104     }
 105     iconvFromPlatform = iconv_open("UTF-8", codeset);
 106     if ( iconvFromPlatform == (iconv_t)-1 ) {
 107         UTF_ERROR("Failed to complete iconv_open() setup");
 108     }
 109 }
 110 
 111 /*
 112  * Terminate all utf processing
 113  */
 114 static void
 115 utfTerminate(void)
 116 {
 117     if ( iconvFromPlatform!=(iconv_t)-1 ) {
 118         (void)iconv_close(iconvFromPlatform);
 119     }
 120     if ( iconvToPlatform!=(iconv_t)-1 ) {
 121         (void)iconv_close(iconvToPlatform);
 122     }
 123     iconvToPlatform   = (iconv_t)-1;
 124     iconvFromPlatform = (iconv_t)-1;
 125 }
 126 
 127 /*
 128  * Do iconv() conversion.
 129  *    Returns length or -1 if output overflows.
 130  */
 131 static int
 132 iconvConvert(iconv_t ic, char *bytes, int len, char *output, int outputMaxLen)
 133 {
 134     int outputLen = 0;
 135 
 136     UTF_ASSERT(bytes);
 137     UTF_ASSERT(len>=0);
 138     UTF_ASSERT(output);
 139     UTF_ASSERT(outputMaxLen>len);
 140 
 141     output[0] = 0;
 142     outputLen = 0;
 143 
 144     if ( ic != (iconv_t)-1 ) {
 145         int          returnValue;
 146         size_t       inLeft;
 147         size_t       outLeft;
 148         char        *inbuf;
 149         char        *outbuf;
 150 
 151         inbuf        = bytes;
 152         outbuf       = output;
 153         inLeft       = len;
 154         outLeft      = outputMaxLen;
 155         returnValue  = iconv(ic, (void*)&inbuf, &inLeft, &outbuf, &outLeft);
 156         if ( returnValue >= 0 && inLeft==0 ) {
 157             outputLen = outputMaxLen-outLeft;
 158             output[outputLen] = 0;
 159             return outputLen;
 160         }
 161 
 162         /* Failed to do the conversion */
 163         UTF_DEBUG(("iconv() failed to do the conversion\n"));
 164         return -1;
 165     }
 166 
 167     /* Just copy bytes */
 168     outputLen = len;
 169     (void)memcpy(output, bytes, len);
 170     output[len] = 0;
 171     return outputLen;
 172 }
 173 
 174 /*
 175  * Convert UTF-8 to Platform Encoding.
 176  *    Returns length or -1 if output overflows.
 177  */
 178 static int
 179 utf8ToPlatform(char *utf8, int len, char *output, int outputMaxLen)
 180 {
 181     return iconvConvert(iconvToPlatform, utf8, len, output, outputMaxLen);
 182 }
 183 
 184 /*
 185  * Convert Platform Encoding to UTF-8.
 186  *    Returns length or -1 if output overflows.
 187  */
 188 static int
 189 platformToUtf8(char *str, int len, char *output, int outputMaxLen)
 190 {
 191     return iconvConvert(iconvFromPlatform, str, len, output, outputMaxLen);
 192 }
 193 
 194 int
 195 convertUft8ToPlatformString(char* utf8_str, int utf8_len, char* platform_str, int platform_len) {
 196     if (iconvToPlatform ==  (iconv_t)-1) {
 197         utfInitialize();
 198     }
 199     return utf8ToPlatform(utf8_str, utf8_len, platform_str, platform_len);
 200 }