src/share/vm/utilities/utf8.cpp

Print this page
rev 6283 : redundant memcpy


 312 
 313 int UNICODE::utf8_size(jchar c) {
 314   if ((0x0001 <= c) && (c <= 0x007F)) return 1;
 315   if (c <= 0x07FF) return 2;
 316   return 3;
 317 }
 318 
 319 int UNICODE::utf8_length(jchar* base, int length) {
 320   int result = 0;
 321   for (int index = 0; index < length; index++) {
 322     jchar c = base[index];
 323     if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
 324     else if (c <= 0x07FF) result += 2;
 325     else result += 3;
 326   }
 327   return result;
 328 }
 329 
 330 char* UNICODE::as_utf8(jchar* base, int length) {
 331   int utf8_len = utf8_length(base, length);
 332   u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
 333   u_char* p = result;
 334   for (int index = 0; index < length; index++) {
 335     p = utf8_write(p, base[index]);
 336   }
 337   *p = '\0';
 338   assert(p == &result[utf8_len], "length prediction must be correct");
 339   return (char*) result;
 340 }
 341 
 342 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
 343   u_char* p = (u_char*)buf;
 344   u_char* end = (u_char*)buf + buflen;
 345   for (int index = 0; index < length; index++) {
 346     jchar c = base[index];
 347     if (p + utf8_size(c) >= end) break;      // string is truncated
 348     p = utf8_write(p, base[index]);

 349   }
 350   *p = '\0';
 351   return buf;
 352 }
 353 
 354 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
 355   for(int index = 0; index < length; index++) {
 356     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
 357   }
 358   *utf8_buffer = '\0';
 359 }
 360 
 361 // returns the quoted ascii length of a unicode string
 362 int UNICODE::quoted_ascii_length(jchar* base, int length) {
 363   int result = 0;
 364   for (int i = 0; i < length; i++) {
 365     jchar c = base[i];
 366     if (c >= 32 && c < 127) {
 367       result++;
 368     } else {


 372   return result;
 373 }
 374 
 375 // converts a utf8 string to quoted ascii
 376 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
 377   char* p = buf;
 378   char* end = buf + buflen;
 379   for (int index = 0; index < length; index++) {
 380     jchar c = base[index];
 381     if (c >= 32 && c < 127) {
 382       if (p + 1 >= end) break;      // string is truncated
 383       *p++ = (char)c;
 384     } else {
 385       if (p + 6 >= end) break;      // string is truncated
 386       sprintf(p, "\\u%04x", c);
 387       p += 6;
 388     }
 389   }
 390   *p = '\0';
 391 }




























 312 
 313 int UNICODE::utf8_size(jchar c) {
 314   if ((0x0001 <= c) && (c <= 0x007F)) return 1;
 315   if (c <= 0x07FF) return 2;
 316   return 3;
 317 }
 318 
 319 int UNICODE::utf8_length(jchar* base, int length) {
 320   int result = 0;
 321   for (int index = 0; index < length; index++) {
 322     jchar c = base[index];
 323     if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
 324     else if (c <= 0x07FF) result += 2;
 325     else result += 3;
 326   }
 327   return result;
 328 }
 329 
 330 char* UNICODE::as_utf8(jchar* base, int length) {
 331   int utf8_len = utf8_length(base, length);
 332   u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
 333   char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
 334   assert((int) strlen(result) == utf8_len, "length prediction must be correct");
 335   return result;




 336 }
 337 
 338 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
 339   u_char* p = (u_char*)buf;

 340   for (int index = 0; index < length; index++) {
 341     jchar c = base[index];
 342     buflen -= utf8_size(c);
 343     if (buflen <= 0) break; // string is truncated
 344     p = utf8_write(p, c);
 345   }
 346   *p = '\0';
 347   return buf;
 348 }
 349 
 350 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
 351   for(int index = 0; index < length; index++) {
 352     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
 353   }
 354   *utf8_buffer = '\0';
 355 }
 356 
 357 // returns the quoted ascii length of a unicode string
 358 int UNICODE::quoted_ascii_length(jchar* base, int length) {
 359   int result = 0;
 360   for (int i = 0; i < length; i++) {
 361     jchar c = base[i];
 362     if (c >= 32 && c < 127) {
 363       result++;
 364     } else {


 368   return result;
 369 }
 370 
 371 // converts a utf8 string to quoted ascii
 372 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
 373   char* p = buf;
 374   char* end = buf + buflen;
 375   for (int index = 0; index < length; index++) {
 376     jchar c = base[index];
 377     if (c >= 32 && c < 127) {
 378       if (p + 1 >= end) break;      // string is truncated
 379       *p++ = (char)c;
 380     } else {
 381       if (p + 6 >= end) break;      // string is truncated
 382       sprintf(p, "\\u%04x", c);
 383       p += 6;
 384     }
 385   }
 386   *p = '\0';
 387 }
 388 
 389 #ifndef PRODUCT
 390 void TestAsUtf8() {
 391   char res[60];
 392   jchar str[20];
 393 
 394   for (int i = 0; i < 20; i++) {
 395     str[i] = 0x0800; // char that is 2B in UTF-16 but 3B in UTF-8
 396   }
 397   str[19] = (jchar)'\0';
 398 
 399   // The resulting string in UTF-8 is 3*19 bytes long, but should be truncated
 400   UNICODE::as_utf8(str, 19, res, 10);
 401   assert(strlen(res) == 9, "string should be truncated here");
 402 
 403   UNICODE::as_utf8(str, 19, res, 18);
 404   assert(strlen(res) == 15, "string should be truncated here");
 405 
 406   UNICODE::as_utf8(str, 19, res, 20);
 407   assert(strlen(res) == 18, "string should be truncated here");
 408 
 409   // Test with an "unbounded" buffer
 410   UNICODE::as_utf8(str, 19, res, INT_MAX);
 411   assert(strlen(res) == 3*19, "string should end here");
 412 }
 413 #endif