< prev index next >

src/hotspot/share/utilities/utf8.cpp

Print this page


 384         c = (buffer[i] & 0xF) << 12;
 385         i += 2;
 386         if ((i < length) && ((buffer[i-1] & 0xC0) == 0x80) && ((buffer[i] & 0xC0) == 0x80)) {
 387           c += ((buffer[i-1] & 0x3F) << 6) + (buffer[i] & 0x3F);
 388           if (version_leq_47 || c >= 0x800) {
 389             break;
 390           }
 391         }
 392         return false;
 393     }  // end of switch
 394   } // end of for
 395   return true;
 396 }
 397 
 398 //-------------------------------------------------------------------------------------
 399 
 400 bool UNICODE::is_latin1(jchar c) {
 401   return (c <= 0x00FF);
 402 }
 403 
 404 bool UNICODE::is_latin1(jchar* base, int length) {
 405   for (int index = 0; index < length; index++) {
 406     if (base[index] > 0x00FF) {
 407       return false;
 408     }
 409   }
 410   return true;
 411 }
 412 
 413 int UNICODE::utf8_size(jchar c) {
 414   if ((0x0001 <= c) && (c <= 0x007F)) {
 415     // ASCII character
 416     return 1;
 417   } else  if (c <= 0x07FF) {
 418     return 2;
 419   } else {
 420     return 3;
 421   }
 422 }
 423 
 424 int UNICODE::utf8_size(jbyte c) {
 425   if (c >= 0x01) {
 426     // ASCII character. Check is equivalent to
 427     // (0x01 <= c) && (c <= 0x7F) because c is signed.
 428     return 1;
 429   } else {
 430     // Non-ASCII character or 0x00 which needs to be
 431     // two-byte encoded as 0xC080 in modified UTF-8.
 432     return 2;
 433   }
 434 }
 435 
 436 template<typename T>
 437 int UNICODE::utf8_length(T* base, int length) {
 438   int result = 0;
 439   for (int index = 0; index < length; index++) {
 440     T c = base[index];
 441     result += utf8_size(c);
 442   }
 443   return result;
 444 }
 445 
 446 template<typename T>
 447 char* UNICODE::as_utf8(T* base, int& length) {
 448   int utf8_len = utf8_length(base, length);
 449   u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
 450   char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
 451   assert((int) strlen(result) == utf8_len, "length prediction must be correct");
 452   // Set string length to uft8 length
 453   length = utf8_len;
 454   return (char*) result;
 455 }
 456 
 457 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
 458   u_char* p = (u_char*)buf;
 459   for (int index = 0; index < length; index++) {
 460     jchar c = base[index];
 461     buflen -= utf8_size(c);
 462     if (buflen <= 0) break; // string is truncated
 463     p = utf8_write(p, c);
 464   }
 465   *p = '\0';
 466   return buf;
 467 }
 468 
 469 char* UNICODE::as_utf8(jbyte* base, int length, char* buf, int buflen) {
 470   u_char* p = (u_char*)buf;
 471   u_char* end = (u_char*)buf + buflen;
 472   for (int index = 0; index < length; index++) {
 473     jbyte c = base[index];
 474     int sz = utf8_size(c);
 475     buflen -= sz;
 476     if (buflen <= 0) break; // string is truncated
 477     if (sz == 1) {
 478       // Copy ASCII characters (UTF-8 is ASCII compatible)
 479       *p++ = c;
 480     } else {
 481       // Non-ASCII character or 0x00 which should
 482       // be encoded as 0xC080 in "modified" UTF8.
 483       p = utf8_write(p, ((jchar) c) & 0xff);
 484     }
 485   }
 486   *p = '\0';
 487   return buf;
 488 }
 489 
 490 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
 491   for(int index = 0; index < length; index++) {
 492     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
 493   }
 494   *utf8_buffer = '\0';
 495 }
 496 
 497 // returns the quoted ascii length of a unicode string
 498 template<typename T>
 499 int UNICODE::quoted_ascii_length(T* base, int length) {
 500   int result = 0;
 501   for (int i = 0; i < length; i++) {
 502     T c = base[i];
 503     if (c >= 32 && c < 127) {
 504       result++;
 505     } else {
 506       result += 6;
 507     }
 508   }
 509   return result;
 510 }
 511 
 512 // converts a unicode string to quoted ascii
 513 template<typename T>
 514 void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen) {
 515   char* p = buf;
 516   char* end = buf + buflen;
 517   for (int index = 0; index < length; index++) {
 518     T c = base[index];
 519     if (c >= 32 && c < 127) {
 520       if (p + 1 >= end) break;      // string is truncated
 521       *p++ = (char)c;
 522     } else {
 523       if (p + 6 >= end) break;      // string is truncated
 524       sprintf(p, "\\u%04x", c);
 525       p += 6;
 526     }
 527   }
 528   *p = '\0';
 529 }
 530 
 531 // Explicit instantiation for all supported types.
 532 template int UNICODE::utf8_length(jbyte* base, int length);
 533 template int UNICODE::utf8_length(jchar* base, int length);
 534 template char* UNICODE::as_utf8(jbyte* base, int& length);
 535 template char* UNICODE::as_utf8(jchar* base, int& length);
 536 template int UNICODE::quoted_ascii_length<jbyte>(jbyte* base, int length);
 537 template int UNICODE::quoted_ascii_length<jchar>(jchar* base, int length);
 538 template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
 539 template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, int buflen);


 384         c = (buffer[i] & 0xF) << 12;
 385         i += 2;
 386         if ((i < length) && ((buffer[i-1] & 0xC0) == 0x80) && ((buffer[i] & 0xC0) == 0x80)) {
 387           c += ((buffer[i-1] & 0x3F) << 6) + (buffer[i] & 0x3F);
 388           if (version_leq_47 || c >= 0x800) {
 389             break;
 390           }
 391         }
 392         return false;
 393     }  // end of switch
 394   } // end of for
 395   return true;
 396 }
 397 
 398 //-------------------------------------------------------------------------------------
 399 
 400 bool UNICODE::is_latin1(jchar c) {
 401   return (c <= 0x00FF);
 402 }
 403 
 404 bool UNICODE::is_latin1(const jchar* base, int length) {
 405   for (int index = 0; index < length; index++) {
 406     if (base[index] > 0x00FF) {
 407       return false;
 408     }
 409   }
 410   return true;
 411 }
 412 
 413 int UNICODE::utf8_size(jchar c) {
 414   if ((0x0001 <= c) && (c <= 0x007F)) {
 415     // ASCII character
 416     return 1;
 417   } else  if (c <= 0x07FF) {
 418     return 2;
 419   } else {
 420     return 3;
 421   }
 422 }
 423 
 424 int UNICODE::utf8_size(jbyte c) {
 425   if (c >= 0x01) {
 426     // ASCII character. Check is equivalent to
 427     // (0x01 <= c) && (c <= 0x7F) because c is signed.
 428     return 1;
 429   } else {
 430     // Non-ASCII character or 0x00 which needs to be
 431     // two-byte encoded as 0xC080 in modified UTF-8.
 432     return 2;
 433   }
 434 }
 435 
 436 template<typename T>
 437 int UNICODE::utf8_length(const T* base, int length) {
 438   int result = 0;
 439   for (int index = 0; index < length; index++) {
 440     T c = base[index];
 441     result += utf8_size(c);
 442   }
 443   return result;
 444 }
 445 
 446 template<typename T>
 447 char* UNICODE::as_utf8(const T* base, int& length) {
 448   int utf8_len = utf8_length(base, length);
 449   u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
 450   char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
 451   assert((int) strlen(result) == utf8_len, "length prediction must be correct");
 452   // Set string length to uft8 length
 453   length = utf8_len;
 454   return (char*) result;
 455 }
 456 
 457 char* UNICODE::as_utf8(const jchar* base, int length, char* buf, int buflen) {
 458   u_char* p = (u_char*)buf;
 459   for (int index = 0; index < length; index++) {
 460     jchar c = base[index];
 461     buflen -= utf8_size(c);
 462     if (buflen <= 0) break; // string is truncated
 463     p = utf8_write(p, c);
 464   }
 465   *p = '\0';
 466   return buf;
 467 }
 468 
 469 char* UNICODE::as_utf8(const jbyte* base, int length, char* buf, int buflen) {
 470   u_char* p = (u_char*)buf;
 471   u_char* end = (u_char*)buf + buflen;
 472   for (int index = 0; index < length; index++) {
 473     jbyte c = base[index];
 474     int sz = utf8_size(c);
 475     buflen -= sz;
 476     if (buflen <= 0) break; // string is truncated
 477     if (sz == 1) {
 478       // Copy ASCII characters (UTF-8 is ASCII compatible)
 479       *p++ = c;
 480     } else {
 481       // Non-ASCII character or 0x00 which should
 482       // be encoded as 0xC080 in "modified" UTF8.
 483       p = utf8_write(p, ((jchar) c) & 0xff);
 484     }
 485   }
 486   *p = '\0';
 487   return buf;
 488 }
 489 
 490 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
 491   for(int index = 0; index < length; index++) {
 492     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
 493   }
 494   *utf8_buffer = '\0';
 495 }
 496 
 497 // returns the quoted ascii length of a unicode string
 498 template<typename T>
 499 int UNICODE::quoted_ascii_length(const T* base, int length) {
 500   int result = 0;
 501   for (int i = 0; i < length; i++) {
 502     T c = base[i];
 503     if (c >= 32 && c < 127) {
 504       result++;
 505     } else {
 506       result += 6;
 507     }
 508   }
 509   return result;
 510 }
 511 
 512 // converts a unicode string to quoted ascii
 513 template<typename T>
 514 void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen) {
 515   char* p = buf;
 516   char* end = buf + buflen;
 517   for (int index = 0; index < length; index++) {
 518     T c = base[index];
 519     if (c >= 32 && c < 127) {
 520       if (p + 1 >= end) break;      // string is truncated
 521       *p++ = (char)c;
 522     } else {
 523       if (p + 6 >= end) break;      // string is truncated
 524       sprintf(p, "\\u%04x", c);
 525       p += 6;
 526     }
 527   }
 528   *p = '\0';
 529 }
 530 
 531 // Explicit instantiation for all supported types.
 532 template int UNICODE::utf8_length(const jbyte* base, int length);
 533 template int UNICODE::utf8_length(const jchar* base, int length);
 534 template char* UNICODE::as_utf8(const jbyte* base, int& length);
 535 template char* UNICODE::as_utf8(const jchar* base, int& length);
 536 template int UNICODE::quoted_ascii_length<jbyte>(const jbyte* base, int length);
 537 template int UNICODE::quoted_ascii_length<jchar>(const jchar* base, int length);
 538 template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
 539 template void UNICODE::as_quoted_ascii<jchar>(const jchar* base, int length, char* buf, int buflen);
< prev index next >