312
313 int UNICODE::utf8_size(jchar c) {
314 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
315 if (c <= 0x07FF) return 2;
316 return 3;
317 }
318
319 int UNICODE::utf8_length(jchar* base, int length) {
320 int result = 0;
321 for (int index = 0; index < length; index++) {
322 jchar c = base[index];
323 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
324 else if (c <= 0x07FF) result += 2;
325 else result += 3;
326 }
327 return result;
328 }
329
330 char* UNICODE::as_utf8(jchar* base, int length) {
331 int utf8_len = utf8_length(base, length);
332 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
333 u_char* p = result;
334 for (int index = 0; index < length; index++) {
335 p = utf8_write(p, base[index]);
336 }
337 *p = '\0';
338 assert(p == &result[utf8_len], "length prediction must be correct");
339 return (char*) result;
340 }
341
342 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
343 u_char* p = (u_char*)buf;
344 u_char* end = (u_char*)buf + buflen;
345 for (int index = 0; index < length; index++) {
346 jchar c = base[index];
347 if (p + utf8_size(c) >= end) break; // string is truncated
348 p = utf8_write(p, base[index]);
349 }
350 *p = '\0';
351 return buf;
352 }
353
354 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
355 for(int index = 0; index < length; index++) {
356 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
357 }
358 *utf8_buffer = '\0';
359 }
360
361 // returns the quoted ascii length of a unicode string
362 int UNICODE::quoted_ascii_length(jchar* base, int length) {
363 int result = 0;
364 for (int i = 0; i < length; i++) {
365 jchar c = base[i];
366 if (c >= 32 && c < 127) {
367 result++;
368 } else {
372 return result;
373 }
374
375 // converts a utf8 string to quoted ascii
376 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
377 char* p = buf;
378 char* end = buf + buflen;
379 for (int index = 0; index < length; index++) {
380 jchar c = base[index];
381 if (c >= 32 && c < 127) {
382 if (p + 1 >= end) break; // string is truncated
383 *p++ = (char)c;
384 } else {
385 if (p + 6 >= end) break; // string is truncated
386 sprintf(p, "\\u%04x", c);
387 p += 6;
388 }
389 }
390 *p = '\0';
391 }
|
312
313 int UNICODE::utf8_size(jchar c) {
314 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
315 if (c <= 0x07FF) return 2;
316 return 3;
317 }
318
319 int UNICODE::utf8_length(jchar* base, int length) {
320 int result = 0;
321 for (int index = 0; index < length; index++) {
322 jchar c = base[index];
323 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
324 else if (c <= 0x07FF) result += 2;
325 else result += 3;
326 }
327 return result;
328 }
329
330 char* UNICODE::as_utf8(jchar* base, int length) {
331 int utf8_len = utf8_length(base, length);
332 u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
333 char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
334 assert((int) strlen(result) == utf8_len, "length prediction must be correct");
335 return result;
336 }
337
338 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
339 u_char* p = (u_char*)buf;
340 for (int index = 0; index < length; index++) {
341 jchar c = base[index];
342 buflen -= utf8_size(c);
343 if (buflen <= 0) break; // string is truncated
344 p = utf8_write(p, c);
345 }
346 *p = '\0';
347 return buf;
348 }
349
350 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
351 for(int index = 0; index < length; index++) {
352 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
353 }
354 *utf8_buffer = '\0';
355 }
356
357 // returns the quoted ascii length of a unicode string
358 int UNICODE::quoted_ascii_length(jchar* base, int length) {
359 int result = 0;
360 for (int i = 0; i < length; i++) {
361 jchar c = base[i];
362 if (c >= 32 && c < 127) {
363 result++;
364 } else {
368 return result;
369 }
370
371 // converts a utf8 string to quoted ascii
372 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
373 char* p = buf;
374 char* end = buf + buflen;
375 for (int index = 0; index < length; index++) {
376 jchar c = base[index];
377 if (c >= 32 && c < 127) {
378 if (p + 1 >= end) break; // string is truncated
379 *p++ = (char)c;
380 } else {
381 if (p + 6 >= end) break; // string is truncated
382 sprintf(p, "\\u%04x", c);
383 p += 6;
384 }
385 }
386 *p = '\0';
387 }
388
389 #ifndef PRODUCT
390 void TestAsUtf8() {
391 char res[60];
392 jchar str[20];
393
394 for (int i = 0; i < 20; i++) {
395 str[i] = 0x0800; // char that is 2B in UTF-16 but 3B in UTF-8
396 }
397 str[19] = (jchar)'\0';
398
399 // The resulting string in UTF-8 is 3*19 bytes long, but should be truncated
400 UNICODE::as_utf8(str, 19, res, 10);
401 assert(strlen(res) == 9, "string should be truncated here");
402
403 UNICODE::as_utf8(str, 19, res, 18);
404 assert(strlen(res) == 15, "string should be truncated here");
405
406 UNICODE::as_utf8(str, 19, res, 20);
407 assert(strlen(res) == 18, "string should be truncated here");
408
409 // Test with an "unbounded" buffer
410 UNICODE::as_utf8(str, 19, res, INT_MAX);
411 assert(strlen(res) == 3*19, "string should end here");
412 }
413 #endif
|