88 * Convert UTF-16 to UTF-8 Modified
89 * Returns length or -1 if output overflows.
90 */
91 int JNICALL
92 utf16ToUtf8m(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
93 {
94 int i;
95 int outputLen;
96
97 UTF_ASSERT(utf16);
98 UTF_ASSERT(len>=0);
99 UTF_ASSERT(output);
100 UTF_ASSERT(outputMaxLen>0);
101
102 outputLen = 0;
103 for (i = 0; i < len; i++) {
104 unsigned code;
105
106 code = utf16[i];
107 if ( code >= 0x0001 && code <= 0x007F ) {
108 output[outputLen++] = code;
109 } else if ( code == 0 || ( code >= 0x0080 && code <= 0x07FF ) ) {
110 output[outputLen++] = ((code>>6) & 0x1F) | 0xC0;
111 output[outputLen++] = (code & 0x3F) | 0x80;
112 } else if ( code >= 0x0800 && code <= 0xFFFF ) {
113 output[outputLen++] = ((code>>12) & 0x0F) | 0xE0;
114 output[outputLen++] = ((code>>6) & 0x3F) | 0x80;
115 output[outputLen++] = (code & 0x3F) | 0x80;
116 }
117 if ( outputLen > outputMaxLen ) {
118 return -1;
119 }
120 }
121 output[outputLen] = 0;
122 return outputLen;
123 }
124
125 int JNICALL
126 utf16ToUtf8s(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
127 {
128 return -1; /* FIXUP */
129 }
130
131 /* Determine length of this Standard UTF-8 in Modified UTF-8.
132 * Validation is done of the basic UTF encoding rules, returns
133 * length (no change) when errors are detected in the UTF encoding.
134 *
135 * Note: Accepts Modified UTF-8 also, no verification on the
136 * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
137 */
138 int JNICALL
139 utf8sToUtf8mLength(struct UtfInst *ui, jbyte *string, int length)
395 /*
396 * Convert any byte array into a printable string.
397 * Returns length or -1 if output overflows.
398 */
399 static int
400 bytesToPrintable(struct UtfInst *ui, char *bytes, int len, char *output, int outputMaxLen)
401 {
402 int outputLen;
403 int i;
404
405 UTF_ASSERT(bytes);
406 UTF_ASSERT(len>=0);
407 UTF_ASSERT(output);
408 UTF_ASSERT(outputMaxLen>=0);
409
410 outputLen = 0;
411 for ( i=0; i<len ; i++ ) {
412 unsigned byte;
413
414 byte = bytes[i];
415 if ( outputLen >= outputMaxLen ) {
416 return -1;
417 }
418 if ( byte <= 0x7f && isprint(byte) && !iscntrl(byte) ) {
419 output[outputLen++] = (char)byte;
420 } else {
421 (void)sprintf(output+outputLen,"\\x%02x",byte);
422 outputLen += 4;
423 }
424 }
425 output[outputLen] = 0;
426 return outputLen;
427 }
428
429 static void
430 test(void)
431 {
432 static char *strings[] = {
433 "characters",
434 "abcdefghijklmnopqrstuvwxyz",
435 "0123456789",
436 "!@#$%^&*()_+=-{}[]:;",
437 NULL };
438 int i;
439 struct UtfInst *ui;
440
|
88 * Convert UTF-16 to UTF-8 Modified
89 * Returns length or -1 if output overflows.
90 */
91 int JNICALL
92 utf16ToUtf8m(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
93 {
94 int i;
95 int outputLen;
96
97 UTF_ASSERT(utf16);
98 UTF_ASSERT(len>=0);
99 UTF_ASSERT(output);
100 UTF_ASSERT(outputMaxLen>0);
101
102 outputLen = 0;
103 for (i = 0; i < len; i++) {
104 unsigned code;
105
106 code = utf16[i];
107 if ( code >= 0x0001 && code <= 0x007F ) {
108 if ( outputLen + 1 >= outputMaxLen ) {
109 return -1;
110 }
111 output[outputLen++] = code;
112 } else if ( code == 0 || ( code >= 0x0080 && code <= 0x07FF ) ) {
113 if ( outputLen + 2 >= outputMaxLen ) {
114 return -1;
115 }
116 output[outputLen++] = ((code>>6) & 0x1F) | 0xC0;
117 output[outputLen++] = (code & 0x3F) | 0x80;
118 } else if ( code >= 0x0800 && code <= 0xFFFF ) {
119 if ( outputLen + 3 >= outputMaxLen ) {
120 return -1;
121 }
122 output[outputLen++] = ((code>>12) & 0x0F) | 0xE0;
123 output[outputLen++] = ((code>>6) & 0x3F) | 0x80;
124 output[outputLen++] = (code & 0x3F) | 0x80;
125 }
126 }
127 output[outputLen] = 0;
128 return outputLen;
129 }
130
131 int JNICALL
132 utf16ToUtf8s(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
133 {
134 return -1; /* FIXUP */
135 }
136
137 /* Determine length of this Standard UTF-8 in Modified UTF-8.
138 * Validation is done of the basic UTF encoding rules, returns
139 * length (no change) when errors are detected in the UTF encoding.
140 *
141 * Note: Accepts Modified UTF-8 also, no verification on the
142 * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
143 */
144 int JNICALL
145 utf8sToUtf8mLength(struct UtfInst *ui, jbyte *string, int length)
401 /*
402 * Convert any byte array into a printable string.
403 * Returns length or -1 if output overflows.
404 */
405 static int
406 bytesToPrintable(struct UtfInst *ui, char *bytes, int len, char *output, int outputMaxLen)
407 {
408 int outputLen;
409 int i;
410
411 UTF_ASSERT(bytes);
412 UTF_ASSERT(len>=0);
413 UTF_ASSERT(output);
414 UTF_ASSERT(outputMaxLen>=0);
415
416 outputLen = 0;
417 for ( i=0; i<len ; i++ ) {
418 unsigned byte;
419
420 byte = bytes[i];
421 if ( byte <= 0x7f && isprint(byte) && !iscntrl(byte) ) {
422 if ( outputLen + 1 >= outputMaxLen ) {
423 return -1;
424 }
425 output[outputLen++] = (char)byte;
426 } else {
427 if ( outputLen + 4 >= outputMaxLen ) {
428 return -1;
429 }
430 (void)sprintf(output+outputLen,"\\x%02x",byte);
431 outputLen += 4;
432 }
433 }
434 output[outputLen] = 0;
435 return outputLen;
436 }
437
438 static void
439 test(void)
440 {
441 static char *strings[] = {
442 "characters",
443 "abcdefghijklmnopqrstuvwxyz",
444 "0123456789",
445 "!@#$%^&*()_+=-{}[]:;",
446 NULL };
447 int i;
448 struct UtfInst *ui;
449
|