1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 28 #ifdef HAVE_CTYPE_H 29 #include <ctype.h> 30 #endif 31 #ifdef HAVE_STDLIB_H 32 #include <stdlib.h> 33 #endif 34 #ifdef LIBXML_ICONV_ENABLED 35 #ifdef HAVE_ERRNO_H 36 #include <errno.h> 37 #endif 38 #endif 39 #include <libxml/encoding.h> 40 #include <libxml/xmlmemory.h> 41 #ifdef LIBXML_HTML_ENABLED 42 #include <libxml/HTMLparser.h> 43 #endif 44 #include <libxml/globals.h> 45 #include <libxml/xmlerror.h> 46 47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 49 50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 52 struct _xmlCharEncodingAlias { 53 const char *name; 54 const char *alias; 55 }; 56 57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 58 static int xmlCharEncodingAliasesNb = 0; 59 static int xmlCharEncodingAliasesMax = 0; 60 61 #ifdef LIBXML_ICONV_ENABLED 62 #if 0 63 #define DEBUG_ENCODING /* Define this to get encoding traces */ 64 #endif 65 #else 66 #ifdef LIBXML_ISO8859X_ENABLED 67 static void xmlRegisterCharEncodingHandlersISO8859x (void); 68 #endif 69 #endif 70 71 static int xmlLittleEndian = 1; 72 73 /** 74 * xmlEncodingErrMemory: 75 * @extra: extra informations 76 * 77 * Handle an out of memory condition 78 */ 79 static void 80 xmlEncodingErrMemory(const char *extra) 81 { 82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 83 } 84 85 /** 86 * xmlErrEncoding: 87 * @error: the error number 88 * @msg: the error message 89 * 90 * n encoding error 91 */ 92 static void 93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 94 { 95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 96 XML_FROM_I18N, error, XML_ERR_FATAL, 97 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 98 } 99 100 /************************************************************************ 101 * * 102 * Conversions To/From UTF8 encoding * 103 * * 104 ************************************************************************/ 105 106 /** 107 * asciiToUTF8: 108 * @out: a pointer to an array of bytes to store the result 109 * @outlen: the length of @out 110 * @in: a pointer to an array of ASCII chars 111 * @inlen: the length of @in 112 * 113 * Take a block of ASCII chars in and try to convert it to an UTF-8 114 * block of chars out. 115 * Returns 0 if success, or -1 otherwise 116 * The value of @inlen after return is the number of octets consumed 117 * if the return value is positive, else unpredictable. 118 * The value of @outlen after return is the number of octets consumed. 119 */ 120 static int 121 asciiToUTF8(unsigned char* out, int *outlen, 122 const unsigned char* in, int *inlen) { 123 unsigned char* outstart = out; 124 const unsigned char* base = in; 125 const unsigned char* processed = in; 126 unsigned char* outend = out + *outlen; 127 const unsigned char* inend; 128 unsigned int c; 129 130 inend = in + (*inlen); 131 while ((in < inend) && (out - outstart + 5 < *outlen)) { 132 c= *in++; 133 134 if (out >= outend) 135 break; 136 if (c < 0x80) { 137 *out++ = c; 138 } else { 139 *outlen = out - outstart; 140 *inlen = processed - base; 141 return(-1); 142 } 143 144 processed = (const unsigned char*) in; 145 } 146 *outlen = out - outstart; 147 *inlen = processed - base; 148 return(*outlen); 149 } 150 151 #ifdef LIBXML_OUTPUT_ENABLED 152 /** 153 * UTF8Toascii: 154 * @out: a pointer to an array of bytes to store the result 155 * @outlen: the length of @out 156 * @in: a pointer to an array of UTF-8 chars 157 * @inlen: the length of @in 158 * 159 * Take a block of UTF-8 chars in and try to convert it to an ASCII 160 * block of chars out. 161 * 162 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 163 * The value of @inlen after return is the number of octets consumed 164 * if the return value is positive, else unpredictable. 165 * The value of @outlen after return is the number of octets consumed. 166 */ 167 static int 168 UTF8Toascii(unsigned char* out, int *outlen, 169 const unsigned char* in, int *inlen) { 170 const unsigned char* processed = in; 171 const unsigned char* outend; 172 const unsigned char* outstart = out; 173 const unsigned char* instart = in; 174 const unsigned char* inend; 175 unsigned int c, d; 176 int trailing; 177 178 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 179 if (in == NULL) { 180 /* 181 * initialization nothing to do 182 */ 183 *outlen = 0; 184 *inlen = 0; 185 return(0); 186 } 187 inend = in + (*inlen); 188 outend = out + (*outlen); 189 while (in < inend) { 190 d = *in++; 191 if (d < 0x80) { c= d; trailing= 0; } 192 else if (d < 0xC0) { 193 /* trailing byte in leading position */ 194 *outlen = out - outstart; 195 *inlen = processed - instart; 196 return(-2); 197 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 198 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 199 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 200 else { 201 /* no chance for this in Ascii */ 202 *outlen = out - outstart; 203 *inlen = processed - instart; 204 return(-2); 205 } 206 207 if (inend - in < trailing) { 208 break; 209 } 210 211 for ( ; trailing; trailing--) { 212 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 213 break; 214 c <<= 6; 215 c |= d & 0x3F; 216 } 217 218 /* assertion: c is a single UTF-4 value */ 219 if (c < 0x80) { 220 if (out >= outend) 221 break; 222 *out++ = c; 223 } else { 224 /* no chance for this in Ascii */ 225 *outlen = out - outstart; 226 *inlen = processed - instart; 227 return(-2); 228 } 229 processed = in; 230 } 231 *outlen = out - outstart; 232 *inlen = processed - instart; 233 return(*outlen); 234 } 235 #endif /* LIBXML_OUTPUT_ENABLED */ 236 237 /** 238 * isolat1ToUTF8: 239 * @out: a pointer to an array of bytes to store the result 240 * @outlen: the length of @out 241 * @in: a pointer to an array of ISO Latin 1 chars 242 * @inlen: the length of @in 243 * 244 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 245 * block of chars out. 246 * Returns the number of bytes written if success, or -1 otherwise 247 * The value of @inlen after return is the number of octets consumed 248 * if the return value is positive, else unpredictable. 249 * The value of @outlen after return is the number of octets consumed. 250 */ 251 int 252 isolat1ToUTF8(unsigned char* out, int *outlen, 253 const unsigned char* in, int *inlen) { 254 unsigned char* outstart = out; 255 const unsigned char* base = in; 256 unsigned char* outend; 257 const unsigned char* inend; 258 const unsigned char* instop; 259 260 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 261 return(-1); 262 263 outend = out + *outlen; 264 inend = in + (*inlen); 265 instop = inend; 266 267 while (in < inend && out < outend - 1) { 268 if (*in >= 0x80) { 269 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 270 *out++ = ((*in) & 0x3F) | 0x80; 271 ++in; 272 } 273 if (instop - in > outend - out) instop = in + (outend - out); 274 while (in < instop && *in < 0x80) { 275 *out++ = *in++; 276 } 277 } 278 if (in < inend && out < outend && *in < 0x80) { 279 *out++ = *in++; 280 } 281 *outlen = out - outstart; 282 *inlen = in - base; 283 return(*outlen); 284 } 285 286 /** 287 * UTF8ToUTF8: 288 * @out: a pointer to an array of bytes to store the result 289 * @outlen: the length of @out 290 * @inb: a pointer to an array of UTF-8 chars 291 * @inlenb: the length of @in in UTF-8 chars 292 * 293 * No op copy operation for UTF8 handling. 294 * 295 * Returns the number of bytes written, or -1 if lack of space. 296 * The value of *inlen after return is the number of octets consumed 297 * if the return value is positive, else unpredictable. 298 */ 299 static int 300 UTF8ToUTF8(unsigned char* out, int *outlen, 301 const unsigned char* inb, int *inlenb) 302 { 303 int len; 304 305 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 306 return(-1); 307 if (*outlen > *inlenb) { 308 len = *inlenb; 309 } else { 310 len = *outlen; 311 } 312 if (len < 0) 313 return(-1); 314 315 memcpy(out, inb, len); 316 317 *outlen = len; 318 *inlenb = len; 319 return(*outlen); 320 } 321 322 323 #ifdef LIBXML_OUTPUT_ENABLED 324 /** 325 * UTF8Toisolat1: 326 * @out: a pointer to an array of bytes to store the result 327 * @outlen: the length of @out 328 * @in: a pointer to an array of UTF-8 chars 329 * @inlen: the length of @in 330 * 331 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 332 * block of chars out. 333 * 334 * Returns the number of bytes written if success, -2 if the transcoding fails, 335 or -1 otherwise 336 * The value of @inlen after return is the number of octets consumed 337 * if the return value is positive, else unpredictable. 338 * The value of @outlen after return is the number of octets consumed. 339 */ 340 int 341 UTF8Toisolat1(unsigned char* out, int *outlen, 342 const unsigned char* in, int *inlen) { 343 const unsigned char* processed = in; 344 const unsigned char* outend; 345 const unsigned char* outstart = out; 346 const unsigned char* instart = in; 347 const unsigned char* inend; 348 unsigned int c, d; 349 int trailing; 350 351 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 352 if (in == NULL) { 353 /* 354 * initialization nothing to do 355 */ 356 *outlen = 0; 357 *inlen = 0; 358 return(0); 359 } 360 inend = in + (*inlen); 361 outend = out + (*outlen); 362 while (in < inend) { 363 d = *in++; 364 if (d < 0x80) { c= d; trailing= 0; } 365 else if (d < 0xC0) { 366 /* trailing byte in leading position */ 367 *outlen = out - outstart; 368 *inlen = processed - instart; 369 return(-2); 370 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 371 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 372 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 373 else { 374 /* no chance for this in IsoLat1 */ 375 *outlen = out - outstart; 376 *inlen = processed - instart; 377 return(-2); 378 } 379 380 if (inend - in < trailing) { 381 break; 382 } 383 384 for ( ; trailing; trailing--) { 385 if (in >= inend) 386 break; 387 if (((d= *in++) & 0xC0) != 0x80) { 388 *outlen = out - outstart; 389 *inlen = processed - instart; 390 return(-2); 391 } 392 c <<= 6; 393 c |= d & 0x3F; 394 } 395 396 /* assertion: c is a single UTF-4 value */ 397 if (c <= 0xFF) { 398 if (out >= outend) 399 break; 400 *out++ = c; 401 } else { 402 /* no chance for this in IsoLat1 */ 403 *outlen = out - outstart; 404 *inlen = processed - instart; 405 return(-2); 406 } 407 processed = in; 408 } 409 *outlen = out - outstart; 410 *inlen = processed - instart; 411 return(*outlen); 412 } 413 #endif /* LIBXML_OUTPUT_ENABLED */ 414 415 /** 416 * UTF16LEToUTF8: 417 * @out: a pointer to an array of bytes to store the result 418 * @outlen: the length of @out 419 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 420 * @inlenb: the length of @in in UTF-16LE chars 421 * 422 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 423 * block of chars out. This function assumes the endian property 424 * is the same between the native type of this machine and the 425 * inputed one. 426 * 427 * Returns the number of bytes written, or -1 if lack of space, or -2 428 * if the transcoding fails (if *in is not a valid utf16 string) 429 * The value of *inlen after return is the number of octets consumed 430 * if the return value is positive, else unpredictable. 431 */ 432 static int 433 UTF16LEToUTF8(unsigned char* out, int *outlen, 434 const unsigned char* inb, int *inlenb) 435 { 436 unsigned char* outstart = out; 437 const unsigned char* processed = inb; 438 unsigned char* outend = out + *outlen; 439 unsigned short* in = (unsigned short*) inb; 440 unsigned short* inend; 441 unsigned int c, d, inlen; 442 unsigned char *tmp; 443 int bits; 444 445 if ((*inlenb % 2) == 1) 446 (*inlenb)--; 447 inlen = *inlenb / 2; 448 inend = in + inlen; 449 while ((in < inend) && (out - outstart + 5 < *outlen)) { 450 if (xmlLittleEndian) { 451 c= *in++; 452 } else { 453 tmp = (unsigned char *) in; 454 c = *tmp++; 455 c = c | (((unsigned int)*tmp) << 8); 456 in++; 457 } 458 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 459 if (in >= inend) { /* (in > inend) shouldn't happens */ 460 break; 461 } 462 if (xmlLittleEndian) { 463 d = *in++; 464 } else { 465 tmp = (unsigned char *) in; 466 d = *tmp++; 467 d = d | (((unsigned int)*tmp) << 8); 468 in++; 469 } 470 if ((d & 0xFC00) == 0xDC00) { 471 c &= 0x03FF; 472 c <<= 10; 473 c |= d & 0x03FF; 474 c += 0x10000; 475 } 476 else { 477 *outlen = out - outstart; 478 *inlenb = processed - inb; 479 return(-2); 480 } 481 } 482 483 /* assertion: c is a single UTF-4 value */ 484 if (out >= outend) 485 break; 486 if (c < 0x80) { *out++= c; bits= -6; } 487 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 488 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 489 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 490 491 for ( ; bits >= 0; bits-= 6) { 492 if (out >= outend) 493 break; 494 *out++= ((c >> bits) & 0x3F) | 0x80; 495 } 496 processed = (const unsigned char*) in; 497 } 498 *outlen = out - outstart; 499 *inlenb = processed - inb; 500 return(*outlen); 501 } 502 503 #ifdef LIBXML_OUTPUT_ENABLED 504 /** 505 * UTF8ToUTF16LE: 506 * @outb: a pointer to an array of bytes to store the result 507 * @outlen: the length of @outb 508 * @in: a pointer to an array of UTF-8 chars 509 * @inlen: the length of @in 510 * 511 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 512 * block of chars out. 513 * 514 * Returns the number of bytes written, or -1 if lack of space, or -2 515 * if the transcoding failed. 516 */ 517 static int 518 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 519 const unsigned char* in, int *inlen) 520 { 521 unsigned short* out = (unsigned short*) outb; 522 const unsigned char* processed = in; 523 const unsigned char *const instart = in; 524 unsigned short* outstart= out; 525 unsigned short* outend; 526 const unsigned char* inend; 527 unsigned int c, d; 528 int trailing; 529 unsigned char *tmp; 530 unsigned short tmp1, tmp2; 531 532 /* UTF16LE encoding has no BOM */ 533 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 534 if (in == NULL) { 535 *outlen = 0; 536 *inlen = 0; 537 return(0); 538 } 539 inend= in + *inlen; 540 outend = out + (*outlen / 2); 541 while (in < inend) { 542 d= *in++; 543 if (d < 0x80) { c= d; trailing= 0; } 544 else if (d < 0xC0) { 545 /* trailing byte in leading position */ 546 *outlen = (out - outstart) * 2; 547 *inlen = processed - instart; 548 return(-2); 549 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 550 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 551 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 552 else { 553 /* no chance for this in UTF-16 */ 554 *outlen = (out - outstart) * 2; 555 *inlen = processed - instart; 556 return(-2); 557 } 558 559 if (inend - in < trailing) { 560 break; 561 } 562 563 for ( ; trailing; trailing--) { 564 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 565 break; 566 c <<= 6; 567 c |= d & 0x3F; 568 } 569 570 /* assertion: c is a single UTF-4 value */ 571 if (c < 0x10000) { 572 if (out >= outend) 573 break; 574 if (xmlLittleEndian) { 575 *out++ = c; 576 } else { 577 tmp = (unsigned char *) out; 578 *tmp = c ; 579 *(tmp + 1) = c >> 8 ; 580 out++; 581 } 582 } 583 else if (c < 0x110000) { 584 if (out+1 >= outend) 585 break; 586 c -= 0x10000; 587 if (xmlLittleEndian) { 588 *out++ = 0xD800 | (c >> 10); 589 *out++ = 0xDC00 | (c & 0x03FF); 590 } else { 591 tmp1 = 0xD800 | (c >> 10); 592 tmp = (unsigned char *) out; 593 *tmp = (unsigned char) tmp1; 594 *(tmp + 1) = tmp1 >> 8; 595 out++; 596 597 tmp2 = 0xDC00 | (c & 0x03FF); 598 tmp = (unsigned char *) out; 599 *tmp = (unsigned char) tmp2; 600 *(tmp + 1) = tmp2 >> 8; 601 out++; 602 } 603 } 604 else 605 break; 606 processed = in; 607 } 608 *outlen = (out - outstart) * 2; 609 *inlen = processed - instart; 610 return(*outlen); 611 } 612 613 /** 614 * UTF8ToUTF16: 615 * @outb: a pointer to an array of bytes to store the result 616 * @outlen: the length of @outb 617 * @in: a pointer to an array of UTF-8 chars 618 * @inlen: the length of @in 619 * 620 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 621 * block of chars out. 622 * 623 * Returns the number of bytes written, or -1 if lack of space, or -2 624 * if the transcoding failed. 625 */ 626 static int 627 UTF8ToUTF16(unsigned char* outb, int *outlen, 628 const unsigned char* in, int *inlen) 629 { 630 if (in == NULL) { 631 /* 632 * initialization, add the Byte Order Mark for UTF-16LE 633 */ 634 if (*outlen >= 2) { 635 outb[0] = 0xFF; 636 outb[1] = 0xFE; 637 *outlen = 2; 638 *inlen = 0; 639 #ifdef DEBUG_ENCODING 640 xmlGenericError(xmlGenericErrorContext, 641 "Added FFFE Byte Order Mark\n"); 642 #endif 643 return(2); 644 } 645 *outlen = 0; 646 *inlen = 0; 647 return(0); 648 } 649 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 650 } 651 #endif /* LIBXML_OUTPUT_ENABLED */ 652 653 /** 654 * UTF16BEToUTF8: 655 * @out: a pointer to an array of bytes to store the result 656 * @outlen: the length of @out 657 * @inb: a pointer to an array of UTF-16 passed as a byte array 658 * @inlenb: the length of @in in UTF-16 chars 659 * 660 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 661 * block of chars out. This function assumes the endian property 662 * is the same between the native type of this machine and the 663 * inputed one. 664 * 665 * Returns the number of bytes written, or -1 if lack of space, or -2 666 * if the transcoding fails (if *in is not a valid utf16 string) 667 * The value of *inlen after return is the number of octets consumed 668 * if the return value is positive, else unpredictable. 669 */ 670 static int 671 UTF16BEToUTF8(unsigned char* out, int *outlen, 672 const unsigned char* inb, int *inlenb) 673 { 674 unsigned char* outstart = out; 675 const unsigned char* processed = inb; 676 unsigned char* outend = out + *outlen; 677 unsigned short* in = (unsigned short*) inb; 678 unsigned short* inend; 679 unsigned int c, d, inlen; 680 unsigned char *tmp; 681 int bits; 682 683 if ((*inlenb % 2) == 1) 684 (*inlenb)--; 685 inlen = *inlenb / 2; 686 inend= in + inlen; 687 while (in < inend) { 688 if (xmlLittleEndian) { 689 tmp = (unsigned char *) in; 690 c = *tmp++; 691 c = c << 8; 692 c = c | (unsigned int) *tmp; 693 in++; 694 } else { 695 c= *in++; 696 } 697 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 698 if (in >= inend) { /* (in > inend) shouldn't happens */ 699 *outlen = out - outstart; 700 *inlenb = processed - inb; 701 return(-2); 702 } 703 if (xmlLittleEndian) { 704 tmp = (unsigned char *) in; 705 d = *tmp++; 706 d = d << 8; 707 d = d | (unsigned int) *tmp; 708 in++; 709 } else { 710 d= *in++; 711 } 712 if ((d & 0xFC00) == 0xDC00) { 713 c &= 0x03FF; 714 c <<= 10; 715 c |= d & 0x03FF; 716 c += 0x10000; 717 } 718 else { 719 *outlen = out - outstart; 720 *inlenb = processed - inb; 721 return(-2); 722 } 723 } 724 725 /* assertion: c is a single UTF-4 value */ 726 if (out >= outend) 727 break; 728 if (c < 0x80) { *out++= c; bits= -6; } 729 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 730 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 731 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 732 733 for ( ; bits >= 0; bits-= 6) { 734 if (out >= outend) 735 break; 736 *out++= ((c >> bits) & 0x3F) | 0x80; 737 } 738 processed = (const unsigned char*) in; 739 } 740 *outlen = out - outstart; 741 *inlenb = processed - inb; 742 return(*outlen); 743 } 744 745 #ifdef LIBXML_OUTPUT_ENABLED 746 /** 747 * UTF8ToUTF16BE: 748 * @outb: a pointer to an array of bytes to store the result 749 * @outlen: the length of @outb 750 * @in: a pointer to an array of UTF-8 chars 751 * @inlen: the length of @in 752 * 753 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 754 * block of chars out. 755 * 756 * Returns the number of byte written, or -1 by lack of space, or -2 757 * if the transcoding failed. 758 */ 759 static int 760 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 761 const unsigned char* in, int *inlen) 762 { 763 unsigned short* out = (unsigned short*) outb; 764 const unsigned char* processed = in; 765 const unsigned char *const instart = in; 766 unsigned short* outstart= out; 767 unsigned short* outend; 768 const unsigned char* inend; 769 unsigned int c, d; 770 int trailing; 771 unsigned char *tmp; 772 unsigned short tmp1, tmp2; 773 774 /* UTF-16BE has no BOM */ 775 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 776 if (in == NULL) { 777 *outlen = 0; 778 *inlen = 0; 779 return(0); 780 } 781 inend= in + *inlen; 782 outend = out + (*outlen / 2); 783 while (in < inend) { 784 d= *in++; 785 if (d < 0x80) { c= d; trailing= 0; } 786 else if (d < 0xC0) { 787 /* trailing byte in leading position */ 788 *outlen = out - outstart; 789 *inlen = processed - instart; 790 return(-2); 791 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 792 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 793 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 794 else { 795 /* no chance for this in UTF-16 */ 796 *outlen = out - outstart; 797 *inlen = processed - instart; 798 return(-2); 799 } 800 801 if (inend - in < trailing) { 802 break; 803 } 804 805 for ( ; trailing; trailing--) { 806 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 807 c <<= 6; 808 c |= d & 0x3F; 809 } 810 811 /* assertion: c is a single UTF-4 value */ 812 if (c < 0x10000) { 813 if (out >= outend) break; 814 if (xmlLittleEndian) { 815 tmp = (unsigned char *) out; 816 *tmp = c >> 8; 817 *(tmp + 1) = c; 818 out++; 819 } else { 820 *out++ = c; 821 } 822 } 823 else if (c < 0x110000) { 824 if (out+1 >= outend) break; 825 c -= 0x10000; 826 if (xmlLittleEndian) { 827 tmp1 = 0xD800 | (c >> 10); 828 tmp = (unsigned char *) out; 829 *tmp = tmp1 >> 8; 830 *(tmp + 1) = (unsigned char) tmp1; 831 out++; 832 833 tmp2 = 0xDC00 | (c & 0x03FF); 834 tmp = (unsigned char *) out; 835 *tmp = tmp2 >> 8; 836 *(tmp + 1) = (unsigned char) tmp2; 837 out++; 838 } else { 839 *out++ = 0xD800 | (c >> 10); 840 *out++ = 0xDC00 | (c & 0x03FF); 841 } 842 } 843 else 844 break; 845 processed = in; 846 } 847 *outlen = (out - outstart) * 2; 848 *inlen = processed - instart; 849 return(*outlen); 850 } 851 #endif /* LIBXML_OUTPUT_ENABLED */ 852 853 /************************************************************************ 854 * * 855 * Generic encoding handling routines * 856 * * 857 ************************************************************************/ 858 859 /** 860 * xmlDetectCharEncoding: 861 * @in: a pointer to the first bytes of the XML entity, must be at least 862 * 2 bytes long (at least 4 if encoding is UTF4 variant). 863 * @len: pointer to the length of the buffer 864 * 865 * Guess the encoding of the entity using the first bytes of the entity content 866 * according to the non-normative appendix F of the XML-1.0 recommendation. 867 * 868 * Returns one of the XML_CHAR_ENCODING_... values. 869 */ 870 xmlCharEncoding 871 xmlDetectCharEncoding(const unsigned char* in, int len) 872 { 873 if (in == NULL) 874 return(XML_CHAR_ENCODING_NONE); 875 if (len >= 4) { 876 if ((in[0] == 0x00) && (in[1] == 0x00) && 877 (in[2] == 0x00) && (in[3] == 0x3C)) 878 return(XML_CHAR_ENCODING_UCS4BE); 879 if ((in[0] == 0x3C) && (in[1] == 0x00) && 880 (in[2] == 0x00) && (in[3] == 0x00)) 881 return(XML_CHAR_ENCODING_UCS4LE); 882 if ((in[0] == 0x00) && (in[1] == 0x00) && 883 (in[2] == 0x3C) && (in[3] == 0x00)) 884 return(XML_CHAR_ENCODING_UCS4_2143); 885 if ((in[0] == 0x00) && (in[1] == 0x3C) && 886 (in[2] == 0x00) && (in[3] == 0x00)) 887 return(XML_CHAR_ENCODING_UCS4_3412); 888 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 889 (in[2] == 0xA7) && (in[3] == 0x94)) 890 return(XML_CHAR_ENCODING_EBCDIC); 891 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 892 (in[2] == 0x78) && (in[3] == 0x6D)) 893 return(XML_CHAR_ENCODING_UTF8); 894 /* 895 * Although not part of the recommendation, we also 896 * attempt an "auto-recognition" of UTF-16LE and 897 * UTF-16BE encodings. 898 */ 899 if ((in[0] == 0x3C) && (in[1] == 0x00) && 900 (in[2] == 0x3F) && (in[3] == 0x00)) 901 return(XML_CHAR_ENCODING_UTF16LE); 902 if ((in[0] == 0x00) && (in[1] == 0x3C) && 903 (in[2] == 0x00) && (in[3] == 0x3F)) 904 return(XML_CHAR_ENCODING_UTF16BE); 905 } 906 if (len >= 3) { 907 /* 908 * Errata on XML-1.0 June 20 2001 909 * We now allow an UTF8 encoded BOM 910 */ 911 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 912 (in[2] == 0xBF)) 913 return(XML_CHAR_ENCODING_UTF8); 914 } 915 /* For UTF-16 we can recognize by the BOM */ 916 if (len >= 2) { 917 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 918 return(XML_CHAR_ENCODING_UTF16BE); 919 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 920 return(XML_CHAR_ENCODING_UTF16LE); 921 } 922 return(XML_CHAR_ENCODING_NONE); 923 } 924 925 /** 926 * xmlCleanupEncodingAliases: 927 * 928 * Unregisters all aliases 929 */ 930 void 931 xmlCleanupEncodingAliases(void) { 932 int i; 933 934 if (xmlCharEncodingAliases == NULL) 935 return; 936 937 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 938 if (xmlCharEncodingAliases[i].name != NULL) 939 xmlFree((char *) xmlCharEncodingAliases[i].name); 940 if (xmlCharEncodingAliases[i].alias != NULL) 941 xmlFree((char *) xmlCharEncodingAliases[i].alias); 942 } 943 xmlCharEncodingAliasesNb = 0; 944 xmlCharEncodingAliasesMax = 0; 945 xmlFree(xmlCharEncodingAliases); 946 xmlCharEncodingAliases = NULL; 947 } 948 949 /** 950 * xmlGetEncodingAlias: 951 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 952 * 953 * Lookup an encoding name for the given alias. 954 * 955 * Returns NULL if not found, otherwise the original name 956 */ 957 const char * 958 xmlGetEncodingAlias(const char *alias) { 959 int i; 960 char upper[100]; 961 962 if (alias == NULL) 963 return(NULL); 964 965 if (xmlCharEncodingAliases == NULL) 966 return(NULL); 967 968 for (i = 0;i < 99;i++) { 969 upper[i] = toupper(alias[i]); 970 if (upper[i] == 0) break; 971 } 972 upper[i] = 0; 973 974 /* 975 * Walk down the list looking for a definition of the alias 976 */ 977 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 978 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 979 return(xmlCharEncodingAliases[i].name); 980 } 981 } 982 return(NULL); 983 } 984 985 /** 986 * xmlAddEncodingAlias: 987 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 988 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 989 * 990 * Registers an alias @alias for an encoding named @name. Existing alias 991 * will be overwritten. 992 * 993 * Returns 0 in case of success, -1 in case of error 994 */ 995 int 996 xmlAddEncodingAlias(const char *name, const char *alias) { 997 int i; 998 char upper[100]; 999 1000 if ((name == NULL) || (alias == NULL)) 1001 return(-1); 1002 1003 for (i = 0;i < 99;i++) { 1004 upper[i] = toupper(alias[i]); 1005 if (upper[i] == 0) break; 1006 } 1007 upper[i] = 0; 1008 1009 if (xmlCharEncodingAliases == NULL) { 1010 xmlCharEncodingAliasesNb = 0; 1011 xmlCharEncodingAliasesMax = 20; 1012 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1013 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1014 if (xmlCharEncodingAliases == NULL) 1015 return(-1); 1016 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1017 xmlCharEncodingAliasesMax *= 2; 1018 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1019 xmlRealloc(xmlCharEncodingAliases, 1020 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1021 } 1022 /* 1023 * Walk down the list looking for a definition of the alias 1024 */ 1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1027 /* 1028 * Replace the definition. 1029 */ 1030 xmlFree((char *) xmlCharEncodingAliases[i].name); 1031 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1032 return(0); 1033 } 1034 } 1035 /* 1036 * Add the definition 1037 */ 1038 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1039 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1040 xmlCharEncodingAliasesNb++; 1041 return(0); 1042 } 1043 1044 /** 1045 * xmlDelEncodingAlias: 1046 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1047 * 1048 * Unregisters an encoding alias @alias 1049 * 1050 * Returns 0 in case of success, -1 in case of error 1051 */ 1052 int 1053 xmlDelEncodingAlias(const char *alias) { 1054 int i; 1055 1056 if (alias == NULL) 1057 return(-1); 1058 1059 if (xmlCharEncodingAliases == NULL) 1060 return(-1); 1061 /* 1062 * Walk down the list looking for a definition of the alias 1063 */ 1064 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1065 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1066 xmlFree((char *) xmlCharEncodingAliases[i].name); 1067 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1068 xmlCharEncodingAliasesNb--; 1069 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1070 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1071 return(0); 1072 } 1073 } 1074 return(-1); 1075 } 1076 1077 /** 1078 * xmlParseCharEncoding: 1079 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1080 * 1081 * Compare the string to the encoding schemes already known. Note 1082 * that the comparison is case insensitive accordingly to the section 1083 * [XML] 4.3.3 Character Encoding in Entities. 1084 * 1085 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1086 * if not recognized. 1087 */ 1088 xmlCharEncoding 1089 xmlParseCharEncoding(const char* name) 1090 { 1091 const char *alias; 1092 char upper[500]; 1093 int i; 1094 1095 if (name == NULL) 1096 return(XML_CHAR_ENCODING_NONE); 1097 1098 /* 1099 * Do the alias resolution 1100 */ 1101 alias = xmlGetEncodingAlias(name); 1102 if (alias != NULL) 1103 name = alias; 1104 1105 for (i = 0;i < 499;i++) { 1106 upper[i] = toupper(name[i]); 1107 if (upper[i] == 0) break; 1108 } 1109 upper[i] = 0; 1110 1111 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1112 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1113 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1114 1115 /* 1116 * NOTE: if we were able to parse this, the endianness of UTF16 is 1117 * already found and in use 1118 */ 1119 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1120 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1121 1122 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1123 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1124 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1125 1126 /* 1127 * NOTE: if we were able to parse this, the endianness of UCS4 is 1128 * already found and in use 1129 */ 1130 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1131 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1132 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1133 1134 1135 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1136 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1137 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1138 1139 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1140 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1141 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1142 1143 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1144 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1145 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1146 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1147 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1148 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1149 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1150 1151 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1152 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1153 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1154 1155 #ifdef DEBUG_ENCODING 1156 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1157 #endif 1158 return(XML_CHAR_ENCODING_ERROR); 1159 } 1160 1161 /** 1162 * xmlGetCharEncodingName: 1163 * @enc: the encoding 1164 * 1165 * The "canonical" name for XML encoding. 1166 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1167 * Section 4.3.3 Character Encoding in Entities 1168 * 1169 * Returns the canonical name for the given encoding 1170 */ 1171 1172 const char* 1173 xmlGetCharEncodingName(xmlCharEncoding enc) { 1174 switch (enc) { 1175 case XML_CHAR_ENCODING_ERROR: 1176 return(NULL); 1177 case XML_CHAR_ENCODING_NONE: 1178 return(NULL); 1179 case XML_CHAR_ENCODING_UTF8: 1180 return("UTF-8"); 1181 case XML_CHAR_ENCODING_UTF16LE: 1182 return("UTF-16"); 1183 case XML_CHAR_ENCODING_UTF16BE: 1184 return("UTF-16"); 1185 case XML_CHAR_ENCODING_EBCDIC: 1186 return("EBCDIC"); 1187 case XML_CHAR_ENCODING_UCS4LE: 1188 return("ISO-10646-UCS-4"); 1189 case XML_CHAR_ENCODING_UCS4BE: 1190 return("ISO-10646-UCS-4"); 1191 case XML_CHAR_ENCODING_UCS4_2143: 1192 return("ISO-10646-UCS-4"); 1193 case XML_CHAR_ENCODING_UCS4_3412: 1194 return("ISO-10646-UCS-4"); 1195 case XML_CHAR_ENCODING_UCS2: 1196 return("ISO-10646-UCS-2"); 1197 case XML_CHAR_ENCODING_8859_1: 1198 return("ISO-8859-1"); 1199 case XML_CHAR_ENCODING_8859_2: 1200 return("ISO-8859-2"); 1201 case XML_CHAR_ENCODING_8859_3: 1202 return("ISO-8859-3"); 1203 case XML_CHAR_ENCODING_8859_4: 1204 return("ISO-8859-4"); 1205 case XML_CHAR_ENCODING_8859_5: 1206 return("ISO-8859-5"); 1207 case XML_CHAR_ENCODING_8859_6: 1208 return("ISO-8859-6"); 1209 case XML_CHAR_ENCODING_8859_7: 1210 return("ISO-8859-7"); 1211 case XML_CHAR_ENCODING_8859_8: 1212 return("ISO-8859-8"); 1213 case XML_CHAR_ENCODING_8859_9: 1214 return("ISO-8859-9"); 1215 case XML_CHAR_ENCODING_2022_JP: 1216 return("ISO-2022-JP"); 1217 case XML_CHAR_ENCODING_SHIFT_JIS: 1218 return("Shift-JIS"); 1219 case XML_CHAR_ENCODING_EUC_JP: 1220 return("EUC-JP"); 1221 case XML_CHAR_ENCODING_ASCII: 1222 return(NULL); 1223 } 1224 return(NULL); 1225 } 1226 1227 /************************************************************************ 1228 * * 1229 * Char encoding handlers * 1230 * * 1231 ************************************************************************/ 1232 1233 1234 /* the size should be growable, but it's not a big deal ... */ 1235 #define MAX_ENCODING_HANDLERS 50 1236 static xmlCharEncodingHandlerPtr *handlers = NULL; 1237 static int nbCharEncodingHandler = 0; 1238 1239 /* 1240 * The default is UTF-8 for XML, that's also the default used for the 1241 * parser internals, so the default encoding handler is NULL 1242 */ 1243 1244 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1245 1246 /** 1247 * xmlNewCharEncodingHandler: 1248 * @name: the encoding name, in UTF-8 format (ASCII actually) 1249 * @input: the xmlCharEncodingInputFunc to read that encoding 1250 * @output: the xmlCharEncodingOutputFunc to write that encoding 1251 * 1252 * Create and registers an xmlCharEncodingHandler. 1253 * 1254 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1255 */ 1256 xmlCharEncodingHandlerPtr 1257 xmlNewCharEncodingHandler(const char *name, 1258 xmlCharEncodingInputFunc input, 1259 xmlCharEncodingOutputFunc output) { 1260 xmlCharEncodingHandlerPtr handler; 1261 const char *alias; 1262 char upper[500]; 1263 int i; 1264 char *up = NULL; 1265 1266 /* 1267 * Do the alias resolution 1268 */ 1269 alias = xmlGetEncodingAlias(name); 1270 if (alias != NULL) 1271 name = alias; 1272 1273 /* 1274 * Keep only the uppercase version of the encoding. 1275 */ 1276 if (name == NULL) { 1277 xmlEncodingErr(XML_I18N_NO_NAME, 1278 "xmlNewCharEncodingHandler : no name !\n", NULL); 1279 return(NULL); 1280 } 1281 for (i = 0;i < 499;i++) { 1282 upper[i] = toupper(name[i]); 1283 if (upper[i] == 0) break; 1284 } 1285 upper[i] = 0; 1286 up = xmlMemStrdup(upper); 1287 if (up == NULL) { 1288 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1289 return(NULL); 1290 } 1291 1292 /* 1293 * allocate and fill-up an handler block. 1294 */ 1295 handler = (xmlCharEncodingHandlerPtr) 1296 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1297 if (handler == NULL) { 1298 xmlFree(up); 1299 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1300 return(NULL); 1301 } 1302 handler->input = input; 1303 handler->output = output; 1304 handler->name = up; 1305 1306 #ifdef LIBXML_ICONV_ENABLED 1307 handler->iconv_in = NULL; 1308 handler->iconv_out = NULL; 1309 #endif /* LIBXML_ICONV_ENABLED */ 1310 1311 /* 1312 * registers and returns the handler. 1313 */ 1314 xmlRegisterCharEncodingHandler(handler); 1315 #ifdef DEBUG_ENCODING 1316 xmlGenericError(xmlGenericErrorContext, 1317 "Registered encoding handler for %s\n", name); 1318 #endif 1319 return(handler); 1320 } 1321 1322 /** 1323 * xmlInitCharEncodingHandlers: 1324 * 1325 * Initialize the char encoding support, it registers the default 1326 * encoding supported. 1327 * NOTE: while public, this function usually doesn't need to be called 1328 * in normal processing. 1329 */ 1330 void 1331 xmlInitCharEncodingHandlers(void) { 1332 unsigned short int tst = 0x1234; 1333 unsigned char *ptr = (unsigned char *) &tst; 1334 1335 if (handlers != NULL) return; 1336 1337 handlers = (xmlCharEncodingHandlerPtr *) 1338 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1339 1340 if (*ptr == 0x12) xmlLittleEndian = 0; 1341 else if (*ptr == 0x34) xmlLittleEndian = 1; 1342 else { 1343 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1344 "Odd problem at endianness detection\n", NULL); 1345 } 1346 1347 if (handlers == NULL) { 1348 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1349 return; 1350 } 1351 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1352 #ifdef LIBXML_OUTPUT_ENABLED 1353 xmlUTF16LEHandler = 1354 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1355 xmlUTF16BEHandler = 1356 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1357 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1358 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1359 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1360 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1361 #ifdef LIBXML_HTML_ENABLED 1362 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1363 #endif 1364 #else 1365 xmlUTF16LEHandler = 1366 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1367 xmlUTF16BEHandler = 1368 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1369 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1370 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1371 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1372 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1373 #endif /* LIBXML_OUTPUT_ENABLED */ 1374 #ifndef LIBXML_ICONV_ENABLED 1375 #ifdef LIBXML_ISO8859X_ENABLED 1376 xmlRegisterCharEncodingHandlersISO8859x (); 1377 #endif 1378 #endif 1379 1380 } 1381 1382 /** 1383 * xmlCleanupCharEncodingHandlers: 1384 * 1385 * Cleanup the memory allocated for the char encoding support, it 1386 * unregisters all the encoding handlers and the aliases. 1387 */ 1388 void 1389 xmlCleanupCharEncodingHandlers(void) { 1390 xmlCleanupEncodingAliases(); 1391 1392 if (handlers == NULL) return; 1393 1394 for (;nbCharEncodingHandler > 0;) { 1395 nbCharEncodingHandler--; 1396 if (handlers[nbCharEncodingHandler] != NULL) { 1397 if (handlers[nbCharEncodingHandler]->name != NULL) 1398 xmlFree(handlers[nbCharEncodingHandler]->name); 1399 xmlFree(handlers[nbCharEncodingHandler]); 1400 } 1401 } 1402 xmlFree(handlers); 1403 handlers = NULL; 1404 nbCharEncodingHandler = 0; 1405 xmlDefaultCharEncodingHandler = NULL; 1406 } 1407 1408 /** 1409 * xmlRegisterCharEncodingHandler: 1410 * @handler: the xmlCharEncodingHandlerPtr handler block 1411 * 1412 * Register the char encoding handler, surprising, isn't it ? 1413 */ 1414 void 1415 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1416 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1417 if (handler == NULL) { 1418 xmlEncodingErr(XML_I18N_NO_HANDLER, 1419 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1420 return; 1421 } 1422 1423 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1424 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1425 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1426 "MAX_ENCODING_HANDLERS"); 1427 return; 1428 } 1429 handlers[nbCharEncodingHandler++] = handler; 1430 } 1431 1432 /** 1433 * xmlGetCharEncodingHandler: 1434 * @enc: an xmlCharEncoding value. 1435 * 1436 * Search in the registered set the handler able to read/write that encoding. 1437 * 1438 * Returns the handler or NULL if not found 1439 */ 1440 xmlCharEncodingHandlerPtr 1441 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1442 xmlCharEncodingHandlerPtr handler; 1443 1444 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1445 switch (enc) { 1446 case XML_CHAR_ENCODING_ERROR: 1447 return(NULL); 1448 case XML_CHAR_ENCODING_NONE: 1449 return(NULL); 1450 case XML_CHAR_ENCODING_UTF8: 1451 return(NULL); 1452 case XML_CHAR_ENCODING_UTF16LE: 1453 return(xmlUTF16LEHandler); 1454 case XML_CHAR_ENCODING_UTF16BE: 1455 return(xmlUTF16BEHandler); 1456 case XML_CHAR_ENCODING_EBCDIC: 1457 handler = xmlFindCharEncodingHandler("EBCDIC"); 1458 if (handler != NULL) return(handler); 1459 handler = xmlFindCharEncodingHandler("ebcdic"); 1460 if (handler != NULL) return(handler); 1461 break; 1462 case XML_CHAR_ENCODING_UCS4BE: 1463 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1464 if (handler != NULL) return(handler); 1465 handler = xmlFindCharEncodingHandler("UCS-4"); 1466 if (handler != NULL) return(handler); 1467 handler = xmlFindCharEncodingHandler("UCS4"); 1468 if (handler != NULL) return(handler); 1469 break; 1470 case XML_CHAR_ENCODING_UCS4LE: 1471 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1472 if (handler != NULL) return(handler); 1473 handler = xmlFindCharEncodingHandler("UCS-4"); 1474 if (handler != NULL) return(handler); 1475 handler = xmlFindCharEncodingHandler("UCS4"); 1476 if (handler != NULL) return(handler); 1477 break; 1478 case XML_CHAR_ENCODING_UCS4_2143: 1479 break; 1480 case XML_CHAR_ENCODING_UCS4_3412: 1481 break; 1482 case XML_CHAR_ENCODING_UCS2: 1483 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1484 if (handler != NULL) return(handler); 1485 handler = xmlFindCharEncodingHandler("UCS-2"); 1486 if (handler != NULL) return(handler); 1487 handler = xmlFindCharEncodingHandler("UCS2"); 1488 if (handler != NULL) return(handler); 1489 break; 1490 1491 /* 1492 * We used to keep ISO Latin encodings native in the 1493 * generated data. This led to so many problems that 1494 * this has been removed. One can still change this 1495 * back by registering no-ops encoders for those 1496 */ 1497 case XML_CHAR_ENCODING_8859_1: 1498 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1499 if (handler != NULL) return(handler); 1500 break; 1501 case XML_CHAR_ENCODING_8859_2: 1502 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1503 if (handler != NULL) return(handler); 1504 break; 1505 case XML_CHAR_ENCODING_8859_3: 1506 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1507 if (handler != NULL) return(handler); 1508 break; 1509 case XML_CHAR_ENCODING_8859_4: 1510 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1511 if (handler != NULL) return(handler); 1512 break; 1513 case XML_CHAR_ENCODING_8859_5: 1514 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1515 if (handler != NULL) return(handler); 1516 break; 1517 case XML_CHAR_ENCODING_8859_6: 1518 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1519 if (handler != NULL) return(handler); 1520 break; 1521 case XML_CHAR_ENCODING_8859_7: 1522 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1523 if (handler != NULL) return(handler); 1524 break; 1525 case XML_CHAR_ENCODING_8859_8: 1526 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1527 if (handler != NULL) return(handler); 1528 break; 1529 case XML_CHAR_ENCODING_8859_9: 1530 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1531 if (handler != NULL) return(handler); 1532 break; 1533 1534 1535 case XML_CHAR_ENCODING_2022_JP: 1536 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1537 if (handler != NULL) return(handler); 1538 break; 1539 case XML_CHAR_ENCODING_SHIFT_JIS: 1540 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1541 if (handler != NULL) return(handler); 1542 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1543 if (handler != NULL) return(handler); 1544 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1545 if (handler != NULL) return(handler); 1546 break; 1547 case XML_CHAR_ENCODING_EUC_JP: 1548 handler = xmlFindCharEncodingHandler("EUC-JP"); 1549 if (handler != NULL) return(handler); 1550 break; 1551 default: 1552 break; 1553 } 1554 1555 #ifdef DEBUG_ENCODING 1556 xmlGenericError(xmlGenericErrorContext, 1557 "No handler found for encoding %d\n", enc); 1558 #endif 1559 return(NULL); 1560 } 1561 1562 /** 1563 * xmlFindCharEncodingHandler: 1564 * @name: a string describing the char encoding. 1565 * 1566 * Search in the registered set the handler able to read/write that encoding. 1567 * 1568 * Returns the handler or NULL if not found 1569 */ 1570 xmlCharEncodingHandlerPtr 1571 xmlFindCharEncodingHandler(const char *name) { 1572 const char *nalias; 1573 const char *norig; 1574 xmlCharEncoding alias; 1575 #ifdef LIBXML_ICONV_ENABLED 1576 xmlCharEncodingHandlerPtr enc; 1577 iconv_t icv_in, icv_out; 1578 #endif /* LIBXML_ICONV_ENABLED */ 1579 char upper[100]; 1580 int i; 1581 1582 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1583 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1584 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1585 1586 /* 1587 * Do the alias resolution 1588 */ 1589 norig = name; 1590 nalias = xmlGetEncodingAlias(name); 1591 if (nalias != NULL) 1592 name = nalias; 1593 1594 /* 1595 * Check first for directly registered encoding names 1596 */ 1597 for (i = 0;i < 99;i++) { 1598 upper[i] = toupper(name[i]); 1599 if (upper[i] == 0) break; 1600 } 1601 upper[i] = 0; 1602 1603 for (i = 0;i < nbCharEncodingHandler; i++) 1604 if (!strcmp(upper, handlers[i]->name)) { 1605 #ifdef DEBUG_ENCODING 1606 xmlGenericError(xmlGenericErrorContext, 1607 "Found registered handler for encoding %s\n", name); 1608 #endif 1609 return(handlers[i]); 1610 } 1611 1612 #ifdef LIBXML_ICONV_ENABLED 1613 /* check whether iconv can handle this */ 1614 icv_in = iconv_open("UTF-8", name); 1615 icv_out = iconv_open(name, "UTF-8"); 1616 if (icv_in == (iconv_t) -1) { 1617 icv_in = iconv_open("UTF-8", upper); 1618 } 1619 if (icv_out == (iconv_t) -1) { 1620 icv_out = iconv_open(upper, "UTF-8"); 1621 } 1622 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1623 enc = (xmlCharEncodingHandlerPtr) 1624 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1625 if (enc == NULL) { 1626 iconv_close(icv_in); 1627 iconv_close(icv_out); 1628 return(NULL); 1629 } 1630 enc->name = xmlMemStrdup(name); 1631 enc->input = NULL; 1632 enc->output = NULL; 1633 enc->iconv_in = icv_in; 1634 enc->iconv_out = icv_out; 1635 #ifdef DEBUG_ENCODING 1636 xmlGenericError(xmlGenericErrorContext, 1637 "Found iconv handler for encoding %s\n", name); 1638 #endif 1639 return enc; 1640 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1641 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1642 "iconv : problems with filters for '%s'\n", name); 1643 } 1644 #endif /* LIBXML_ICONV_ENABLED */ 1645 1646 #ifdef DEBUG_ENCODING 1647 xmlGenericError(xmlGenericErrorContext, 1648 "No handler found for encoding %s\n", name); 1649 #endif 1650 1651 /* 1652 * Fallback using the canonical names 1653 */ 1654 alias = xmlParseCharEncoding(norig); 1655 if (alias != XML_CHAR_ENCODING_ERROR) { 1656 const char* canon; 1657 canon = xmlGetCharEncodingName(alias); 1658 if ((canon != NULL) && (strcmp(name, canon))) { 1659 return(xmlFindCharEncodingHandler(canon)); 1660 } 1661 } 1662 1663 /* If "none of the above", give up */ 1664 return(NULL); 1665 } 1666 1667 /************************************************************************ 1668 * * 1669 * ICONV based generic conversion functions * 1670 * * 1671 ************************************************************************/ 1672 1673 #ifdef LIBXML_ICONV_ENABLED 1674 /** 1675 * xmlIconvWrapper: 1676 * @cd: iconv converter data structure 1677 * @out: a pointer to an array of bytes to store the result 1678 * @outlen: the length of @out 1679 * @in: a pointer to an array of ISO Latin 1 chars 1680 * @inlen: the length of @in 1681 * 1682 * Returns 0 if success, or 1683 * -1 by lack of space, or 1684 * -2 if the transcoding fails (for *in is not valid utf8 string or 1685 * the result of transformation can't fit into the encoding we want), or 1686 * -3 if there the last byte can't form a single output char. 1687 * 1688 * The value of @inlen after return is the number of octets consumed 1689 * as the return value is positive, else unpredictable. 1690 * The value of @outlen after return is the number of ocetes consumed. 1691 */ 1692 static int 1693 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1694 const unsigned char *in, int *inlen) { 1695 size_t icv_inlen, icv_outlen; 1696 const char *icv_in = (const char *) in; 1697 char *icv_out = (char *) out; 1698 int ret; 1699 1700 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1701 if (outlen != NULL) *outlen = 0; 1702 return(-1); 1703 } 1704 icv_inlen = *inlen; 1705 icv_outlen = *outlen; 1706 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1707 *inlen -= icv_inlen; 1708 *outlen -= icv_outlen; 1709 if ((icv_inlen != 0) || (ret == -1)) { 1710 #ifdef EILSEQ 1711 if (errno == EILSEQ) { 1712 return -2; 1713 } else 1714 #endif 1715 #ifdef E2BIG 1716 if (errno == E2BIG) { 1717 return -1; 1718 } else 1719 #endif 1720 #ifdef EINVAL 1721 if (errno == EINVAL) { 1722 return -3; 1723 } else 1724 #endif 1725 { 1726 return -3; 1727 } 1728 } 1729 return 0; 1730 } 1731 #endif /* LIBXML_ICONV_ENABLED */ 1732 1733 /************************************************************************ 1734 * * 1735 * The real API used by libxml for on-the-fly conversion * 1736 * * 1737 ************************************************************************/ 1738 1739 /** 1740 * xmlCharEncFirstLine: 1741 * @handler: char enconding transformation data structure 1742 * @out: an xmlBuffer for the output. 1743 * @in: an xmlBuffer for the input 1744 * 1745 * Front-end for the encoding handler input function, but handle only 1746 * the very first line, i.e. limit itself to 45 chars. 1747 * 1748 * Returns the number of byte written if success, or 1749 * -1 general error 1750 * -2 if the transcoding fails (for *in is not valid utf8 string or 1751 * the result of transformation can't fit into the encoding we want), or 1752 */ 1753 int 1754 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1755 xmlBufferPtr in) { 1756 int ret = -2; 1757 int written; 1758 int toconv; 1759 1760 if (handler == NULL) return(-1); 1761 if (out == NULL) return(-1); 1762 if (in == NULL) return(-1); 1763 1764 /* calculate space available */ 1765 written = out->size - out->use; 1766 toconv = in->use; 1767 /* 1768 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1769 * 45 chars should be sufficient to reach the end of the encoding 1770 * declaration without going too far inside the document content. 1771 * on UTF-16 this means 90bytes, on UCS4 this means 180 1772 */ 1773 if (toconv > 180) 1774 toconv = 180; 1775 if (toconv * 2 >= written) { 1776 xmlBufferGrow(out, toconv); 1777 written = out->size - out->use - 1; 1778 } 1779 1780 if (handler->input != NULL) { 1781 ret = handler->input(&out->content[out->use], &written, 1782 in->content, &toconv); 1783 xmlBufferShrink(in, toconv); 1784 out->use += written; 1785 out->content[out->use] = 0; 1786 } 1787 #ifdef LIBXML_ICONV_ENABLED 1788 else if (handler->iconv_in != NULL) { 1789 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1790 &written, in->content, &toconv); 1791 xmlBufferShrink(in, toconv); 1792 out->use += written; 1793 out->content[out->use] = 0; 1794 if (ret == -1) ret = -3; 1795 } 1796 #endif /* LIBXML_ICONV_ENABLED */ 1797 #ifdef DEBUG_ENCODING 1798 switch (ret) { 1799 case 0: 1800 xmlGenericError(xmlGenericErrorContext, 1801 "converted %d bytes to %d bytes of input\n", 1802 toconv, written); 1803 break; 1804 case -1: 1805 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1806 toconv, written, in->use); 1807 break; 1808 case -2: 1809 xmlGenericError(xmlGenericErrorContext, 1810 "input conversion failed due to input error\n"); 1811 break; 1812 case -3: 1813 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1814 toconv, written, in->use); 1815 break; 1816 default: 1817 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 1818 } 1819 #endif /* DEBUG_ENCODING */ 1820 /* 1821 * Ignore when input buffer is not on a boundary 1822 */ 1823 if (ret == -3) ret = 0; 1824 if (ret == -1) ret = 0; 1825 return(ret); 1826 } 1827 1828 /** 1829 * xmlCharEncInFunc: 1830 * @handler: char encoding transformation data structure 1831 * @out: an xmlBuffer for the output. 1832 * @in: an xmlBuffer for the input 1833 * 1834 * Generic front-end for the encoding handler input function 1835 * 1836 * Returns the number of byte written if success, or 1837 * -1 general error 1838 * -2 if the transcoding fails (for *in is not valid utf8 string or 1839 * the result of transformation can't fit into the encoding we want), or 1840 */ 1841 int 1842 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 1843 xmlBufferPtr in) 1844 { 1845 int ret = -2; 1846 int written; 1847 int toconv; 1848 1849 if (handler == NULL) 1850 return (-1); 1851 if (out == NULL) 1852 return (-1); 1853 if (in == NULL) 1854 return (-1); 1855 1856 toconv = in->use; 1857 if (toconv == 0) 1858 return (0); 1859 written = out->size - out->use; 1860 if (toconv * 2 >= written) { 1861 xmlBufferGrow(out, out->size + toconv * 2); 1862 written = out->size - out->use - 1; 1863 } 1864 if (handler->input != NULL) { 1865 ret = handler->input(&out->content[out->use], &written, 1866 in->content, &toconv); 1867 xmlBufferShrink(in, toconv); 1868 out->use += written; 1869 out->content[out->use] = 0; 1870 } 1871 #ifdef LIBXML_ICONV_ENABLED 1872 else if (handler->iconv_in != NULL) { 1873 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1874 &written, in->content, &toconv); 1875 xmlBufferShrink(in, toconv); 1876 out->use += written; 1877 out->content[out->use] = 0; 1878 if (ret == -1) 1879 ret = -3; 1880 } 1881 #endif /* LIBXML_ICONV_ENABLED */ 1882 switch (ret) { 1883 case 0: 1884 #ifdef DEBUG_ENCODING 1885 xmlGenericError(xmlGenericErrorContext, 1886 "converted %d bytes to %d bytes of input\n", 1887 toconv, written); 1888 #endif 1889 break; 1890 case -1: 1891 #ifdef DEBUG_ENCODING 1892 xmlGenericError(xmlGenericErrorContext, 1893 "converted %d bytes to %d bytes of input, %d left\n", 1894 toconv, written, in->use); 1895 #endif 1896 break; 1897 case -3: 1898 #ifdef DEBUG_ENCODING 1899 xmlGenericError(xmlGenericErrorContext, 1900 "converted %d bytes to %d bytes of input, %d left\n", 1901 toconv, written, in->use); 1902 #endif 1903 break; 1904 case -2: { 1905 char buf[50]; 1906 1907 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 1908 in->content[0], in->content[1], 1909 in->content[2], in->content[3]); 1910 buf[49] = 0; 1911 xmlEncodingErr(XML_I18N_CONV_FAILED, 1912 "input conversion failed due to input error, bytes %s\n", 1913 buf); 1914 } 1915 } 1916 /* 1917 * Ignore when input buffer is not on a boundary 1918 */ 1919 if (ret == -3) 1920 ret = 0; 1921 return (written? written : ret); 1922 } 1923 1924 /** 1925 * xmlCharEncOutFunc: 1926 * @handler: char enconding transformation data structure 1927 * @out: an xmlBuffer for the output. 1928 * @in: an xmlBuffer for the input 1929 * 1930 * Generic front-end for the encoding handler output function 1931 * a first call with @in == NULL has to be made firs to initiate the 1932 * output in case of non-stateless encoding needing to initiate their 1933 * state or the output (like the BOM in UTF16). 1934 * In case of UTF8 sequence conversion errors for the given encoder, 1935 * the content will be automatically remapped to a CharRef sequence. 1936 * 1937 * Returns the number of byte written if success, or 1938 * -1 general error 1939 * -2 if the transcoding fails (for *in is not valid utf8 string or 1940 * the result of transformation can't fit into the encoding we want), or 1941 */ 1942 int 1943 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1944 xmlBufferPtr in) { 1945 int ret = -2; 1946 int written; 1947 int writtentot = 0; 1948 int toconv; 1949 int output = 0; 1950 1951 if (handler == NULL) return(-1); 1952 if (out == NULL) return(-1); 1953 1954 retry: 1955 1956 written = out->size - out->use; 1957 1958 if (written > 0) 1959 written--; /* Gennady: count '/0' */ 1960 1961 /* 1962 * First specific handling of in = NULL, i.e. the initialization call 1963 */ 1964 if (in == NULL) { 1965 toconv = 0; 1966 if (handler->output != NULL) { 1967 ret = handler->output(&out->content[out->use], &written, 1968 NULL, &toconv); 1969 if (ret >= 0) { /* Gennady: check return value */ 1970 out->use += written; 1971 out->content[out->use] = 0; 1972 } 1973 } 1974 #ifdef LIBXML_ICONV_ENABLED 1975 else if (handler->iconv_out != NULL) { 1976 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 1977 &written, NULL, &toconv); 1978 out->use += written; 1979 out->content[out->use] = 0; 1980 } 1981 #endif /* LIBXML_ICONV_ENABLED */ 1982 #ifdef DEBUG_ENCODING 1983 xmlGenericError(xmlGenericErrorContext, 1984 "initialized encoder\n"); 1985 #endif 1986 return(0); 1987 } 1988 1989 /* 1990 * Conversion itself. 1991 */ 1992 toconv = in->use; 1993 if (toconv == 0) 1994 return(0); 1995 if (toconv * 4 >= written) { 1996 xmlBufferGrow(out, toconv * 4); 1997 written = out->size - out->use - 1; 1998 } 1999 if (handler->output != NULL) { 2000 ret = handler->output(&out->content[out->use], &written, 2001 in->content, &toconv); 2002 if (written > 0) { 2003 xmlBufferShrink(in, toconv); 2004 out->use += written; 2005 writtentot += written; 2006 } 2007 out->content[out->use] = 0; 2008 } 2009 #ifdef LIBXML_ICONV_ENABLED 2010 else if (handler->iconv_out != NULL) { 2011 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2012 &written, in->content, &toconv); 2013 xmlBufferShrink(in, toconv); 2014 out->use += written; 2015 writtentot += written; 2016 out->content[out->use] = 0; 2017 if (ret == -1) { 2018 if (written > 0) { 2019 /* 2020 * Can be a limitation of iconv 2021 */ 2022 goto retry; 2023 } 2024 ret = -3; 2025 } 2026 } 2027 #endif /* LIBXML_ICONV_ENABLED */ 2028 else { 2029 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2030 "xmlCharEncOutFunc: no output function !\n", NULL); 2031 return(-1); 2032 } 2033 2034 if (ret >= 0) output += ret; 2035 2036 /* 2037 * Attempt to handle error cases 2038 */ 2039 switch (ret) { 2040 case 0: 2041 #ifdef DEBUG_ENCODING 2042 xmlGenericError(xmlGenericErrorContext, 2043 "converted %d bytes to %d bytes of output\n", 2044 toconv, written); 2045 #endif 2046 break; 2047 case -1: 2048 #ifdef DEBUG_ENCODING 2049 xmlGenericError(xmlGenericErrorContext, 2050 "output conversion failed by lack of space\n"); 2051 #endif 2052 break; 2053 case -3: 2054 #ifdef DEBUG_ENCODING 2055 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2056 toconv, written, in->use); 2057 #endif 2058 break; 2059 case -2: { 2060 int len = in->use; 2061 const xmlChar *utf = (const xmlChar *) in->content; 2062 int cur; 2063 2064 cur = xmlGetUTF8Char(utf, &len); 2065 if (cur > 0) { 2066 xmlChar charref[20]; 2067 2068 #ifdef DEBUG_ENCODING 2069 xmlGenericError(xmlGenericErrorContext, 2070 "handling output conversion error\n"); 2071 xmlGenericError(xmlGenericErrorContext, 2072 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2073 in->content[0], in->content[1], 2074 in->content[2], in->content[3]); 2075 #endif 2076 /* 2077 * Removes the UTF8 sequence, and replace it by a charref 2078 * and continue the transcoding phase, hoping the error 2079 * did not mangle the encoder state. 2080 */ 2081 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur); 2082 xmlBufferShrink(in, len); 2083 xmlBufferAddHead(in, charref, -1); 2084 2085 goto retry; 2086 } else { 2087 char buf[50]; 2088 2089 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2090 in->content[0], in->content[1], 2091 in->content[2], in->content[3]); 2092 buf[49] = 0; 2093 xmlEncodingErr(XML_I18N_CONV_FAILED, 2094 "output conversion failed due to conv error, bytes %s\n", 2095 buf); 2096 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2097 in->content[0] = ' '; 2098 } 2099 break; 2100 } 2101 } 2102 return(ret); 2103 } 2104 2105 /** 2106 * xmlCharEncCloseFunc: 2107 * @handler: char enconding transformation data structure 2108 * 2109 * Generic front-end for encoding handler close function 2110 * 2111 * Returns 0 if success, or -1 in case of error 2112 */ 2113 int 2114 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2115 int ret = 0; 2116 if (handler == NULL) return(-1); 2117 if (handler->name == NULL) return(-1); 2118 #ifdef LIBXML_ICONV_ENABLED 2119 /* 2120 * Iconv handlers can be used only once, free the whole block. 2121 * and the associated icon resources. 2122 */ 2123 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2124 if (handler->name != NULL) 2125 xmlFree(handler->name); 2126 handler->name = NULL; 2127 if (handler->iconv_out != NULL) { 2128 if (iconv_close(handler->iconv_out)) 2129 ret = -1; 2130 handler->iconv_out = NULL; 2131 } 2132 if (handler->iconv_in != NULL) { 2133 if (iconv_close(handler->iconv_in)) 2134 ret = -1; 2135 handler->iconv_in = NULL; 2136 } 2137 xmlFree(handler); 2138 } 2139 #endif /* LIBXML_ICONV_ENABLED */ 2140 #ifdef DEBUG_ENCODING 2141 if (ret) 2142 xmlGenericError(xmlGenericErrorContext, 2143 "failed to close the encoding handler\n"); 2144 else 2145 xmlGenericError(xmlGenericErrorContext, 2146 "closed the encoding handler\n"); 2147 #endif 2148 2149 return(ret); 2150 } 2151 2152 /** 2153 * xmlByteConsumed: 2154 * @ctxt: an XML parser context 2155 * 2156 * This function provides the current index of the parser relative 2157 * to the start of the current entity. This function is computed in 2158 * bytes from the beginning starting at zero and finishing at the 2159 * size in byte of the file if parsing a file. The function is 2160 * of constant cost if the input is UTF-8 but can be costly if run 2161 * on non-UTF-8 input. 2162 * 2163 * Returns the index in bytes from the beginning of the entity or -1 2164 * in case the index could not be computed. 2165 */ 2166 long 2167 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2168 xmlParserInputPtr in; 2169 2170 if (ctxt == NULL) return(-1); 2171 in = ctxt->input; 2172 if (in == NULL) return(-1); 2173 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2174 unsigned int unused = 0; 2175 xmlCharEncodingHandler * handler = in->buf->encoder; 2176 /* 2177 * Encoding conversion, compute the number of unused original 2178 * bytes from the input not consumed and substract that from 2179 * the raw consumed value, this is not a cheap operation 2180 */ 2181 if (in->end - in->cur > 0) { 2182 unsigned char convbuf[32000]; 2183 const unsigned char *cur = (const unsigned char *)in->cur; 2184 int toconv = in->end - in->cur, written = 32000; 2185 2186 int ret; 2187 2188 if (handler->output != NULL) { 2189 do { 2190 toconv = in->end - cur; 2191 written = 32000; 2192 ret = handler->output(&convbuf[0], &written, 2193 cur, &toconv); 2194 if (ret == -1) return(-1); 2195 unused += written; 2196 cur += toconv; 2197 } while (ret == -2); 2198 #ifdef LIBXML_ICONV_ENABLED 2199 } else if (handler->iconv_out != NULL) { 2200 do { 2201 toconv = in->end - cur; 2202 written = 32000; 2203 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2204 &written, cur, &toconv); 2205 if (ret < 0) { 2206 if (written > 0) 2207 ret = -2; 2208 else 2209 return(-1); 2210 } 2211 unused += written; 2212 cur += toconv; 2213 } while (ret == -2); 2214 #endif 2215 } else { 2216 /* could not find a converter */ 2217 return(-1); 2218 } 2219 } 2220 if (in->buf->rawconsumed < unused) 2221 return(-1); 2222 return(in->buf->rawconsumed - unused); 2223 } 2224 return(in->consumed + (in->cur - in->base)); 2225 } 2226 2227 #ifndef LIBXML_ICONV_ENABLED 2228 #ifdef LIBXML_ISO8859X_ENABLED 2229 2230 /** 2231 * UTF8ToISO8859x: 2232 * @out: a pointer to an array of bytes to store the result 2233 * @outlen: the length of @out 2234 * @in: a pointer to an array of UTF-8 chars 2235 * @inlen: the length of @in 2236 * @xlattable: the 2-level transcoding table 2237 * 2238 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2239 * block of chars out. 2240 * 2241 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2242 * The value of @inlen after return is the number of octets consumed 2243 * as the return value is positive, else unpredictable. 2244 * The value of @outlen after return is the number of ocetes consumed. 2245 */ 2246 static int 2247 UTF8ToISO8859x(unsigned char* out, int *outlen, 2248 const unsigned char* in, int *inlen, 2249 unsigned char const *xlattable) { 2250 const unsigned char* outstart = out; 2251 const unsigned char* inend; 2252 const unsigned char* instart = in; 2253 2254 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2255 (xlattable == NULL)) 2256 return(-1); 2257 if (in == NULL) { 2258 /* 2259 * initialization nothing to do 2260 */ 2261 *outlen = 0; 2262 *inlen = 0; 2263 return(0); 2264 } 2265 inend = in + (*inlen); 2266 while (in < inend) { 2267 unsigned char d = *in++; 2268 if (d < 0x80) { 2269 *out++ = d; 2270 } else if (d < 0xC0) { 2271 /* trailing byte in leading position */ 2272 *outlen = out - outstart; 2273 *inlen = in - instart - 1; 2274 return(-2); 2275 } else if (d < 0xE0) { 2276 unsigned char c; 2277 if (!(in < inend)) { 2278 /* trailing byte not in input buffer */ 2279 *outlen = out - outstart; 2280 *inlen = in - instart - 1; 2281 return(-2); 2282 } 2283 c = *in++; 2284 if ((c & 0xC0) != 0x80) { 2285 /* not a trailing byte */ 2286 *outlen = out - outstart; 2287 *inlen = in - instart - 2; 2288 return(-2); 2289 } 2290 c = c & 0x3F; 2291 d = d & 0x1F; 2292 d = xlattable [48 + c + xlattable [d] * 64]; 2293 if (d == 0) { 2294 /* not in character set */ 2295 *outlen = out - outstart; 2296 *inlen = in - instart - 2; 2297 return(-2); 2298 } 2299 *out++ = d; 2300 } else if (d < 0xF0) { 2301 unsigned char c1; 2302 unsigned char c2; 2303 if (!(in < inend - 1)) { 2304 /* trailing bytes not in input buffer */ 2305 *outlen = out - outstart; 2306 *inlen = in - instart - 1; 2307 return(-2); 2308 } 2309 c1 = *in++; 2310 if ((c1 & 0xC0) != 0x80) { 2311 /* not a trailing byte (c1) */ 2312 *outlen = out - outstart; 2313 *inlen = in - instart - 2; 2314 return(-2); 2315 } 2316 c2 = *in++; 2317 if ((c2 & 0xC0) != 0x80) { 2318 /* not a trailing byte (c2) */ 2319 *outlen = out - outstart; 2320 *inlen = in - instart - 2; 2321 return(-2); 2322 } 2323 c1 = c1 & 0x3F; 2324 c2 = c2 & 0x3F; 2325 d = d & 0x0F; 2326 d = xlattable [48 + c2 + xlattable [48 + c1 + 2327 xlattable [32 + d] * 64] * 64]; 2328 if (d == 0) { 2329 /* not in character set */ 2330 *outlen = out - outstart; 2331 *inlen = in - instart - 3; 2332 return(-2); 2333 } 2334 *out++ = d; 2335 } else { 2336 /* cannot transcode >= U+010000 */ 2337 *outlen = out - outstart; 2338 *inlen = in - instart - 1; 2339 return(-2); 2340 } 2341 } 2342 *outlen = out - outstart; 2343 *inlen = in - instart; 2344 return(*outlen); 2345 } 2346 2347 /** 2348 * ISO8859xToUTF8 2349 * @out: a pointer to an array of bytes to store the result 2350 * @outlen: the length of @out 2351 * @in: a pointer to an array of ISO Latin 1 chars 2352 * @inlen: the length of @in 2353 * 2354 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2355 * block of chars out. 2356 * Returns 0 if success, or -1 otherwise 2357 * The value of @inlen after return is the number of octets consumed 2358 * The value of @outlen after return is the number of ocetes produced. 2359 */ 2360 static int 2361 ISO8859xToUTF8(unsigned char* out, int *outlen, 2362 const unsigned char* in, int *inlen, 2363 unsigned short const *unicodetable) { 2364 unsigned char* outstart = out; 2365 unsigned char* outend; 2366 const unsigned char* instart = in; 2367 const unsigned char* inend; 2368 const unsigned char* instop; 2369 unsigned int c; 2370 2371 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2372 (in == NULL) || (unicodetable == NULL)) 2373 return(-1); 2374 outend = out + *outlen; 2375 inend = in + *inlen; 2376 instop = inend; 2377 c = *in; 2378 while (in < inend && out < outend - 1) { 2379 if (c >= 0x80) { 2380 c = unicodetable [c - 0x80]; 2381 if (c == 0) { 2382 /* undefined code point */ 2383 *outlen = out - outstart; 2384 *inlen = in - instart; 2385 return (-1); 2386 } 2387 if (c < 0x800) { 2388 *out++ = ((c >> 6) & 0x1F) | 0xC0; 2389 *out++ = (c & 0x3F) | 0x80; 2390 } else { 2391 *out++ = ((c >> 12) & 0x0F) | 0xE0; 2392 *out++ = ((c >> 6) & 0x3F) | 0x80; 2393 *out++ = (c & 0x3F) | 0x80; 2394 } 2395 ++in; 2396 c = *in; 2397 } 2398 if (instop - in > outend - out) instop = in + (outend - out); 2399 while (c < 0x80 && in < instop) { 2400 *out++ = c; 2401 ++in; 2402 c = *in; 2403 } 2404 } 2405 if (in < inend && out < outend && c < 0x80) { 2406 *out++ = c; 2407 ++in; 2408 } 2409 *outlen = out - outstart; 2410 *inlen = in - instart; 2411 return (*outlen); 2412 } 2413 2414 2415 /************************************************************************ 2416 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 2417 ************************************************************************/ 2418 2419 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 2420 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2421 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2422 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2423 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2424 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 2425 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 2426 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 2427 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 2428 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 2429 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 2430 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 2431 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 2432 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 2433 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 2434 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 2435 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 2436 }; 2437 2438 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 2439 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2446 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2447 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2448 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2449 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2450 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 2451 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 2452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 2454 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2455 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 2456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2458 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 2459 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 2460 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 2461 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 2462 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2463 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 2464 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 2465 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 2466 }; 2467 2468 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 2469 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2470 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2471 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2472 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2473 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 2474 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 2475 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 2476 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 2477 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 2478 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2479 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 2480 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 2481 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 2482 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2483 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 2484 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 2485 }; 2486 2487 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 2488 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2494 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2495 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2496 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2497 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2498 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 2499 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 2500 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 2501 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 2502 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2504 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 2505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2506 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2507 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2508 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2510 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 2513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 2514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 2515 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2516 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 2517 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2518 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 2519 }; 2520 2521 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 2522 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2523 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2524 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2525 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2526 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 2527 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 2528 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 2529 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 2530 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2531 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 2532 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2533 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 2534 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2535 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 2536 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2537 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 2538 }; 2539 2540 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 2541 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 2542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2543 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2548 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2549 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2550 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 2551 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2552 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2553 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2554 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 2555 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 2556 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 2557 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 2558 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 2559 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 2560 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 2562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2564 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2565 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 2566 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 2567 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 2568 }; 2569 2570 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 2571 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2572 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2573 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2574 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2575 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 2576 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 2577 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 2578 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 2579 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 2580 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 2581 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 2582 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 2583 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 2584 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 2585 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 2586 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 2587 }; 2588 2589 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 2590 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2591 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2592 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2597 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2598 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2599 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 2600 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2601 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 2602 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2603 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2604 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2605 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2606 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 2607 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2609 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2614 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2617 }; 2618 2619 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 2620 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2621 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2622 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2623 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2624 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 2625 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 2626 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2627 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 2628 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 2629 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 2630 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 2631 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2632 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 2633 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 2634 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2635 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2636 }; 2637 2638 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 2639 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2640 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 2641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2646 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2647 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2648 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 2649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2650 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 2655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 2656 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2657 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 2658 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2659 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2662 }; 2663 2664 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 2665 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2666 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2667 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2668 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2669 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 2670 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 2671 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 2672 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 2673 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 2674 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 2675 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 2676 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 2677 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 2678 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 2679 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 2680 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 2681 }; 2682 2683 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 2684 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 2685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2686 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2691 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2692 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2693 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 2694 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 2695 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2700 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 2701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2703 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2704 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2705 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2707 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 2708 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2709 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2710 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2711 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 2712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2715 }; 2716 2717 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 2718 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2719 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2720 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2721 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2722 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 2723 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 2724 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 2725 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 2726 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2727 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2728 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2729 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 2730 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 2731 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 2732 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 2733 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 2734 }; 2735 2736 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 2737 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2738 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 2739 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2744 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2745 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2746 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 2747 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 2748 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2753 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 2754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2755 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 2756 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2759 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2760 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 2761 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2765 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2766 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 2767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2768 }; 2769 2770 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 2771 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2772 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2773 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2774 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2775 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 2776 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 2777 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 2778 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 2779 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 2780 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2781 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2782 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 2783 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 2784 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2785 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2786 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 2787 }; 2788 2789 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 2790 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2797 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2798 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2799 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 2800 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2801 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2802 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 2803 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2804 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 2805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 2807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2808 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 2811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2813 }; 2814 2815 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 2816 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2817 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2818 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2819 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2820 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 2821 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 2822 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 2823 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 2824 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2825 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 2826 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 2827 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 2828 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2829 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 2830 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 2831 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 2832 }; 2833 2834 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 2835 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2837 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2842 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2843 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2844 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 2845 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2846 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2847 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2848 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 2849 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 2850 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2852 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 2853 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 2854 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2859 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2862 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 2863 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 2864 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 2865 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 2866 }; 2867 2868 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 2869 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2870 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2871 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2872 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2873 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 2874 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 2875 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 2876 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 2877 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 2878 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 2879 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 2880 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 2881 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 2882 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 2883 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 2884 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 2885 }; 2886 2887 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 2888 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2890 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2895 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2896 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2897 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2900 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2902 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 2903 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 2904 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2905 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2906 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 2907 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2911 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2912 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 2913 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2915 }; 2916 2917 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 2918 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2919 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2920 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2921 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2922 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 2923 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 2924 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 2925 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 2926 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 2927 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 2928 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 2929 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 2930 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 2931 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 2932 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 2933 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 2934 }; 2935 2936 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 2937 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2939 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2944 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2945 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2946 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 2947 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 2948 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2952 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2955 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2956 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 2957 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 2958 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 2959 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 2960 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 2961 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 2962 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 2963 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 2964 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 2965 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 2966 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 2967 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 2968 }; 2969 2970 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 2971 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2972 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2973 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2974 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2975 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 2976 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 2977 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 2978 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 2979 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 2980 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2981 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 2982 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 2983 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 2984 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2985 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 2986 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 2987 }; 2988 2989 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 2990 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2992 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2993 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2997 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2998 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2999 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3000 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3002 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3003 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3005 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3006 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3008 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3009 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3010 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3011 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3012 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3013 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3017 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3018 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3019 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3020 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3021 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3024 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3025 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3026 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3027 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3029 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3030 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3031 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3032 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3033 }; 3034 3035 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3036 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3037 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3038 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3039 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3040 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3041 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3042 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3043 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3044 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3045 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3046 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3047 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3048 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3049 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3050 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3051 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3052 }; 3053 3054 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3055 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3056 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3057 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3062 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3063 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3064 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3065 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3066 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3070 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3071 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3074 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3075 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3076 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3077 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3078 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3079 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3080 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3081 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3082 }; 3083 3084 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3085 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3086 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3087 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3088 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3089 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3090 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3091 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3092 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3093 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3094 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3095 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3096 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3097 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3098 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3099 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3100 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3101 }; 3102 3103 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3104 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3106 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3111 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3112 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3113 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3114 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3115 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3116 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3117 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3119 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3120 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3121 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3122 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3123 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3127 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3136 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3139 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3140 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3141 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3142 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3143 }; 3144 3145 3146 /* 3147 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3148 */ 3149 3150 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3151 const unsigned char* in, int *inlen) { 3152 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3153 } 3154 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3155 const unsigned char* in, int *inlen) { 3156 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3157 } 3158 3159 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3160 const unsigned char* in, int *inlen) { 3161 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3162 } 3163 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3164 const unsigned char* in, int *inlen) { 3165 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3166 } 3167 3168 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3169 const unsigned char* in, int *inlen) { 3170 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3171 } 3172 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3173 const unsigned char* in, int *inlen) { 3174 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3175 } 3176 3177 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3178 const unsigned char* in, int *inlen) { 3179 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3180 } 3181 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3182 const unsigned char* in, int *inlen) { 3183 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3184 } 3185 3186 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3187 const unsigned char* in, int *inlen) { 3188 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3189 } 3190 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3191 const unsigned char* in, int *inlen) { 3192 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3193 } 3194 3195 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3196 const unsigned char* in, int *inlen) { 3197 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3198 } 3199 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3200 const unsigned char* in, int *inlen) { 3201 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3202 } 3203 3204 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3205 const unsigned char* in, int *inlen) { 3206 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3207 } 3208 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3209 const unsigned char* in, int *inlen) { 3210 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3211 } 3212 3213 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3214 const unsigned char* in, int *inlen) { 3215 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3216 } 3217 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3218 const unsigned char* in, int *inlen) { 3219 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3220 } 3221 3222 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3223 const unsigned char* in, int *inlen) { 3224 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3225 } 3226 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3227 const unsigned char* in, int *inlen) { 3228 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3229 } 3230 3231 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3232 const unsigned char* in, int *inlen) { 3233 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3234 } 3235 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3236 const unsigned char* in, int *inlen) { 3237 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3238 } 3239 3240 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3241 const unsigned char* in, int *inlen) { 3242 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3243 } 3244 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3245 const unsigned char* in, int *inlen) { 3246 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3247 } 3248 3249 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3250 const unsigned char* in, int *inlen) { 3251 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3252 } 3253 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3254 const unsigned char* in, int *inlen) { 3255 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3256 } 3257 3258 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3259 const unsigned char* in, int *inlen) { 3260 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3261 } 3262 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3263 const unsigned char* in, int *inlen) { 3264 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3265 } 3266 3267 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3268 const unsigned char* in, int *inlen) { 3269 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3270 } 3271 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3272 const unsigned char* in, int *inlen) { 3273 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3274 } 3275 3276 static void 3277 xmlRegisterCharEncodingHandlersISO8859x (void) { 3278 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3279 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3280 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3281 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3282 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3283 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3284 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3285 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3286 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3287 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3288 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3289 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3290 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3291 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3292 } 3293 3294 #endif 3295 #endif 3296 3297 #define bottom_encoding 3298 #include "elfgcchack.h" 3299