1 /*
   2  * encoding.c : implements the encoding conversion functions needed for XML
   3  *
   4  * Related specs:
   5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
   6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
   7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
   8  * [ISO-8859-1]   ISO Latin-1 characters codes.
   9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
  10  *                Worldwide Character Encoding -- Version 1.0", Addison-
  11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
  12  *                described in Unicode Technical Report #4.
  13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
  14  *                Information Interchange, ANSI X3.4-1986.
  15  *
  16  * See Copyright for the status of this software.
  17  *
  18  * daniel@veillard.com
  19  *
  20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
  21  */
  22 
  23 #define IN_LIBXML
  24 #include "libxml.h"
  25 
  26 #include <string.h>
  27 
  28 #ifdef HAVE_CTYPE_H
  29 #include <ctype.h>
  30 #endif
  31 #ifdef HAVE_STDLIB_H
  32 #include <stdlib.h>
  33 #endif
  34 #ifdef LIBXML_ICONV_ENABLED
  35 #ifdef HAVE_ERRNO_H
  36 #include <errno.h>
  37 #endif
  38 #endif
  39 #include <libxml/encoding.h>
  40 #include <libxml/xmlmemory.h>
  41 #ifdef LIBXML_HTML_ENABLED
  42 #include <libxml/HTMLparser.h>
  43 #endif
  44 #include <libxml/globals.h>
  45 #include <libxml/xmlerror.h>
  46 
  47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
  48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
  49 
  50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
  51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
  52 struct _xmlCharEncodingAlias {
  53     const char *name;
  54     const char *alias;
  55 };
  56 
  57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
  58 static int xmlCharEncodingAliasesNb = 0;
  59 static int xmlCharEncodingAliasesMax = 0;
  60 
  61 #ifdef LIBXML_ICONV_ENABLED
  62 #if 0
  63 #define DEBUG_ENCODING  /* Define this to get encoding traces */
  64 #endif
  65 #else
  66 #ifdef LIBXML_ISO8859X_ENABLED
  67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
  68 #endif
  69 #endif
  70 
  71 static int xmlLittleEndian = 1;
  72 
  73 /**
  74  * xmlEncodingErrMemory:
  75  * @extra:  extra informations
  76  *
  77  * Handle an out of memory condition
  78  */
  79 static void
  80 xmlEncodingErrMemory(const char *extra)
  81 {
  82     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  83 }
  84 
  85 /**
  86  * xmlErrEncoding:
  87  * @error:  the error number
  88  * @msg:  the error message
  89  *
  90  * n encoding error
  91  */
  92 static void
  93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
  94 {
  95     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
  96                     XML_FROM_I18N, error, XML_ERR_FATAL,
  97                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
  98 }
  99 
 100 /************************************************************************
 101  *                                  *
 102  *      Conversions To/From UTF8 encoding           *
 103  *                                  *
 104  ************************************************************************/
 105 
 106 /**
 107  * asciiToUTF8:
 108  * @out:  a pointer to an array of bytes to store the result
 109  * @outlen:  the length of @out
 110  * @in:  a pointer to an array of ASCII chars
 111  * @inlen:  the length of @in
 112  *
 113  * Take a block of ASCII chars in and try to convert it to an UTF-8
 114  * block of chars out.
 115  * Returns 0 if success, or -1 otherwise
 116  * The value of @inlen after return is the number of octets consumed
 117  *     if the return value is positive, else unpredictable.
 118  * The value of @outlen after return is the number of octets consumed.
 119  */
 120 static int
 121 asciiToUTF8(unsigned char* out, int *outlen,
 122               const unsigned char* in, int *inlen) {
 123     unsigned char* outstart = out;
 124     const unsigned char* base = in;
 125     const unsigned char* processed = in;
 126     unsigned char* outend = out + *outlen;
 127     const unsigned char* inend;
 128     unsigned int c;
 129 
 130     inend = in + (*inlen);
 131     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 132     c= *in++;
 133 
 134         if (out >= outend)
 135         break;
 136         if (c < 0x80) {
 137         *out++ = c;
 138     } else {
 139         *outlen = out - outstart;
 140         *inlen = processed - base;
 141         return(-1);
 142     }
 143 
 144     processed = (const unsigned char*) in;
 145     }
 146     *outlen = out - outstart;
 147     *inlen = processed - base;
 148     return(*outlen);
 149 }
 150 
 151 #ifdef LIBXML_OUTPUT_ENABLED
 152 /**
 153  * UTF8Toascii:
 154  * @out:  a pointer to an array of bytes to store the result
 155  * @outlen:  the length of @out
 156  * @in:  a pointer to an array of UTF-8 chars
 157  * @inlen:  the length of @in
 158  *
 159  * Take a block of UTF-8 chars in and try to convert it to an ASCII
 160  * block of chars out.
 161  *
 162  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 163  * The value of @inlen after return is the number of octets consumed
 164  *     if the return value is positive, else unpredictable.
 165  * The value of @outlen after return is the number of octets consumed.
 166  */
 167 static int
 168 UTF8Toascii(unsigned char* out, int *outlen,
 169               const unsigned char* in, int *inlen) {
 170     const unsigned char* processed = in;
 171     const unsigned char* outend;
 172     const unsigned char* outstart = out;
 173     const unsigned char* instart = in;
 174     const unsigned char* inend;
 175     unsigned int c, d;
 176     int trailing;
 177 
 178     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 179     if (in == NULL) {
 180         /*
 181      * initialization nothing to do
 182      */
 183     *outlen = 0;
 184     *inlen = 0;
 185     return(0);
 186     }
 187     inend = in + (*inlen);
 188     outend = out + (*outlen);
 189     while (in < inend) {
 190     d = *in++;
 191     if      (d < 0x80)  { c= d; trailing= 0; }
 192     else if (d < 0xC0) {
 193         /* trailing byte in leading position */
 194         *outlen = out - outstart;
 195         *inlen = processed - instart;
 196         return(-2);
 197         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 198         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 199         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 200     else {
 201         /* no chance for this in Ascii */
 202         *outlen = out - outstart;
 203         *inlen = processed - instart;
 204         return(-2);
 205     }
 206 
 207     if (inend - in < trailing) {
 208         break;
 209     }
 210 
 211     for ( ; trailing; trailing--) {
 212         if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 213         break;
 214         c <<= 6;
 215         c |= d & 0x3F;
 216     }
 217 
 218     /* assertion: c is a single UTF-4 value */
 219     if (c < 0x80) {
 220         if (out >= outend)
 221         break;
 222         *out++ = c;
 223     } else {
 224         /* no chance for this in Ascii */
 225         *outlen = out - outstart;
 226         *inlen = processed - instart;
 227         return(-2);
 228     }
 229     processed = in;
 230     }
 231     *outlen = out - outstart;
 232     *inlen = processed - instart;
 233     return(*outlen);
 234 }
 235 #endif /* LIBXML_OUTPUT_ENABLED */
 236 
 237 /**
 238  * isolat1ToUTF8:
 239  * @out:  a pointer to an array of bytes to store the result
 240  * @outlen:  the length of @out
 241  * @in:  a pointer to an array of ISO Latin 1 chars
 242  * @inlen:  the length of @in
 243  *
 244  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 245  * block of chars out.
 246  * Returns the number of bytes written if success, or -1 otherwise
 247  * The value of @inlen after return is the number of octets consumed
 248  *     if the return value is positive, else unpredictable.
 249  * The value of @outlen after return is the number of octets consumed.
 250  */
 251 int
 252 isolat1ToUTF8(unsigned char* out, int *outlen,
 253               const unsigned char* in, int *inlen) {
 254     unsigned char* outstart = out;
 255     const unsigned char* base = in;
 256     unsigned char* outend;
 257     const unsigned char* inend;
 258     const unsigned char* instop;
 259 
 260     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
 261     return(-1);
 262 
 263     outend = out + *outlen;
 264     inend = in + (*inlen);
 265     instop = inend;
 266 
 267     while (in < inend && out < outend - 1) {
 268         if (*in >= 0x80) {
 269         *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
 270         *out++ = ((*in) & 0x3F) | 0x80;
 271         ++in;
 272     }
 273     if (instop - in > outend - out) instop = in + (outend - out);
 274     while (in < instop && *in < 0x80) {
 275         *out++ = *in++;
 276     }
 277     }
 278     if (in < inend && out < outend && *in < 0x80) {
 279         *out++ = *in++;
 280     }
 281     *outlen = out - outstart;
 282     *inlen = in - base;
 283     return(*outlen);
 284 }
 285 
 286 /**
 287  * UTF8ToUTF8:
 288  * @out:  a pointer to an array of bytes to store the result
 289  * @outlen:  the length of @out
 290  * @inb:  a pointer to an array of UTF-8 chars
 291  * @inlenb:  the length of @in in UTF-8 chars
 292  *
 293  * No op copy operation for UTF8 handling.
 294  *
 295  * Returns the number of bytes written, or -1 if lack of space.
 296  *     The value of *inlen after return is the number of octets consumed
 297  *     if the return value is positive, else unpredictable.
 298  */
 299 static int
 300 UTF8ToUTF8(unsigned char* out, int *outlen,
 301            const unsigned char* inb, int *inlenb)
 302 {
 303     int len;
 304 
 305     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
 306     return(-1);
 307     if (*outlen > *inlenb) {
 308     len = *inlenb;
 309     } else {
 310     len = *outlen;
 311     }
 312     if (len < 0)
 313     return(-1);
 314 
 315     memcpy(out, inb, len);
 316 
 317     *outlen = len;
 318     *inlenb = len;
 319     return(*outlen);
 320 }
 321 
 322 
 323 #ifdef LIBXML_OUTPUT_ENABLED
 324 /**
 325  * UTF8Toisolat1:
 326  * @out:  a pointer to an array of bytes to store the result
 327  * @outlen:  the length of @out
 328  * @in:  a pointer to an array of UTF-8 chars
 329  * @inlen:  the length of @in
 330  *
 331  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
 332  * block of chars out.
 333  *
 334  * Returns the number of bytes written if success, -2 if the transcoding fails,
 335            or -1 otherwise
 336  * The value of @inlen after return is the number of octets consumed
 337  *     if the return value is positive, else unpredictable.
 338  * The value of @outlen after return is the number of octets consumed.
 339  */
 340 int
 341 UTF8Toisolat1(unsigned char* out, int *outlen,
 342               const unsigned char* in, int *inlen) {
 343     const unsigned char* processed = in;
 344     const unsigned char* outend;
 345     const unsigned char* outstart = out;
 346     const unsigned char* instart = in;
 347     const unsigned char* inend;
 348     unsigned int c, d;
 349     int trailing;
 350 
 351     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 352     if (in == NULL) {
 353         /*
 354      * initialization nothing to do
 355      */
 356     *outlen = 0;
 357     *inlen = 0;
 358     return(0);
 359     }
 360     inend = in + (*inlen);
 361     outend = out + (*outlen);
 362     while (in < inend) {
 363     d = *in++;
 364     if      (d < 0x80)  { c= d; trailing= 0; }
 365     else if (d < 0xC0) {
 366         /* trailing byte in leading position */
 367         *outlen = out - outstart;
 368         *inlen = processed - instart;
 369         return(-2);
 370         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 371         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 372         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 373     else {
 374         /* no chance for this in IsoLat1 */
 375         *outlen = out - outstart;
 376         *inlen = processed - instart;
 377         return(-2);
 378     }
 379 
 380     if (inend - in < trailing) {
 381         break;
 382     }
 383 
 384     for ( ; trailing; trailing--) {
 385         if (in >= inend)
 386         break;
 387         if (((d= *in++) & 0xC0) != 0x80) {
 388         *outlen = out - outstart;
 389         *inlen = processed - instart;
 390         return(-2);
 391         }
 392         c <<= 6;
 393         c |= d & 0x3F;
 394     }
 395 
 396     /* assertion: c is a single UTF-4 value */
 397     if (c <= 0xFF) {
 398         if (out >= outend)
 399         break;
 400         *out++ = c;
 401     } else {
 402         /* no chance for this in IsoLat1 */
 403         *outlen = out - outstart;
 404         *inlen = processed - instart;
 405         return(-2);
 406     }
 407     processed = in;
 408     }
 409     *outlen = out - outstart;
 410     *inlen = processed - instart;
 411     return(*outlen);
 412 }
 413 #endif /* LIBXML_OUTPUT_ENABLED */
 414 
 415 /**
 416  * UTF16LEToUTF8:
 417  * @out:  a pointer to an array of bytes to store the result
 418  * @outlen:  the length of @out
 419  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
 420  * @inlenb:  the length of @in in UTF-16LE chars
 421  *
 422  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
 423  * block of chars out. This function assumes the endian property
 424  * is the same between the native type of this machine and the
 425  * inputed one.
 426  *
 427  * Returns the number of bytes written, or -1 if lack of space, or -2
 428  *     if the transcoding fails (if *in is not a valid utf16 string)
 429  *     The value of *inlen after return is the number of octets consumed
 430  *     if the return value is positive, else unpredictable.
 431  */
 432 static int
 433 UTF16LEToUTF8(unsigned char* out, int *outlen,
 434             const unsigned char* inb, int *inlenb)
 435 {
 436     unsigned char* outstart = out;
 437     const unsigned char* processed = inb;
 438     unsigned char* outend = out + *outlen;
 439     unsigned short* in = (unsigned short*) inb;
 440     unsigned short* inend;
 441     unsigned int c, d, inlen;
 442     unsigned char *tmp;
 443     int bits;
 444 
 445     if ((*inlenb % 2) == 1)
 446         (*inlenb)--;
 447     inlen = *inlenb / 2;
 448     inend = in + inlen;
 449     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 450         if (xmlLittleEndian) {
 451         c= *in++;
 452     } else {
 453         tmp = (unsigned char *) in;
 454         c = *tmp++;
 455         c = c | (((unsigned int)*tmp) << 8);
 456         in++;
 457     }
 458         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 459         if (in >= inend) {           /* (in > inend) shouldn't happens */
 460         break;
 461         }
 462         if (xmlLittleEndian) {
 463         d = *in++;
 464         } else {
 465         tmp = (unsigned char *) in;
 466         d = *tmp++;
 467         d = d | (((unsigned int)*tmp) << 8);
 468         in++;
 469         }
 470             if ((d & 0xFC00) == 0xDC00) {
 471                 c &= 0x03FF;
 472                 c <<= 10;
 473                 c |= d & 0x03FF;
 474                 c += 0x10000;
 475             }
 476             else {
 477         *outlen = out - outstart;
 478         *inlenb = processed - inb;
 479             return(-2);
 480         }
 481         }
 482 
 483     /* assertion: c is a single UTF-4 value */
 484         if (out >= outend)
 485         break;
 486         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 487         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 488         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 489         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 490 
 491         for ( ; bits >= 0; bits-= 6) {
 492             if (out >= outend)
 493             break;
 494             *out++= ((c >> bits) & 0x3F) | 0x80;
 495         }
 496     processed = (const unsigned char*) in;
 497     }
 498     *outlen = out - outstart;
 499     *inlenb = processed - inb;
 500     return(*outlen);
 501 }
 502 
 503 #ifdef LIBXML_OUTPUT_ENABLED
 504 /**
 505  * UTF8ToUTF16LE:
 506  * @outb:  a pointer to an array of bytes to store the result
 507  * @outlen:  the length of @outb
 508  * @in:  a pointer to an array of UTF-8 chars
 509  * @inlen:  the length of @in
 510  *
 511  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 512  * block of chars out.
 513  *
 514  * Returns the number of bytes written, or -1 if lack of space, or -2
 515  *     if the transcoding failed.
 516  */
 517 static int
 518 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
 519             const unsigned char* in, int *inlen)
 520 {
 521     unsigned short* out = (unsigned short*) outb;
 522     const unsigned char* processed = in;
 523     const unsigned char *const instart = in;
 524     unsigned short* outstart= out;
 525     unsigned short* outend;
 526     const unsigned char* inend;
 527     unsigned int c, d;
 528     int trailing;
 529     unsigned char *tmp;
 530     unsigned short tmp1, tmp2;
 531 
 532     /* UTF16LE encoding has no BOM */
 533     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 534     if (in == NULL) {
 535     *outlen = 0;
 536     *inlen = 0;
 537     return(0);
 538     }
 539     inend= in + *inlen;
 540     outend = out + (*outlen / 2);
 541     while (in < inend) {
 542       d= *in++;
 543       if      (d < 0x80)  { c= d; trailing= 0; }
 544       else if (d < 0xC0) {
 545           /* trailing byte in leading position */
 546       *outlen = (out - outstart) * 2;
 547       *inlen = processed - instart;
 548       return(-2);
 549       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 550       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 551       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 552       else {
 553     /* no chance for this in UTF-16 */
 554     *outlen = (out - outstart) * 2;
 555     *inlen = processed - instart;
 556     return(-2);
 557       }
 558 
 559       if (inend - in < trailing) {
 560           break;
 561       }
 562 
 563       for ( ; trailing; trailing--) {
 564           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
 565           break;
 566           c <<= 6;
 567           c |= d & 0x3F;
 568       }
 569 
 570       /* assertion: c is a single UTF-4 value */
 571         if (c < 0x10000) {
 572             if (out >= outend)
 573             break;
 574         if (xmlLittleEndian) {
 575         *out++ = c;
 576         } else {
 577         tmp = (unsigned char *) out;
 578         *tmp = c ;
 579         *(tmp + 1) = c >> 8 ;
 580         out++;
 581         }
 582         }
 583         else if (c < 0x110000) {
 584             if (out+1 >= outend)
 585             break;
 586             c -= 0x10000;
 587         if (xmlLittleEndian) {
 588         *out++ = 0xD800 | (c >> 10);
 589         *out++ = 0xDC00 | (c & 0x03FF);
 590         } else {
 591         tmp1 = 0xD800 | (c >> 10);
 592         tmp = (unsigned char *) out;
 593         *tmp = (unsigned char) tmp1;
 594         *(tmp + 1) = tmp1 >> 8;
 595         out++;
 596 
 597         tmp2 = 0xDC00 | (c & 0x03FF);
 598         tmp = (unsigned char *) out;
 599         *tmp  = (unsigned char) tmp2;
 600         *(tmp + 1) = tmp2 >> 8;
 601         out++;
 602         }
 603         }
 604         else
 605         break;
 606     processed = in;
 607     }
 608     *outlen = (out - outstart) * 2;
 609     *inlen = processed - instart;
 610     return(*outlen);
 611 }
 612 
 613 /**
 614  * UTF8ToUTF16:
 615  * @outb:  a pointer to an array of bytes to store the result
 616  * @outlen:  the length of @outb
 617  * @in:  a pointer to an array of UTF-8 chars
 618  * @inlen:  the length of @in
 619  *
 620  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
 621  * block of chars out.
 622  *
 623  * Returns the number of bytes written, or -1 if lack of space, or -2
 624  *     if the transcoding failed.
 625  */
 626 static int
 627 UTF8ToUTF16(unsigned char* outb, int *outlen,
 628             const unsigned char* in, int *inlen)
 629 {
 630     if (in == NULL) {
 631     /*
 632      * initialization, add the Byte Order Mark for UTF-16LE
 633      */
 634         if (*outlen >= 2) {
 635         outb[0] = 0xFF;
 636         outb[1] = 0xFE;
 637         *outlen = 2;
 638         *inlen = 0;
 639 #ifdef DEBUG_ENCODING
 640             xmlGenericError(xmlGenericErrorContext,
 641             "Added FFFE Byte Order Mark\n");
 642 #endif
 643         return(2);
 644     }
 645     *outlen = 0;
 646     *inlen = 0;
 647     return(0);
 648     }
 649     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
 650 }
 651 #endif /* LIBXML_OUTPUT_ENABLED */
 652 
 653 /**
 654  * UTF16BEToUTF8:
 655  * @out:  a pointer to an array of bytes to store the result
 656  * @outlen:  the length of @out
 657  * @inb:  a pointer to an array of UTF-16 passed as a byte array
 658  * @inlenb:  the length of @in in UTF-16 chars
 659  *
 660  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
 661  * block of chars out. This function assumes the endian property
 662  * is the same between the native type of this machine and the
 663  * inputed one.
 664  *
 665  * Returns the number of bytes written, or -1 if lack of space, or -2
 666  *     if the transcoding fails (if *in is not a valid utf16 string)
 667  * The value of *inlen after return is the number of octets consumed
 668  *     if the return value is positive, else unpredictable.
 669  */
 670 static int
 671 UTF16BEToUTF8(unsigned char* out, int *outlen,
 672             const unsigned char* inb, int *inlenb)
 673 {
 674     unsigned char* outstart = out;
 675     const unsigned char* processed = inb;
 676     unsigned char* outend = out + *outlen;
 677     unsigned short* in = (unsigned short*) inb;
 678     unsigned short* inend;
 679     unsigned int c, d, inlen;
 680     unsigned char *tmp;
 681     int bits;
 682 
 683     if ((*inlenb % 2) == 1)
 684         (*inlenb)--;
 685     inlen = *inlenb / 2;
 686     inend= in + inlen;
 687     while (in < inend) {
 688     if (xmlLittleEndian) {
 689         tmp = (unsigned char *) in;
 690         c = *tmp++;
 691         c = c << 8;
 692         c = c | (unsigned int) *tmp;
 693         in++;
 694     } else {
 695         c= *in++;
 696     }
 697         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 698         if (in >= inend) {           /* (in > inend) shouldn't happens */
 699         *outlen = out - outstart;
 700         *inlenb = processed - inb;
 701             return(-2);
 702         }
 703         if (xmlLittleEndian) {
 704         tmp = (unsigned char *) in;
 705         d = *tmp++;
 706         d = d << 8;
 707         d = d | (unsigned int) *tmp;
 708         in++;
 709         } else {
 710         d= *in++;
 711         }
 712             if ((d & 0xFC00) == 0xDC00) {
 713                 c &= 0x03FF;
 714                 c <<= 10;
 715                 c |= d & 0x03FF;
 716                 c += 0x10000;
 717             }
 718             else {
 719         *outlen = out - outstart;
 720         *inlenb = processed - inb;
 721             return(-2);
 722         }
 723         }
 724 
 725     /* assertion: c is a single UTF-4 value */
 726         if (out >= outend)
 727         break;
 728         if      (c <    0x80) {  *out++=  c;                bits= -6; }
 729         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
 730         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
 731         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 732 
 733         for ( ; bits >= 0; bits-= 6) {
 734             if (out >= outend)
 735             break;
 736             *out++= ((c >> bits) & 0x3F) | 0x80;
 737         }
 738     processed = (const unsigned char*) in;
 739     }
 740     *outlen = out - outstart;
 741     *inlenb = processed - inb;
 742     return(*outlen);
 743 }
 744 
 745 #ifdef LIBXML_OUTPUT_ENABLED
 746 /**
 747  * UTF8ToUTF16BE:
 748  * @outb:  a pointer to an array of bytes to store the result
 749  * @outlen:  the length of @outb
 750  * @in:  a pointer to an array of UTF-8 chars
 751  * @inlen:  the length of @in
 752  *
 753  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
 754  * block of chars out.
 755  *
 756  * Returns the number of byte written, or -1 by lack of space, or -2
 757  *     if the transcoding failed.
 758  */
 759 static int
 760 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
 761             const unsigned char* in, int *inlen)
 762 {
 763     unsigned short* out = (unsigned short*) outb;
 764     const unsigned char* processed = in;
 765     const unsigned char *const instart = in;
 766     unsigned short* outstart= out;
 767     unsigned short* outend;
 768     const unsigned char* inend;
 769     unsigned int c, d;
 770     int trailing;
 771     unsigned char *tmp;
 772     unsigned short tmp1, tmp2;
 773 
 774     /* UTF-16BE has no BOM */
 775     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
 776     if (in == NULL) {
 777     *outlen = 0;
 778     *inlen = 0;
 779     return(0);
 780     }
 781     inend= in + *inlen;
 782     outend = out + (*outlen / 2);
 783     while (in < inend) {
 784       d= *in++;
 785       if      (d < 0x80)  { c= d; trailing= 0; }
 786       else if (d < 0xC0)  {
 787           /* trailing byte in leading position */
 788       *outlen = out - outstart;
 789       *inlen = processed - instart;
 790       return(-2);
 791       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
 792       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
 793       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
 794       else {
 795           /* no chance for this in UTF-16 */
 796       *outlen = out - outstart;
 797       *inlen = processed - instart;
 798       return(-2);
 799       }
 800 
 801       if (inend - in < trailing) {
 802           break;
 803       }
 804 
 805       for ( ; trailing; trailing--) {
 806           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
 807           c <<= 6;
 808           c |= d & 0x3F;
 809       }
 810 
 811       /* assertion: c is a single UTF-4 value */
 812         if (c < 0x10000) {
 813             if (out >= outend)  break;
 814         if (xmlLittleEndian) {
 815         tmp = (unsigned char *) out;
 816         *tmp = c >> 8;
 817         *(tmp + 1) = c;
 818         out++;
 819         } else {
 820         *out++ = c;
 821         }
 822         }
 823         else if (c < 0x110000) {
 824             if (out+1 >= outend)  break;
 825             c -= 0x10000;
 826         if (xmlLittleEndian) {
 827         tmp1 = 0xD800 | (c >> 10);
 828         tmp = (unsigned char *) out;
 829         *tmp = tmp1 >> 8;
 830         *(tmp + 1) = (unsigned char) tmp1;
 831         out++;
 832 
 833         tmp2 = 0xDC00 | (c & 0x03FF);
 834         tmp = (unsigned char *) out;
 835         *tmp = tmp2 >> 8;
 836         *(tmp + 1) = (unsigned char) tmp2;
 837         out++;
 838         } else {
 839         *out++ = 0xD800 | (c >> 10);
 840         *out++ = 0xDC00 | (c & 0x03FF);
 841         }
 842         }
 843         else
 844         break;
 845     processed = in;
 846     }
 847     *outlen = (out - outstart) * 2;
 848     *inlen = processed - instart;
 849     return(*outlen);
 850 }
 851 #endif /* LIBXML_OUTPUT_ENABLED */
 852 
 853 /************************************************************************
 854  *                                  *
 855  *      Generic encoding handling routines          *
 856  *                                  *
 857  ************************************************************************/
 858 
 859 /**
 860  * xmlDetectCharEncoding:
 861  * @in:  a pointer to the first bytes of the XML entity, must be at least
 862  *       2 bytes long (at least 4 if encoding is UTF4 variant).
 863  * @len:  pointer to the length of the buffer
 864  *
 865  * Guess the encoding of the entity using the first bytes of the entity content
 866  * according to the non-normative appendix F of the XML-1.0 recommendation.
 867  *
 868  * Returns one of the XML_CHAR_ENCODING_... values.
 869  */
 870 xmlCharEncoding
 871 xmlDetectCharEncoding(const unsigned char* in, int len)
 872 {
 873     if (in == NULL)
 874         return(XML_CHAR_ENCODING_NONE);
 875     if (len >= 4) {
 876     if ((in[0] == 0x00) && (in[1] == 0x00) &&
 877         (in[2] == 0x00) && (in[3] == 0x3C))
 878         return(XML_CHAR_ENCODING_UCS4BE);
 879     if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 880         (in[2] == 0x00) && (in[3] == 0x00))
 881         return(XML_CHAR_ENCODING_UCS4LE);
 882     if ((in[0] == 0x00) && (in[1] == 0x00) &&
 883         (in[2] == 0x3C) && (in[3] == 0x00))
 884         return(XML_CHAR_ENCODING_UCS4_2143);
 885     if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 886         (in[2] == 0x00) && (in[3] == 0x00))
 887         return(XML_CHAR_ENCODING_UCS4_3412);
 888     if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 889         (in[2] == 0xA7) && (in[3] == 0x94))
 890         return(XML_CHAR_ENCODING_EBCDIC);
 891     if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 892         (in[2] == 0x78) && (in[3] == 0x6D))
 893         return(XML_CHAR_ENCODING_UTF8);
 894     /*
 895      * Although not part of the recommendation, we also
 896      * attempt an "auto-recognition" of UTF-16LE and
 897      * UTF-16BE encodings.
 898      */
 899     if ((in[0] == 0x3C) && (in[1] == 0x00) &&
 900         (in[2] == 0x3F) && (in[3] == 0x00))
 901         return(XML_CHAR_ENCODING_UTF16LE);
 902     if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 903         (in[2] == 0x00) && (in[3] == 0x3F))
 904         return(XML_CHAR_ENCODING_UTF16BE);
 905     }
 906     if (len >= 3) {
 907     /*
 908      * Errata on XML-1.0 June 20 2001
 909      * We now allow an UTF8 encoded BOM
 910      */
 911     if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
 912         (in[2] == 0xBF))
 913         return(XML_CHAR_ENCODING_UTF8);
 914     }
 915     /* For UTF-16 we can recognize by the BOM */
 916     if (len >= 2) {
 917     if ((in[0] == 0xFE) && (in[1] == 0xFF))
 918         return(XML_CHAR_ENCODING_UTF16BE);
 919     if ((in[0] == 0xFF) && (in[1] == 0xFE))
 920         return(XML_CHAR_ENCODING_UTF16LE);
 921     }
 922     return(XML_CHAR_ENCODING_NONE);
 923 }
 924 
 925 /**
 926  * xmlCleanupEncodingAliases:
 927  *
 928  * Unregisters all aliases
 929  */
 930 void
 931 xmlCleanupEncodingAliases(void) {
 932     int i;
 933 
 934     if (xmlCharEncodingAliases == NULL)
 935     return;
 936 
 937     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 938     if (xmlCharEncodingAliases[i].name != NULL)
 939         xmlFree((char *) xmlCharEncodingAliases[i].name);
 940     if (xmlCharEncodingAliases[i].alias != NULL)
 941         xmlFree((char *) xmlCharEncodingAliases[i].alias);
 942     }
 943     xmlCharEncodingAliasesNb = 0;
 944     xmlCharEncodingAliasesMax = 0;
 945     xmlFree(xmlCharEncodingAliases);
 946     xmlCharEncodingAliases = NULL;
 947 }
 948 
 949 /**
 950  * xmlGetEncodingAlias:
 951  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
 952  *
 953  * Lookup an encoding name for the given alias.
 954  *
 955  * Returns NULL if not found, otherwise the original name
 956  */
 957 const char *
 958 xmlGetEncodingAlias(const char *alias) {
 959     int i;
 960     char upper[100];
 961 
 962     if (alias == NULL)
 963     return(NULL);
 964 
 965     if (xmlCharEncodingAliases == NULL)
 966     return(NULL);
 967 
 968     for (i = 0;i < 99;i++) {
 969         upper[i] = toupper(alias[i]);
 970     if (upper[i] == 0) break;
 971     }
 972     upper[i] = 0;
 973 
 974     /*
 975      * Walk down the list looking for a definition of the alias
 976      */
 977     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
 978     if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
 979         return(xmlCharEncodingAliases[i].name);
 980     }
 981     }
 982     return(NULL);
 983 }
 984 
 985 /**
 986  * xmlAddEncodingAlias:
 987  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
 988  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
 989  *
 990  * Registers an alias @alias for an encoding named @name. Existing alias
 991  * will be overwritten.
 992  *
 993  * Returns 0 in case of success, -1 in case of error
 994  */
 995 int
 996 xmlAddEncodingAlias(const char *name, const char *alias) {
 997     int i;
 998     char upper[100];
 999 
1000     if ((name == NULL) || (alias == NULL))
1001     return(-1);
1002 
1003     for (i = 0;i < 99;i++) {
1004         upper[i] = toupper(alias[i]);
1005     if (upper[i] == 0) break;
1006     }
1007     upper[i] = 0;
1008 
1009     if (xmlCharEncodingAliases == NULL) {
1010     xmlCharEncodingAliasesNb = 0;
1011     xmlCharEncodingAliasesMax = 20;
1012     xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1013           xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1014     if (xmlCharEncodingAliases == NULL)
1015         return(-1);
1016     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1017     xmlCharEncodingAliasesMax *= 2;
1018     xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1019           xmlRealloc(xmlCharEncodingAliases,
1020                  xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1021     }
1022     /*
1023      * Walk down the list looking for a definition of the alias
1024      */
1025     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026     if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027         /*
1028          * Replace the definition.
1029          */
1030         xmlFree((char *) xmlCharEncodingAliases[i].name);
1031         xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1032         return(0);
1033     }
1034     }
1035     /*
1036      * Add the definition
1037      */
1038     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1039     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1040     xmlCharEncodingAliasesNb++;
1041     return(0);
1042 }
1043 
1044 /**
1045  * xmlDelEncodingAlias:
1046  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1047  *
1048  * Unregisters an encoding alias @alias
1049  *
1050  * Returns 0 in case of success, -1 in case of error
1051  */
1052 int
1053 xmlDelEncodingAlias(const char *alias) {
1054     int i;
1055 
1056     if (alias == NULL)
1057     return(-1);
1058 
1059     if (xmlCharEncodingAliases == NULL)
1060     return(-1);
1061     /*
1062      * Walk down the list looking for a definition of the alias
1063      */
1064     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1065     if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1066         xmlFree((char *) xmlCharEncodingAliases[i].name);
1067         xmlFree((char *) xmlCharEncodingAliases[i].alias);
1068         xmlCharEncodingAliasesNb--;
1069         memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1070             sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1071         return(0);
1072     }
1073     }
1074     return(-1);
1075 }
1076 
1077 /**
1078  * xmlParseCharEncoding:
1079  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1080  *
1081  * Compare the string to the encoding schemes already known. Note
1082  * that the comparison is case insensitive accordingly to the section
1083  * [XML] 4.3.3 Character Encoding in Entities.
1084  *
1085  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1086  * if not recognized.
1087  */
1088 xmlCharEncoding
1089 xmlParseCharEncoding(const char* name)
1090 {
1091     const char *alias;
1092     char upper[500];
1093     int i;
1094 
1095     if (name == NULL)
1096     return(XML_CHAR_ENCODING_NONE);
1097 
1098     /*
1099      * Do the alias resolution
1100      */
1101     alias = xmlGetEncodingAlias(name);
1102     if (alias != NULL)
1103     name = alias;
1104 
1105     for (i = 0;i < 499;i++) {
1106         upper[i] = toupper(name[i]);
1107     if (upper[i] == 0) break;
1108     }
1109     upper[i] = 0;
1110 
1111     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1112     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1113     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1114 
1115     /*
1116      * NOTE: if we were able to parse this, the endianness of UTF16 is
1117      *       already found and in use
1118      */
1119     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1120     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1121 
1122     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1123     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1124     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1125 
1126     /*
1127      * NOTE: if we were able to parse this, the endianness of UCS4 is
1128      *       already found and in use
1129      */
1130     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1131     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1132     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1133 
1134 
1135     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1136     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1137     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1138 
1139     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1140     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1141     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1142 
1143     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1144     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1145     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1146     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1147     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1148     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1149     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1150 
1151     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1152     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1153     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1154 
1155 #ifdef DEBUG_ENCODING
1156     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1157 #endif
1158     return(XML_CHAR_ENCODING_ERROR);
1159 }
1160 
1161 /**
1162  * xmlGetCharEncodingName:
1163  * @enc:  the encoding
1164  *
1165  * The "canonical" name for XML encoding.
1166  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1167  * Section 4.3.3  Character Encoding in Entities
1168  *
1169  * Returns the canonical name for the given encoding
1170  */
1171 
1172 const char*
1173 xmlGetCharEncodingName(xmlCharEncoding enc) {
1174     switch (enc) {
1175         case XML_CHAR_ENCODING_ERROR:
1176         return(NULL);
1177         case XML_CHAR_ENCODING_NONE:
1178         return(NULL);
1179         case XML_CHAR_ENCODING_UTF8:
1180         return("UTF-8");
1181         case XML_CHAR_ENCODING_UTF16LE:
1182         return("UTF-16");
1183         case XML_CHAR_ENCODING_UTF16BE:
1184         return("UTF-16");
1185         case XML_CHAR_ENCODING_EBCDIC:
1186             return("EBCDIC");
1187         case XML_CHAR_ENCODING_UCS4LE:
1188             return("ISO-10646-UCS-4");
1189         case XML_CHAR_ENCODING_UCS4BE:
1190             return("ISO-10646-UCS-4");
1191         case XML_CHAR_ENCODING_UCS4_2143:
1192             return("ISO-10646-UCS-4");
1193         case XML_CHAR_ENCODING_UCS4_3412:
1194             return("ISO-10646-UCS-4");
1195         case XML_CHAR_ENCODING_UCS2:
1196             return("ISO-10646-UCS-2");
1197         case XML_CHAR_ENCODING_8859_1:
1198         return("ISO-8859-1");
1199         case XML_CHAR_ENCODING_8859_2:
1200         return("ISO-8859-2");
1201         case XML_CHAR_ENCODING_8859_3:
1202         return("ISO-8859-3");
1203         case XML_CHAR_ENCODING_8859_4:
1204         return("ISO-8859-4");
1205         case XML_CHAR_ENCODING_8859_5:
1206         return("ISO-8859-5");
1207         case XML_CHAR_ENCODING_8859_6:
1208         return("ISO-8859-6");
1209         case XML_CHAR_ENCODING_8859_7:
1210         return("ISO-8859-7");
1211         case XML_CHAR_ENCODING_8859_8:
1212         return("ISO-8859-8");
1213         case XML_CHAR_ENCODING_8859_9:
1214         return("ISO-8859-9");
1215         case XML_CHAR_ENCODING_2022_JP:
1216             return("ISO-2022-JP");
1217         case XML_CHAR_ENCODING_SHIFT_JIS:
1218             return("Shift-JIS");
1219         case XML_CHAR_ENCODING_EUC_JP:
1220             return("EUC-JP");
1221     case XML_CHAR_ENCODING_ASCII:
1222         return(NULL);
1223     }
1224     return(NULL);
1225 }
1226 
1227 /************************************************************************
1228  *                                  *
1229  *          Char encoding handlers              *
1230  *                                  *
1231  ************************************************************************/
1232 
1233 
1234 /* the size should be growable, but it's not a big deal ... */
1235 #define MAX_ENCODING_HANDLERS 50
1236 static xmlCharEncodingHandlerPtr *handlers = NULL;
1237 static int nbCharEncodingHandler = 0;
1238 
1239 /*
1240  * The default is UTF-8 for XML, that's also the default used for the
1241  * parser internals, so the default encoding handler is NULL
1242  */
1243 
1244 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1245 
1246 /**
1247  * xmlNewCharEncodingHandler:
1248  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1249  * @input:  the xmlCharEncodingInputFunc to read that encoding
1250  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1251  *
1252  * Create and registers an xmlCharEncodingHandler.
1253  *
1254  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1255  */
1256 xmlCharEncodingHandlerPtr
1257 xmlNewCharEncodingHandler(const char *name,
1258                           xmlCharEncodingInputFunc input,
1259                           xmlCharEncodingOutputFunc output) {
1260     xmlCharEncodingHandlerPtr handler;
1261     const char *alias;
1262     char upper[500];
1263     int i;
1264     char *up = NULL;
1265 
1266     /*
1267      * Do the alias resolution
1268      */
1269     alias = xmlGetEncodingAlias(name);
1270     if (alias != NULL)
1271     name = alias;
1272 
1273     /*
1274      * Keep only the uppercase version of the encoding.
1275      */
1276     if (name == NULL) {
1277         xmlEncodingErr(XML_I18N_NO_NAME,
1278                "xmlNewCharEncodingHandler : no name !\n", NULL);
1279     return(NULL);
1280     }
1281     for (i = 0;i < 499;i++) {
1282         upper[i] = toupper(name[i]);
1283     if (upper[i] == 0) break;
1284     }
1285     upper[i] = 0;
1286     up = xmlMemStrdup(upper);
1287     if (up == NULL) {
1288         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1289     return(NULL);
1290     }
1291 
1292     /*
1293      * allocate and fill-up an handler block.
1294      */
1295     handler = (xmlCharEncodingHandlerPtr)
1296               xmlMalloc(sizeof(xmlCharEncodingHandler));
1297     if (handler == NULL) {
1298         xmlFree(up);
1299         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1300     return(NULL);
1301     }
1302     handler->input = input;
1303     handler->output = output;
1304     handler->name = up;
1305 
1306 #ifdef LIBXML_ICONV_ENABLED
1307     handler->iconv_in = NULL;
1308     handler->iconv_out = NULL;
1309 #endif /* LIBXML_ICONV_ENABLED */
1310 
1311     /*
1312      * registers and returns the handler.
1313      */
1314     xmlRegisterCharEncodingHandler(handler);
1315 #ifdef DEBUG_ENCODING
1316     xmlGenericError(xmlGenericErrorContext,
1317         "Registered encoding handler for %s\n", name);
1318 #endif
1319     return(handler);
1320 }
1321 
1322 /**
1323  * xmlInitCharEncodingHandlers:
1324  *
1325  * Initialize the char encoding support, it registers the default
1326  * encoding supported.
1327  * NOTE: while public, this function usually doesn't need to be called
1328  *       in normal processing.
1329  */
1330 void
1331 xmlInitCharEncodingHandlers(void) {
1332     unsigned short int tst = 0x1234;
1333     unsigned char *ptr = (unsigned char *) &tst;
1334 
1335     if (handlers != NULL) return;
1336 
1337     handlers = (xmlCharEncodingHandlerPtr *)
1338         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1339 
1340     if (*ptr == 0x12) xmlLittleEndian = 0;
1341     else if (*ptr == 0x34) xmlLittleEndian = 1;
1342     else {
1343         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1344                    "Odd problem at endianness detection\n", NULL);
1345     }
1346 
1347     if (handlers == NULL) {
1348         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1349     return;
1350     }
1351     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1352 #ifdef LIBXML_OUTPUT_ENABLED
1353     xmlUTF16LEHandler =
1354           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1355     xmlUTF16BEHandler =
1356           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1357     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1358     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1359     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1360     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1361 #ifdef LIBXML_HTML_ENABLED
1362     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1363 #endif
1364 #else
1365     xmlUTF16LEHandler =
1366           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1367     xmlUTF16BEHandler =
1368           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1369     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1370     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1371     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1372     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1373 #endif /* LIBXML_OUTPUT_ENABLED */
1374 #ifndef LIBXML_ICONV_ENABLED
1375 #ifdef LIBXML_ISO8859X_ENABLED
1376     xmlRegisterCharEncodingHandlersISO8859x ();
1377 #endif
1378 #endif
1379 
1380 }
1381 
1382 /**
1383  * xmlCleanupCharEncodingHandlers:
1384  *
1385  * Cleanup the memory allocated for the char encoding support, it
1386  * unregisters all the encoding handlers and the aliases.
1387  */
1388 void
1389 xmlCleanupCharEncodingHandlers(void) {
1390     xmlCleanupEncodingAliases();
1391 
1392     if (handlers == NULL) return;
1393 
1394     for (;nbCharEncodingHandler > 0;) {
1395         nbCharEncodingHandler--;
1396     if (handlers[nbCharEncodingHandler] != NULL) {
1397         if (handlers[nbCharEncodingHandler]->name != NULL)
1398         xmlFree(handlers[nbCharEncodingHandler]->name);
1399         xmlFree(handlers[nbCharEncodingHandler]);
1400     }
1401     }
1402     xmlFree(handlers);
1403     handlers = NULL;
1404     nbCharEncodingHandler = 0;
1405     xmlDefaultCharEncodingHandler = NULL;
1406 }
1407 
1408 /**
1409  * xmlRegisterCharEncodingHandler:
1410  * @handler:  the xmlCharEncodingHandlerPtr handler block
1411  *
1412  * Register the char encoding handler, surprising, isn't it ?
1413  */
1414 void
1415 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1416     if (handlers == NULL) xmlInitCharEncodingHandlers();
1417     if (handler == NULL) {
1418         xmlEncodingErr(XML_I18N_NO_HANDLER,
1419         "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1420     return;
1421     }
1422 
1423     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1424         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1425     "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1426                    "MAX_ENCODING_HANDLERS");
1427     return;
1428     }
1429     handlers[nbCharEncodingHandler++] = handler;
1430 }
1431 
1432 /**
1433  * xmlGetCharEncodingHandler:
1434  * @enc:  an xmlCharEncoding value.
1435  *
1436  * Search in the registered set the handler able to read/write that encoding.
1437  *
1438  * Returns the handler or NULL if not found
1439  */
1440 xmlCharEncodingHandlerPtr
1441 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1442     xmlCharEncodingHandlerPtr handler;
1443 
1444     if (handlers == NULL) xmlInitCharEncodingHandlers();
1445     switch (enc) {
1446         case XML_CHAR_ENCODING_ERROR:
1447         return(NULL);
1448         case XML_CHAR_ENCODING_NONE:
1449         return(NULL);
1450         case XML_CHAR_ENCODING_UTF8:
1451         return(NULL);
1452         case XML_CHAR_ENCODING_UTF16LE:
1453         return(xmlUTF16LEHandler);
1454         case XML_CHAR_ENCODING_UTF16BE:
1455         return(xmlUTF16BEHandler);
1456         case XML_CHAR_ENCODING_EBCDIC:
1457             handler = xmlFindCharEncodingHandler("EBCDIC");
1458             if (handler != NULL) return(handler);
1459             handler = xmlFindCharEncodingHandler("ebcdic");
1460             if (handler != NULL) return(handler);
1461         break;
1462         case XML_CHAR_ENCODING_UCS4BE:
1463             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1464             if (handler != NULL) return(handler);
1465             handler = xmlFindCharEncodingHandler("UCS-4");
1466             if (handler != NULL) return(handler);
1467             handler = xmlFindCharEncodingHandler("UCS4");
1468             if (handler != NULL) return(handler);
1469         break;
1470         case XML_CHAR_ENCODING_UCS4LE:
1471             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1472             if (handler != NULL) return(handler);
1473             handler = xmlFindCharEncodingHandler("UCS-4");
1474             if (handler != NULL) return(handler);
1475             handler = xmlFindCharEncodingHandler("UCS4");
1476             if (handler != NULL) return(handler);
1477         break;
1478         case XML_CHAR_ENCODING_UCS4_2143:
1479         break;
1480         case XML_CHAR_ENCODING_UCS4_3412:
1481         break;
1482         case XML_CHAR_ENCODING_UCS2:
1483             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1484             if (handler != NULL) return(handler);
1485             handler = xmlFindCharEncodingHandler("UCS-2");
1486             if (handler != NULL) return(handler);
1487             handler = xmlFindCharEncodingHandler("UCS2");
1488             if (handler != NULL) return(handler);
1489         break;
1490 
1491         /*
1492          * We used to keep ISO Latin encodings native in the
1493          * generated data. This led to so many problems that
1494          * this has been removed. One can still change this
1495          * back by registering no-ops encoders for those
1496          */
1497         case XML_CHAR_ENCODING_8859_1:
1498         handler = xmlFindCharEncodingHandler("ISO-8859-1");
1499         if (handler != NULL) return(handler);
1500         break;
1501         case XML_CHAR_ENCODING_8859_2:
1502         handler = xmlFindCharEncodingHandler("ISO-8859-2");
1503         if (handler != NULL) return(handler);
1504         break;
1505         case XML_CHAR_ENCODING_8859_3:
1506         handler = xmlFindCharEncodingHandler("ISO-8859-3");
1507         if (handler != NULL) return(handler);
1508         break;
1509         case XML_CHAR_ENCODING_8859_4:
1510         handler = xmlFindCharEncodingHandler("ISO-8859-4");
1511         if (handler != NULL) return(handler);
1512         break;
1513         case XML_CHAR_ENCODING_8859_5:
1514         handler = xmlFindCharEncodingHandler("ISO-8859-5");
1515         if (handler != NULL) return(handler);
1516         break;
1517         case XML_CHAR_ENCODING_8859_6:
1518         handler = xmlFindCharEncodingHandler("ISO-8859-6");
1519         if (handler != NULL) return(handler);
1520         break;
1521         case XML_CHAR_ENCODING_8859_7:
1522         handler = xmlFindCharEncodingHandler("ISO-8859-7");
1523         if (handler != NULL) return(handler);
1524         break;
1525         case XML_CHAR_ENCODING_8859_8:
1526         handler = xmlFindCharEncodingHandler("ISO-8859-8");
1527         if (handler != NULL) return(handler);
1528         break;
1529         case XML_CHAR_ENCODING_8859_9:
1530         handler = xmlFindCharEncodingHandler("ISO-8859-9");
1531         if (handler != NULL) return(handler);
1532         break;
1533 
1534 
1535         case XML_CHAR_ENCODING_2022_JP:
1536             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1537             if (handler != NULL) return(handler);
1538         break;
1539         case XML_CHAR_ENCODING_SHIFT_JIS:
1540             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1541             if (handler != NULL) return(handler);
1542             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1543             if (handler != NULL) return(handler);
1544             handler = xmlFindCharEncodingHandler("Shift_JIS");
1545             if (handler != NULL) return(handler);
1546         break;
1547         case XML_CHAR_ENCODING_EUC_JP:
1548             handler = xmlFindCharEncodingHandler("EUC-JP");
1549             if (handler != NULL) return(handler);
1550         break;
1551     default:
1552         break;
1553     }
1554 
1555 #ifdef DEBUG_ENCODING
1556     xmlGenericError(xmlGenericErrorContext,
1557         "No handler found for encoding %d\n", enc);
1558 #endif
1559     return(NULL);
1560 }
1561 
1562 /**
1563  * xmlFindCharEncodingHandler:
1564  * @name:  a string describing the char encoding.
1565  *
1566  * Search in the registered set the handler able to read/write that encoding.
1567  *
1568  * Returns the handler or NULL if not found
1569  */
1570 xmlCharEncodingHandlerPtr
1571 xmlFindCharEncodingHandler(const char *name) {
1572     const char *nalias;
1573     const char *norig;
1574     xmlCharEncoding alias;
1575 #ifdef LIBXML_ICONV_ENABLED
1576     xmlCharEncodingHandlerPtr enc;
1577     iconv_t icv_in, icv_out;
1578 #endif /* LIBXML_ICONV_ENABLED */
1579     char upper[100];
1580     int i;
1581 
1582     if (handlers == NULL) xmlInitCharEncodingHandlers();
1583     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1584     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1585 
1586     /*
1587      * Do the alias resolution
1588      */
1589     norig = name;
1590     nalias = xmlGetEncodingAlias(name);
1591     if (nalias != NULL)
1592     name = nalias;
1593 
1594     /*
1595      * Check first for directly registered encoding names
1596      */
1597     for (i = 0;i < 99;i++) {
1598         upper[i] = toupper(name[i]);
1599     if (upper[i] == 0) break;
1600     }
1601     upper[i] = 0;
1602 
1603     for (i = 0;i < nbCharEncodingHandler; i++)
1604         if (!strcmp(upper, handlers[i]->name)) {
1605 #ifdef DEBUG_ENCODING
1606             xmlGenericError(xmlGenericErrorContext,
1607             "Found registered handler for encoding %s\n", name);
1608 #endif
1609         return(handlers[i]);
1610     }
1611 
1612 #ifdef LIBXML_ICONV_ENABLED
1613     /* check whether iconv can handle this */
1614     icv_in = iconv_open("UTF-8", name);
1615     icv_out = iconv_open(name, "UTF-8");
1616     if (icv_in == (iconv_t) -1) {
1617         icv_in = iconv_open("UTF-8", upper);
1618     }
1619     if (icv_out == (iconv_t) -1) {
1620     icv_out = iconv_open(upper, "UTF-8");
1621     }
1622     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1623         enc = (xmlCharEncodingHandlerPtr)
1624               xmlMalloc(sizeof(xmlCharEncodingHandler));
1625         if (enc == NULL) {
1626             iconv_close(icv_in);
1627             iconv_close(icv_out);
1628         return(NULL);
1629         }
1630         enc->name = xmlMemStrdup(name);
1631         enc->input = NULL;
1632         enc->output = NULL;
1633         enc->iconv_in = icv_in;
1634         enc->iconv_out = icv_out;
1635 #ifdef DEBUG_ENCODING
1636             xmlGenericError(xmlGenericErrorContext,
1637             "Found iconv handler for encoding %s\n", name);
1638 #endif
1639         return enc;
1640     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1641         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1642             "iconv : problems with filters for '%s'\n", name);
1643     }
1644 #endif /* LIBXML_ICONV_ENABLED */
1645 
1646 #ifdef DEBUG_ENCODING
1647     xmlGenericError(xmlGenericErrorContext,
1648         "No handler found for encoding %s\n", name);
1649 #endif
1650 
1651     /*
1652      * Fallback using the canonical names
1653      */
1654     alias = xmlParseCharEncoding(norig);
1655     if (alias != XML_CHAR_ENCODING_ERROR) {
1656         const char* canon;
1657         canon = xmlGetCharEncodingName(alias);
1658         if ((canon != NULL) && (strcmp(name, canon))) {
1659         return(xmlFindCharEncodingHandler(canon));
1660         }
1661     }
1662 
1663     /* If "none of the above", give up */
1664     return(NULL);
1665 }
1666 
1667 /************************************************************************
1668  *                                  *
1669  *      ICONV based generic conversion functions        *
1670  *                                  *
1671  ************************************************************************/
1672 
1673 #ifdef LIBXML_ICONV_ENABLED
1674 /**
1675  * xmlIconvWrapper:
1676  * @cd:     iconv converter data structure
1677  * @out:  a pointer to an array of bytes to store the result
1678  * @outlen:  the length of @out
1679  * @in:  a pointer to an array of ISO Latin 1 chars
1680  * @inlen:  the length of @in
1681  *
1682  * Returns 0 if success, or
1683  *     -1 by lack of space, or
1684  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1685  *        the result of transformation can't fit into the encoding we want), or
1686  *     -3 if there the last byte can't form a single output char.
1687  *
1688  * The value of @inlen after return is the number of octets consumed
1689  *     as the return value is positive, else unpredictable.
1690  * The value of @outlen after return is the number of ocetes consumed.
1691  */
1692 static int
1693 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1694                 const unsigned char *in, int *inlen) {
1695     size_t icv_inlen, icv_outlen;
1696     const char *icv_in = (const char *) in;
1697     char *icv_out = (char *) out;
1698     int ret;
1699 
1700     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1701         if (outlen != NULL) *outlen = 0;
1702         return(-1);
1703     }
1704     icv_inlen = *inlen;
1705     icv_outlen = *outlen;
1706     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1707     *inlen -= icv_inlen;
1708     *outlen -= icv_outlen;
1709     if ((icv_inlen != 0) || (ret == -1)) {
1710 #ifdef EILSEQ
1711         if (errno == EILSEQ) {
1712             return -2;
1713         } else
1714 #endif
1715 #ifdef E2BIG
1716         if (errno == E2BIG) {
1717             return -1;
1718         } else
1719 #endif
1720 #ifdef EINVAL
1721         if (errno == EINVAL) {
1722             return -3;
1723         } else
1724 #endif
1725         {
1726             return -3;
1727         }
1728     }
1729     return 0;
1730 }
1731 #endif /* LIBXML_ICONV_ENABLED */
1732 
1733 /************************************************************************
1734  *                                  *
1735  *      The real API used by libxml for on-the-fly conversion   *
1736  *                                  *
1737  ************************************************************************/
1738 
1739 /**
1740  * xmlCharEncFirstLine:
1741  * @handler:    char enconding transformation data structure
1742  * @out:  an xmlBuffer for the output.
1743  * @in:  an xmlBuffer for the input
1744  *
1745  * Front-end for the encoding handler input function, but handle only
1746  * the very first line, i.e. limit itself to 45 chars.
1747  *
1748  * Returns the number of byte written if success, or
1749  *     -1 general error
1750  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1751  *        the result of transformation can't fit into the encoding we want), or
1752  */
1753 int
1754 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1755                  xmlBufferPtr in) {
1756     int ret = -2;
1757     int written;
1758     int toconv;
1759 
1760     if (handler == NULL) return(-1);
1761     if (out == NULL) return(-1);
1762     if (in == NULL) return(-1);
1763 
1764     /* calculate space available */
1765     written = out->size - out->use;
1766     toconv = in->use;
1767     /*
1768      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1769      * 45 chars should be sufficient to reach the end of the encoding
1770      * declaration without going too far inside the document content.
1771      * on UTF-16 this means 90bytes, on UCS4 this means 180
1772      */
1773     if (toconv > 180)
1774     toconv  = 180;
1775     if (toconv * 2 >= written) {
1776         xmlBufferGrow(out, toconv);
1777     written = out->size - out->use - 1;
1778     }
1779 
1780     if (handler->input != NULL) {
1781     ret = handler->input(&out->content[out->use], &written,
1782                          in->content, &toconv);
1783     xmlBufferShrink(in, toconv);
1784     out->use += written;
1785     out->content[out->use] = 0;
1786     }
1787 #ifdef LIBXML_ICONV_ENABLED
1788     else if (handler->iconv_in != NULL) {
1789     ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1790                           &written, in->content, &toconv);
1791     xmlBufferShrink(in, toconv);
1792     out->use += written;
1793     out->content[out->use] = 0;
1794     if (ret == -1) ret = -3;
1795     }
1796 #endif /* LIBXML_ICONV_ENABLED */
1797 #ifdef DEBUG_ENCODING
1798     switch (ret) {
1799         case 0:
1800         xmlGenericError(xmlGenericErrorContext,
1801             "converted %d bytes to %d bytes of input\n",
1802                 toconv, written);
1803         break;
1804         case -1:
1805         xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1806                 toconv, written, in->use);
1807         break;
1808         case -2:
1809         xmlGenericError(xmlGenericErrorContext,
1810             "input conversion failed due to input error\n");
1811         break;
1812         case -3:
1813         xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1814                 toconv, written, in->use);
1815         break;
1816     default:
1817         xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1818     }
1819 #endif /* DEBUG_ENCODING */
1820     /*
1821      * Ignore when input buffer is not on a boundary
1822      */
1823     if (ret == -3) ret = 0;
1824     if (ret == -1) ret = 0;
1825     return(ret);
1826 }
1827 
1828 /**
1829  * xmlCharEncInFunc:
1830  * @handler:    char encoding transformation data structure
1831  * @out:  an xmlBuffer for the output.
1832  * @in:  an xmlBuffer for the input
1833  *
1834  * Generic front-end for the encoding handler input function
1835  *
1836  * Returns the number of byte written if success, or
1837  *     -1 general error
1838  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1839  *        the result of transformation can't fit into the encoding we want), or
1840  */
1841 int
1842 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1843                  xmlBufferPtr in)
1844 {
1845     int ret = -2;
1846     int written;
1847     int toconv;
1848 
1849     if (handler == NULL)
1850         return (-1);
1851     if (out == NULL)
1852         return (-1);
1853     if (in == NULL)
1854         return (-1);
1855 
1856     toconv = in->use;
1857     if (toconv == 0)
1858         return (0);
1859     written = out->size - out->use;
1860     if (toconv * 2 >= written) {
1861         xmlBufferGrow(out, out->size + toconv * 2);
1862         written = out->size - out->use - 1;
1863     }
1864     if (handler->input != NULL) {
1865         ret = handler->input(&out->content[out->use], &written,
1866                              in->content, &toconv);
1867         xmlBufferShrink(in, toconv);
1868         out->use += written;
1869         out->content[out->use] = 0;
1870     }
1871 #ifdef LIBXML_ICONV_ENABLED
1872     else if (handler->iconv_in != NULL) {
1873         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1874                               &written, in->content, &toconv);
1875         xmlBufferShrink(in, toconv);
1876         out->use += written;
1877         out->content[out->use] = 0;
1878         if (ret == -1)
1879             ret = -3;
1880     }
1881 #endif /* LIBXML_ICONV_ENABLED */
1882     switch (ret) {
1883         case 0:
1884 #ifdef DEBUG_ENCODING
1885             xmlGenericError(xmlGenericErrorContext,
1886                             "converted %d bytes to %d bytes of input\n",
1887                             toconv, written);
1888 #endif
1889             break;
1890         case -1:
1891 #ifdef DEBUG_ENCODING
1892             xmlGenericError(xmlGenericErrorContext,
1893                          "converted %d bytes to %d bytes of input, %d left\n",
1894                             toconv, written, in->use);
1895 #endif
1896             break;
1897         case -3:
1898 #ifdef DEBUG_ENCODING
1899             xmlGenericError(xmlGenericErrorContext,
1900                         "converted %d bytes to %d bytes of input, %d left\n",
1901                             toconv, written, in->use);
1902 #endif
1903             break;
1904         case -2: {
1905             char buf[50];
1906 
1907         snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
1908              in->content[0], in->content[1],
1909              in->content[2], in->content[3]);
1910         buf[49] = 0;
1911         xmlEncodingErr(XML_I18N_CONV_FAILED,
1912             "input conversion failed due to input error, bytes %s\n",
1913                    buf);
1914         }
1915     }
1916     /*
1917      * Ignore when input buffer is not on a boundary
1918      */
1919     if (ret == -3)
1920         ret = 0;
1921     return (written? written : ret);
1922 }
1923 
1924 /**
1925  * xmlCharEncOutFunc:
1926  * @handler:    char enconding transformation data structure
1927  * @out:  an xmlBuffer for the output.
1928  * @in:  an xmlBuffer for the input
1929  *
1930  * Generic front-end for the encoding handler output function
1931  * a first call with @in == NULL has to be made firs to initiate the
1932  * output in case of non-stateless encoding needing to initiate their
1933  * state or the output (like the BOM in UTF16).
1934  * In case of UTF8 sequence conversion errors for the given encoder,
1935  * the content will be automatically remapped to a CharRef sequence.
1936  *
1937  * Returns the number of byte written if success, or
1938  *     -1 general error
1939  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1940  *        the result of transformation can't fit into the encoding we want), or
1941  */
1942 int
1943 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1944                   xmlBufferPtr in) {
1945     int ret = -2;
1946     int written;
1947     int writtentot = 0;
1948     int toconv;
1949     int output = 0;
1950 
1951     if (handler == NULL) return(-1);
1952     if (out == NULL) return(-1);
1953 
1954 retry:
1955 
1956     written = out->size - out->use;
1957 
1958     if (written > 0)
1959     written--; /* Gennady: count '/0' */
1960 
1961     /*
1962      * First specific handling of in = NULL, i.e. the initialization call
1963      */
1964     if (in == NULL) {
1965         toconv = 0;
1966     if (handler->output != NULL) {
1967         ret = handler->output(&out->content[out->use], &written,
1968                   NULL, &toconv);
1969         if (ret >= 0) { /* Gennady: check return value */
1970         out->use += written;
1971         out->content[out->use] = 0;
1972         }
1973     }
1974 #ifdef LIBXML_ICONV_ENABLED
1975     else if (handler->iconv_out != NULL) {
1976         ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
1977                   &written, NULL, &toconv);
1978         out->use += written;
1979         out->content[out->use] = 0;
1980     }
1981 #endif /* LIBXML_ICONV_ENABLED */
1982 #ifdef DEBUG_ENCODING
1983     xmlGenericError(xmlGenericErrorContext,
1984         "initialized encoder\n");
1985 #endif
1986         return(0);
1987     }
1988 
1989     /*
1990      * Conversion itself.
1991      */
1992     toconv = in->use;
1993     if (toconv == 0)
1994     return(0);
1995     if (toconv * 4 >= written) {
1996         xmlBufferGrow(out, toconv * 4);
1997     written = out->size - out->use - 1;
1998     }
1999     if (handler->output != NULL) {
2000     ret = handler->output(&out->content[out->use], &written,
2001                           in->content, &toconv);
2002     if (written > 0) {
2003         xmlBufferShrink(in, toconv);
2004         out->use += written;
2005         writtentot += written;
2006     }
2007     out->content[out->use] = 0;
2008     }
2009 #ifdef LIBXML_ICONV_ENABLED
2010     else if (handler->iconv_out != NULL) {
2011     ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2012                           &written, in->content, &toconv);
2013     xmlBufferShrink(in, toconv);
2014     out->use += written;
2015     writtentot += written;
2016     out->content[out->use] = 0;
2017     if (ret == -1) {
2018         if (written > 0) {
2019         /*
2020          * Can be a limitation of iconv
2021          */
2022         goto retry;
2023         }
2024         ret = -3;
2025     }
2026     }
2027 #endif /* LIBXML_ICONV_ENABLED */
2028     else {
2029     xmlEncodingErr(XML_I18N_NO_OUTPUT,
2030                "xmlCharEncOutFunc: no output function !\n", NULL);
2031     return(-1);
2032     }
2033 
2034     if (ret >= 0) output += ret;
2035 
2036     /*
2037      * Attempt to handle error cases
2038      */
2039     switch (ret) {
2040         case 0:
2041 #ifdef DEBUG_ENCODING
2042         xmlGenericError(xmlGenericErrorContext,
2043             "converted %d bytes to %d bytes of output\n",
2044                 toconv, written);
2045 #endif
2046         break;
2047         case -1:
2048 #ifdef DEBUG_ENCODING
2049         xmlGenericError(xmlGenericErrorContext,
2050             "output conversion failed by lack of space\n");
2051 #endif
2052         break;
2053         case -3:
2054 #ifdef DEBUG_ENCODING
2055         xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2056                 toconv, written, in->use);
2057 #endif
2058         break;
2059         case -2: {
2060         int len = in->use;
2061         const xmlChar *utf = (const xmlChar *) in->content;
2062         int cur;
2063 
2064         cur = xmlGetUTF8Char(utf, &len);
2065         if (cur > 0) {
2066         xmlChar charref[20];
2067 
2068 #ifdef DEBUG_ENCODING
2069         xmlGenericError(xmlGenericErrorContext,
2070             "handling output conversion error\n");
2071         xmlGenericError(xmlGenericErrorContext,
2072             "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2073             in->content[0], in->content[1],
2074             in->content[2], in->content[3]);
2075 #endif
2076         /*
2077          * Removes the UTF8 sequence, and replace it by a charref
2078          * and continue the transcoding phase, hoping the error
2079          * did not mangle the encoder state.
2080          */
2081         snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
2082         xmlBufferShrink(in, len);
2083         xmlBufferAddHead(in, charref, -1);
2084 
2085         goto retry;
2086         } else {
2087         char buf[50];
2088 
2089         snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2090              in->content[0], in->content[1],
2091              in->content[2], in->content[3]);
2092         buf[49] = 0;
2093         xmlEncodingErr(XML_I18N_CONV_FAILED,
2094             "output conversion failed due to conv error, bytes %s\n",
2095                    buf);
2096         if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2097             in->content[0] = ' ';
2098         }
2099         break;
2100     }
2101     }
2102     return(ret);
2103 }
2104 
2105 /**
2106  * xmlCharEncCloseFunc:
2107  * @handler:    char enconding transformation data structure
2108  *
2109  * Generic front-end for encoding handler close function
2110  *
2111  * Returns 0 if success, or -1 in case of error
2112  */
2113 int
2114 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2115     int ret = 0;
2116     if (handler == NULL) return(-1);
2117     if (handler->name == NULL) return(-1);
2118 #ifdef LIBXML_ICONV_ENABLED
2119     /*
2120      * Iconv handlers can be used only once, free the whole block.
2121      * and the associated icon resources.
2122      */
2123     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2124     if (handler->name != NULL)
2125         xmlFree(handler->name);
2126     handler->name = NULL;
2127     if (handler->iconv_out != NULL) {
2128         if (iconv_close(handler->iconv_out))
2129         ret = -1;
2130         handler->iconv_out = NULL;
2131     }
2132     if (handler->iconv_in != NULL) {
2133         if (iconv_close(handler->iconv_in))
2134         ret = -1;
2135         handler->iconv_in = NULL;
2136     }
2137     xmlFree(handler);
2138     }
2139 #endif /* LIBXML_ICONV_ENABLED */
2140 #ifdef DEBUG_ENCODING
2141     if (ret)
2142         xmlGenericError(xmlGenericErrorContext,
2143         "failed to close the encoding handler\n");
2144     else
2145         xmlGenericError(xmlGenericErrorContext,
2146         "closed the encoding handler\n");
2147 #endif
2148 
2149     return(ret);
2150 }
2151 
2152 /**
2153  * xmlByteConsumed:
2154  * @ctxt: an XML parser context
2155  *
2156  * This function provides the current index of the parser relative
2157  * to the start of the current entity. This function is computed in
2158  * bytes from the beginning starting at zero and finishing at the
2159  * size in byte of the file if parsing a file. The function is
2160  * of constant cost if the input is UTF-8 but can be costly if run
2161  * on non-UTF-8 input.
2162  *
2163  * Returns the index in bytes from the beginning of the entity or -1
2164  *         in case the index could not be computed.
2165  */
2166 long
2167 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2168     xmlParserInputPtr in;
2169 
2170     if (ctxt == NULL) return(-1);
2171     in = ctxt->input;
2172     if (in == NULL)  return(-1);
2173     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2174         unsigned int unused = 0;
2175     xmlCharEncodingHandler * handler = in->buf->encoder;
2176         /*
2177      * Encoding conversion, compute the number of unused original
2178      * bytes from the input not consumed and substract that from
2179      * the raw consumed value, this is not a cheap operation
2180      */
2181         if (in->end - in->cur > 0) {
2182         unsigned char convbuf[32000];
2183         const unsigned char *cur = (const unsigned char *)in->cur;
2184         int toconv = in->end - in->cur, written = 32000;
2185 
2186         int ret;
2187 
2188         if (handler->output != NULL) {
2189             do {
2190             toconv = in->end - cur;
2191             written = 32000;
2192             ret = handler->output(&convbuf[0], &written,
2193                       cur, &toconv);
2194             if (ret == -1) return(-1);
2195             unused += written;
2196             cur += toconv;
2197         } while (ret == -2);
2198 #ifdef LIBXML_ICONV_ENABLED
2199         } else if (handler->iconv_out != NULL) {
2200             do {
2201             toconv = in->end - cur;
2202             written = 32000;
2203             ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2204                           &written, cur, &toconv);
2205             if (ret < 0) {
2206                 if (written > 0)
2207                 ret = -2;
2208             else
2209                 return(-1);
2210             }
2211             unused += written;
2212             cur += toconv;
2213         } while (ret == -2);
2214 #endif
2215             } else {
2216             /* could not find a converter */
2217             return(-1);
2218         }
2219     }
2220     if (in->buf->rawconsumed < unused)
2221         return(-1);
2222     return(in->buf->rawconsumed - unused);
2223     }
2224     return(in->consumed + (in->cur - in->base));
2225 }
2226 
2227 #ifndef LIBXML_ICONV_ENABLED
2228 #ifdef LIBXML_ISO8859X_ENABLED
2229 
2230 /**
2231  * UTF8ToISO8859x:
2232  * @out:  a pointer to an array of bytes to store the result
2233  * @outlen:  the length of @out
2234  * @in:  a pointer to an array of UTF-8 chars
2235  * @inlen:  the length of @in
2236  * @xlattable: the 2-level transcoding table
2237  *
2238  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2239  * block of chars out.
2240  *
2241  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2242  * The value of @inlen after return is the number of octets consumed
2243  *     as the return value is positive, else unpredictable.
2244  * The value of @outlen after return is the number of ocetes consumed.
2245  */
2246 static int
2247 UTF8ToISO8859x(unsigned char* out, int *outlen,
2248               const unsigned char* in, int *inlen,
2249               unsigned char const *xlattable) {
2250     const unsigned char* outstart = out;
2251     const unsigned char* inend;
2252     const unsigned char* instart = in;
2253 
2254     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2255         (xlattable == NULL))
2256     return(-1);
2257     if (in == NULL) {
2258         /*
2259         * initialization nothing to do
2260         */
2261         *outlen = 0;
2262         *inlen = 0;
2263         return(0);
2264     }
2265     inend = in + (*inlen);
2266     while (in < inend) {
2267         unsigned char d = *in++;
2268         if  (d < 0x80)  {
2269             *out++ = d;
2270         } else if (d < 0xC0) {
2271             /* trailing byte in leading position */
2272             *outlen = out - outstart;
2273             *inlen = in - instart - 1;
2274             return(-2);
2275         } else if (d < 0xE0) {
2276             unsigned char c;
2277             if (!(in < inend)) {
2278                 /* trailing byte not in input buffer */
2279                 *outlen = out - outstart;
2280                 *inlen = in - instart - 1;
2281                 return(-2);
2282             }
2283             c = *in++;
2284             if ((c & 0xC0) != 0x80) {
2285                 /* not a trailing byte */
2286                 *outlen = out - outstart;
2287                 *inlen = in - instart - 2;
2288                 return(-2);
2289             }
2290             c = c & 0x3F;
2291             d = d & 0x1F;
2292             d = xlattable [48 + c + xlattable [d] * 64];
2293             if (d == 0) {
2294                 /* not in character set */
2295                 *outlen = out - outstart;
2296                 *inlen = in - instart - 2;
2297                 return(-2);
2298             }
2299             *out++ = d;
2300         } else if (d < 0xF0) {
2301             unsigned char c1;
2302             unsigned char c2;
2303             if (!(in < inend - 1)) {
2304                 /* trailing bytes not in input buffer */
2305                 *outlen = out - outstart;
2306                 *inlen = in - instart - 1;
2307                 return(-2);
2308             }
2309             c1 = *in++;
2310             if ((c1 & 0xC0) != 0x80) {
2311                 /* not a trailing byte (c1) */
2312                 *outlen = out - outstart;
2313                 *inlen = in - instart - 2;
2314                 return(-2);
2315             }
2316             c2 = *in++;
2317             if ((c2 & 0xC0) != 0x80) {
2318                 /* not a trailing byte (c2) */
2319                 *outlen = out - outstart;
2320                 *inlen = in - instart - 2;
2321                 return(-2);
2322             }
2323             c1 = c1 & 0x3F;
2324             c2 = c2 & 0x3F;
2325         d = d & 0x0F;
2326         d = xlattable [48 + c2 + xlattable [48 + c1 +
2327                 xlattable [32 + d] * 64] * 64];
2328             if (d == 0) {
2329                 /* not in character set */
2330                 *outlen = out - outstart;
2331                 *inlen = in - instart - 3;
2332                 return(-2);
2333             }
2334             *out++ = d;
2335         } else {
2336             /* cannot transcode >= U+010000 */
2337             *outlen = out - outstart;
2338             *inlen = in - instart - 1;
2339             return(-2);
2340         }
2341     }
2342     *outlen = out - outstart;
2343     *inlen = in - instart;
2344     return(*outlen);
2345 }
2346 
2347 /**
2348  * ISO8859xToUTF8
2349  * @out:  a pointer to an array of bytes to store the result
2350  * @outlen:  the length of @out
2351  * @in:  a pointer to an array of ISO Latin 1 chars
2352  * @inlen:  the length of @in
2353  *
2354  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2355  * block of chars out.
2356  * Returns 0 if success, or -1 otherwise
2357  * The value of @inlen after return is the number of octets consumed
2358  * The value of @outlen after return is the number of ocetes produced.
2359  */
2360 static int
2361 ISO8859xToUTF8(unsigned char* out, int *outlen,
2362               const unsigned char* in, int *inlen,
2363               unsigned short const *unicodetable) {
2364     unsigned char* outstart = out;
2365     unsigned char* outend;
2366     const unsigned char* instart = in;
2367     const unsigned char* inend;
2368     const unsigned char* instop;
2369     unsigned int c;
2370 
2371     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2372         (in == NULL) || (unicodetable == NULL))
2373     return(-1);
2374     outend = out + *outlen;
2375     inend = in + *inlen;
2376     instop = inend;
2377     c = *in;
2378     while (in < inend && out < outend - 1) {
2379         if (c >= 0x80) {
2380             c = unicodetable [c - 0x80];
2381             if (c == 0) {
2382                 /* undefined code point */
2383                 *outlen = out - outstart;
2384                 *inlen = in - instart;
2385                 return (-1);
2386             }
2387             if (c < 0x800) {
2388                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
2389                 *out++ = (c & 0x3F) | 0x80;
2390             } else {
2391                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
2392                 *out++ = ((c >>  6) & 0x3F) | 0x80;
2393                 *out++ = (c & 0x3F) | 0x80;
2394             }
2395             ++in;
2396             c = *in;
2397         }
2398         if (instop - in > outend - out) instop = in + (outend - out);
2399         while (c < 0x80 && in < instop) {
2400             *out++ =  c;
2401             ++in;
2402             c = *in;
2403         }
2404     }
2405     if (in < inend && out < outend && c < 0x80) {
2406         *out++ =  c;
2407         ++in;
2408     }
2409     *outlen = out - outstart;
2410     *inlen = in - instart;
2411     return (*outlen);
2412 }
2413 
2414 
2415 /************************************************************************
2416  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2417  ************************************************************************/
2418 
2419 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2420     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2421     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2422     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2423     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2424     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2425     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2426     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2427     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2428     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2429     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2430     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2431     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2432     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2433     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2434     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2435     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2436 };
2437 
2438 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2439     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2443     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2444     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2445     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2446     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2447     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2448     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2449     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2450     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2451     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2452     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2453     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2454     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2455     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2457     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2458     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2459     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2460     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2461     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2462     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2463     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2464     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2465     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2466 };
2467 
2468 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2469     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2470     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2471     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2472     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2473     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2474     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2475     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2476     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2477     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2478     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2479     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2480     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2481     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2482     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2483     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2484     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2485 };
2486 
2487 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2488     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2489     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2491     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2492     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2493     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2494     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2495     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2496     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2497     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2498     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2499     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2500     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2501     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2502     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2503     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2504     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2505     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2506     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2507     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2508     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2510     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2511     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2512     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2513     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2514     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2515     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2516     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2517     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2518     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2519 };
2520 
2521 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2522     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2523     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2524     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2525     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2526     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2527     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2528     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2529     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2530     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2531     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2532     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2533     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2534     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2535     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2536     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2537     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2538 };
2539 
2540 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2541     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2542     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2543     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2544     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2545     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2546     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2547     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2548     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2549     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2550     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2551     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2552     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2553     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2554     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2555     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2556     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2557     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2558     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2559     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2560     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2561     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2562     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2563     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2564     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2565     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2566     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2567     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2568 };
2569 
2570 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2571     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2572     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2573     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2574     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2575     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2576     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2577     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2578     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2579     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2580     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2581     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2582     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2583     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2584     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2585     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2586     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2587 };
2588 
2589 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2590     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2591     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2592     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2593     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2594     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2595     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2596     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2597     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2598     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2599     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2600     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2601     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2602     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2603     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2604     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2605     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2606     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2607     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2608     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2609     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2610     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2611     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2612     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2614     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2615     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2616     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2617 };
2618 
2619 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2620     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2621     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2622     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2623     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2624     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2625     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2626     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2627     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2628     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2629     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2630     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2631     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2632     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2633     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2634     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2635     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2636 };
2637 
2638 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2639     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2640     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2641     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2642     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2643     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2645     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2646     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2647     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2648     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2649     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2650     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2654     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2655     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2656     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2657     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2658     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2659     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2660     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2661     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2662 };
2663 
2664 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2665     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2666     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2667     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2668     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2669     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2670     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2671     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2672     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2673     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2674     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2675     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2676     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2677     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2678     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2679     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2680     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2681 };
2682 
2683 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2684     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2685     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2686     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2691     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2692     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2693     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2694     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2695     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2699     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2700     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2701     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2702     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2703     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2704     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2705     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2706     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2707     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2708     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2709     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2710     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2711     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2712     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2713     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2714     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2715 };
2716 
2717 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2718     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2719     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2720     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2721     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2722     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2723     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2724     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2725     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2726     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2727     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2728     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2729     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2730     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2731     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2732     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2733     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2734 };
2735 
2736 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2737     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2739     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2742     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2743     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2744     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2745     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2746     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2747     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2748     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2753     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2755     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2756     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2757     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2758     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2759     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2760     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2761     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2762     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2763     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2764     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2765     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2766     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2767     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2768 };
2769 
2770 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2771     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2772     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2773     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2774     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2775     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2776     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2777     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2778     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2779     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2780     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2781     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2782     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2783     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2784     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2785     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2786     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2787 };
2788 
2789 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2790     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2793     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2796     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2797     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2798     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2799     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2800     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2801     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2802     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2803     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2804     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2805     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2808     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2810     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2811     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2812     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2813 };
2814 
2815 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2816     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2817     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2818     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2819     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2820     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2821     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2822     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2823     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2824     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2825     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2826     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2827     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2828     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2829     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2830     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2831     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2832 };
2833 
2834 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2835     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2843     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2844     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2845     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2846     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2847     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2848     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2849     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2850     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2851     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2853     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2854     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2859     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2860     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2861     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2862     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2863     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2864     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2865     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2866 };
2867 
2868 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2869     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2870     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2871     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2872     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2873     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2874     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2875     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2876     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2877     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2878     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2879     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2880     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2881     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2882     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2883     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2884     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2885 };
2886 
2887 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2888     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2896     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2897     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2900     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2901     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2902     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2903     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2904     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2905     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2906     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2907     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2908     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2911     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2912     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2913     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2914     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2915 };
2916 
2917 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2918     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2919     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2920     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2921     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2922     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2923     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2924     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2925     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2926     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2927     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2928     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2929     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2930     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2931     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2932     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2933     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2934 };
2935 
2936 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2937     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2945     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2946     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2947     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2948     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2949     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2953     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2954     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2957     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2958     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2959     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2960     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2961     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2962     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2963     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
2964     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
2965     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
2966     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
2967     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
2968 };
2969 
2970 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
2971     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2972     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2973     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2974     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2975     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
2976     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
2977     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
2978     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
2979     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2980     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2981     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
2982     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
2983     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2984     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2985     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
2986     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
2987 };
2988 
2989 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
2990     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2993     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2998     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2999     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3000     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3003     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3004     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3005     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3006     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3007     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3009     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3010     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3011     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3012     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3013     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3014     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3015     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3016     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3017     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3018     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3019     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3020     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3021     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3022     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3023     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3024     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3025     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3026     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3027     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3028     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3030     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3031     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3032     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3033 };
3034 
3035 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3036     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3037     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3038     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3039     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3040     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3041     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3042     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3043     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3044     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3045     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3046     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3047     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3048     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3049     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3050     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3051     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3052 };
3053 
3054 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3055     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3063     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3064     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3065     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3066     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3067     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3068     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3069     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3070     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3071     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3072     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3078     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3079     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3080     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3081     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3082 };
3083 
3084 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3085     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3086     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3087     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3088     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3089     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3090     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3091     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3092     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3093     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3094     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3095     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3096     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3097     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3098     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3099     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3100     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3101 };
3102 
3103 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3104     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3105     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3112     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3113     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3114     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3115     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3116     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3117     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3118     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3121     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3123     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3137     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3140     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3141     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3142     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3143 };
3144 
3145 
3146 /*
3147  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3148  */
3149 
3150 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3151     const unsigned char* in, int *inlen) {
3152     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3153 }
3154 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3155     const unsigned char* in, int *inlen) {
3156     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3157 }
3158 
3159 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3160     const unsigned char* in, int *inlen) {
3161     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3162 }
3163 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3164     const unsigned char* in, int *inlen) {
3165     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3166 }
3167 
3168 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3169     const unsigned char* in, int *inlen) {
3170     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3171 }
3172 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3173     const unsigned char* in, int *inlen) {
3174     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3175 }
3176 
3177 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3178     const unsigned char* in, int *inlen) {
3179     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3180 }
3181 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3182     const unsigned char* in, int *inlen) {
3183     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3184 }
3185 
3186 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3187     const unsigned char* in, int *inlen) {
3188     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3189 }
3190 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3191     const unsigned char* in, int *inlen) {
3192     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3193 }
3194 
3195 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3196     const unsigned char* in, int *inlen) {
3197     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3198 }
3199 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3200     const unsigned char* in, int *inlen) {
3201     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3202 }
3203 
3204 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3205     const unsigned char* in, int *inlen) {
3206     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3207 }
3208 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3209     const unsigned char* in, int *inlen) {
3210     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3211 }
3212 
3213 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3214     const unsigned char* in, int *inlen) {
3215     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3216 }
3217 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3218     const unsigned char* in, int *inlen) {
3219     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3220 }
3221 
3222 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3223     const unsigned char* in, int *inlen) {
3224     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3225 }
3226 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3227     const unsigned char* in, int *inlen) {
3228     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3229 }
3230 
3231 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3232     const unsigned char* in, int *inlen) {
3233     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3234 }
3235 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3236     const unsigned char* in, int *inlen) {
3237     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3238 }
3239 
3240 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3241     const unsigned char* in, int *inlen) {
3242     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3243 }
3244 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3245     const unsigned char* in, int *inlen) {
3246     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3247 }
3248 
3249 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3250     const unsigned char* in, int *inlen) {
3251     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3252 }
3253 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3254     const unsigned char* in, int *inlen) {
3255     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3256 }
3257 
3258 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3259     const unsigned char* in, int *inlen) {
3260     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3261 }
3262 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3263     const unsigned char* in, int *inlen) {
3264     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3265 }
3266 
3267 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3268     const unsigned char* in, int *inlen) {
3269     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3270 }
3271 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3272     const unsigned char* in, int *inlen) {
3273     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3274 }
3275 
3276 static void
3277 xmlRegisterCharEncodingHandlersISO8859x (void) {
3278     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3279     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3280     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3281     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3282     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3283     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3284     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3285     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3286     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3287     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3288     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3289     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3290     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3291     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3292 }
3293 
3294 #endif
3295 #endif
3296 
3297 #define bottom_encoding
3298 #include "elfgcchack.h"
3299