< prev index next >

src/java.desktop/share/native/libmlib_image/mlib_c_ImageLookUp_f.c

Print this page
rev 59383 : [mq]: final
   1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 233       mlib_s32 off;
 234       mlib_s32 size = xsize;
 235       mlib_u8 *dp = dst, *sp = (void *)src;
 236 
 237       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 238 
 239       for (i = 0; i < off; i++, sp++) {
 240         *dp++ = tab[sp[0]];
 241         size--;
 242       }
 243 
 244       da = (mlib_u32 *) dp;
 245 
 246       if (((mlib_addr) sp & 1) == 0) {
 247         sa = (mlib_u16 *) sp;
 248 
 249         s0 = sa[0];
 250         s1 = sa[1];
 251         sa += 2;
 252 
 253 #ifdef __SUNPRO_C
 254 #pragma pipeloop(0)
 255 #endif /* __SUNPRO_C */
 256         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 257           READ_U8_U8_ALIGN(tab, tab, tab, tab);
 258           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 259           s0 = sa[0];
 260           s1 = sa[1];
 261           da[0] = t;
 262         }
 263 
 264         READ_U8_U8_ALIGN(tab, tab, tab, tab);
 265         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 266         da[0] = t;
 267         da++;
 268         dp = (mlib_u8 *) da;
 269         sp = (mlib_u8 *) sa;
 270         i += 4;
 271         for (; i < size; i++, dp++, sp++)
 272           dp[0] = tab[sp[0]];
 273 
 274       }
 275       else {
 276         sa = (mlib_u16 *) (sp - 1);
 277 
 278         s0 = sa[0];
 279         s1 = sa[1];
 280         s2 = sa[2];
 281         sa += 3;
 282 
 283 #ifdef __SUNPRO_C
 284 #pragma pipeloop(0)
 285 #endif /* __SUNPRO_C */
 286         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 287           READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 288           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 289           s0 = s2;
 290           s1 = sa[0];
 291           s2 = sa[1];
 292           da[0] = t;
 293         }
 294 
 295         READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 296         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 297         da[0] = t;
 298         da++;
 299         dp = (mlib_u8 *) da;
 300 #ifdef _LITTLE_ENDIAN
 301         *dp++ = tab[s2 >> 8];
 302 #else
 303         *dp++ = tab[s2 & 0xFF];
 304 #endif /* _LITTLE_ENDIAN */
 305         sp = (mlib_u8 *) sa;


 333       }
 334 
 335       if ((off & 1) != 0) {
 336         *dp++ = tab0[sp[0]];
 337         size--;
 338         sp++;
 339         tab = tab0;
 340         tab0 = tab1;
 341         tab1 = tab;
 342       }
 343 
 344       da = (mlib_u32 *) dp;
 345 
 346       if (((mlib_addr) sp & 1) == 0) {
 347         sa = (mlib_u16 *) sp;
 348 
 349         s0 = sa[0];
 350         s1 = sa[1];
 351         sa += 2;
 352 
 353 #ifdef __SUNPRO_C
 354 #pragma pipeloop(0)
 355 #endif /* __SUNPRO_C */
 356         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 357           READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 358           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 359           s0 = sa[0];
 360           s1 = sa[1];
 361           da[0] = t;
 362         }
 363 
 364         READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 365         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 366         da[0] = t;
 367         da++;
 368         dp = (mlib_u8 *) da;
 369         sp = (mlib_u8 *) sa;
 370         i += 4;
 371 
 372         for (; i < size - 1; i += 2, sp += 2) {
 373           *dp++ = tab0[sp[0]];
 374           *dp++ = tab1[sp[1]];
 375         }
 376 
 377         if (i < size)
 378           *dp = tab0[(*sp)];
 379 
 380       }
 381       else {
 382         sa = (mlib_u16 *) (sp - 1);
 383 
 384         s0 = sa[0];
 385         s1 = sa[1];
 386         s2 = sa[2];
 387         sa += 3;
 388 
 389 #ifdef __SUNPRO_C
 390 #pragma pipeloop(0)
 391 #endif /* __SUNPRO_C */
 392         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 393           READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 394           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 395           s0 = s2;
 396           s1 = sa[0];
 397           s2 = sa[1];
 398           da[0] = t;
 399         }
 400 
 401         READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 402         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 403         da[0] = t;
 404         da++;
 405         dp = (mlib_u8 *) da;
 406 #ifdef _LITTLE_ENDIAN
 407         *dp++ = tab0[s2 >> 8];
 408 #else
 409         *dp++ = tab0[s2 & 0xFF];
 410 #endif /* _LITTLE_ENDIAN */
 411         sp = (mlib_u8 *) sa;


 458         size -= 2;
 459         sp += 2;
 460       }
 461       else if (off == 3) {
 462         *dp++ = tab0[sp[0]];
 463         *dp++ = tab1[sp[1]];
 464         *dp++ = tab2[sp[2]];
 465         size -= 3;
 466         sp += 3;
 467       }
 468 
 469       da = (mlib_u32 *) dp;
 470 
 471       if (((mlib_addr) sp & 1) == 0) {
 472         sa = (mlib_u16 *) sp;
 473 
 474         s0 = sa[0];
 475         s1 = sa[1];
 476         sa += 2;
 477 
 478 #ifdef __SUNPRO_C
 479 #pragma pipeloop(0)
 480 #endif /* __SUNPRO_C */
 481         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 482           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 483           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 484           tab = tab0;
 485           tab0 = tab1;
 486           tab1 = tab2;
 487           tab2 = tab;
 488           s0 = sa[0];
 489           s1 = sa[1];
 490           da[0] = t;
 491         }
 492 
 493         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 494         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 495         da[0] = t;
 496         da++;
 497         dp = (mlib_u8 *) da;
 498         sp = (mlib_u8 *) sa;
 499         i += 4;
 500 


 506 
 507         if (i < size) {
 508           *dp++ = tab2[(*sp)];
 509           i++;
 510           sp++;
 511         }
 512 
 513         if (i < size) {
 514           *dp++ = tab0[(*sp)];
 515         }
 516 
 517       }
 518       else {
 519         sa = (mlib_u16 *) (sp - 1);
 520 
 521         s0 = sa[0];
 522         s1 = sa[1];
 523         s2 = sa[2];
 524         sa += 3;
 525 
 526 #ifdef __SUNPRO_C
 527 #pragma pipeloop(0)
 528 #endif /* __SUNPRO_C */
 529         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 530           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 531           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 532           tab = tab0;
 533           tab0 = tab1;
 534           tab1 = tab2;
 535           tab2 = tab;
 536           s0 = s2;
 537           s1 = sa[0];
 538           s2 = sa[1];
 539           da[0] = t;
 540         }
 541 
 542         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 543         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 544         da[0] = t;
 545         da++;
 546         dp = (mlib_u8 *) da;
 547 #ifdef _LITTLE_ENDIAN
 548         *dp++ = tab1[s2 >> 8];


 616         *dp++ = tab1[sp[1]];
 617         *dp++ = tab2[sp[2]];
 618         tab = tab3;
 619         tab3 = tab2;
 620         tab2 = tab1;
 621         tab1 = tab0;
 622         tab0 = tab;
 623         size -= 3;
 624         sp += 3;
 625       }
 626 
 627       da = (mlib_u32 *) dp;
 628 
 629       if (((mlib_addr) sp & 1) == 0) {
 630         sa = (mlib_u16 *) sp;
 631 
 632         s0 = sa[0];
 633         s1 = sa[1];
 634         sa += 2;
 635 
 636 #ifdef __SUNPRO_C
 637 #pragma pipeloop(0)
 638 #endif /* __SUNPRO_C */
 639         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 640           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 641           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 642           s0 = sa[0];
 643           s1 = sa[1];
 644           da[0] = t;
 645         }
 646 
 647         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 648         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 649         da[0] = t;
 650         da++;
 651         dp = (mlib_u8 *) da;
 652         sp = (mlib_u8 *) sa;
 653         i += 4;
 654 
 655         if (i < size) {
 656           *dp++ = tab0[(*sp)];
 657           i++;
 658           sp++;


 660 
 661         if (i < size) {
 662           *dp++ = tab1[(*sp)];
 663           i++;
 664           sp++;
 665         }
 666 
 667         if (i < size) {
 668           *dp = tab2[(*sp)];
 669         }
 670 
 671       }
 672       else {
 673         sa = (mlib_u16 *) (sp - 1);
 674 
 675         s0 = sa[0];
 676         s1 = sa[1];
 677         s2 = sa[2];
 678         sa += 3;
 679 
 680 #ifdef __SUNPRO_C
 681 #pragma pipeloop(0)
 682 #endif /* __SUNPRO_C */
 683         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 684           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 685           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 686           s0 = s2;
 687           s1 = sa[0];
 688           s2 = sa[1];
 689           da[0] = t;
 690         }
 691 
 692         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 693         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 694         da[0] = t;
 695         da++;
 696         dp = (mlib_u8 *) da;
 697 #ifdef _LITTLE_ENDIAN
 698         *dp++ = tab0[s2 >> 8];
 699 #else
 700         *dp++ = tab0[s2 & 0xFF];
 701 #endif /* _LITTLE_ENDIAN */
 702         sp = (mlib_u8 *) sa;


 808       mlib_s32 off;
 809       mlib_s32 size = xsize;
 810       mlib_u16 *dp = (mlib_u16 *) dst;
 811       mlib_u8 *sp = (void *)src;
 812 
 813       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 814 
 815       for (i = 0; i < off; i++, sp++) {
 816         *dp++ = tab[sp[0]];
 817         size--;
 818       }
 819 
 820       sa = (mlib_u32 *) sp;
 821 
 822       if (((mlib_addr) dp & 3) == 0) {
 823         da = (mlib_u32 *) dp;
 824 
 825         s0 = sa[0];
 826         sa++;
 827 
 828 #ifdef __SUNPRO_C
 829 #pragma pipeloop(0)
 830 #endif /* __SUNPRO_C */
 831         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 832           READ_U8_S16_ALIGN(tab, tab, tab, tab);
 833           res1 = (t0 << 16) + t1;
 834           res2 = (t2 << 16) + t3;
 835           s0 = sa[0];
 836           da[0] = res1;
 837           da[1] = res2;
 838         }
 839 
 840         READ_U8_S16_ALIGN(tab, tab, tab, tab);
 841         res1 = (t0 << 16) + t1;
 842         res2 = (t2 << 16) + t3;
 843         da[0] = res1;
 844         da[1] = res2;
 845         da += 2;
 846         dp = (mlib_u16 *) da;
 847         sp = (mlib_u8 *) sa;
 848         i += 4;
 849         for (; i < size; i++, dp++, sp++)
 850           dp[0] = tab[sp[0]];
 851 
 852       }
 853       else {
 854 
 855         *dp++ = tab[(*sp)];
 856         size--;
 857         da = (mlib_u32 *) dp;
 858 
 859         s0 = sa[0];
 860         s1 = sa[1];
 861         sa += 2;
 862 
 863 #ifdef __SUNPRO_C
 864 #pragma pipeloop(0)
 865 #endif /* __SUNPRO_C */
 866         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 867           READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 868           s0 = s1;
 869           res1 = (t0 << 16) + t1;
 870           res2 = (t2 << 16) + t3;
 871           s1 = sa[0];
 872           da[0] = res1;
 873           da[1] = res2;
 874         }
 875 
 876         READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 877         res1 = (t0 << 16) + t1;
 878         res2 = (t2 << 16) + t3;
 879         da[0] = res1;
 880         da[1] = res2;
 881         ADD_READ_U8_S16_NOTALIGN(tab, tab, tab);
 882         res1 = (t0 << 16) + t1;
 883         da[2] = res1;
 884         da += 3;
 885         dp = (mlib_u16 *) da;


 916         size -= 2;
 917       }
 918 
 919       if ((off & 1) != 0) {
 920         *dp++ = tab0[*sp];
 921         size--;
 922         sp++;
 923         tab = tab0;
 924         tab0 = tab1;
 925         tab1 = tab;
 926       }
 927 
 928       sa = (mlib_u32 *) sp;
 929 
 930       if (((mlib_addr) dp & 3) == 0) {
 931         da = (mlib_u32 *) dp;
 932 
 933         s0 = sa[0];
 934         sa++;
 935 
 936 #ifdef __SUNPRO_C
 937 #pragma pipeloop(0)
 938 #endif /* __SUNPRO_C */
 939         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 940           READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 941           res1 = (t0 << 16) + t1;
 942           res2 = (t2 << 16) + t3;
 943           s0 = sa[0];
 944           da[0] = res1;
 945           da[1] = res2;
 946         }
 947 
 948         READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 949         res1 = (t0 << 16) + t1;
 950         res2 = (t2 << 16) + t3;
 951         da[0] = res1;
 952         da[1] = res2;
 953         da += 2;
 954         dp = (mlib_u16 *) da;
 955         sp = (mlib_u8 *) sa;
 956         i += 4;
 957 
 958         for (; i < size - 1; i += 2, sp += 2) {
 959           *dp++ = tab0[sp[0]];
 960           *dp++ = tab1[sp[1]];
 961         }
 962 
 963         if (i < size)
 964           *dp = tab0[(*sp)];
 965 
 966       }
 967       else {
 968 
 969         *dp++ = tab0[(*sp)];
 970         size--;
 971         da = (mlib_u32 *) dp;
 972 
 973         s0 = sa[0];
 974         s1 = sa[1];
 975         sa += 2;
 976 
 977 #ifdef __SUNPRO_C
 978 #pragma pipeloop(0)
 979 #endif /* __SUNPRO_C */
 980         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 981           READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 982           s0 = s1;
 983           res1 = (t0 << 16) + t1;
 984           res2 = (t2 << 16) + t3;
 985           s1 = sa[0];
 986           da[0] = res1;
 987           da[1] = res2;
 988         }
 989 
 990         READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 991         res1 = (t0 << 16) + t1;
 992         res2 = (t2 << 16) + t3;
 993         da[0] = res1;
 994         da[1] = res2;
 995         ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1);
 996         res1 = (t0 << 16) + t1;
 997         da[2] = res1;
 998         da += 3;
 999         dp = (mlib_u16 *) da;


1049         tab0 = tab;
1050         size -= 2;
1051         sp += 2;
1052       }
1053       else if (off == 3) {
1054         *dp++ = tab0[sp[0]];
1055         *dp++ = tab1[sp[1]];
1056         *dp++ = tab2[sp[2]];
1057         size -= 3;
1058         sp += 3;
1059       }
1060 
1061       sa = (mlib_u32 *) sp;
1062 
1063       if (((mlib_addr) dp & 3) == 0) {
1064         da = (mlib_u32 *) dp;
1065 
1066         s0 = sa[0];
1067         sa++;
1068 
1069 #ifdef __SUNPRO_C
1070 #pragma pipeloop(0)
1071 #endif /* __SUNPRO_C */
1072         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1073           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1074           res1 = (t0 << 16) + t1;
1075           res2 = (t2 << 16) + t3;
1076           tab = tab0;
1077           tab0 = tab1;
1078           tab1 = tab2;
1079           tab2 = tab;
1080           s0 = sa[0];
1081           da[0] = res1;
1082           da[1] = res2;
1083         }
1084 
1085         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1086         res1 = (t0 << 16) + t1;
1087         res2 = (t2 << 16) + t3;
1088         da[0] = res1;
1089         da[1] = res2;
1090         da += 2;
1091         dp = (mlib_u16 *) da;


1102           *dp++ = tab2[(*sp)];
1103           i++;
1104           sp++;
1105         }
1106 
1107         if (i < size) {
1108           *dp = tab0[(*sp)];
1109         }
1110 
1111       }
1112       else {
1113 
1114         *dp++ = tab0[(*sp)];
1115         size--;
1116         da = (mlib_u32 *) dp;
1117 
1118         s0 = sa[0];
1119         s1 = sa[1];
1120         sa += 2;
1121 
1122 #ifdef __SUNPRO_C
1123 #pragma pipeloop(0)
1124 #endif /* __SUNPRO_C */
1125         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1126           READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1127           s0 = s1;
1128           res1 = (t0 << 16) + t1;
1129           res2 = (t2 << 16) + t3;
1130           tab = tab0;
1131           tab0 = tab1;
1132           tab1 = tab2;
1133           tab2 = tab;
1134           s1 = sa[0];
1135           da[0] = res1;
1136           da[1] = res2;
1137         }
1138 
1139         READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1140         res1 = (t0 << 16) + t1;
1141         res2 = (t2 << 16) + t3;
1142         da[0] = res1;
1143         da[1] = res2;
1144         ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1);


1215         *dp++ = tab0[sp[0]];
1216         *dp++ = tab1[sp[1]];
1217         *dp++ = tab2[sp[2]];
1218         tab = tab3;
1219         tab3 = tab2;
1220         tab2 = tab1;
1221         tab1 = tab0;
1222         tab0 = tab;
1223         size -= 3;
1224         sp += 3;
1225       }
1226 
1227       sa = (mlib_u32 *) sp;
1228 
1229       if (((mlib_addr) dp & 3) == 0) {
1230         da = (mlib_u32 *) dp;
1231 
1232         s0 = sa[0];
1233         sa++;
1234 
1235 #ifdef __SUNPRO_C
1236 #pragma pipeloop(0)
1237 #endif /* __SUNPRO_C */
1238         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1239           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1240           res1 = (t0 << 16) + t1;
1241           res2 = (t2 << 16) + t3;
1242           s0 = sa[0];
1243           da[0] = res1;
1244           da[1] = res2;
1245         }
1246 
1247         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1248         res1 = (t0 << 16) + t1;
1249         res2 = (t2 << 16) + t3;
1250         da[0] = res1;
1251         da[1] = res2;
1252         da += 2;
1253         dp = (mlib_u16 *) da;
1254         sp = (mlib_u8 *) sa;
1255         i += 4;
1256 
1257         if (i < size) {


1264           *dp++ = tab1[(*sp)];
1265           i++;
1266           sp++;
1267         }
1268 
1269         if (i < size) {
1270           *dp = tab2[(*sp)];
1271         }
1272 
1273       }
1274       else {
1275 
1276         *dp++ = tab0[(*sp)];
1277         size--;
1278         da = (mlib_u32 *) dp;
1279 
1280         s0 = sa[0];
1281         s1 = sa[1];
1282         sa += 2;
1283 
1284 #ifdef __SUNPRO_C
1285 #pragma pipeloop(0)
1286 #endif /* __SUNPRO_C */
1287         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1288           READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1289           s0 = s1;
1290           res1 = (t0 << 16) + t1;
1291           res2 = (t2 << 16) + t3;
1292           s1 = sa[0];
1293           da[0] = res1;
1294           da[1] = res2;
1295         }
1296 
1297         READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1298         res1 = (t0 << 16) + t1;
1299         res2 = (t2 << 16) + t3;
1300         da[0] = res1;
1301         da[1] = res2;
1302         ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3);
1303         res1 = (t0 << 16) + t1;
1304         da[2] = res1;
1305         da += 3;
1306         dp = (mlib_u16 *) da;


1469       mlib_u32 *sa;
1470       mlib_u32 *tab = (mlib_u32 *) table[0];
1471       mlib_u32 s0, t0, t1, t2, t3;
1472       mlib_s32 off;
1473       mlib_s32 size = xsize;
1474       mlib_u32 *dp = (mlib_u32 *) dst;
1475       mlib_u8 *sp = (void *)src;
1476 
1477       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1478 
1479       for (i = 0; i < off; i++, sp++) {
1480         *dp++ = tab[sp[0]];
1481         size--;
1482       }
1483 
1484       sa = (mlib_u32 *) sp;
1485 
1486       s0 = sa[0];
1487       sa++;
1488 
1489 #ifdef __SUNPRO_C
1490 #pragma pipeloop(0)
1491 #endif /* __SUNPRO_C */
1492       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1493         READ_U8_S32(tab, tab, tab, tab);
1494         s0 = sa[0];
1495         dp[0] = t0;
1496         dp[1] = t1;
1497         dp[2] = t2;
1498         dp[3] = t3;
1499       }
1500 
1501       READ_U8_S32(tab, tab, tab, tab);
1502       dp[0] = t0;
1503       dp[1] = t1;
1504       dp[2] = t2;
1505       dp[3] = t3;
1506       dp += 4;
1507       sp = (mlib_u8 *) sa;
1508       i += 4;
1509       for (; i < size; i++, dp++, sp++)
1510         dp[0] = tab[sp[0]];
1511     }


1530       for (i = 0; i < off - 1; i += 2, sp += 2) {
1531         *dp++ = tab0[sp[0]];
1532         *dp++ = tab1[sp[1]];
1533         size -= 2;
1534       }
1535 
1536       if ((off & 1) != 0) {
1537         *dp++ = tab0[*sp];
1538         size--;
1539         sp++;
1540         tab = tab0;
1541         tab0 = tab1;
1542         tab1 = tab;
1543       }
1544 
1545       sa = (mlib_u32 *) sp;
1546 
1547       s0 = sa[0];
1548       sa++;
1549 
1550 #ifdef __SUNPRO_C
1551 #pragma pipeloop(0)
1552 #endif /* __SUNPRO_C */
1553       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1554         READ_U8_S32(tab0, tab1, tab0, tab1);
1555         s0 = sa[0];
1556         dp[0] = t0;
1557         dp[1] = t1;
1558         dp[2] = t2;
1559         dp[3] = t3;
1560       }
1561 
1562       READ_U8_S32(tab0, tab1, tab0, tab1);
1563       dp[0] = t0;
1564       dp[1] = t1;
1565       dp[2] = t2;
1566       dp[3] = t3;
1567       dp += 4;
1568       sp = (mlib_u8 *) sa;
1569       i += 4;
1570 
1571       for (; i < size - 1; i += 2, sp += 2) {
1572         *dp++ = tab0[sp[0]];


1610         tab = tab2;
1611         tab2 = tab1;
1612         tab1 = tab0;
1613         tab0 = tab;
1614         size -= 2;
1615         sp += 2;
1616       }
1617       else if (off == 3) {
1618         *dp++ = tab0[sp[0]];
1619         *dp++ = tab1[sp[1]];
1620         *dp++ = tab2[sp[2]];
1621         size -= 3;
1622         sp += 3;
1623       }
1624 
1625       sa = (mlib_u32 *) sp;
1626 
1627       s0 = sa[0];
1628       sa++;
1629 
1630 #ifdef __SUNPRO_C
1631 #pragma pipeloop(0)
1632 #endif /* __SUNPRO_C */
1633       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1634         READ_U8_S32(tab0, tab1, tab2, tab0);
1635         tab = tab0;
1636         tab0 = tab1;
1637         tab1 = tab2;
1638         tab2 = tab;
1639         s0 = sa[0];
1640         dp[0] = t0;
1641         dp[1] = t1;
1642         dp[2] = t2;
1643         dp[3] = t3;
1644       }
1645 
1646       READ_U8_S32(tab0, tab1, tab2, tab0);
1647       dp[0] = t0;
1648       dp[1] = t1;
1649       dp[2] = t2;
1650       dp[3] = t3;
1651       dp += 4;
1652       sp = (mlib_u8 *) sa;


1711         sp += 2;
1712       }
1713       else if (off == 3) {
1714         *dp++ = tab0[sp[0]];
1715         *dp++ = tab1[sp[1]];
1716         *dp++ = tab2[sp[2]];
1717         tab = tab3;
1718         tab3 = tab2;
1719         tab2 = tab1;
1720         tab1 = tab0;
1721         tab0 = tab;
1722         size -= 3;
1723         sp += 3;
1724       }
1725 
1726       sa = (mlib_u32 *) sp;
1727 
1728       s0 = sa[0];
1729       sa++;
1730 
1731 #ifdef __SUNPRO_C
1732 #pragma pipeloop(0)
1733 #endif /* __SUNPRO_C */
1734       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1735         READ_U8_S32(tab0, tab1, tab2, tab3);
1736         s0 = sa[0];
1737         dp[0] = t0;
1738         dp[1] = t1;
1739         dp[2] = t2;
1740         dp[3] = t3;
1741       }
1742 
1743       READ_U8_S32(tab0, tab1, tab2, tab3);
1744       dp[0] = t0;
1745       dp[1] = t1;
1746       dp[2] = t2;
1747       dp[3] = t3;
1748       dp += 4;
1749       sp = (mlib_u8 *) sa;
1750       i += 4;
1751 
1752       if (i < size) {
1753         *dp++ = tab0[(*sp)];


1874       mlib_u8 *dp = dst;
1875       mlib_u8 *sa = (void *)src;
1876       mlib_s32 s0, t0, s1, t1, t, t2, off;
1877       mlib_s32 size = xsize;
1878 
1879       if (((mlib_addr) dp & 1) == 0) {
1880 
1881         if (((mlib_addr) dp & 3) != 0) {
1882           *((mlib_u16 *) dp) = tab[sa[0]];
1883           sa++;
1884           size--;
1885           dp += 2;
1886         }
1887 
1888         da = (mlib_s32 *) dp;
1889 
1890         s0 = sa[0];
1891         s1 = sa[1];
1892         sa += 2;
1893 
1894 #ifdef __SUNPRO_C
1895 #pragma pipeloop(0)
1896 #endif /* __SUNPRO_C */
1897         for (i = 0; i < size - 3; i += 2, da++, sa += 2) {
1898           t0 = tab[s0];
1899           t1 = tab[s1];
1900 #ifdef _LITTLE_ENDIAN
1901           t = (t1 << 16) + t0;
1902 #else
1903           t = (t0 << 16) + t1;
1904 #endif /* _LITTLE_ENDIAN */
1905           s0 = sa[0];
1906           s1 = sa[1];
1907           da[0] = t;
1908         }
1909 
1910         t0 = tab[s0];
1911         t1 = tab[s1];
1912 #ifdef _LITTLE_ENDIAN
1913         t = (t1 << 16) + t0;
1914 #else
1915         t = (t0 << 16) + t1;
1916 #endif /* _LITTLE_ENDIAN */


1936 #endif /* _LITTLE_ENDIAN */
1937           sa++;
1938           size--;
1939           dp += 2;
1940         }
1941 
1942         t0 = tab[sa[0]];
1943         sa++;
1944 #ifdef _LITTLE_ENDIAN
1945         *dp++ = t0;
1946 #else
1947         *dp++ = (t0 >> 8);
1948 #endif /* _LITTLE_ENDIAN */
1949 
1950         da = (mlib_s32 *) dp;
1951 
1952         s0 = sa[0];
1953         s1 = sa[1];
1954         sa += 2;
1955 
1956 #ifdef __SUNPRO_C
1957 #pragma pipeloop(0)
1958 #endif /* __SUNPRO_C */
1959         for (i = 0; i < size - 4; i += 2, da++, sa += 2) {
1960           t1 = tab[s0];
1961           t2 = tab[s1];
1962 #ifdef _LITTLE_ENDIAN
1963           t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1964 #else
1965           t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1966 #endif /* _LITTLE_ENDIAN */
1967           t0 = t2;
1968           s0 = sa[0];
1969           s1 = sa[1];
1970           da[0] = t;
1971         }
1972 
1973         t1 = tab[s0];
1974         t2 = tab[s1];
1975 #ifdef _LITTLE_ENDIAN
1976         t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1977 #else
1978         t = (t0 << 24) + (t1 << 8) + (t2 >> 8);


2023       tab[i - 1] = s3;
2024     }
2025 
2026 #ifdef _LITTLE_ENDIAN
2027     s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
2028 #else
2029     s3 = (s0 << 16) + (s1 << 8) + s2;
2030 #endif /* _LITTLE_ENDIAN */
2031     tab[255] = s3;
2032 
2033     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2034       mlib_u32 *da;
2035       mlib_u8 *dp = dst;
2036       mlib_u8 *sa = (void *)src, *ptr;
2037       mlib_u32 s0, s1, t0, t1;
2038       mlib_u32 res1, res2;
2039       mlib_s32 size = xsize, off;
2040 
2041       off = (mlib_s32) ((mlib_addr) dp & 3);
2042 
2043 #ifdef __SUNPRO_C
2044 #pragma pipeloop(0)
2045 #endif /* __SUNPRO_C */
2046       for (i = 0; i < off; i++) {
2047         ptr = (mlib_u8 *) (tab + sa[0]);
2048         dp[0] = ptr[1];
2049         dp[1] = ptr[2];
2050         dp[2] = ptr[3];
2051         dp += 3;
2052         sa++;
2053       }
2054 
2055       size -= off;
2056       da = (mlib_u32 *) dp;
2057       s0 = sa[0];
2058       s1 = sa[1];
2059       sa += 2;
2060 
2061 #ifdef __SUNPRO_C
2062 #pragma pipeloop(0)
2063 #endif /* __SUNPRO_C */
2064       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2065         t0 = tab[s0];
2066         t1 = tab[s1];
2067 #ifdef _LITTLE_ENDIAN
2068         da[0] = (t0 >> 8) + (t1 << 16);
2069         res2 = (t1 >> 16);
2070 #else
2071         da[0] = (t0 << 8) + (t1 >> 16);
2072         res2 = (t1 << 16);
2073 #endif /* _LITTLE_ENDIAN */
2074         s0 = sa[0];
2075         s1 = sa[1];
2076         t0 = tab[s0];
2077         t1 = tab[s1];
2078 #ifdef _LITTLE_ENDIAN
2079         res2 += (t0 << 8);
2080         res1 = (t0 >> 24) + t1;
2081 #else
2082         res2 += (t0 >> 8);
2083         res1 = (t0 << 24) + t1;


2098       res2 = (t1 << 16);
2099 #endif /* _LITTLE_ENDIAN */
2100       s0 = sa[0];
2101       s1 = sa[1];
2102       t0 = tab[s0];
2103       t1 = tab[s1];
2104 #ifdef _LITTLE_ENDIAN
2105       res2 += (t0 << 8);
2106       res1 = (t0 >> 24) + t1;
2107 #else
2108       res2 += (t0 >> 8);
2109       res1 = (t0 << 24) + t1;
2110 #endif /* _LITTLE_ENDIAN */
2111       da[1] = res2;
2112       da[2] = res1;
2113       da += 3;
2114       sa += 2;
2115       dp = (mlib_u8 *) da;
2116       i += 4;
2117 
2118 #ifdef __SUNPRO_C
2119 #pragma pipeloop(0)
2120 #endif /* __SUNPRO_C */
2121       for (; i < size; i++) {
2122         ptr = (mlib_u8 *) (tab + sa[0]);
2123         dp[0] = ptr[1];
2124         dp[1] = ptr[2];
2125         dp[2] = ptr[3];
2126         dp += 3;
2127         sa++;
2128       }
2129     }
2130 
2131   }
2132   else if (csize == 4) {
2133     mlib_u32 tab[256];
2134     const mlib_u8 *tab0 = table[0];
2135     const mlib_u8 *tab1 = table[1];
2136     const mlib_u8 *tab2 = table[2];
2137     const mlib_u8 *tab3 = table[3];
2138     mlib_s32 i, j;
2139     mlib_u32 s0, s1, s2, s3, s4;
2140 


2161     s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2162 #endif /* _LITTLE_ENDIAN */
2163     tab[255] = s4;
2164 
2165     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2166       mlib_u32 *da;
2167       mlib_u8 *dp = dst;
2168       mlib_u8 *sa = (void *)src;
2169       mlib_u32 s0, t0, s1, t1, t2;
2170       mlib_s32 size = xsize, off;
2171       mlib_u32 shift, shift1, res1, res2;
2172 
2173       if (((mlib_addr) dp & 3) == 0) {
2174 
2175         da = (mlib_u32 *) dp;
2176 
2177         s0 = sa[0];
2178         s1 = sa[1];
2179         sa += 2;
2180 
2181 #ifdef __SUNPRO_C
2182 #pragma pipeloop(0)
2183 #endif /* __SUNPRO_C */
2184         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2185           t0 = tab[s0];
2186           t1 = tab[s1];
2187           s0 = sa[0];
2188           s1 = sa[1];
2189           da[0] = t0;
2190           da[1] = t1;
2191         }
2192 
2193         t0 = tab[s0];
2194         t1 = tab[s1];
2195         da[0] = t0;
2196         da[1] = t1;
2197 
2198         if (size & 1)
2199           da[2] = tab[sa[0]];
2200 
2201       }
2202       else {
2203 
2204         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2205         shift = 8 * off;
2206         shift1 = 32 - shift;
2207 
2208         for (i = 0; i < off; i++) {
2209           dp[i] = table[i][sa[0]];
2210         }
2211 
2212         dp += i;
2213         t0 = tab[sa[0]];
2214         sa++;
2215 
2216         da = (mlib_u32 *) dp;
2217 
2218         s0 = sa[0];
2219         s1 = sa[1];
2220         sa += 2;
2221 
2222 #ifdef __SUNPRO_C
2223 #pragma pipeloop(0)
2224 #endif /* __SUNPRO_C */
2225         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2226           t1 = tab[s0];
2227           t2 = tab[s1];
2228 #ifdef _LITTLE_ENDIAN
2229           res1 = (t0 >> shift) + (t1 << shift1);
2230           res2 = (t1 >> shift) + (t2 << shift1);
2231 #else
2232           res1 = (t0 << shift) + (t1 >> shift1);
2233           res2 = (t1 << shift) + (t2 >> shift1);
2234 #endif /* _LITTLE_ENDIAN */
2235           t0 = t2;
2236           s0 = sa[0];
2237           s1 = sa[1];
2238           da[0] = res1;
2239           da[1] = res2;
2240         }
2241 
2242         t1 = tab[s0];
2243         t2 = tab[s1];
2244 #ifdef _LITTLE_ENDIAN


2304   if ((xsize < 8) || (csize == 2)) {
2305     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base);
2306   }
2307   else if (csize == 3) {
2308     mlib_s32 i, j;
2309 
2310     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2311       mlib_u32 *da;
2312       mlib_u8 *dp = dst;
2313       mlib_s16 *sa = (void *)src;
2314       const mlib_u8 *tab0 = table_base[0];
2315       const mlib_u8 *tab1 = table_base[1];
2316       const mlib_u8 *tab2 = table_base[2];
2317       mlib_s32 s0, s1;
2318       mlib_u32 t0, t1, t2, t3, t4, t5;
2319       mlib_u32 res1, res2;
2320       mlib_s32 size = xsize, off;
2321 
2322       off = (mlib_s32) ((mlib_addr) dp & 3);
2323 
2324 #ifdef __SUNPRO_C
2325 #pragma pipeloop(0)
2326 #endif /* __SUNPRO_C */
2327       for (i = 0; i < off; i++) {
2328         s0 = *sa++;
2329         dp[0] = tab0[s0];
2330         dp[1] = tab1[s0];
2331         dp[2] = tab2[s0];
2332         dp += 3;
2333       }
2334 
2335       size -= off;
2336       da = (mlib_u32 *) dp;
2337       s0 = sa[0];
2338       s1 = sa[1];
2339       sa += 2;
2340 
2341 #ifdef __SUNPRO_C
2342 #pragma pipeloop(0)
2343 #endif /* __SUNPRO_C */
2344       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2345         t0 = tab0[s0];
2346         t1 = tab1[s0];
2347         t2 = tab2[s0];
2348         t3 = tab0[s1];
2349         t4 = tab1[s1];
2350         t5 = tab2[s1];
2351 #ifdef _LITTLE_ENDIAN
2352         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2353         res2 = (t5 << 8) + t4;
2354 #else
2355         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2356         res2 = (t4 << 24) + (t5 << 16);
2357 #endif /* _LITTLE_ENDIAN */
2358         s0 = sa[0];
2359         s1 = sa[1];
2360         t0 = tab0[s0];
2361         t1 = tab1[s0];
2362         t2 = tab2[s0];
2363         t3 = tab0[s1];


2394       t0 = tab0[s0];
2395       t1 = tab1[s0];
2396       t2 = tab2[s0];
2397       t3 = tab0[s1];
2398       t4 = tab1[s1];
2399       t5 = tab2[s1];
2400 #ifdef _LITTLE_ENDIAN
2401       res2 += ((t1 << 24) + (t0 << 16));
2402       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2403 #else
2404       res2 += ((t0 << 8) + t1);
2405       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2406 #endif /* _LITTLE_ENDIAN */
2407       da[1] = res2;
2408       da[2] = res1;
2409       da += 3;
2410       sa += 2;
2411       dp = (mlib_u8 *) da;
2412       i += 4;
2413 
2414 #ifdef __SUNPRO_C
2415 #pragma pipeloop(0)
2416 #endif /* __SUNPRO_C */
2417       for (; i < size; i++) {
2418         s0 = *sa++;
2419         dp[0] = tab0[s0];
2420         dp[1] = tab1[s0];
2421         dp[2] = tab2[s0];
2422         dp += 3;
2423       }
2424     }
2425 
2426   }
2427   else if (csize == 4) {
2428     mlib_s32 i, j;
2429 
2430     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2431       mlib_u32 *da;
2432       mlib_u8 *dp = dst;
2433       mlib_s16 *sa = (void *)src;
2434       const mlib_u8 *tab0 = table_base[0];
2435       const mlib_u8 *tab1 = table_base[1];
2436       const mlib_u8 *tab2 = table_base[2];
2437       const mlib_u8 *tab3 = table_base[3];
2438       mlib_s32 s0;
2439       mlib_u32 t0, t1, t2, t3;
2440       mlib_s32 size = xsize, off;
2441       mlib_u32 shift, shift1, res1, res2, res;
2442 
2443       if (((mlib_addr) dp & 3) == 0) {
2444 
2445         da = (mlib_u32 *) dp;
2446 
2447         s0 = sa[0];
2448         sa++;
2449 
2450 #ifdef __SUNPRO_C
2451 #pragma pipeloop(0)
2452 #endif /* __SUNPRO_C */
2453         for (i = 0; i < size - 1; i++, da++, sa++) {
2454           t0 = tab0[s0];
2455           t1 = tab1[s0];
2456           t2 = tab2[s0];
2457           t3 = tab3[s0];
2458 #ifdef _LITTLE_ENDIAN
2459           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2460 #else
2461           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2462 #endif /* _LITTLE_ENDIAN */
2463           s0 = sa[0];
2464           da[0] = res;
2465         }
2466 
2467         t0 = tab0[s0];
2468         t1 = tab1[s0];
2469         t2 = tab2[s0];
2470         t3 = tab3[s0];
2471 #ifdef _LITTLE_ENDIAN
2472         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;


2488           dp[i] = table_base[i][s0];
2489         }
2490 
2491         dp += i;
2492         da = (mlib_u32 *) dp;
2493 
2494         t0 = tab0[s0];
2495         t1 = tab1[s0];
2496         t2 = tab2[s0];
2497         t3 = tab3[s0];
2498 
2499 #ifdef _LITTLE_ENDIAN
2500         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2501 #else
2502         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2503 #endif /* _LITTLE_ENDIAN */
2504 
2505         s0 = sa[0];
2506         sa++;
2507 
2508 #ifdef __SUNPRO_C
2509 #pragma pipeloop(0)
2510 #endif /* __SUNPRO_C */
2511         for (i = 0; i < size - 2; i++, da++, sa++) {
2512           t0 = tab0[s0];
2513           t1 = tab1[s0];
2514           t2 = tab2[s0];
2515           t3 = tab3[s0];
2516 #ifdef _LITTLE_ENDIAN
2517           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2518           res = (res1 >> shift) + (res2 << shift1);
2519 #else
2520           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2521           res = (res1 << shift) + (res2 >> shift1);
2522 #endif /* _LITTLE_ENDIAN */
2523           res1 = res2;
2524           s0 = sa[0];
2525           da[0] = res;
2526         }
2527 
2528         t0 = tab0[s0];
2529         t1 = tab1[s0];
2530         t2 = tab2[s0];


2573   if ((xsize < 8) || (csize == 2)) {
2574     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base);
2575   }
2576   else if (csize == 3) {
2577     mlib_s32 i, j;
2578 
2579     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2580       mlib_u32 *da;
2581       mlib_u8 *dp = dst;
2582       mlib_u16 *sa = (void *)src;
2583       const mlib_u8 *tab0 = table_base[0];
2584       const mlib_u8 *tab1 = table_base[1];
2585       const mlib_u8 *tab2 = table_base[2];
2586       mlib_s32 s0, s1;
2587       mlib_u32 t0, t1, t2, t3, t4, t5;
2588       mlib_u32 res1, res2;
2589       mlib_s32 size = xsize, off;
2590 
2591       off = (mlib_s32) ((mlib_addr) dp & 3);
2592 
2593 #ifdef __SUNPRO_C
2594 #pragma pipeloop(0)
2595 #endif /* __SUNPRO_C */
2596       for (i = 0; i < off; i++) {
2597         s0 = *sa++;
2598         dp[0] = tab0[s0];
2599         dp[1] = tab1[s0];
2600         dp[2] = tab2[s0];
2601         dp += 3;
2602       }
2603 
2604       size -= off;
2605       da = (mlib_u32 *) dp;
2606       s0 = sa[0];
2607       s1 = sa[1];
2608       sa += 2;
2609 
2610 #ifdef __SUNPRO_C
2611 #pragma pipeloop(0)
2612 #endif /* __SUNPRO_C */
2613       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2614         t0 = tab0[s0];
2615         t1 = tab1[s0];
2616         t2 = tab2[s0];
2617         t3 = tab0[s1];
2618         t4 = tab1[s1];
2619         t5 = tab2[s1];
2620 #ifdef _LITTLE_ENDIAN
2621         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2622         res2 = (t5 << 8) + t4;
2623 #else
2624         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2625         res2 = (t4 << 24) + (t5 << 16);
2626 #endif /* _LITTLE_ENDIAN */
2627         s0 = sa[0];
2628         s1 = sa[1];
2629         t0 = tab0[s0];
2630         t1 = tab1[s0];
2631         t2 = tab2[s0];
2632         t3 = tab0[s1];


2663       t0 = tab0[s0];
2664       t1 = tab1[s0];
2665       t2 = tab2[s0];
2666       t3 = tab0[s1];
2667       t4 = tab1[s1];
2668       t5 = tab2[s1];
2669 #ifdef _LITTLE_ENDIAN
2670       res2 += ((t1 << 24) + (t0 << 16));
2671       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2672 #else
2673       res2 += ((t0 << 8) + t1);
2674       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2675 #endif /* _LITTLE_ENDIAN */
2676       da[1] = res2;
2677       da[2] = res1;
2678       da += 3;
2679       sa += 2;
2680       dp = (mlib_u8 *) da;
2681       i += 4;
2682 
2683 #ifdef __SUNPRO_C
2684 #pragma pipeloop(0)
2685 #endif /* __SUNPRO_C */
2686       for (; i < size; i++) {
2687         s0 = *sa++;
2688         dp[0] = tab0[s0];
2689         dp[1] = tab1[s0];
2690         dp[2] = tab2[s0];
2691         dp += 3;
2692       }
2693     }
2694 
2695   }
2696   else if (csize == 4) {
2697     mlib_s32 i, j;
2698 
2699     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2700       mlib_u32 *da;
2701       mlib_u8 *dp = dst;
2702       mlib_u16 *sa = (void *)src;
2703       const mlib_u8 *tab0 = table_base[0];
2704       const mlib_u8 *tab1 = table_base[1];
2705       const mlib_u8 *tab2 = table_base[2];
2706       const mlib_u8 *tab3 = table_base[3];
2707       mlib_s32 s0;
2708       mlib_u32 t0, t1, t2, t3;
2709       mlib_s32 size = xsize, off;
2710       mlib_u32 shift, shift1, res1, res2, res;
2711 
2712       if (((mlib_addr) dp & 3) == 0) {
2713 
2714         da = (mlib_u32 *) dp;
2715 
2716         s0 = sa[0];
2717         sa++;
2718 
2719 #ifdef __SUNPRO_C
2720 #pragma pipeloop(0)
2721 #endif /* __SUNPRO_C */
2722         for (i = 0; i < size - 1; i++, da++, sa++) {
2723           t0 = tab0[s0];
2724           t1 = tab1[s0];
2725           t2 = tab2[s0];
2726           t3 = tab3[s0];
2727 #ifdef _LITTLE_ENDIAN
2728           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2729 #else
2730           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2731 #endif /* _LITTLE_ENDIAN */
2732           s0 = sa[0];
2733           da[0] = res;
2734         }
2735 
2736         t0 = tab0[s0];
2737         t1 = tab1[s0];
2738         t2 = tab2[s0];
2739         t3 = tab3[s0];
2740 #ifdef _LITTLE_ENDIAN
2741         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;


2757           dp[i] = table_base[i][s0];
2758         }
2759 
2760         dp += i;
2761         da = (mlib_u32 *) dp;
2762 
2763         t0 = tab0[s0];
2764         t1 = tab1[s0];
2765         t2 = tab2[s0];
2766         t3 = tab3[s0];
2767 
2768 #ifdef _LITTLE_ENDIAN
2769         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2770 #else
2771         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2772 #endif /* _LITTLE_ENDIAN */
2773 
2774         s0 = sa[0];
2775         sa++;
2776 
2777 #ifdef __SUNPRO_C
2778 #pragma pipeloop(0)
2779 #endif /* __SUNPRO_C */
2780         for (i = 0; i < size - 2; i++, da++, sa++) {
2781           t0 = tab0[s0];
2782           t1 = tab1[s0];
2783           t2 = tab2[s0];
2784           t3 = tab3[s0];
2785 #ifdef _LITTLE_ENDIAN
2786           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2787           res = (res1 >> shift) + (res2 << shift1);
2788 #else
2789           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2790           res = (res1 << shift) + (res2 >> shift1);
2791 #endif /* _LITTLE_ENDIAN */
2792           res1 = res2;
2793           s0 = sa[0];
2794           da[0] = res;
2795         }
2796 
2797         t0 = tab0[s0];
2798         t1 = tab1[s0];
2799         t2 = tab2[s0];


2878 #else
2879     s2 = (s0 << 16) + s1;
2880 #endif /* _LITTLE_ENDIAN */
2881     tab[255] = s2;
2882 
2883     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2884       mlib_u32 *da;
2885       mlib_u16 *dp = (mlib_u16 *) dst;
2886       mlib_u8 *sa = (void *)src;
2887       mlib_u32 s0, t0, s1, t1, t2;
2888       mlib_u32 res1, res2;
2889       mlib_s32 size = xsize;
2890 
2891       if (((mlib_addr) dp & 3) == 0) {
2892 
2893         da = (mlib_u32 *) dp;
2894         s0 = sa[0];
2895         s1 = sa[1];
2896         sa += 2;
2897 
2898 #ifdef __SUNPRO_C
2899 #pragma pipeloop(0)
2900 #endif /* __SUNPRO_C */
2901         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2902           t0 = tab[s0];
2903           t1 = tab[s1];
2904           s0 = sa[0];
2905           s1 = sa[1];
2906           da[0] = t0;
2907           da[1] = t1;
2908         }
2909 
2910         t0 = tab[s0];
2911         t1 = tab[s1];
2912         da[0] = t0;
2913         da[1] = t1;
2914 
2915         if (size & 1)
2916           da[2] = tab[sa[0]];
2917 
2918       }
2919       else {
2920 
2921         t0 = tab[*sa++];
2922 #ifdef _LITTLE_ENDIAN
2923         *dp++ = (mlib_u16) (t0);
2924 #else
2925         *dp++ = (mlib_u16) (t0 >> 16);
2926 #endif /* _LITTLE_ENDIAN */
2927         da = (mlib_u32 *) dp;
2928         s0 = sa[0];
2929         s1 = sa[1];
2930         sa += 2;
2931 
2932 #ifdef __SUNPRO_C
2933 #pragma pipeloop(0)
2934 #endif /* __SUNPRO_C */
2935         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2936           t1 = tab[s0];
2937           t2 = tab[s1];
2938 #ifdef _LITTLE_ENDIAN
2939           res1 = (t0 >> 16) + (t1 << 16);
2940           res2 = (t1 >> 16) + (t2 << 16);
2941 #else
2942           res1 = (t0 << 16) + (t1 >> 16);
2943           res2 = (t1 << 16) + (t2 >> 16);
2944 #endif /* _LITTLE_ENDIAN */
2945           t0 = t2;
2946           s0 = sa[0];
2947           s1 = sa[1];
2948           da[0] = res1;
2949           da[1] = res2;
2950         }
2951 
2952         t1 = tab[s0];
2953         t2 = tab[s1];
2954 #ifdef _LITTLE_ENDIAN


3025       mlib_u32 res1, res2;
3026       mlib_s32 size = xsize, off;
3027 
3028       off = (mlib_s32) ((mlib_addr) dp & 3);
3029 
3030       if (off != 0) {
3031         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
3032         dp[0] = ptr[1];
3033         dp[1] = ptr[2];
3034         dp[2] = ptr[3];
3035         dp += 3;
3036         sa++;
3037         size--;
3038       }
3039 
3040       da = (mlib_u32 *) dp;
3041       s0 = sa[0] << 3;
3042       s1 = sa[1] << 3;
3043       sa += 2;
3044 
3045 #ifdef __SUNPRO_C
3046 #pragma pipeloop(0)
3047 #endif /* __SUNPRO_C */
3048       for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) {
3049         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3050         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3051         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3052         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3053 #ifdef _LITTLE_ENDIAN
3054         res1 = (t0 >> 16) + (t1 << 16);
3055         res2 = (t1 >> 16) + t2;
3056 #else
3057         res1 = (t0 << 16) + (t1 >> 16);
3058         res2 = (t1 << 16) + t2;
3059 #endif /* _LITTLE_ENDIAN */
3060         s0 = sa[0] << 3;
3061         s1 = sa[1] << 3;
3062         da[0] = res1;
3063         da[1] = res2;
3064         da[2] = t3;
3065       }
3066 
3067       t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);


3129 #endif /* _LITTLE_ENDIAN */
3130     tab[510] = s4;
3131     tab[511] = s5;
3132 
3133     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3134       mlib_u32 *da;
3135       mlib_u16 *dp = (mlib_u16 *) dst;
3136       mlib_u8 *sa = (void *)src;
3137       mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5;
3138       mlib_s32 size = xsize;
3139       mlib_u32 res1, res2, res3, res4;
3140 
3141       if (((mlib_addr) dp & 3) == 0) {
3142 
3143         da = (mlib_u32 *) dp;
3144 
3145         s0 = sa[0] << 3;
3146         s1 = sa[1] << 3;
3147         sa += 2;
3148 
3149 #ifdef __SUNPRO_C
3150 #pragma pipeloop(0)
3151 #endif /* __SUNPRO_C */
3152         for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) {
3153           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3154           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3155           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3156           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3157           s0 = sa[0] << 3;
3158           s1 = sa[1] << 3;
3159           da[0] = t0;
3160           da[1] = t1;
3161           da[2] = t2;
3162           da[3] = t3;
3163         }
3164 
3165         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3166         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3167         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3168         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3169         da[0] = t0;
3170         da[1] = t1;
3171         da[2] = t2;


3180       else {
3181 
3182         t4 = tab[2 * sa[0]];
3183         t5 = tab[2 * sa[0] + 1];
3184 #ifdef _LITTLE_ENDIAN
3185         *dp++ = (mlib_u16) (t4);
3186 #else
3187         *dp++ = (mlib_u16) (t4 >> 16);
3188 #endif /* _LITTLE_ENDIAN */
3189         sa++;
3190         da = (mlib_u32 *) dp;
3191 #ifdef _LITTLE_ENDIAN
3192         *da++ = (t4 >> 16) + (t5 << 16);
3193 #else
3194         *da++ = (t4 << 16) + (t5 >> 16);
3195 #endif /* _LITTLE_ENDIAN */
3196         s0 = sa[0] << 3;
3197         s1 = sa[1] << 3;
3198         sa += 2;
3199 
3200 #ifdef __SUNPRO_C
3201 #pragma pipeloop(0)
3202 #endif /* __SUNPRO_C */
3203         for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) {
3204           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3205           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3206           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3207           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3208 #ifdef _LITTLE_ENDIAN
3209           res1 = (t5 >> 16) + (t0 << 16);
3210           res2 = (t0 >> 16) + (t1 << 16);
3211           res3 = (t1 >> 16) + (t2 << 16);
3212           res4 = (t2 >> 16) + (t3 << 16);
3213 #else
3214           res1 = (t5 << 16) + (t0 >> 16);
3215           res2 = (t0 << 16) + (t1 >> 16);
3216           res3 = (t1 << 16) + (t2 >> 16);
3217           res4 = (t2 << 16) + (t3 >> 16);
3218 #endif /* _LITTLE_ENDIAN */
3219           s0 = sa[0] << 3;
3220           s1 = sa[1] << 3;
3221           da[0] = res1;
3222           da[1] = res2;


3418       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3419       mlib_u32 s0, t0, t1, t2, t3;
3420       mlib_s32 off;
3421       mlib_s32 size = xsize;
3422       mlib_u32 *dp = (mlib_u32 *) dst;
3423       mlib_u8 *sp = (void *)src;
3424 
3425       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3426 
3427       for (i = 0; i < off; i++, sp++) {
3428         *dp++ = tab0[sp[0]];
3429         *dp++ = tab1[sp[0]];
3430         size--;
3431       }
3432 
3433       sa = (mlib_u32 *) sp;
3434 
3435       s0 = sa[0];
3436       sa++;
3437 
3438 #ifdef __SUNPRO_C
3439 #pragma pipeloop(0)
3440 #endif /* __SUNPRO_C */
3441       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
3442 #ifdef _LITTLE_ENDIAN
3443         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3444         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3445         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3446         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3447 #else
3448         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3449         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3450         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3451         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3452 #endif /* _LITTLE_ENDIAN */
3453         dp[0] = t0;
3454         dp[1] = t1;
3455         dp[2] = t2;
3456         dp[3] = t3;
3457 #ifdef _LITTLE_ENDIAN
3458         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3459         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3460         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));


3524       mlib_u32 s0, t0, t1, t2, t3, t4, t5;
3525       mlib_s32 off;
3526       mlib_s32 size = xsize;
3527       mlib_u32 *dp = (mlib_u32 *) dst;
3528       mlib_u8 *sp = (void *)src;
3529 
3530       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3531 
3532       for (i = 0; i < off; i++, sp++) {
3533         *dp++ = tab0[sp[0]];
3534         *dp++ = tab1[sp[0]];
3535         *dp++ = tab2[sp[0]];
3536         size--;
3537       }
3538 
3539       sa = (mlib_u32 *) sp;
3540 
3541       s0 = sa[0];
3542       sa++;
3543 
3544 #ifdef __SUNPRO_C
3545 #pragma pipeloop(0)
3546 #endif /* __SUNPRO_C */
3547       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
3548 #ifdef _LITTLE_ENDIAN
3549         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3550         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3551         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3552         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3553         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3554         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3555 #else
3556         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3557         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3558         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3559         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3560         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3561         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3562 #endif /* _LITTLE_ENDIAN */
3563         dp[0] = t0;
3564         dp[1] = t1;
3565         dp[2] = t2;
3566         dp[3] = t3;


3657       mlib_s32 off;
3658       mlib_s32 size = xsize;
3659       mlib_u32 *dp = (mlib_u32 *) dst;
3660       mlib_u8 *sp = (void *)src;
3661 
3662       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3663 
3664       for (i = 0; i < off; i++, sp++) {
3665         *dp++ = tab0[sp[0]];
3666         *dp++ = tab1[sp[0]];
3667         *dp++ = tab2[sp[0]];
3668         *dp++ = tab3[sp[0]];
3669         size--;
3670       }
3671 
3672       sa = (mlib_u32 *) sp;
3673 
3674       s0 = sa[0];
3675       sa++;
3676 
3677 #ifdef __SUNPRO_C
3678 #pragma pipeloop(0)
3679 #endif /* __SUNPRO_C */
3680       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
3681 #ifdef _LITTLE_ENDIAN
3682         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3683         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3684         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3685         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3686 #else
3687         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3688         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3689         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3690         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3691 #endif /* _LITTLE_ENDIAN */
3692         dp[0] = t0;
3693         dp[1] = t1;
3694         dp[2] = t2;
3695         dp[3] = t3;
3696 #ifdef _LITTLE_ENDIAN
3697         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3698         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3699         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));


   1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 233       mlib_s32 off;
 234       mlib_s32 size = xsize;
 235       mlib_u8 *dp = dst, *sp = (void *)src;
 236 
 237       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 238 
 239       for (i = 0; i < off; i++, sp++) {
 240         *dp++ = tab[sp[0]];
 241         size--;
 242       }
 243 
 244       da = (mlib_u32 *) dp;
 245 
 246       if (((mlib_addr) sp & 1) == 0) {
 247         sa = (mlib_u16 *) sp;
 248 
 249         s0 = sa[0];
 250         s1 = sa[1];
 251         sa += 2;
 252 



 253         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 254           READ_U8_U8_ALIGN(tab, tab, tab, tab);
 255           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 256           s0 = sa[0];
 257           s1 = sa[1];
 258           da[0] = t;
 259         }
 260 
 261         READ_U8_U8_ALIGN(tab, tab, tab, tab);
 262         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 263         da[0] = t;
 264         da++;
 265         dp = (mlib_u8 *) da;
 266         sp = (mlib_u8 *) sa;
 267         i += 4;
 268         for (; i < size; i++, dp++, sp++)
 269           dp[0] = tab[sp[0]];
 270 
 271       }
 272       else {
 273         sa = (mlib_u16 *) (sp - 1);
 274 
 275         s0 = sa[0];
 276         s1 = sa[1];
 277         s2 = sa[2];
 278         sa += 3;
 279 



 280         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 281           READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 282           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 283           s0 = s2;
 284           s1 = sa[0];
 285           s2 = sa[1];
 286           da[0] = t;
 287         }
 288 
 289         READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 290         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 291         da[0] = t;
 292         da++;
 293         dp = (mlib_u8 *) da;
 294 #ifdef _LITTLE_ENDIAN
 295         *dp++ = tab[s2 >> 8];
 296 #else
 297         *dp++ = tab[s2 & 0xFF];
 298 #endif /* _LITTLE_ENDIAN */
 299         sp = (mlib_u8 *) sa;


 327       }
 328 
 329       if ((off & 1) != 0) {
 330         *dp++ = tab0[sp[0]];
 331         size--;
 332         sp++;
 333         tab = tab0;
 334         tab0 = tab1;
 335         tab1 = tab;
 336       }
 337 
 338       da = (mlib_u32 *) dp;
 339 
 340       if (((mlib_addr) sp & 1) == 0) {
 341         sa = (mlib_u16 *) sp;
 342 
 343         s0 = sa[0];
 344         s1 = sa[1];
 345         sa += 2;
 346 



 347         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 348           READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 349           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 350           s0 = sa[0];
 351           s1 = sa[1];
 352           da[0] = t;
 353         }
 354 
 355         READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 356         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 357         da[0] = t;
 358         da++;
 359         dp = (mlib_u8 *) da;
 360         sp = (mlib_u8 *) sa;
 361         i += 4;
 362 
 363         for (; i < size - 1; i += 2, sp += 2) {
 364           *dp++ = tab0[sp[0]];
 365           *dp++ = tab1[sp[1]];
 366         }
 367 
 368         if (i < size)
 369           *dp = tab0[(*sp)];
 370 
 371       }
 372       else {
 373         sa = (mlib_u16 *) (sp - 1);
 374 
 375         s0 = sa[0];
 376         s1 = sa[1];
 377         s2 = sa[2];
 378         sa += 3;
 379 



 380         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 381           READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 382           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 383           s0 = s2;
 384           s1 = sa[0];
 385           s2 = sa[1];
 386           da[0] = t;
 387         }
 388 
 389         READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 390         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 391         da[0] = t;
 392         da++;
 393         dp = (mlib_u8 *) da;
 394 #ifdef _LITTLE_ENDIAN
 395         *dp++ = tab0[s2 >> 8];
 396 #else
 397         *dp++ = tab0[s2 & 0xFF];
 398 #endif /* _LITTLE_ENDIAN */
 399         sp = (mlib_u8 *) sa;


 446         size -= 2;
 447         sp += 2;
 448       }
 449       else if (off == 3) {
 450         *dp++ = tab0[sp[0]];
 451         *dp++ = tab1[sp[1]];
 452         *dp++ = tab2[sp[2]];
 453         size -= 3;
 454         sp += 3;
 455       }
 456 
 457       da = (mlib_u32 *) dp;
 458 
 459       if (((mlib_addr) sp & 1) == 0) {
 460         sa = (mlib_u16 *) sp;
 461 
 462         s0 = sa[0];
 463         s1 = sa[1];
 464         sa += 2;
 465 



 466         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 467           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 468           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 469           tab = tab0;
 470           tab0 = tab1;
 471           tab1 = tab2;
 472           tab2 = tab;
 473           s0 = sa[0];
 474           s1 = sa[1];
 475           da[0] = t;
 476         }
 477 
 478         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 479         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 480         da[0] = t;
 481         da++;
 482         dp = (mlib_u8 *) da;
 483         sp = (mlib_u8 *) sa;
 484         i += 4;
 485 


 491 
 492         if (i < size) {
 493           *dp++ = tab2[(*sp)];
 494           i++;
 495           sp++;
 496         }
 497 
 498         if (i < size) {
 499           *dp++ = tab0[(*sp)];
 500         }
 501 
 502       }
 503       else {
 504         sa = (mlib_u16 *) (sp - 1);
 505 
 506         s0 = sa[0];
 507         s1 = sa[1];
 508         s2 = sa[2];
 509         sa += 3;
 510 



 511         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 512           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 513           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 514           tab = tab0;
 515           tab0 = tab1;
 516           tab1 = tab2;
 517           tab2 = tab;
 518           s0 = s2;
 519           s1 = sa[0];
 520           s2 = sa[1];
 521           da[0] = t;
 522         }
 523 
 524         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 525         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 526         da[0] = t;
 527         da++;
 528         dp = (mlib_u8 *) da;
 529 #ifdef _LITTLE_ENDIAN
 530         *dp++ = tab1[s2 >> 8];


 598         *dp++ = tab1[sp[1]];
 599         *dp++ = tab2[sp[2]];
 600         tab = tab3;
 601         tab3 = tab2;
 602         tab2 = tab1;
 603         tab1 = tab0;
 604         tab0 = tab;
 605         size -= 3;
 606         sp += 3;
 607       }
 608 
 609       da = (mlib_u32 *) dp;
 610 
 611       if (((mlib_addr) sp & 1) == 0) {
 612         sa = (mlib_u16 *) sp;
 613 
 614         s0 = sa[0];
 615         s1 = sa[1];
 616         sa += 2;
 617 



 618         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 619           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 620           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 621           s0 = sa[0];
 622           s1 = sa[1];
 623           da[0] = t;
 624         }
 625 
 626         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 627         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 628         da[0] = t;
 629         da++;
 630         dp = (mlib_u8 *) da;
 631         sp = (mlib_u8 *) sa;
 632         i += 4;
 633 
 634         if (i < size) {
 635           *dp++ = tab0[(*sp)];
 636           i++;
 637           sp++;


 639 
 640         if (i < size) {
 641           *dp++ = tab1[(*sp)];
 642           i++;
 643           sp++;
 644         }
 645 
 646         if (i < size) {
 647           *dp = tab2[(*sp)];
 648         }
 649 
 650       }
 651       else {
 652         sa = (mlib_u16 *) (sp - 1);
 653 
 654         s0 = sa[0];
 655         s1 = sa[1];
 656         s2 = sa[2];
 657         sa += 3;
 658 



 659         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 660           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 661           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 662           s0 = s2;
 663           s1 = sa[0];
 664           s2 = sa[1];
 665           da[0] = t;
 666         }
 667 
 668         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 669         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 670         da[0] = t;
 671         da++;
 672         dp = (mlib_u8 *) da;
 673 #ifdef _LITTLE_ENDIAN
 674         *dp++ = tab0[s2 >> 8];
 675 #else
 676         *dp++ = tab0[s2 & 0xFF];
 677 #endif /* _LITTLE_ENDIAN */
 678         sp = (mlib_u8 *) sa;


 784       mlib_s32 off;
 785       mlib_s32 size = xsize;
 786       mlib_u16 *dp = (mlib_u16 *) dst;
 787       mlib_u8 *sp = (void *)src;
 788 
 789       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 790 
 791       for (i = 0; i < off; i++, sp++) {
 792         *dp++ = tab[sp[0]];
 793         size--;
 794       }
 795 
 796       sa = (mlib_u32 *) sp;
 797 
 798       if (((mlib_addr) dp & 3) == 0) {
 799         da = (mlib_u32 *) dp;
 800 
 801         s0 = sa[0];
 802         sa++;
 803 



 804         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 805           READ_U8_S16_ALIGN(tab, tab, tab, tab);
 806           res1 = (t0 << 16) + t1;
 807           res2 = (t2 << 16) + t3;
 808           s0 = sa[0];
 809           da[0] = res1;
 810           da[1] = res2;
 811         }
 812 
 813         READ_U8_S16_ALIGN(tab, tab, tab, tab);
 814         res1 = (t0 << 16) + t1;
 815         res2 = (t2 << 16) + t3;
 816         da[0] = res1;
 817         da[1] = res2;
 818         da += 2;
 819         dp = (mlib_u16 *) da;
 820         sp = (mlib_u8 *) sa;
 821         i += 4;
 822         for (; i < size; i++, dp++, sp++)
 823           dp[0] = tab[sp[0]];
 824 
 825       }
 826       else {
 827 
 828         *dp++ = tab[(*sp)];
 829         size--;
 830         da = (mlib_u32 *) dp;
 831 
 832         s0 = sa[0];
 833         s1 = sa[1];
 834         sa += 2;
 835 



 836         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 837           READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 838           s0 = s1;
 839           res1 = (t0 << 16) + t1;
 840           res2 = (t2 << 16) + t3;
 841           s1 = sa[0];
 842           da[0] = res1;
 843           da[1] = res2;
 844         }
 845 
 846         READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 847         res1 = (t0 << 16) + t1;
 848         res2 = (t2 << 16) + t3;
 849         da[0] = res1;
 850         da[1] = res2;
 851         ADD_READ_U8_S16_NOTALIGN(tab, tab, tab);
 852         res1 = (t0 << 16) + t1;
 853         da[2] = res1;
 854         da += 3;
 855         dp = (mlib_u16 *) da;


 886         size -= 2;
 887       }
 888 
 889       if ((off & 1) != 0) {
 890         *dp++ = tab0[*sp];
 891         size--;
 892         sp++;
 893         tab = tab0;
 894         tab0 = tab1;
 895         tab1 = tab;
 896       }
 897 
 898       sa = (mlib_u32 *) sp;
 899 
 900       if (((mlib_addr) dp & 3) == 0) {
 901         da = (mlib_u32 *) dp;
 902 
 903         s0 = sa[0];
 904         sa++;
 905 



 906         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 907           READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 908           res1 = (t0 << 16) + t1;
 909           res2 = (t2 << 16) + t3;
 910           s0 = sa[0];
 911           da[0] = res1;
 912           da[1] = res2;
 913         }
 914 
 915         READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 916         res1 = (t0 << 16) + t1;
 917         res2 = (t2 << 16) + t3;
 918         da[0] = res1;
 919         da[1] = res2;
 920         da += 2;
 921         dp = (mlib_u16 *) da;
 922         sp = (mlib_u8 *) sa;
 923         i += 4;
 924 
 925         for (; i < size - 1; i += 2, sp += 2) {
 926           *dp++ = tab0[sp[0]];
 927           *dp++ = tab1[sp[1]];
 928         }
 929 
 930         if (i < size)
 931           *dp = tab0[(*sp)];
 932 
 933       }
 934       else {
 935 
 936         *dp++ = tab0[(*sp)];
 937         size--;
 938         da = (mlib_u32 *) dp;
 939 
 940         s0 = sa[0];
 941         s1 = sa[1];
 942         sa += 2;
 943 



 944         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 945           READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 946           s0 = s1;
 947           res1 = (t0 << 16) + t1;
 948           res2 = (t2 << 16) + t3;
 949           s1 = sa[0];
 950           da[0] = res1;
 951           da[1] = res2;
 952         }
 953 
 954         READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 955         res1 = (t0 << 16) + t1;
 956         res2 = (t2 << 16) + t3;
 957         da[0] = res1;
 958         da[1] = res2;
 959         ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1);
 960         res1 = (t0 << 16) + t1;
 961         da[2] = res1;
 962         da += 3;
 963         dp = (mlib_u16 *) da;


1013         tab0 = tab;
1014         size -= 2;
1015         sp += 2;
1016       }
1017       else if (off == 3) {
1018         *dp++ = tab0[sp[0]];
1019         *dp++ = tab1[sp[1]];
1020         *dp++ = tab2[sp[2]];
1021         size -= 3;
1022         sp += 3;
1023       }
1024 
1025       sa = (mlib_u32 *) sp;
1026 
1027       if (((mlib_addr) dp & 3) == 0) {
1028         da = (mlib_u32 *) dp;
1029 
1030         s0 = sa[0];
1031         sa++;
1032 



1033         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1034           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1035           res1 = (t0 << 16) + t1;
1036           res2 = (t2 << 16) + t3;
1037           tab = tab0;
1038           tab0 = tab1;
1039           tab1 = tab2;
1040           tab2 = tab;
1041           s0 = sa[0];
1042           da[0] = res1;
1043           da[1] = res2;
1044         }
1045 
1046         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1047         res1 = (t0 << 16) + t1;
1048         res2 = (t2 << 16) + t3;
1049         da[0] = res1;
1050         da[1] = res2;
1051         da += 2;
1052         dp = (mlib_u16 *) da;


1063           *dp++ = tab2[(*sp)];
1064           i++;
1065           sp++;
1066         }
1067 
1068         if (i < size) {
1069           *dp = tab0[(*sp)];
1070         }
1071 
1072       }
1073       else {
1074 
1075         *dp++ = tab0[(*sp)];
1076         size--;
1077         da = (mlib_u32 *) dp;
1078 
1079         s0 = sa[0];
1080         s1 = sa[1];
1081         sa += 2;
1082 



1083         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1084           READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1085           s0 = s1;
1086           res1 = (t0 << 16) + t1;
1087           res2 = (t2 << 16) + t3;
1088           tab = tab0;
1089           tab0 = tab1;
1090           tab1 = tab2;
1091           tab2 = tab;
1092           s1 = sa[0];
1093           da[0] = res1;
1094           da[1] = res2;
1095         }
1096 
1097         READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1098         res1 = (t0 << 16) + t1;
1099         res2 = (t2 << 16) + t3;
1100         da[0] = res1;
1101         da[1] = res2;
1102         ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1);


1173         *dp++ = tab0[sp[0]];
1174         *dp++ = tab1[sp[1]];
1175         *dp++ = tab2[sp[2]];
1176         tab = tab3;
1177         tab3 = tab2;
1178         tab2 = tab1;
1179         tab1 = tab0;
1180         tab0 = tab;
1181         size -= 3;
1182         sp += 3;
1183       }
1184 
1185       sa = (mlib_u32 *) sp;
1186 
1187       if (((mlib_addr) dp & 3) == 0) {
1188         da = (mlib_u32 *) dp;
1189 
1190         s0 = sa[0];
1191         sa++;
1192 



1193         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1194           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1195           res1 = (t0 << 16) + t1;
1196           res2 = (t2 << 16) + t3;
1197           s0 = sa[0];
1198           da[0] = res1;
1199           da[1] = res2;
1200         }
1201 
1202         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1203         res1 = (t0 << 16) + t1;
1204         res2 = (t2 << 16) + t3;
1205         da[0] = res1;
1206         da[1] = res2;
1207         da += 2;
1208         dp = (mlib_u16 *) da;
1209         sp = (mlib_u8 *) sa;
1210         i += 4;
1211 
1212         if (i < size) {


1219           *dp++ = tab1[(*sp)];
1220           i++;
1221           sp++;
1222         }
1223 
1224         if (i < size) {
1225           *dp = tab2[(*sp)];
1226         }
1227 
1228       }
1229       else {
1230 
1231         *dp++ = tab0[(*sp)];
1232         size--;
1233         da = (mlib_u32 *) dp;
1234 
1235         s0 = sa[0];
1236         s1 = sa[1];
1237         sa += 2;
1238 



1239         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1240           READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1241           s0 = s1;
1242           res1 = (t0 << 16) + t1;
1243           res2 = (t2 << 16) + t3;
1244           s1 = sa[0];
1245           da[0] = res1;
1246           da[1] = res2;
1247         }
1248 
1249         READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1250         res1 = (t0 << 16) + t1;
1251         res2 = (t2 << 16) + t3;
1252         da[0] = res1;
1253         da[1] = res2;
1254         ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3);
1255         res1 = (t0 << 16) + t1;
1256         da[2] = res1;
1257         da += 3;
1258         dp = (mlib_u16 *) da;


1421       mlib_u32 *sa;
1422       mlib_u32 *tab = (mlib_u32 *) table[0];
1423       mlib_u32 s0, t0, t1, t2, t3;
1424       mlib_s32 off;
1425       mlib_s32 size = xsize;
1426       mlib_u32 *dp = (mlib_u32 *) dst;
1427       mlib_u8 *sp = (void *)src;
1428 
1429       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1430 
1431       for (i = 0; i < off; i++, sp++) {
1432         *dp++ = tab[sp[0]];
1433         size--;
1434       }
1435 
1436       sa = (mlib_u32 *) sp;
1437 
1438       s0 = sa[0];
1439       sa++;
1440 



1441       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1442         READ_U8_S32(tab, tab, tab, tab);
1443         s0 = sa[0];
1444         dp[0] = t0;
1445         dp[1] = t1;
1446         dp[2] = t2;
1447         dp[3] = t3;
1448       }
1449 
1450       READ_U8_S32(tab, tab, tab, tab);
1451       dp[0] = t0;
1452       dp[1] = t1;
1453       dp[2] = t2;
1454       dp[3] = t3;
1455       dp += 4;
1456       sp = (mlib_u8 *) sa;
1457       i += 4;
1458       for (; i < size; i++, dp++, sp++)
1459         dp[0] = tab[sp[0]];
1460     }


1479       for (i = 0; i < off - 1; i += 2, sp += 2) {
1480         *dp++ = tab0[sp[0]];
1481         *dp++ = tab1[sp[1]];
1482         size -= 2;
1483       }
1484 
1485       if ((off & 1) != 0) {
1486         *dp++ = tab0[*sp];
1487         size--;
1488         sp++;
1489         tab = tab0;
1490         tab0 = tab1;
1491         tab1 = tab;
1492       }
1493 
1494       sa = (mlib_u32 *) sp;
1495 
1496       s0 = sa[0];
1497       sa++;
1498 



1499       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1500         READ_U8_S32(tab0, tab1, tab0, tab1);
1501         s0 = sa[0];
1502         dp[0] = t0;
1503         dp[1] = t1;
1504         dp[2] = t2;
1505         dp[3] = t3;
1506       }
1507 
1508       READ_U8_S32(tab0, tab1, tab0, tab1);
1509       dp[0] = t0;
1510       dp[1] = t1;
1511       dp[2] = t2;
1512       dp[3] = t3;
1513       dp += 4;
1514       sp = (mlib_u8 *) sa;
1515       i += 4;
1516 
1517       for (; i < size - 1; i += 2, sp += 2) {
1518         *dp++ = tab0[sp[0]];


1556         tab = tab2;
1557         tab2 = tab1;
1558         tab1 = tab0;
1559         tab0 = tab;
1560         size -= 2;
1561         sp += 2;
1562       }
1563       else if (off == 3) {
1564         *dp++ = tab0[sp[0]];
1565         *dp++ = tab1[sp[1]];
1566         *dp++ = tab2[sp[2]];
1567         size -= 3;
1568         sp += 3;
1569       }
1570 
1571       sa = (mlib_u32 *) sp;
1572 
1573       s0 = sa[0];
1574       sa++;
1575 



1576       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1577         READ_U8_S32(tab0, tab1, tab2, tab0);
1578         tab = tab0;
1579         tab0 = tab1;
1580         tab1 = tab2;
1581         tab2 = tab;
1582         s0 = sa[0];
1583         dp[0] = t0;
1584         dp[1] = t1;
1585         dp[2] = t2;
1586         dp[3] = t3;
1587       }
1588 
1589       READ_U8_S32(tab0, tab1, tab2, tab0);
1590       dp[0] = t0;
1591       dp[1] = t1;
1592       dp[2] = t2;
1593       dp[3] = t3;
1594       dp += 4;
1595       sp = (mlib_u8 *) sa;


1654         sp += 2;
1655       }
1656       else if (off == 3) {
1657         *dp++ = tab0[sp[0]];
1658         *dp++ = tab1[sp[1]];
1659         *dp++ = tab2[sp[2]];
1660         tab = tab3;
1661         tab3 = tab2;
1662         tab2 = tab1;
1663         tab1 = tab0;
1664         tab0 = tab;
1665         size -= 3;
1666         sp += 3;
1667       }
1668 
1669       sa = (mlib_u32 *) sp;
1670 
1671       s0 = sa[0];
1672       sa++;
1673 



1674       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1675         READ_U8_S32(tab0, tab1, tab2, tab3);
1676         s0 = sa[0];
1677         dp[0] = t0;
1678         dp[1] = t1;
1679         dp[2] = t2;
1680         dp[3] = t3;
1681       }
1682 
1683       READ_U8_S32(tab0, tab1, tab2, tab3);
1684       dp[0] = t0;
1685       dp[1] = t1;
1686       dp[2] = t2;
1687       dp[3] = t3;
1688       dp += 4;
1689       sp = (mlib_u8 *) sa;
1690       i += 4;
1691 
1692       if (i < size) {
1693         *dp++ = tab0[(*sp)];


1814       mlib_u8 *dp = dst;
1815       mlib_u8 *sa = (void *)src;
1816       mlib_s32 s0, t0, s1, t1, t, t2, off;
1817       mlib_s32 size = xsize;
1818 
1819       if (((mlib_addr) dp & 1) == 0) {
1820 
1821         if (((mlib_addr) dp & 3) != 0) {
1822           *((mlib_u16 *) dp) = tab[sa[0]];
1823           sa++;
1824           size--;
1825           dp += 2;
1826         }
1827 
1828         da = (mlib_s32 *) dp;
1829 
1830         s0 = sa[0];
1831         s1 = sa[1];
1832         sa += 2;
1833 



1834         for (i = 0; i < size - 3; i += 2, da++, sa += 2) {
1835           t0 = tab[s0];
1836           t1 = tab[s1];
1837 #ifdef _LITTLE_ENDIAN
1838           t = (t1 << 16) + t0;
1839 #else
1840           t = (t0 << 16) + t1;
1841 #endif /* _LITTLE_ENDIAN */
1842           s0 = sa[0];
1843           s1 = sa[1];
1844           da[0] = t;
1845         }
1846 
1847         t0 = tab[s0];
1848         t1 = tab[s1];
1849 #ifdef _LITTLE_ENDIAN
1850         t = (t1 << 16) + t0;
1851 #else
1852         t = (t0 << 16) + t1;
1853 #endif /* _LITTLE_ENDIAN */


1873 #endif /* _LITTLE_ENDIAN */
1874           sa++;
1875           size--;
1876           dp += 2;
1877         }
1878 
1879         t0 = tab[sa[0]];
1880         sa++;
1881 #ifdef _LITTLE_ENDIAN
1882         *dp++ = t0;
1883 #else
1884         *dp++ = (t0 >> 8);
1885 #endif /* _LITTLE_ENDIAN */
1886 
1887         da = (mlib_s32 *) dp;
1888 
1889         s0 = sa[0];
1890         s1 = sa[1];
1891         sa += 2;
1892 



1893         for (i = 0; i < size - 4; i += 2, da++, sa += 2) {
1894           t1 = tab[s0];
1895           t2 = tab[s1];
1896 #ifdef _LITTLE_ENDIAN
1897           t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1898 #else
1899           t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1900 #endif /* _LITTLE_ENDIAN */
1901           t0 = t2;
1902           s0 = sa[0];
1903           s1 = sa[1];
1904           da[0] = t;
1905         }
1906 
1907         t1 = tab[s0];
1908         t2 = tab[s1];
1909 #ifdef _LITTLE_ENDIAN
1910         t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1911 #else
1912         t = (t0 << 24) + (t1 << 8) + (t2 >> 8);


1957       tab[i - 1] = s3;
1958     }
1959 
1960 #ifdef _LITTLE_ENDIAN
1961     s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
1962 #else
1963     s3 = (s0 << 16) + (s1 << 8) + s2;
1964 #endif /* _LITTLE_ENDIAN */
1965     tab[255] = s3;
1966 
1967     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1968       mlib_u32 *da;
1969       mlib_u8 *dp = dst;
1970       mlib_u8 *sa = (void *)src, *ptr;
1971       mlib_u32 s0, s1, t0, t1;
1972       mlib_u32 res1, res2;
1973       mlib_s32 size = xsize, off;
1974 
1975       off = (mlib_s32) ((mlib_addr) dp & 3);
1976 



1977       for (i = 0; i < off; i++) {
1978         ptr = (mlib_u8 *) (tab + sa[0]);
1979         dp[0] = ptr[1];
1980         dp[1] = ptr[2];
1981         dp[2] = ptr[3];
1982         dp += 3;
1983         sa++;
1984       }
1985 
1986       size -= off;
1987       da = (mlib_u32 *) dp;
1988       s0 = sa[0];
1989       s1 = sa[1];
1990       sa += 2;
1991 



1992       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
1993         t0 = tab[s0];
1994         t1 = tab[s1];
1995 #ifdef _LITTLE_ENDIAN
1996         da[0] = (t0 >> 8) + (t1 << 16);
1997         res2 = (t1 >> 16);
1998 #else
1999         da[0] = (t0 << 8) + (t1 >> 16);
2000         res2 = (t1 << 16);
2001 #endif /* _LITTLE_ENDIAN */
2002         s0 = sa[0];
2003         s1 = sa[1];
2004         t0 = tab[s0];
2005         t1 = tab[s1];
2006 #ifdef _LITTLE_ENDIAN
2007         res2 += (t0 << 8);
2008         res1 = (t0 >> 24) + t1;
2009 #else
2010         res2 += (t0 >> 8);
2011         res1 = (t0 << 24) + t1;


2026       res2 = (t1 << 16);
2027 #endif /* _LITTLE_ENDIAN */
2028       s0 = sa[0];
2029       s1 = sa[1];
2030       t0 = tab[s0];
2031       t1 = tab[s1];
2032 #ifdef _LITTLE_ENDIAN
2033       res2 += (t0 << 8);
2034       res1 = (t0 >> 24) + t1;
2035 #else
2036       res2 += (t0 >> 8);
2037       res1 = (t0 << 24) + t1;
2038 #endif /* _LITTLE_ENDIAN */
2039       da[1] = res2;
2040       da[2] = res1;
2041       da += 3;
2042       sa += 2;
2043       dp = (mlib_u8 *) da;
2044       i += 4;
2045 



2046       for (; i < size; i++) {
2047         ptr = (mlib_u8 *) (tab + sa[0]);
2048         dp[0] = ptr[1];
2049         dp[1] = ptr[2];
2050         dp[2] = ptr[3];
2051         dp += 3;
2052         sa++;
2053       }
2054     }
2055 
2056   }
2057   else if (csize == 4) {
2058     mlib_u32 tab[256];
2059     const mlib_u8 *tab0 = table[0];
2060     const mlib_u8 *tab1 = table[1];
2061     const mlib_u8 *tab2 = table[2];
2062     const mlib_u8 *tab3 = table[3];
2063     mlib_s32 i, j;
2064     mlib_u32 s0, s1, s2, s3, s4;
2065 


2086     s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2087 #endif /* _LITTLE_ENDIAN */
2088     tab[255] = s4;
2089 
2090     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2091       mlib_u32 *da;
2092       mlib_u8 *dp = dst;
2093       mlib_u8 *sa = (void *)src;
2094       mlib_u32 s0, t0, s1, t1, t2;
2095       mlib_s32 size = xsize, off;
2096       mlib_u32 shift, shift1, res1, res2;
2097 
2098       if (((mlib_addr) dp & 3) == 0) {
2099 
2100         da = (mlib_u32 *) dp;
2101 
2102         s0 = sa[0];
2103         s1 = sa[1];
2104         sa += 2;
2105 



2106         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2107           t0 = tab[s0];
2108           t1 = tab[s1];
2109           s0 = sa[0];
2110           s1 = sa[1];
2111           da[0] = t0;
2112           da[1] = t1;
2113         }
2114 
2115         t0 = tab[s0];
2116         t1 = tab[s1];
2117         da[0] = t0;
2118         da[1] = t1;
2119 
2120         if (size & 1)
2121           da[2] = tab[sa[0]];
2122 
2123       }
2124       else {
2125 
2126         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2127         shift = 8 * off;
2128         shift1 = 32 - shift;
2129 
2130         for (i = 0; i < off; i++) {
2131           dp[i] = table[i][sa[0]];
2132         }
2133 
2134         dp += i;
2135         t0 = tab[sa[0]];
2136         sa++;
2137 
2138         da = (mlib_u32 *) dp;
2139 
2140         s0 = sa[0];
2141         s1 = sa[1];
2142         sa += 2;
2143 



2144         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2145           t1 = tab[s0];
2146           t2 = tab[s1];
2147 #ifdef _LITTLE_ENDIAN
2148           res1 = (t0 >> shift) + (t1 << shift1);
2149           res2 = (t1 >> shift) + (t2 << shift1);
2150 #else
2151           res1 = (t0 << shift) + (t1 >> shift1);
2152           res2 = (t1 << shift) + (t2 >> shift1);
2153 #endif /* _LITTLE_ENDIAN */
2154           t0 = t2;
2155           s0 = sa[0];
2156           s1 = sa[1];
2157           da[0] = res1;
2158           da[1] = res2;
2159         }
2160 
2161         t1 = tab[s0];
2162         t2 = tab[s1];
2163 #ifdef _LITTLE_ENDIAN


2223   if ((xsize < 8) || (csize == 2)) {
2224     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base);
2225   }
2226   else if (csize == 3) {
2227     mlib_s32 i, j;
2228 
2229     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2230       mlib_u32 *da;
2231       mlib_u8 *dp = dst;
2232       mlib_s16 *sa = (void *)src;
2233       const mlib_u8 *tab0 = table_base[0];
2234       const mlib_u8 *tab1 = table_base[1];
2235       const mlib_u8 *tab2 = table_base[2];
2236       mlib_s32 s0, s1;
2237       mlib_u32 t0, t1, t2, t3, t4, t5;
2238       mlib_u32 res1, res2;
2239       mlib_s32 size = xsize, off;
2240 
2241       off = (mlib_s32) ((mlib_addr) dp & 3);
2242 



2243       for (i = 0; i < off; i++) {
2244         s0 = *sa++;
2245         dp[0] = tab0[s0];
2246         dp[1] = tab1[s0];
2247         dp[2] = tab2[s0];
2248         dp += 3;
2249       }
2250 
2251       size -= off;
2252       da = (mlib_u32 *) dp;
2253       s0 = sa[0];
2254       s1 = sa[1];
2255       sa += 2;
2256 



2257       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2258         t0 = tab0[s0];
2259         t1 = tab1[s0];
2260         t2 = tab2[s0];
2261         t3 = tab0[s1];
2262         t4 = tab1[s1];
2263         t5 = tab2[s1];
2264 #ifdef _LITTLE_ENDIAN
2265         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2266         res2 = (t5 << 8) + t4;
2267 #else
2268         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2269         res2 = (t4 << 24) + (t5 << 16);
2270 #endif /* _LITTLE_ENDIAN */
2271         s0 = sa[0];
2272         s1 = sa[1];
2273         t0 = tab0[s0];
2274         t1 = tab1[s0];
2275         t2 = tab2[s0];
2276         t3 = tab0[s1];


2307       t0 = tab0[s0];
2308       t1 = tab1[s0];
2309       t2 = tab2[s0];
2310       t3 = tab0[s1];
2311       t4 = tab1[s1];
2312       t5 = tab2[s1];
2313 #ifdef _LITTLE_ENDIAN
2314       res2 += ((t1 << 24) + (t0 << 16));
2315       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2316 #else
2317       res2 += ((t0 << 8) + t1);
2318       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2319 #endif /* _LITTLE_ENDIAN */
2320       da[1] = res2;
2321       da[2] = res1;
2322       da += 3;
2323       sa += 2;
2324       dp = (mlib_u8 *) da;
2325       i += 4;
2326 



2327       for (; i < size; i++) {
2328         s0 = *sa++;
2329         dp[0] = tab0[s0];
2330         dp[1] = tab1[s0];
2331         dp[2] = tab2[s0];
2332         dp += 3;
2333       }
2334     }
2335 
2336   }
2337   else if (csize == 4) {
2338     mlib_s32 i, j;
2339 
2340     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2341       mlib_u32 *da;
2342       mlib_u8 *dp = dst;
2343       mlib_s16 *sa = (void *)src;
2344       const mlib_u8 *tab0 = table_base[0];
2345       const mlib_u8 *tab1 = table_base[1];
2346       const mlib_u8 *tab2 = table_base[2];
2347       const mlib_u8 *tab3 = table_base[3];
2348       mlib_s32 s0;
2349       mlib_u32 t0, t1, t2, t3;
2350       mlib_s32 size = xsize, off;
2351       mlib_u32 shift, shift1, res1, res2, res;
2352 
2353       if (((mlib_addr) dp & 3) == 0) {
2354 
2355         da = (mlib_u32 *) dp;
2356 
2357         s0 = sa[0];
2358         sa++;
2359 



2360         for (i = 0; i < size - 1; i++, da++, sa++) {
2361           t0 = tab0[s0];
2362           t1 = tab1[s0];
2363           t2 = tab2[s0];
2364           t3 = tab3[s0];
2365 #ifdef _LITTLE_ENDIAN
2366           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2367 #else
2368           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2369 #endif /* _LITTLE_ENDIAN */
2370           s0 = sa[0];
2371           da[0] = res;
2372         }
2373 
2374         t0 = tab0[s0];
2375         t1 = tab1[s0];
2376         t2 = tab2[s0];
2377         t3 = tab3[s0];
2378 #ifdef _LITTLE_ENDIAN
2379         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;


2395           dp[i] = table_base[i][s0];
2396         }
2397 
2398         dp += i;
2399         da = (mlib_u32 *) dp;
2400 
2401         t0 = tab0[s0];
2402         t1 = tab1[s0];
2403         t2 = tab2[s0];
2404         t3 = tab3[s0];
2405 
2406 #ifdef _LITTLE_ENDIAN
2407         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2408 #else
2409         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2410 #endif /* _LITTLE_ENDIAN */
2411 
2412         s0 = sa[0];
2413         sa++;
2414 



2415         for (i = 0; i < size - 2; i++, da++, sa++) {
2416           t0 = tab0[s0];
2417           t1 = tab1[s0];
2418           t2 = tab2[s0];
2419           t3 = tab3[s0];
2420 #ifdef _LITTLE_ENDIAN
2421           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2422           res = (res1 >> shift) + (res2 << shift1);
2423 #else
2424           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2425           res = (res1 << shift) + (res2 >> shift1);
2426 #endif /* _LITTLE_ENDIAN */
2427           res1 = res2;
2428           s0 = sa[0];
2429           da[0] = res;
2430         }
2431 
2432         t0 = tab0[s0];
2433         t1 = tab1[s0];
2434         t2 = tab2[s0];


2477   if ((xsize < 8) || (csize == 2)) {
2478     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base);
2479   }
2480   else if (csize == 3) {
2481     mlib_s32 i, j;
2482 
2483     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2484       mlib_u32 *da;
2485       mlib_u8 *dp = dst;
2486       mlib_u16 *sa = (void *)src;
2487       const mlib_u8 *tab0 = table_base[0];
2488       const mlib_u8 *tab1 = table_base[1];
2489       const mlib_u8 *tab2 = table_base[2];
2490       mlib_s32 s0, s1;
2491       mlib_u32 t0, t1, t2, t3, t4, t5;
2492       mlib_u32 res1, res2;
2493       mlib_s32 size = xsize, off;
2494 
2495       off = (mlib_s32) ((mlib_addr) dp & 3);
2496 



2497       for (i = 0; i < off; i++) {
2498         s0 = *sa++;
2499         dp[0] = tab0[s0];
2500         dp[1] = tab1[s0];
2501         dp[2] = tab2[s0];
2502         dp += 3;
2503       }
2504 
2505       size -= off;
2506       da = (mlib_u32 *) dp;
2507       s0 = sa[0];
2508       s1 = sa[1];
2509       sa += 2;
2510 



2511       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2512         t0 = tab0[s0];
2513         t1 = tab1[s0];
2514         t2 = tab2[s0];
2515         t3 = tab0[s1];
2516         t4 = tab1[s1];
2517         t5 = tab2[s1];
2518 #ifdef _LITTLE_ENDIAN
2519         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2520         res2 = (t5 << 8) + t4;
2521 #else
2522         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2523         res2 = (t4 << 24) + (t5 << 16);
2524 #endif /* _LITTLE_ENDIAN */
2525         s0 = sa[0];
2526         s1 = sa[1];
2527         t0 = tab0[s0];
2528         t1 = tab1[s0];
2529         t2 = tab2[s0];
2530         t3 = tab0[s1];


2561       t0 = tab0[s0];
2562       t1 = tab1[s0];
2563       t2 = tab2[s0];
2564       t3 = tab0[s1];
2565       t4 = tab1[s1];
2566       t5 = tab2[s1];
2567 #ifdef _LITTLE_ENDIAN
2568       res2 += ((t1 << 24) + (t0 << 16));
2569       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2570 #else
2571       res2 += ((t0 << 8) + t1);
2572       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2573 #endif /* _LITTLE_ENDIAN */
2574       da[1] = res2;
2575       da[2] = res1;
2576       da += 3;
2577       sa += 2;
2578       dp = (mlib_u8 *) da;
2579       i += 4;
2580 



2581       for (; i < size; i++) {
2582         s0 = *sa++;
2583         dp[0] = tab0[s0];
2584         dp[1] = tab1[s0];
2585         dp[2] = tab2[s0];
2586         dp += 3;
2587       }
2588     }
2589 
2590   }
2591   else if (csize == 4) {
2592     mlib_s32 i, j;
2593 
2594     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2595       mlib_u32 *da;
2596       mlib_u8 *dp = dst;
2597       mlib_u16 *sa = (void *)src;
2598       const mlib_u8 *tab0 = table_base[0];
2599       const mlib_u8 *tab1 = table_base[1];
2600       const mlib_u8 *tab2 = table_base[2];
2601       const mlib_u8 *tab3 = table_base[3];
2602       mlib_s32 s0;
2603       mlib_u32 t0, t1, t2, t3;
2604       mlib_s32 size = xsize, off;
2605       mlib_u32 shift, shift1, res1, res2, res;
2606 
2607       if (((mlib_addr) dp & 3) == 0) {
2608 
2609         da = (mlib_u32 *) dp;
2610 
2611         s0 = sa[0];
2612         sa++;
2613 



2614         for (i = 0; i < size - 1; i++, da++, sa++) {
2615           t0 = tab0[s0];
2616           t1 = tab1[s0];
2617           t2 = tab2[s0];
2618           t3 = tab3[s0];
2619 #ifdef _LITTLE_ENDIAN
2620           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2621 #else
2622           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2623 #endif /* _LITTLE_ENDIAN */
2624           s0 = sa[0];
2625           da[0] = res;
2626         }
2627 
2628         t0 = tab0[s0];
2629         t1 = tab1[s0];
2630         t2 = tab2[s0];
2631         t3 = tab3[s0];
2632 #ifdef _LITTLE_ENDIAN
2633         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;


2649           dp[i] = table_base[i][s0];
2650         }
2651 
2652         dp += i;
2653         da = (mlib_u32 *) dp;
2654 
2655         t0 = tab0[s0];
2656         t1 = tab1[s0];
2657         t2 = tab2[s0];
2658         t3 = tab3[s0];
2659 
2660 #ifdef _LITTLE_ENDIAN
2661         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2662 #else
2663         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2664 #endif /* _LITTLE_ENDIAN */
2665 
2666         s0 = sa[0];
2667         sa++;
2668 



2669         for (i = 0; i < size - 2; i++, da++, sa++) {
2670           t0 = tab0[s0];
2671           t1 = tab1[s0];
2672           t2 = tab2[s0];
2673           t3 = tab3[s0];
2674 #ifdef _LITTLE_ENDIAN
2675           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2676           res = (res1 >> shift) + (res2 << shift1);
2677 #else
2678           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2679           res = (res1 << shift) + (res2 >> shift1);
2680 #endif /* _LITTLE_ENDIAN */
2681           res1 = res2;
2682           s0 = sa[0];
2683           da[0] = res;
2684         }
2685 
2686         t0 = tab0[s0];
2687         t1 = tab1[s0];
2688         t2 = tab2[s0];


2767 #else
2768     s2 = (s0 << 16) + s1;
2769 #endif /* _LITTLE_ENDIAN */
2770     tab[255] = s2;
2771 
2772     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2773       mlib_u32 *da;
2774       mlib_u16 *dp = (mlib_u16 *) dst;
2775       mlib_u8 *sa = (void *)src;
2776       mlib_u32 s0, t0, s1, t1, t2;
2777       mlib_u32 res1, res2;
2778       mlib_s32 size = xsize;
2779 
2780       if (((mlib_addr) dp & 3) == 0) {
2781 
2782         da = (mlib_u32 *) dp;
2783         s0 = sa[0];
2784         s1 = sa[1];
2785         sa += 2;
2786 



2787         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2788           t0 = tab[s0];
2789           t1 = tab[s1];
2790           s0 = sa[0];
2791           s1 = sa[1];
2792           da[0] = t0;
2793           da[1] = t1;
2794         }
2795 
2796         t0 = tab[s0];
2797         t1 = tab[s1];
2798         da[0] = t0;
2799         da[1] = t1;
2800 
2801         if (size & 1)
2802           da[2] = tab[sa[0]];
2803 
2804       }
2805       else {
2806 
2807         t0 = tab[*sa++];
2808 #ifdef _LITTLE_ENDIAN
2809         *dp++ = (mlib_u16) (t0);
2810 #else
2811         *dp++ = (mlib_u16) (t0 >> 16);
2812 #endif /* _LITTLE_ENDIAN */
2813         da = (mlib_u32 *) dp;
2814         s0 = sa[0];
2815         s1 = sa[1];
2816         sa += 2;
2817 



2818         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2819           t1 = tab[s0];
2820           t2 = tab[s1];
2821 #ifdef _LITTLE_ENDIAN
2822           res1 = (t0 >> 16) + (t1 << 16);
2823           res2 = (t1 >> 16) + (t2 << 16);
2824 #else
2825           res1 = (t0 << 16) + (t1 >> 16);
2826           res2 = (t1 << 16) + (t2 >> 16);
2827 #endif /* _LITTLE_ENDIAN */
2828           t0 = t2;
2829           s0 = sa[0];
2830           s1 = sa[1];
2831           da[0] = res1;
2832           da[1] = res2;
2833         }
2834 
2835         t1 = tab[s0];
2836         t2 = tab[s1];
2837 #ifdef _LITTLE_ENDIAN


2908       mlib_u32 res1, res2;
2909       mlib_s32 size = xsize, off;
2910 
2911       off = (mlib_s32) ((mlib_addr) dp & 3);
2912 
2913       if (off != 0) {
2914         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
2915         dp[0] = ptr[1];
2916         dp[1] = ptr[2];
2917         dp[2] = ptr[3];
2918         dp += 3;
2919         sa++;
2920         size--;
2921       }
2922 
2923       da = (mlib_u32 *) dp;
2924       s0 = sa[0] << 3;
2925       s1 = sa[1] << 3;
2926       sa += 2;
2927 



2928       for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) {
2929         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
2930         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
2931         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
2932         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
2933 #ifdef _LITTLE_ENDIAN
2934         res1 = (t0 >> 16) + (t1 << 16);
2935         res2 = (t1 >> 16) + t2;
2936 #else
2937         res1 = (t0 << 16) + (t1 >> 16);
2938         res2 = (t1 << 16) + t2;
2939 #endif /* _LITTLE_ENDIAN */
2940         s0 = sa[0] << 3;
2941         s1 = sa[1] << 3;
2942         da[0] = res1;
2943         da[1] = res2;
2944         da[2] = t3;
2945       }
2946 
2947       t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);


3009 #endif /* _LITTLE_ENDIAN */
3010     tab[510] = s4;
3011     tab[511] = s5;
3012 
3013     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3014       mlib_u32 *da;
3015       mlib_u16 *dp = (mlib_u16 *) dst;
3016       mlib_u8 *sa = (void *)src;
3017       mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5;
3018       mlib_s32 size = xsize;
3019       mlib_u32 res1, res2, res3, res4;
3020 
3021       if (((mlib_addr) dp & 3) == 0) {
3022 
3023         da = (mlib_u32 *) dp;
3024 
3025         s0 = sa[0] << 3;
3026         s1 = sa[1] << 3;
3027         sa += 2;
3028 



3029         for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) {
3030           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3031           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3032           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3033           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3034           s0 = sa[0] << 3;
3035           s1 = sa[1] << 3;
3036           da[0] = t0;
3037           da[1] = t1;
3038           da[2] = t2;
3039           da[3] = t3;
3040         }
3041 
3042         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3043         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3044         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3045         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3046         da[0] = t0;
3047         da[1] = t1;
3048         da[2] = t2;


3057       else {
3058 
3059         t4 = tab[2 * sa[0]];
3060         t5 = tab[2 * sa[0] + 1];
3061 #ifdef _LITTLE_ENDIAN
3062         *dp++ = (mlib_u16) (t4);
3063 #else
3064         *dp++ = (mlib_u16) (t4 >> 16);
3065 #endif /* _LITTLE_ENDIAN */
3066         sa++;
3067         da = (mlib_u32 *) dp;
3068 #ifdef _LITTLE_ENDIAN
3069         *da++ = (t4 >> 16) + (t5 << 16);
3070 #else
3071         *da++ = (t4 << 16) + (t5 >> 16);
3072 #endif /* _LITTLE_ENDIAN */
3073         s0 = sa[0] << 3;
3074         s1 = sa[1] << 3;
3075         sa += 2;
3076 



3077         for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) {
3078           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3079           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3080           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3081           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3082 #ifdef _LITTLE_ENDIAN
3083           res1 = (t5 >> 16) + (t0 << 16);
3084           res2 = (t0 >> 16) + (t1 << 16);
3085           res3 = (t1 >> 16) + (t2 << 16);
3086           res4 = (t2 >> 16) + (t3 << 16);
3087 #else
3088           res1 = (t5 << 16) + (t0 >> 16);
3089           res2 = (t0 << 16) + (t1 >> 16);
3090           res3 = (t1 << 16) + (t2 >> 16);
3091           res4 = (t2 << 16) + (t3 >> 16);
3092 #endif /* _LITTLE_ENDIAN */
3093           s0 = sa[0] << 3;
3094           s1 = sa[1] << 3;
3095           da[0] = res1;
3096           da[1] = res2;


3292       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3293       mlib_u32 s0, t0, t1, t2, t3;
3294       mlib_s32 off;
3295       mlib_s32 size = xsize;
3296       mlib_u32 *dp = (mlib_u32 *) dst;
3297       mlib_u8 *sp = (void *)src;
3298 
3299       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3300 
3301       for (i = 0; i < off; i++, sp++) {
3302         *dp++ = tab0[sp[0]];
3303         *dp++ = tab1[sp[0]];
3304         size--;
3305       }
3306 
3307       sa = (mlib_u32 *) sp;
3308 
3309       s0 = sa[0];
3310       sa++;
3311 



3312       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
3313 #ifdef _LITTLE_ENDIAN
3314         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3315         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3316         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3317         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3318 #else
3319         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3320         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3321         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3322         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3323 #endif /* _LITTLE_ENDIAN */
3324         dp[0] = t0;
3325         dp[1] = t1;
3326         dp[2] = t2;
3327         dp[3] = t3;
3328 #ifdef _LITTLE_ENDIAN
3329         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3330         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3331         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));


3395       mlib_u32 s0, t0, t1, t2, t3, t4, t5;
3396       mlib_s32 off;
3397       mlib_s32 size = xsize;
3398       mlib_u32 *dp = (mlib_u32 *) dst;
3399       mlib_u8 *sp = (void *)src;
3400 
3401       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3402 
3403       for (i = 0; i < off; i++, sp++) {
3404         *dp++ = tab0[sp[0]];
3405         *dp++ = tab1[sp[0]];
3406         *dp++ = tab2[sp[0]];
3407         size--;
3408       }
3409 
3410       sa = (mlib_u32 *) sp;
3411 
3412       s0 = sa[0];
3413       sa++;
3414 



3415       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
3416 #ifdef _LITTLE_ENDIAN
3417         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3418         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3419         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3420         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3421         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3422         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3423 #else
3424         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3425         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3426         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3427         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3428         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3429         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3430 #endif /* _LITTLE_ENDIAN */
3431         dp[0] = t0;
3432         dp[1] = t1;
3433         dp[2] = t2;
3434         dp[3] = t3;


3525       mlib_s32 off;
3526       mlib_s32 size = xsize;
3527       mlib_u32 *dp = (mlib_u32 *) dst;
3528       mlib_u8 *sp = (void *)src;
3529 
3530       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3531 
3532       for (i = 0; i < off; i++, sp++) {
3533         *dp++ = tab0[sp[0]];
3534         *dp++ = tab1[sp[0]];
3535         *dp++ = tab2[sp[0]];
3536         *dp++ = tab3[sp[0]];
3537         size--;
3538       }
3539 
3540       sa = (mlib_u32 *) sp;
3541 
3542       s0 = sa[0];
3543       sa++;
3544 



3545       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
3546 #ifdef _LITTLE_ENDIAN
3547         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3548         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3549         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3550         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3551 #else
3552         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3553         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3554         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3555         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3556 #endif /* _LITTLE_ENDIAN */
3557         dp[0] = t0;
3558         dp[1] = t1;
3559         dp[2] = t2;
3560         dp[3] = t3;
3561 #ifdef _LITTLE_ENDIAN
3562         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3563         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3564         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));


< prev index next >