1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 #include "mlib_image.h"
  28 #include "mlib_ImageLookUp.h"
  29 #include "mlib_c_ImageLookUp.h"
  30 
  31 /***************************************************************/
  32 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE)                         \
  33 {                                                                       \
  34   mlib_s32 i, j, k;                                                     \
  35                                                                         \
  36   if (xsize < 2) {                                                      \
  37     for(j = 0; j < ysize; j++, dst += dlb, src += slb){                 \
  38       for(k = 0; k < csize; k++) {                                      \
  39         DTYPE *da = dst + k;                                            \
  40         const STYPE *sa = src + k;                                      \
  41         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  42                                                                         \
  43         for(i = 0; i < xsize; i++, da += csize, sa += csize)            \
  44         *da=tab[*sa];                                                   \
  45       }                                                                 \
  46     }                                                                   \
  47   } else {                                                              \
  48     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {                \
  49       for(k = 0; k < csize; k++) {                                      \
  50         DTYPE    *da = dst + k;                                         \
  51         const STYPE *sa = src + k;                                      \
  52         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  53         mlib_s32 s0, t0, s1, t1;                                        \
  54                                                                         \
  55         s0 = (mlib_s32)sa[0];                                           \
  56         s1 = (mlib_s32)sa[csize];                                       \
  57         sa += 2*csize;                                                  \
  58                                                                         \
  59         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2*csize) { \
  60           t0 = (mlib_s32)tab[s0];                                       \
  61           t1 = (mlib_s32)tab[s1];                                       \
  62           s0 = (mlib_s32)sa[0];                                         \
  63           s1 = (mlib_s32)sa[csize];                                     \
  64           da[0] = (DTYPE)t0;                                            \
  65           da[csize] = (DTYPE)t1;                                        \
  66         }                                                               \
  67         t0 = (mlib_s32)tab[s0];                                         \
  68         t1 = (mlib_s32)tab[s1];                                         \
  69         da[0] = (DTYPE)t0;                                              \
  70         da[csize] = (DTYPE)t1;                                          \
  71         if (xsize & 1) da[2*csize] = tab[sa[0]];                        \
  72       }                                                                 \
  73     }                                                                   \
  74   }                                                                     \
  75 }
  76 
  77 /***************************************************************/
  78 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE)                 \
  79 {                                                                 \
  80   mlib_s32 i, j, k;                                               \
  81                                                                   \
  82   if (xsize < 2) {                                                \
  83     for(j = 0; j < ysize; j++, dst += dlb, src += slb){           \
  84       for(k = 0; k < csize; k++) {                                \
  85         DTYPE *da = dst + k;                                      \
  86         const STYPE *sa = (void *)src;                                    \
  87         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  88                                                                   \
  89         for(i = 0; i < xsize; i++, da += csize, sa ++)            \
  90         *da=tab[*sa];                                             \
  91       }                                                           \
  92     }                                                             \
  93   } else {                                                        \
  94     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {          \
  95       for(k = 0; k < csize; k++) {                                \
  96         DTYPE *da = dst + k;                                      \
  97         const STYPE *sa = (void *)src;                                    \
  98         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  99         mlib_s32 s0, t0, s1, t1;                                  \
 100                                                                   \
 101         s0 = (mlib_s32)sa[0];                                     \
 102         s1 = (mlib_s32)sa[1];                                     \
 103         sa += 2;                                                  \
 104                                                                   \
 105         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2) { \
 106           t0 = (mlib_s32)tab[s0];                                 \
 107           t1 = (mlib_s32)tab[s1];                                 \
 108           s0 = (mlib_s32)sa[0];                                   \
 109           s1 = (mlib_s32)sa[1];                                   \
 110           da[0] = (DTYPE)t0;                                      \
 111           da[csize] = (DTYPE)t1;                                  \
 112         }                                                         \
 113         t0 = (mlib_s32)tab[s0];                                   \
 114         t1 = (mlib_s32)tab[s1];                                   \
 115         da[0] = (DTYPE)t0;                                        \
 116         da[csize] = (DTYPE)t1;                                    \
 117         if (xsize & 1) da[2*csize] = tab[sa[0]];                  \
 118       }                                                           \
 119     }                                                             \
 120   }                                                               \
 121 }
 122 
 123 #ifdef _LITTLE_ENDIAN
 124 
 125 /***************************************************************/
 126 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 127   t3 = table0[s0 & 0xFF];                                       \
 128   t2 = table1[s0>>8];                                           \
 129   t1 = table2[s1 & 0xFF];                                       \
 130   t0 = table3[s1>>8]
 131 
 132 /***************************************************************/
 133 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 134   t3 = table0[s0 >> 8];                                         \
 135   t2 = table1[s1 & 0xFF];                                       \
 136   t1 = table2[s1 >> 8];                                         \
 137   t0 = table3[s2 & 0xFF]
 138 
 139 /***************************************************************/
 140 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 141   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 << 1) & 0x1FE));    \
 142   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7) & 0x1FE));    \
 143   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 15)  & 0x1FE));  \
 144   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 >> 23)  & 0x1FE))
 145 
 146 /***************************************************************/
 147 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 148   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 7) & 0x1FE));    \
 149   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15)  & 0x1FE));  \
 150   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 23)  & 0x1FE));  \
 151   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 << 1) & 0x1FE))
 152 
 153 /***************************************************************/
 154 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 155   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 7) & 0x1FE));    \
 156   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 15)  & 0x1FE));  \
 157   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 >> 23)  & 0x1FE))
 158 
 159 /***************************************************************/
 160 #define READ_U8_S32(table0, table1, table2, table3)             \
 161   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 << 2) & 0x3FC));    \
 162   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 6) & 0x3FC));    \
 163   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 14)  & 0x3FC));  \
 164   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 >> 22)  & 0x3FC))
 165 
 166 #else /* _LITTLE_ENDIAN */
 167 
 168 /***********/
 169 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 170   t0 = table0[s0>>8];                                           \
 171   t1 = table1[s0 & 0xFF];                                       \
 172   t2 = table2[s1>>8];                                           \
 173   t3 = table3[s1 & 0xFF]
 174 
 175 /***************************************************************/
 176 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 177   t0 = table0[s0 & 0xFF];                                       \
 178   t1 = table1[s1 >> 8];                                         \
 179   t2 = table2[s1 & 0xFF];                                       \
 180   t3 = table3[s2 >> 8]
 181 
 182 /***************************************************************/
 183 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 184   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 23) & 0x1FE));   \
 185   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15) & 0x1FE));   \
 186   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 7)  & 0x1FE));   \
 187   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 << 1)  & 0x1FE))
 188 
 189 /***************************************************************/
 190 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 191   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 15) & 0x1FE));   \
 192   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7)  & 0x1FE));   \
 193   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 << 1)  & 0x1FE));   \
 194   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 >> 23) & 0x1FE))
 195 
 196 /***************************************************************/
 197 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 198   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 15) & 0x1FE));   \
 199   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 7)  & 0x1FE));   \
 200   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 << 1)  & 0x1FE))
 201 
 202 /***************************************************************/
 203 #define READ_U8_S32(table0, table1, table2, table3)             \
 204   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 >> 22) & 0x3FC));   \
 205   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 14) & 0x3FC));   \
 206   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 6)  & 0x3FC));   \
 207   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 << 2)  & 0x3FC))
 208 
 209 #endif /* _LITTLE_ENDIAN */
 210 
 211 /***************************************************************/
 212 void mlib_c_ImageLookUp_U8_U8(const mlib_u8 *src,
 213                               mlib_s32      slb,
 214                               mlib_u8       *dst,
 215                               mlib_s32      dlb,
 216                               mlib_s32      xsize,
 217                               mlib_s32      ysize,
 218                               mlib_s32      csize,
 219                               const mlib_u8 **table)
 220 {
 221 
 222   if (xsize * csize < 9) {
 223     MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u8, table);
 224   }
 225   else if (csize == 1) {
 226     mlib_s32 i, j;
 227 
 228     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 229       mlib_u32 *da;
 230       mlib_u16 *sa;
 231       mlib_u8 *tab = (mlib_u8 *) table[0];
 232       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 233       mlib_s32 off;
 234       mlib_s32 size = xsize;
 235       mlib_u8 *dp = dst, *sp = (void *)src;
 236 
 237       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 238 
 239       for (i = 0; i < off; i++, sp++) {
 240         *dp++ = tab[sp[0]];
 241         size--;
 242       }
 243 
 244       da = (mlib_u32 *) dp;
 245 
 246       if (((mlib_addr) sp & 1) == 0) {
 247         sa = (mlib_u16 *) sp;
 248 
 249         s0 = sa[0];
 250         s1 = sa[1];
 251         sa += 2;
 252 
 253 #ifdef __SUNPRO_C
 254 #pragma pipeloop(0)
 255 #endif /* __SUNPRO_C */
 256         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 257           READ_U8_U8_ALIGN(tab, tab, tab, tab);
 258           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 259           s0 = sa[0];
 260           s1 = sa[1];
 261           da[0] = t;
 262         }
 263 
 264         READ_U8_U8_ALIGN(tab, tab, tab, tab);
 265         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 266         da[0] = t;
 267         da++;
 268         dp = (mlib_u8 *) da;
 269         sp = (mlib_u8 *) sa;
 270         i += 4;
 271         for (; i < size; i++, dp++, sp++)
 272           dp[0] = tab[sp[0]];
 273 
 274       }
 275       else {
 276         sa = (mlib_u16 *) (sp - 1);
 277 
 278         s0 = sa[0];
 279         s1 = sa[1];
 280         s2 = sa[2];
 281         sa += 3;
 282 
 283 #ifdef __SUNPRO_C
 284 #pragma pipeloop(0)
 285 #endif /* __SUNPRO_C */
 286         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 287           READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 288           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 289           s0 = s2;
 290           s1 = sa[0];
 291           s2 = sa[1];
 292           da[0] = t;
 293         }
 294 
 295         READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 296         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 297         da[0] = t;
 298         da++;
 299         dp = (mlib_u8 *) da;
 300 #ifdef _LITTLE_ENDIAN
 301         *dp++ = tab[s2 >> 8];
 302 #else
 303         *dp++ = tab[s2 & 0xFF];
 304 #endif /* _LITTLE_ENDIAN */
 305         sp = (mlib_u8 *) sa;
 306         i += 5;
 307         for (; i < size; i++, dp++, sp++)
 308           dp[0] = tab[sp[0]];
 309       }
 310     }
 311 
 312   }
 313   else if (csize == 2) {
 314     mlib_s32 i, j;
 315 
 316     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 317       mlib_u32 *da;
 318       mlib_u16 *sa;
 319       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 320       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 321       mlib_u8 *tab;
 322       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 323       mlib_s32 off;
 324       mlib_s32 size = xsize * 2;
 325       mlib_u8 *dp = dst, *sp = (void *)src;
 326 
 327       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 328 
 329       for (i = 0; i < off - 1; i += 2, sp += 2) {
 330         *dp++ = tab0[sp[0]];
 331         *dp++ = tab1[sp[1]];
 332         size -= 2;
 333       }
 334 
 335       if ((off & 1) != 0) {
 336         *dp++ = tab0[sp[0]];
 337         size--;
 338         sp++;
 339         tab = tab0;
 340         tab0 = tab1;
 341         tab1 = tab;
 342       }
 343 
 344       da = (mlib_u32 *) dp;
 345 
 346       if (((mlib_addr) sp & 1) == 0) {
 347         sa = (mlib_u16 *) sp;
 348 
 349         s0 = sa[0];
 350         s1 = sa[1];
 351         sa += 2;
 352 
 353 #ifdef __SUNPRO_C
 354 #pragma pipeloop(0)
 355 #endif /* __SUNPRO_C */
 356         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 357           READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 358           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 359           s0 = sa[0];
 360           s1 = sa[1];
 361           da[0] = t;
 362         }
 363 
 364         READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 365         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 366         da[0] = t;
 367         da++;
 368         dp = (mlib_u8 *) da;
 369         sp = (mlib_u8 *) sa;
 370         i += 4;
 371 
 372         for (; i < size - 1; i += 2, sp += 2) {
 373           *dp++ = tab0[sp[0]];
 374           *dp++ = tab1[sp[1]];
 375         }
 376 
 377         if (i < size)
 378           *dp = tab0[(*sp)];
 379 
 380       }
 381       else {
 382         sa = (mlib_u16 *) (sp - 1);
 383 
 384         s0 = sa[0];
 385         s1 = sa[1];
 386         s2 = sa[2];
 387         sa += 3;
 388 
 389 #ifdef __SUNPRO_C
 390 #pragma pipeloop(0)
 391 #endif /* __SUNPRO_C */
 392         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 393           READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 394           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 395           s0 = s2;
 396           s1 = sa[0];
 397           s2 = sa[1];
 398           da[0] = t;
 399         }
 400 
 401         READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 402         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 403         da[0] = t;
 404         da++;
 405         dp = (mlib_u8 *) da;
 406 #ifdef _LITTLE_ENDIAN
 407         *dp++ = tab0[s2 >> 8];
 408 #else
 409         *dp++ = tab0[s2 & 0xFF];
 410 #endif /* _LITTLE_ENDIAN */
 411         sp = (mlib_u8 *) sa;
 412         i += 5;
 413 
 414         for (; i < size - 1; i += 2, sp += 2) {
 415           *dp++ = tab1[sp[0]];
 416           *dp++ = tab0[sp[1]];
 417         }
 418 
 419         if (i < size)
 420           *dp = tab1[(*sp)];
 421       }
 422     }
 423 
 424   }
 425   else if (csize == 3) {
 426     mlib_s32 i, j;
 427 
 428     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 429       mlib_u32 *da;
 430       mlib_u16 *sa;
 431       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 432       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 433       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 434       mlib_u8 *tab;
 435       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 436       mlib_s32 off;
 437       mlib_s32 size = xsize * 3;
 438       mlib_u8 *dp = dst, *sp = (void *)src;
 439 
 440       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 441 
 442       if (off == 1) {
 443         *dp++ = tab0[sp[0]];
 444         tab = tab0;
 445         tab0 = tab1;
 446         tab1 = tab2;
 447         tab2 = tab;
 448         size--;
 449         sp++;
 450       }
 451       else if (off == 2) {
 452         *dp++ = tab0[sp[0]];
 453         *dp++ = tab1[sp[1]];
 454         tab = tab2;
 455         tab2 = tab1;
 456         tab1 = tab0;
 457         tab0 = tab;
 458         size -= 2;
 459         sp += 2;
 460       }
 461       else if (off == 3) {
 462         *dp++ = tab0[sp[0]];
 463         *dp++ = tab1[sp[1]];
 464         *dp++ = tab2[sp[2]];
 465         size -= 3;
 466         sp += 3;
 467       }
 468 
 469       da = (mlib_u32 *) dp;
 470 
 471       if (((mlib_addr) sp & 1) == 0) {
 472         sa = (mlib_u16 *) sp;
 473 
 474         s0 = sa[0];
 475         s1 = sa[1];
 476         sa += 2;
 477 
 478 #ifdef __SUNPRO_C
 479 #pragma pipeloop(0)
 480 #endif /* __SUNPRO_C */
 481         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 482           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 483           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 484           tab = tab0;
 485           tab0 = tab1;
 486           tab1 = tab2;
 487           tab2 = tab;
 488           s0 = sa[0];
 489           s1 = sa[1];
 490           da[0] = t;
 491         }
 492 
 493         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 494         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 495         da[0] = t;
 496         da++;
 497         dp = (mlib_u8 *) da;
 498         sp = (mlib_u8 *) sa;
 499         i += 4;
 500 
 501         if (i < size) {
 502           *dp++ = tab1[(*sp)];
 503           i++;
 504           sp++;
 505         }
 506 
 507         if (i < size) {
 508           *dp++ = tab2[(*sp)];
 509           i++;
 510           sp++;
 511         }
 512 
 513         if (i < size) {
 514           *dp++ = tab0[(*sp)];
 515         }
 516 
 517       }
 518       else {
 519         sa = (mlib_u16 *) (sp - 1);
 520 
 521         s0 = sa[0];
 522         s1 = sa[1];
 523         s2 = sa[2];
 524         sa += 3;
 525 
 526 #ifdef __SUNPRO_C
 527 #pragma pipeloop(0)
 528 #endif /* __SUNPRO_C */
 529         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 530           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 531           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 532           tab = tab0;
 533           tab0 = tab1;
 534           tab1 = tab2;
 535           tab2 = tab;
 536           s0 = s2;
 537           s1 = sa[0];
 538           s2 = sa[1];
 539           da[0] = t;
 540         }
 541 
 542         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 543         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 544         da[0] = t;
 545         da++;
 546         dp = (mlib_u8 *) da;
 547 #ifdef _LITTLE_ENDIAN
 548         *dp++ = tab1[s2 >> 8];
 549 #else
 550         *dp++ = tab1[s2 & 0xFF];
 551 #endif /* _LITTLE_ENDIAN */
 552         sp = (mlib_u8 *) sa;
 553         i += 5;
 554 
 555         if (i < size) {
 556           *dp++ = tab2[(*sp)];
 557           i++;
 558           sp++;
 559         }
 560 
 561         if (i < size) {
 562           *dp++ = tab0[(*sp)];
 563           i++;
 564           sp++;
 565         }
 566 
 567         if (i < size) {
 568           *dp = tab1[(*sp)];
 569         }
 570       }
 571     }
 572 
 573   }
 574   else if (csize == 4) {
 575     mlib_s32 i, j;
 576 
 577     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 578       mlib_u32 *da;
 579       mlib_u16 *sa;
 580       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 581       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 582       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 583       mlib_u8 *tab3 = (mlib_u8 *) table[3];
 584       mlib_u8 *tab;
 585       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 586       mlib_s32 off;
 587       mlib_s32 size = xsize * 4;
 588       mlib_u8 *dp = dst, *sp = (void *)src;
 589 
 590       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 591 
 592       if (off == 1) {
 593         *dp++ = tab0[sp[0]];
 594         tab = tab0;
 595         tab0 = tab1;
 596         tab1 = tab2;
 597         tab2 = tab3;
 598         tab3 = tab;
 599         size--;
 600         sp++;
 601       }
 602       else if (off == 2) {
 603         *dp++ = tab0[sp[0]];
 604         *dp++ = tab1[sp[1]];
 605         tab = tab0;
 606         tab0 = tab2;
 607         tab2 = tab;
 608         tab = tab1;
 609         tab1 = tab3;
 610         tab3 = tab;
 611         size -= 2;
 612         sp += 2;
 613       }
 614       else if (off == 3) {
 615         *dp++ = tab0[sp[0]];
 616         *dp++ = tab1[sp[1]];
 617         *dp++ = tab2[sp[2]];
 618         tab = tab3;
 619         tab3 = tab2;
 620         tab2 = tab1;
 621         tab1 = tab0;
 622         tab0 = tab;
 623         size -= 3;
 624         sp += 3;
 625       }
 626 
 627       da = (mlib_u32 *) dp;
 628 
 629       if (((mlib_addr) sp & 1) == 0) {
 630         sa = (mlib_u16 *) sp;
 631 
 632         s0 = sa[0];
 633         s1 = sa[1];
 634         sa += 2;
 635 
 636 #ifdef __SUNPRO_C
 637 #pragma pipeloop(0)
 638 #endif /* __SUNPRO_C */
 639         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 640           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 641           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 642           s0 = sa[0];
 643           s1 = sa[1];
 644           da[0] = t;
 645         }
 646 
 647         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 648         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 649         da[0] = t;
 650         da++;
 651         dp = (mlib_u8 *) da;
 652         sp = (mlib_u8 *) sa;
 653         i += 4;
 654 
 655         if (i < size) {
 656           *dp++ = tab0[(*sp)];
 657           i++;
 658           sp++;
 659         }
 660 
 661         if (i < size) {
 662           *dp++ = tab1[(*sp)];
 663           i++;
 664           sp++;
 665         }
 666 
 667         if (i < size) {
 668           *dp = tab2[(*sp)];
 669         }
 670 
 671       }
 672       else {
 673         sa = (mlib_u16 *) (sp - 1);
 674 
 675         s0 = sa[0];
 676         s1 = sa[1];
 677         s2 = sa[2];
 678         sa += 3;
 679 
 680 #ifdef __SUNPRO_C
 681 #pragma pipeloop(0)
 682 #endif /* __SUNPRO_C */
 683         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 684           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 685           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 686           s0 = s2;
 687           s1 = sa[0];
 688           s2 = sa[1];
 689           da[0] = t;
 690         }
 691 
 692         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 693         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 694         da[0] = t;
 695         da++;
 696         dp = (mlib_u8 *) da;
 697 #ifdef _LITTLE_ENDIAN
 698         *dp++ = tab0[s2 >> 8];
 699 #else
 700         *dp++ = tab0[s2 & 0xFF];
 701 #endif /* _LITTLE_ENDIAN */
 702         sp = (mlib_u8 *) sa;
 703         i += 5;
 704 
 705         if (i < size) {
 706           *dp++ = tab1[(*sp)];
 707           i++;
 708           sp++;
 709         }
 710 
 711         if (i < size) {
 712           *dp++ = tab2[(*sp)];
 713           i++;
 714           sp++;
 715         }
 716 
 717         if (i < size) {
 718           *dp = tab3[(*sp)];
 719         }
 720       }
 721     }
 722   }
 723 }
 724 
 725 /***************************************************************/
 726 void mlib_c_ImageLookUp_S16_U8(const mlib_s16 *src,
 727                                mlib_s32       slb,
 728                                mlib_u8        *dst,
 729                                mlib_s32       dlb,
 730                                mlib_s32       xsize,
 731                                mlib_s32       ysize,
 732                                mlib_s32       csize,
 733                                const mlib_u8  **table)
 734 {
 735   const mlib_u8 *table_base[4];
 736   mlib_s32 c;
 737 
 738   for (c = 0; c < csize; c++) {
 739     table_base[c] = &table[c][32768];
 740   }
 741 
 742   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s16, table_base);
 743 }
 744 
 745 /***************************************************************/
 746 void mlib_c_ImageLookUp_U16_U8(const mlib_u16 *src,
 747                                mlib_s32       slb,
 748                                mlib_u8        *dst,
 749                                mlib_s32       dlb,
 750                                mlib_s32       xsize,
 751                                mlib_s32       ysize,
 752                                mlib_s32       csize,
 753                                const mlib_u8  **table)
 754 {
 755   const mlib_u8 *table_base[4];
 756   mlib_s32 c;
 757 
 758   for (c = 0; c < csize; c++) {
 759     table_base[c] = &table[c][0];
 760   }
 761 
 762   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u16, table_base);
 763 }
 764 
 765 /***************************************************************/
 766 void mlib_c_ImageLookUp_S32_U8(const mlib_s32 *src,
 767                                mlib_s32       slb,
 768                                mlib_u8        *dst,
 769                                mlib_s32       dlb,
 770                                mlib_s32       xsize,
 771                                mlib_s32       ysize,
 772                                mlib_s32       csize,
 773                                const mlib_u8  **table)
 774 {
 775   const mlib_u8 *table_base[4];
 776   mlib_s32 c;
 777 
 778   for (c = 0; c < csize; c++) {
 779     table_base[c] = &table[c][TABLE_SHIFT_S32];
 780   }
 781 
 782   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s32, table_base);
 783 }
 784 
 785 /***************************************************************/
 786 void mlib_c_ImageLookUp_U8_S16(const mlib_u8  *src,
 787                                mlib_s32       slb,
 788                                mlib_s16       *dst,
 789                                mlib_s32       dlb,
 790                                mlib_s32       xsize,
 791                                mlib_s32       ysize,
 792                                mlib_s32       csize,
 793                                const mlib_s16 **table)
 794 {
 795 
 796   if (xsize * csize < 12) {
 797     MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u8, table);
 798   }
 799   else if (csize == 1) {
 800     mlib_s32 i, j;
 801 
 802     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 803       mlib_u32 *sa;
 804       mlib_u32 *da;
 805       mlib_u16 *tab = (mlib_u16 *) table[0];
 806       mlib_u32 s0, s1, t0, t1, t2, t3;
 807       mlib_u32 res1, res2;
 808       mlib_s32 off;
 809       mlib_s32 size = xsize;
 810       mlib_u16 *dp = (mlib_u16 *) dst;
 811       mlib_u8 *sp = (void *)src;
 812 
 813       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 814 
 815       for (i = 0; i < off; i++, sp++) {
 816         *dp++ = tab[sp[0]];
 817         size--;
 818       }
 819 
 820       sa = (mlib_u32 *) sp;
 821 
 822       if (((mlib_addr) dp & 3) == 0) {
 823         da = (mlib_u32 *) dp;
 824 
 825         s0 = sa[0];
 826         sa++;
 827 
 828 #ifdef __SUNPRO_C
 829 #pragma pipeloop(0)
 830 #endif /* __SUNPRO_C */
 831         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 832           READ_U8_S16_ALIGN(tab, tab, tab, tab);
 833           res1 = (t0 << 16) + t1;
 834           res2 = (t2 << 16) + t3;
 835           s0 = sa[0];
 836           da[0] = res1;
 837           da[1] = res2;
 838         }
 839 
 840         READ_U8_S16_ALIGN(tab, tab, tab, tab);
 841         res1 = (t0 << 16) + t1;
 842         res2 = (t2 << 16) + t3;
 843         da[0] = res1;
 844         da[1] = res2;
 845         da += 2;
 846         dp = (mlib_u16 *) da;
 847         sp = (mlib_u8 *) sa;
 848         i += 4;
 849         for (; i < size; i++, dp++, sp++)
 850           dp[0] = tab[sp[0]];
 851 
 852       }
 853       else {
 854 
 855         *dp++ = tab[(*sp)];
 856         size--;
 857         da = (mlib_u32 *) dp;
 858 
 859         s0 = sa[0];
 860         s1 = sa[1];
 861         sa += 2;
 862 
 863 #ifdef __SUNPRO_C
 864 #pragma pipeloop(0)
 865 #endif /* __SUNPRO_C */
 866         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 867           READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 868           s0 = s1;
 869           res1 = (t0 << 16) + t1;
 870           res2 = (t2 << 16) + t3;
 871           s1 = sa[0];
 872           da[0] = res1;
 873           da[1] = res2;
 874         }
 875 
 876         READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 877         res1 = (t0 << 16) + t1;
 878         res2 = (t2 << 16) + t3;
 879         da[0] = res1;
 880         da[1] = res2;
 881         ADD_READ_U8_S16_NOTALIGN(tab, tab, tab);
 882         res1 = (t0 << 16) + t1;
 883         da[2] = res1;
 884         da += 3;
 885         dp = (mlib_u16 *) da;
 886         *dp++ = (mlib_u16) t2;
 887         sp = (mlib_u8 *) sa;
 888         i += 7;
 889         for (; i < size; i++, dp++, sp++)
 890           dp[0] = tab[sp[0]];
 891       }
 892     }
 893 
 894   }
 895   else if (csize == 2) {
 896     mlib_s32 i, j;
 897 
 898     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 899       mlib_u32 *sa;
 900       mlib_u32 *da;
 901       mlib_u16 *tab0 = (mlib_u16 *) table[0];
 902       mlib_u16 *tab1 = (mlib_u16 *) table[1];
 903       mlib_u16 *tab;
 904       mlib_u32 s0, s1, t0, t1, t2, t3;
 905       mlib_u32 res1, res2;
 906       mlib_s32 off;
 907       mlib_s32 size = xsize * 2;
 908       mlib_u16 *dp = (mlib_u16 *) dst;
 909       mlib_u8 *sp = (void *)src;
 910 
 911       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 912 
 913       for (i = 0; i < off - 1; i += 2, sp += 2) {
 914         *dp++ = tab0[sp[0]];
 915         *dp++ = tab1[sp[1]];
 916         size -= 2;
 917       }
 918 
 919       if ((off & 1) != 0) {
 920         *dp++ = tab0[*sp];
 921         size--;
 922         sp++;
 923         tab = tab0;
 924         tab0 = tab1;
 925         tab1 = tab;
 926       }
 927 
 928       sa = (mlib_u32 *) sp;
 929 
 930       if (((mlib_addr) dp & 3) == 0) {
 931         da = (mlib_u32 *) dp;
 932 
 933         s0 = sa[0];
 934         sa++;
 935 
 936 #ifdef __SUNPRO_C
 937 #pragma pipeloop(0)
 938 #endif /* __SUNPRO_C */
 939         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 940           READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 941           res1 = (t0 << 16) + t1;
 942           res2 = (t2 << 16) + t3;
 943           s0 = sa[0];
 944           da[0] = res1;
 945           da[1] = res2;
 946         }
 947 
 948         READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 949         res1 = (t0 << 16) + t1;
 950         res2 = (t2 << 16) + t3;
 951         da[0] = res1;
 952         da[1] = res2;
 953         da += 2;
 954         dp = (mlib_u16 *) da;
 955         sp = (mlib_u8 *) sa;
 956         i += 4;
 957 
 958         for (; i < size - 1; i += 2, sp += 2) {
 959           *dp++ = tab0[sp[0]];
 960           *dp++ = tab1[sp[1]];
 961         }
 962 
 963         if (i < size)
 964           *dp = tab0[(*sp)];
 965 
 966       }
 967       else {
 968 
 969         *dp++ = tab0[(*sp)];
 970         size--;
 971         da = (mlib_u32 *) dp;
 972 
 973         s0 = sa[0];
 974         s1 = sa[1];
 975         sa += 2;
 976 
 977 #ifdef __SUNPRO_C
 978 #pragma pipeloop(0)
 979 #endif /* __SUNPRO_C */
 980         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 981           READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 982           s0 = s1;
 983           res1 = (t0 << 16) + t1;
 984           res2 = (t2 << 16) + t3;
 985           s1 = sa[0];
 986           da[0] = res1;
 987           da[1] = res2;
 988         }
 989 
 990         READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 991         res1 = (t0 << 16) + t1;
 992         res2 = (t2 << 16) + t3;
 993         da[0] = res1;
 994         da[1] = res2;
 995         ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1);
 996         res1 = (t0 << 16) + t1;
 997         da[2] = res1;
 998         da += 3;
 999         dp = (mlib_u16 *) da;
1000         *dp++ = (mlib_u16) t2;
1001         sp = (mlib_u8 *) sa;
1002         i += 7;
1003 
1004         for (; i < size - 1; i += 2, sp += 2) {
1005           *dp++ = tab0[sp[0]];
1006           *dp++ = tab1[sp[1]];
1007         }
1008 
1009         if (i < size)
1010           *dp = tab0[(*sp)];
1011       }
1012     }
1013 
1014   }
1015   else if (csize == 3) {
1016     mlib_s32 i, j;
1017 
1018     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1019       mlib_u32 *sa;
1020       mlib_u32 *da;
1021       mlib_u16 *tab0 = (mlib_u16 *) table[0];
1022       mlib_u16 *tab1 = (mlib_u16 *) table[1];
1023       mlib_u16 *tab2 = (mlib_u16 *) table[2];
1024       mlib_u16 *tab;
1025       mlib_u32 s0, s1, t0, t1, t2, t3;
1026       mlib_u32 res1, res2;
1027       mlib_s32 off;
1028       mlib_s32 size = xsize * 3;
1029       mlib_u16 *dp = (mlib_u16 *) dst;
1030       mlib_u8 *sp = (void *)src;
1031 
1032       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1033 
1034       if (off == 1) {
1035         *dp++ = tab0[(*sp)];
1036         tab = tab0;
1037         tab0 = tab1;
1038         tab1 = tab2;
1039         tab2 = tab;
1040         size--;
1041         sp++;
1042       }
1043       else if (off == 2) {
1044         *dp++ = tab0[sp[0]];
1045         *dp++ = tab1[sp[1]];
1046         tab = tab2;
1047         tab2 = tab1;
1048         tab1 = tab0;
1049         tab0 = tab;
1050         size -= 2;
1051         sp += 2;
1052       }
1053       else if (off == 3) {
1054         *dp++ = tab0[sp[0]];
1055         *dp++ = tab1[sp[1]];
1056         *dp++ = tab2[sp[2]];
1057         size -= 3;
1058         sp += 3;
1059       }
1060 
1061       sa = (mlib_u32 *) sp;
1062 
1063       if (((mlib_addr) dp & 3) == 0) {
1064         da = (mlib_u32 *) dp;
1065 
1066         s0 = sa[0];
1067         sa++;
1068 
1069 #ifdef __SUNPRO_C
1070 #pragma pipeloop(0)
1071 #endif /* __SUNPRO_C */
1072         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1073           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1074           res1 = (t0 << 16) + t1;
1075           res2 = (t2 << 16) + t3;
1076           tab = tab0;
1077           tab0 = tab1;
1078           tab1 = tab2;
1079           tab2 = tab;
1080           s0 = sa[0];
1081           da[0] = res1;
1082           da[1] = res2;
1083         }
1084 
1085         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1086         res1 = (t0 << 16) + t1;
1087         res2 = (t2 << 16) + t3;
1088         da[0] = res1;
1089         da[1] = res2;
1090         da += 2;
1091         dp = (mlib_u16 *) da;
1092         sp = (mlib_u8 *) sa;
1093         i += 4;
1094 
1095         if (i < size) {
1096           *dp++ = tab1[(*sp)];
1097           i++;
1098           sp++;
1099         }
1100 
1101         if (i < size) {
1102           *dp++ = tab2[(*sp)];
1103           i++;
1104           sp++;
1105         }
1106 
1107         if (i < size) {
1108           *dp = tab0[(*sp)];
1109         }
1110 
1111       }
1112       else {
1113 
1114         *dp++ = tab0[(*sp)];
1115         size--;
1116         da = (mlib_u32 *) dp;
1117 
1118         s0 = sa[0];
1119         s1 = sa[1];
1120         sa += 2;
1121 
1122 #ifdef __SUNPRO_C
1123 #pragma pipeloop(0)
1124 #endif /* __SUNPRO_C */
1125         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1126           READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1127           s0 = s1;
1128           res1 = (t0 << 16) + t1;
1129           res2 = (t2 << 16) + t3;
1130           tab = tab0;
1131           tab0 = tab1;
1132           tab1 = tab2;
1133           tab2 = tab;
1134           s1 = sa[0];
1135           da[0] = res1;
1136           da[1] = res2;
1137         }
1138 
1139         READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1140         res1 = (t0 << 16) + t1;
1141         res2 = (t2 << 16) + t3;
1142         da[0] = res1;
1143         da[1] = res2;
1144         ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1);
1145         res1 = (t0 << 16) + t1;
1146         da[2] = res1;
1147         da += 3;
1148         dp = (mlib_u16 *) da;
1149         *dp++ = (mlib_u16) t2;
1150         sp = (mlib_u8 *) sa;
1151         i += 7;
1152 
1153         if (i < size) {
1154           *dp++ = tab2[(*sp)];
1155           i++;
1156           sp++;
1157         }
1158 
1159         if (i < size) {
1160           *dp++ = tab0[(*sp)];
1161           i++;
1162           sp++;
1163         }
1164 
1165         if (i < size) {
1166           *dp = tab1[(*sp)];
1167         }
1168       }
1169     }
1170 
1171   }
1172   else if (csize == 4) {
1173     mlib_s32 i, j;
1174 
1175     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1176       mlib_u32 *sa;
1177       mlib_u32 *da;
1178       mlib_u16 *tab0 = (mlib_u16 *) table[0];
1179       mlib_u16 *tab1 = (mlib_u16 *) table[1];
1180       mlib_u16 *tab2 = (mlib_u16 *) table[2];
1181       mlib_u16 *tab3 = (mlib_u16 *) table[3];
1182       mlib_u16 *tab;
1183       mlib_u32 s0, s1, t0, t1, t2, t3;
1184       mlib_u32 res1, res2;
1185       mlib_s32 off;
1186       mlib_s32 size = xsize * 4;
1187       mlib_u16 *dp = (mlib_u16 *) dst;
1188       mlib_u8 *sp = (void *)src;
1189 
1190       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1191 
1192       if (off == 1) {
1193         *dp++ = tab0[(*sp)];
1194         tab = tab0;
1195         tab0 = tab1;
1196         tab1 = tab2;
1197         tab2 = tab3;
1198         tab3 = tab;
1199         size--;
1200         sp++;
1201       }
1202       else if (off == 2) {
1203         *dp++ = tab0[sp[0]];
1204         *dp++ = tab1[sp[1]];
1205         tab = tab0;
1206         tab0 = tab2;
1207         tab2 = tab;
1208         tab = tab1;
1209         tab1 = tab3;
1210         tab3 = tab;
1211         size -= 2;
1212         sp += 2;
1213       }
1214       else if (off == 3) {
1215         *dp++ = tab0[sp[0]];
1216         *dp++ = tab1[sp[1]];
1217         *dp++ = tab2[sp[2]];
1218         tab = tab3;
1219         tab3 = tab2;
1220         tab2 = tab1;
1221         tab1 = tab0;
1222         tab0 = tab;
1223         size -= 3;
1224         sp += 3;
1225       }
1226 
1227       sa = (mlib_u32 *) sp;
1228 
1229       if (((mlib_addr) dp & 3) == 0) {
1230         da = (mlib_u32 *) dp;
1231 
1232         s0 = sa[0];
1233         sa++;
1234 
1235 #ifdef __SUNPRO_C
1236 #pragma pipeloop(0)
1237 #endif /* __SUNPRO_C */
1238         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1239           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1240           res1 = (t0 << 16) + t1;
1241           res2 = (t2 << 16) + t3;
1242           s0 = sa[0];
1243           da[0] = res1;
1244           da[1] = res2;
1245         }
1246 
1247         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1248         res1 = (t0 << 16) + t1;
1249         res2 = (t2 << 16) + t3;
1250         da[0] = res1;
1251         da[1] = res2;
1252         da += 2;
1253         dp = (mlib_u16 *) da;
1254         sp = (mlib_u8 *) sa;
1255         i += 4;
1256 
1257         if (i < size) {
1258           *dp++ = tab0[(*sp)];
1259           i++;
1260           sp++;
1261         }
1262 
1263         if (i < size) {
1264           *dp++ = tab1[(*sp)];
1265           i++;
1266           sp++;
1267         }
1268 
1269         if (i < size) {
1270           *dp = tab2[(*sp)];
1271         }
1272 
1273       }
1274       else {
1275 
1276         *dp++ = tab0[(*sp)];
1277         size--;
1278         da = (mlib_u32 *) dp;
1279 
1280         s0 = sa[0];
1281         s1 = sa[1];
1282         sa += 2;
1283 
1284 #ifdef __SUNPRO_C
1285 #pragma pipeloop(0)
1286 #endif /* __SUNPRO_C */
1287         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1288           READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1289           s0 = s1;
1290           res1 = (t0 << 16) + t1;
1291           res2 = (t2 << 16) + t3;
1292           s1 = sa[0];
1293           da[0] = res1;
1294           da[1] = res2;
1295         }
1296 
1297         READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1298         res1 = (t0 << 16) + t1;
1299         res2 = (t2 << 16) + t3;
1300         da[0] = res1;
1301         da[1] = res2;
1302         ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3);
1303         res1 = (t0 << 16) + t1;
1304         da[2] = res1;
1305         da += 3;
1306         dp = (mlib_u16 *) da;
1307         *dp++ = (mlib_u16) t2;
1308         sp = (mlib_u8 *) sa;
1309         i += 7;
1310 
1311         if (i < size) {
1312           *dp++ = tab0[(*sp)];
1313           i++;
1314           sp++;
1315         }
1316 
1317         if (i < size) {
1318           *dp++ = tab1[(*sp)];
1319           i++;
1320           sp++;
1321         }
1322 
1323         if (i < size) {
1324           *dp = tab2[(*sp)];
1325         }
1326       }
1327     }
1328   }
1329 }
1330 
1331 /***************************************************************/
1332 void mlib_c_ImageLookUp_S16_S16(const mlib_s16 *src,
1333                                 mlib_s32       slb,
1334                                 mlib_s16       *dst,
1335                                 mlib_s32       dlb,
1336                                 mlib_s32       xsize,
1337                                 mlib_s32       ysize,
1338                                 mlib_s32       csize,
1339                                 const mlib_s16 **table)
1340 {
1341   const mlib_s16 *table_base[4];
1342   mlib_s32 c;
1343 
1344   for (c = 0; c < csize; c++) {
1345     table_base[c] = &table[c][32768];
1346   }
1347 
1348   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s16, table_base);
1349 }
1350 
1351 /***************************************************************/
1352 void mlib_c_ImageLookUp_U16_S16(const mlib_u16 *src,
1353                                 mlib_s32       slb,
1354                                 mlib_s16       *dst,
1355                                 mlib_s32       dlb,
1356                                 mlib_s32       xsize,
1357                                 mlib_s32       ysize,
1358                                 mlib_s32       csize,
1359                                 const mlib_s16 **table)
1360 {
1361   const mlib_s16 *table_base[4];
1362   mlib_s32 c;
1363 
1364   for (c = 0; c < csize; c++) {
1365     table_base[c] = &table[c][0];
1366   }
1367 
1368   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u16, table_base);
1369 }
1370 
1371 /***************************************************************/
1372 void mlib_c_ImageLookUp_S32_S16(const mlib_s32 *src,
1373                                 mlib_s32       slb,
1374                                 mlib_s16       *dst,
1375                                 mlib_s32       dlb,
1376                                 mlib_s32       xsize,
1377                                 mlib_s32       ysize,
1378                                 mlib_s32       csize,
1379                                 const mlib_s16 **table)
1380 {
1381   const mlib_s16 *table_base[4];
1382   mlib_s32 c;
1383 
1384   for (c = 0; c < csize; c++) {
1385     table_base[c] = &table[c][TABLE_SHIFT_S32];
1386   }
1387 
1388   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s32, table_base);
1389 }
1390 
1391 /***************************************************************/
1392 void mlib_c_ImageLookUp_S16_U16(const mlib_s16 *src,
1393                                 mlib_s32       slb,
1394                                 mlib_u16       *dst,
1395                                 mlib_s32       dlb,
1396                                 mlib_s32       xsize,
1397                                 mlib_s32       ysize,
1398                                 mlib_s32       csize,
1399                                 const mlib_s16 **table)
1400 {
1401   const mlib_s16 *table_base[4];
1402   mlib_s32 c;
1403 
1404   for (c = 0; c < csize; c++) {
1405     table_base[c] = &table[c][32768];
1406   }
1407 
1408   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s16, table_base);
1409 }
1410 
1411 /***************************************************************/
1412 void mlib_c_ImageLookUp_U16_U16(const mlib_u16 *src,
1413                                 mlib_s32       slb,
1414                                 mlib_u16       *dst,
1415                                 mlib_s32       dlb,
1416                                 mlib_s32       xsize,
1417                                 mlib_s32       ysize,
1418                                 mlib_s32       csize,
1419                                 const mlib_s16 **table)
1420 {
1421   const mlib_s16 *table_base[4];
1422   mlib_s32 c;
1423 
1424   for (c = 0; c < csize; c++) {
1425     table_base[c] = &table[c][0];
1426   }
1427 
1428   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_u16, table_base);
1429 }
1430 
1431 /***************************************************************/
1432 void mlib_c_ImageLookUp_S32_U16(const mlib_s32 *src,
1433                                 mlib_s32       slb,
1434                                 mlib_u16       *dst,
1435                                 mlib_s32       dlb,
1436                                 mlib_s32       xsize,
1437                                 mlib_s32       ysize,
1438                                 mlib_s32       csize,
1439                                 const mlib_s16 **table)
1440 {
1441   const mlib_s16 *table_base[4];
1442   mlib_s32 c;
1443 
1444   for (c = 0; c < csize; c++) {
1445     table_base[c] = &table[c][TABLE_SHIFT_S32];
1446   }
1447 
1448   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s32, table_base);
1449 }
1450 
1451 /***************************************************************/
1452 void mlib_c_ImageLookUp_U8_S32(const mlib_u8  *src,
1453                                mlib_s32       slb,
1454                                mlib_s32       *dst,
1455                                mlib_s32       dlb,
1456                                mlib_s32       xsize,
1457                                mlib_s32       ysize,
1458                                mlib_s32       csize,
1459                                const mlib_s32 **table)
1460 {
1461 
1462   if (xsize * csize < 7) {
1463     MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u8, table);
1464   }
1465   else if (csize == 1) {
1466     mlib_s32 i, j;
1467 
1468     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1469       mlib_u32 *sa;
1470       mlib_u32 *tab = (mlib_u32 *) table[0];
1471       mlib_u32 s0, t0, t1, t2, t3;
1472       mlib_s32 off;
1473       mlib_s32 size = xsize;
1474       mlib_u32 *dp = (mlib_u32 *) dst;
1475       mlib_u8 *sp = (void *)src;
1476 
1477       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1478 
1479       for (i = 0; i < off; i++, sp++) {
1480         *dp++ = tab[sp[0]];
1481         size--;
1482       }
1483 
1484       sa = (mlib_u32 *) sp;
1485 
1486       s0 = sa[0];
1487       sa++;
1488 
1489 #ifdef __SUNPRO_C
1490 #pragma pipeloop(0)
1491 #endif /* __SUNPRO_C */
1492       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1493         READ_U8_S32(tab, tab, tab, tab);
1494         s0 = sa[0];
1495         dp[0] = t0;
1496         dp[1] = t1;
1497         dp[2] = t2;
1498         dp[3] = t3;
1499       }
1500 
1501       READ_U8_S32(tab, tab, tab, tab);
1502       dp[0] = t0;
1503       dp[1] = t1;
1504       dp[2] = t2;
1505       dp[3] = t3;
1506       dp += 4;
1507       sp = (mlib_u8 *) sa;
1508       i += 4;
1509       for (; i < size; i++, dp++, sp++)
1510         dp[0] = tab[sp[0]];
1511     }
1512 
1513   }
1514   else if (csize == 2) {
1515     mlib_s32 i, j;
1516 
1517     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1518       mlib_u32 *sa;
1519       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1520       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1521       mlib_u32 *tab;
1522       mlib_u32 s0, t0, t1, t2, t3;
1523       mlib_s32 off;
1524       mlib_s32 size = xsize * 2;
1525       mlib_u32 *dp = (mlib_u32 *) dst;
1526       mlib_u8 *sp = (void *)src;
1527 
1528       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1529 
1530       for (i = 0; i < off - 1; i += 2, sp += 2) {
1531         *dp++ = tab0[sp[0]];
1532         *dp++ = tab1[sp[1]];
1533         size -= 2;
1534       }
1535 
1536       if ((off & 1) != 0) {
1537         *dp++ = tab0[*sp];
1538         size--;
1539         sp++;
1540         tab = tab0;
1541         tab0 = tab1;
1542         tab1 = tab;
1543       }
1544 
1545       sa = (mlib_u32 *) sp;
1546 
1547       s0 = sa[0];
1548       sa++;
1549 
1550 #ifdef __SUNPRO_C
1551 #pragma pipeloop(0)
1552 #endif /* __SUNPRO_C */
1553       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1554         READ_U8_S32(tab0, tab1, tab0, tab1);
1555         s0 = sa[0];
1556         dp[0] = t0;
1557         dp[1] = t1;
1558         dp[2] = t2;
1559         dp[3] = t3;
1560       }
1561 
1562       READ_U8_S32(tab0, tab1, tab0, tab1);
1563       dp[0] = t0;
1564       dp[1] = t1;
1565       dp[2] = t2;
1566       dp[3] = t3;
1567       dp += 4;
1568       sp = (mlib_u8 *) sa;
1569       i += 4;
1570 
1571       for (; i < size - 1; i += 2, sp += 2) {
1572         *dp++ = tab0[sp[0]];
1573         *dp++ = tab1[sp[1]];
1574       }
1575 
1576       if (i < size)
1577         *dp = tab0[(*sp)];
1578     }
1579 
1580   }
1581   else if (csize == 3) {
1582     mlib_s32 i, j;
1583 
1584     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1585       mlib_u32 *sa;
1586       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1587       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1588       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1589       mlib_u32 *tab;
1590       mlib_u32 s0, t0, t1, t2, t3;
1591       mlib_s32 off;
1592       mlib_s32 size = xsize * 3;
1593       mlib_u32 *dp = (mlib_u32 *) dst;
1594       mlib_u8 *sp = (void *)src;
1595 
1596       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1597 
1598       if (off == 1) {
1599         *dp++ = tab0[(*sp)];
1600         tab = tab0;
1601         tab0 = tab1;
1602         tab1 = tab2;
1603         tab2 = tab;
1604         size--;
1605         sp++;
1606       }
1607       else if (off == 2) {
1608         *dp++ = tab0[sp[0]];
1609         *dp++ = tab1[sp[1]];
1610         tab = tab2;
1611         tab2 = tab1;
1612         tab1 = tab0;
1613         tab0 = tab;
1614         size -= 2;
1615         sp += 2;
1616       }
1617       else if (off == 3) {
1618         *dp++ = tab0[sp[0]];
1619         *dp++ = tab1[sp[1]];
1620         *dp++ = tab2[sp[2]];
1621         size -= 3;
1622         sp += 3;
1623       }
1624 
1625       sa = (mlib_u32 *) sp;
1626 
1627       s0 = sa[0];
1628       sa++;
1629 
1630 #ifdef __SUNPRO_C
1631 #pragma pipeloop(0)
1632 #endif /* __SUNPRO_C */
1633       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1634         READ_U8_S32(tab0, tab1, tab2, tab0);
1635         tab = tab0;
1636         tab0 = tab1;
1637         tab1 = tab2;
1638         tab2 = tab;
1639         s0 = sa[0];
1640         dp[0] = t0;
1641         dp[1] = t1;
1642         dp[2] = t2;
1643         dp[3] = t3;
1644       }
1645 
1646       READ_U8_S32(tab0, tab1, tab2, tab0);
1647       dp[0] = t0;
1648       dp[1] = t1;
1649       dp[2] = t2;
1650       dp[3] = t3;
1651       dp += 4;
1652       sp = (mlib_u8 *) sa;
1653       i += 4;
1654 
1655       if (i < size) {
1656         *dp++ = tab1[(*sp)];
1657         i++;
1658         sp++;
1659       }
1660 
1661       if (i < size) {
1662         *dp++ = tab2[(*sp)];
1663         i++;
1664         sp++;
1665       }
1666 
1667       if (i < size) {
1668         *dp = tab0[(*sp)];
1669       }
1670     }
1671 
1672   }
1673   else if (csize == 4) {
1674     mlib_s32 i, j;
1675 
1676     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1677       mlib_u32 *sa;
1678       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1679       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1680       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1681       mlib_u32 *tab3 = (mlib_u32 *) table[3];
1682       mlib_u32 *tab;
1683       mlib_u32 s0, t0, t1, t2, t3;
1684       mlib_s32 off;
1685       mlib_s32 size = xsize * 4;
1686       mlib_u32 *dp = (mlib_u32 *) dst;
1687       mlib_u8 *sp = (void *)src;
1688 
1689       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1690 
1691       if (off == 1) {
1692         *dp++ = tab0[(*sp)];
1693         tab = tab0;
1694         tab0 = tab1;
1695         tab1 = tab2;
1696         tab2 = tab3;
1697         tab3 = tab;
1698         size--;
1699         sp++;
1700       }
1701       else if (off == 2) {
1702         *dp++ = tab0[sp[0]];
1703         *dp++ = tab1[sp[1]];
1704         tab = tab0;
1705         tab0 = tab2;
1706         tab2 = tab;
1707         tab = tab1;
1708         tab1 = tab3;
1709         tab3 = tab;
1710         size -= 2;
1711         sp += 2;
1712       }
1713       else if (off == 3) {
1714         *dp++ = tab0[sp[0]];
1715         *dp++ = tab1[sp[1]];
1716         *dp++ = tab2[sp[2]];
1717         tab = tab3;
1718         tab3 = tab2;
1719         tab2 = tab1;
1720         tab1 = tab0;
1721         tab0 = tab;
1722         size -= 3;
1723         sp += 3;
1724       }
1725 
1726       sa = (mlib_u32 *) sp;
1727 
1728       s0 = sa[0];
1729       sa++;
1730 
1731 #ifdef __SUNPRO_C
1732 #pragma pipeloop(0)
1733 #endif /* __SUNPRO_C */
1734       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1735         READ_U8_S32(tab0, tab1, tab2, tab3);
1736         s0 = sa[0];
1737         dp[0] = t0;
1738         dp[1] = t1;
1739         dp[2] = t2;
1740         dp[3] = t3;
1741       }
1742 
1743       READ_U8_S32(tab0, tab1, tab2, tab3);
1744       dp[0] = t0;
1745       dp[1] = t1;
1746       dp[2] = t2;
1747       dp[3] = t3;
1748       dp += 4;
1749       sp = (mlib_u8 *) sa;
1750       i += 4;
1751 
1752       if (i < size) {
1753         *dp++ = tab0[(*sp)];
1754         i++;
1755         sp++;
1756       }
1757 
1758       if (i < size) {
1759         *dp++ = tab1[(*sp)];
1760         i++;
1761         sp++;
1762       }
1763 
1764       if (i < size) {
1765         *dp = tab2[(*sp)];
1766       }
1767     }
1768   }
1769 }
1770 
1771 /***************************************************************/
1772 void mlib_c_ImageLookUp_S16_S32(const mlib_s16 *src,
1773                                 mlib_s32       slb,
1774                                 mlib_s32       *dst,
1775                                 mlib_s32       dlb,
1776                                 mlib_s32       xsize,
1777                                 mlib_s32       ysize,
1778                                 mlib_s32       csize,
1779                                 const mlib_s32 **table)
1780 {
1781   const mlib_s32 *table_base[4];
1782   mlib_s32 c;
1783 
1784   for (c = 0; c < csize; c++) {
1785     table_base[c] = &table[c][32768];
1786   }
1787 
1788   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s16, table_base);
1789 }
1790 
1791 /***************************************************************/
1792 void mlib_c_ImageLookUp_U16_S32(const mlib_u16 *src,
1793                                 mlib_s32       slb,
1794                                 mlib_s32       *dst,
1795                                 mlib_s32       dlb,
1796                                 mlib_s32       xsize,
1797                                 mlib_s32       ysize,
1798                                 mlib_s32       csize,
1799                                 const mlib_s32 **table)
1800 {
1801   const mlib_s32 *table_base[4];
1802   mlib_s32 c;
1803 
1804   for (c = 0; c < csize; c++) {
1805     table_base[c] = &table[c][0];
1806   }
1807 
1808   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u16, table_base);
1809 }
1810 
1811 /***************************************************************/
1812 void mlib_c_ImageLookUp_S32_S32(const mlib_s32 *src,
1813                                 mlib_s32       slb,
1814                                 mlib_s32       *dst,
1815                                 mlib_s32       dlb,
1816                                 mlib_s32       xsize,
1817                                 mlib_s32       ysize,
1818                                 mlib_s32       csize,
1819                                 const mlib_s32 **table)
1820 {
1821   const mlib_s32 *table_base[4];
1822   mlib_s32 c;
1823 
1824   for (c = 0; c < csize; c++) {
1825     table_base[c] = &table[c][TABLE_SHIFT_S32];
1826   }
1827 
1828   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s32, table_base);
1829 }
1830 
1831 /***************************************************************/
1832 void mlib_c_ImageLookUpSI_U8_U8(const mlib_u8 *src,
1833                                 mlib_s32      slb,
1834                                 mlib_u8       *dst,
1835                                 mlib_s32      dlb,
1836                                 mlib_s32      xsize,
1837                                 mlib_s32      ysize,
1838                                 mlib_s32      csize,
1839                                 const mlib_u8 **table)
1840 {
1841 
1842   if ((xsize < 8) || ((xsize * ysize) < 250)) {
1843     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u8, table);
1844   }
1845   else if (csize == 2) {
1846 
1847     mlib_u16 tab[256];
1848     const mlib_u8 *tab0 = table[0];
1849     const mlib_u8 *tab1 = table[1];
1850     mlib_s32 i, j, s0, s1, s2;
1851 
1852     s0 = tab0[0];
1853     s1 = tab1[0];
1854     for (i = 1; i < 256; i++) {
1855 #ifdef _LITTLE_ENDIAN
1856       s2 = (s1 << 8) + s0;
1857 #else
1858       s2 = (s0 << 8) + s1;
1859 #endif /* _LITTLE_ENDIAN */
1860       s0 = tab0[i];
1861       s1 = tab1[i];
1862       tab[i - 1] = (mlib_u16) s2;
1863     }
1864 
1865 #ifdef _LITTLE_ENDIAN
1866     s2 = (s1 << 8) + s0;
1867 #else
1868     s2 = (s0 << 8) + s1;
1869 #endif /* _LITTLE_ENDIAN */
1870     tab[255] = (mlib_u16) s2;
1871 
1872     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1873       mlib_s32 *da;
1874       mlib_u8 *dp = dst;
1875       mlib_u8 *sa = (void *)src;
1876       mlib_s32 s0, t0, s1, t1, t, t2, off;
1877       mlib_s32 size = xsize;
1878 
1879       if (((mlib_addr) dp & 1) == 0) {
1880 
1881         if (((mlib_addr) dp & 3) != 0) {
1882           *((mlib_u16 *) dp) = tab[sa[0]];
1883           sa++;
1884           size--;
1885           dp += 2;
1886         }
1887 
1888         da = (mlib_s32 *) dp;
1889 
1890         s0 = sa[0];
1891         s1 = sa[1];
1892         sa += 2;
1893 
1894 #ifdef __SUNPRO_C
1895 #pragma pipeloop(0)
1896 #endif /* __SUNPRO_C */
1897         for (i = 0; i < size - 3; i += 2, da++, sa += 2) {
1898           t0 = tab[s0];
1899           t1 = tab[s1];
1900 #ifdef _LITTLE_ENDIAN
1901           t = (t1 << 16) + t0;
1902 #else
1903           t = (t0 << 16) + t1;
1904 #endif /* _LITTLE_ENDIAN */
1905           s0 = sa[0];
1906           s1 = sa[1];
1907           da[0] = t;
1908         }
1909 
1910         t0 = tab[s0];
1911         t1 = tab[s1];
1912 #ifdef _LITTLE_ENDIAN
1913         t = (t1 << 16) + t0;
1914 #else
1915         t = (t0 << 16) + t1;
1916 #endif /* _LITTLE_ENDIAN */
1917         da[0] = t;
1918         da++;
1919 
1920         if (size & 1)
1921           *((mlib_u16 *) da) = tab[sa[0]];
1922 
1923       }
1924       else {
1925 
1926         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
1927 
1928         if (off > 1) {
1929           t0 = tab[sa[0]];
1930 #ifdef _LITTLE_ENDIAN
1931           dp[1] = (t0 >> 8);
1932           dp[0] = t0;
1933 #else
1934           dp[0] = (t0 >> 8);
1935           dp[1] = t0;
1936 #endif /* _LITTLE_ENDIAN */
1937           sa++;
1938           size--;
1939           dp += 2;
1940         }
1941 
1942         t0 = tab[sa[0]];
1943         sa++;
1944 #ifdef _LITTLE_ENDIAN
1945         *dp++ = t0;
1946 #else
1947         *dp++ = (t0 >> 8);
1948 #endif /* _LITTLE_ENDIAN */
1949 
1950         da = (mlib_s32 *) dp;
1951 
1952         s0 = sa[0];
1953         s1 = sa[1];
1954         sa += 2;
1955 
1956 #ifdef __SUNPRO_C
1957 #pragma pipeloop(0)
1958 #endif /* __SUNPRO_C */
1959         for (i = 0; i < size - 4; i += 2, da++, sa += 2) {
1960           t1 = tab[s0];
1961           t2 = tab[s1];
1962 #ifdef _LITTLE_ENDIAN
1963           t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1964 #else
1965           t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1966 #endif /* _LITTLE_ENDIAN */
1967           t0 = t2;
1968           s0 = sa[0];
1969           s1 = sa[1];
1970           da[0] = t;
1971         }
1972 
1973         t1 = tab[s0];
1974         t2 = tab[s1];
1975 #ifdef _LITTLE_ENDIAN
1976         t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1977 #else
1978         t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1979 #endif /* _LITTLE_ENDIAN */
1980         da[0] = t;
1981         da++;
1982         dp = (mlib_u8 *) da;
1983 #ifdef _LITTLE_ENDIAN
1984         dp[0] = (t2 >> 8);
1985 #else
1986         dp[0] = t2;
1987 #endif /* _LITTLE_ENDIAN */
1988 
1989         if ((size & 1) == 0) {
1990           t0 = tab[sa[0]];
1991 #ifdef _LITTLE_ENDIAN
1992           dp[2] = (t0 >> 8);
1993           dp[1] = t0;
1994 #else
1995           dp[1] = (t0 >> 8);
1996           dp[2] = t0;
1997 #endif /* _LITTLE_ENDIAN */
1998         }
1999       }
2000     }
2001 
2002   }
2003   else if (csize == 3) {
2004     mlib_u32 tab[256];
2005     const mlib_u8 *tab0 = table[0];
2006     const mlib_u8 *tab1 = table[1];
2007     const mlib_u8 *tab2 = table[2];
2008     mlib_s32 i, j;
2009     mlib_u32 s0, s1, s2, s3;
2010 
2011     s0 = tab0[0];
2012     s1 = tab1[0];
2013     s2 = tab2[0];
2014     for (i = 1; i < 256; i++) {
2015 #ifdef _LITTLE_ENDIAN
2016       s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
2017 #else
2018       s3 = (s0 << 16) + (s1 << 8) + s2;
2019 #endif /* _LITTLE_ENDIAN */
2020       s0 = tab0[i];
2021       s1 = tab1[i];
2022       s2 = tab2[i];
2023       tab[i - 1] = s3;
2024     }
2025 
2026 #ifdef _LITTLE_ENDIAN
2027     s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
2028 #else
2029     s3 = (s0 << 16) + (s1 << 8) + s2;
2030 #endif /* _LITTLE_ENDIAN */
2031     tab[255] = s3;
2032 
2033     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2034       mlib_u32 *da;
2035       mlib_u8 *dp = dst;
2036       mlib_u8 *sa = (void *)src, *ptr;
2037       mlib_u32 s0, s1, t0, t1;
2038       mlib_u32 res1, res2;
2039       mlib_s32 size = xsize, off;
2040 
2041       off = (mlib_s32) ((mlib_addr) dp & 3);
2042 
2043 #ifdef __SUNPRO_C
2044 #pragma pipeloop(0)
2045 #endif /* __SUNPRO_C */
2046       for (i = 0; i < off; i++) {
2047         ptr = (mlib_u8 *) (tab + sa[0]);
2048         dp[0] = ptr[1];
2049         dp[1] = ptr[2];
2050         dp[2] = ptr[3];
2051         dp += 3;
2052         sa++;
2053       }
2054 
2055       size -= off;
2056       da = (mlib_u32 *) dp;
2057       s0 = sa[0];
2058       s1 = sa[1];
2059       sa += 2;
2060 
2061 #ifdef __SUNPRO_C
2062 #pragma pipeloop(0)
2063 #endif /* __SUNPRO_C */
2064       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2065         t0 = tab[s0];
2066         t1 = tab[s1];
2067 #ifdef _LITTLE_ENDIAN
2068         da[0] = (t0 >> 8) + (t1 << 16);
2069         res2 = (t1 >> 16);
2070 #else
2071         da[0] = (t0 << 8) + (t1 >> 16);
2072         res2 = (t1 << 16);
2073 #endif /* _LITTLE_ENDIAN */
2074         s0 = sa[0];
2075         s1 = sa[1];
2076         t0 = tab[s0];
2077         t1 = tab[s1];
2078 #ifdef _LITTLE_ENDIAN
2079         res2 += (t0 << 8);
2080         res1 = (t0 >> 24) + t1;
2081 #else
2082         res2 += (t0 >> 8);
2083         res1 = (t0 << 24) + t1;
2084 #endif /* _LITTLE_ENDIAN */
2085         s0 = sa[2];
2086         s1 = sa[3];
2087         da[1] = res2;
2088         da[2] = res1;
2089       }
2090 
2091       t0 = tab[s0];
2092       t1 = tab[s1];
2093 #ifdef _LITTLE_ENDIAN
2094       da[0] = (t0 >> 8) + (t1 << 16);
2095       res2 = (t1 >> 16);
2096 #else
2097       da[0] = (t0 << 8) + (t1 >> 16);
2098       res2 = (t1 << 16);
2099 #endif /* _LITTLE_ENDIAN */
2100       s0 = sa[0];
2101       s1 = sa[1];
2102       t0 = tab[s0];
2103       t1 = tab[s1];
2104 #ifdef _LITTLE_ENDIAN
2105       res2 += (t0 << 8);
2106       res1 = (t0 >> 24) + t1;
2107 #else
2108       res2 += (t0 >> 8);
2109       res1 = (t0 << 24) + t1;
2110 #endif /* _LITTLE_ENDIAN */
2111       da[1] = res2;
2112       da[2] = res1;
2113       da += 3;
2114       sa += 2;
2115       dp = (mlib_u8 *) da;
2116       i += 4;
2117 
2118 #ifdef __SUNPRO_C
2119 #pragma pipeloop(0)
2120 #endif /* __SUNPRO_C */
2121       for (; i < size; i++) {
2122         ptr = (mlib_u8 *) (tab + sa[0]);
2123         dp[0] = ptr[1];
2124         dp[1] = ptr[2];
2125         dp[2] = ptr[3];
2126         dp += 3;
2127         sa++;
2128       }
2129     }
2130 
2131   }
2132   else if (csize == 4) {
2133     mlib_u32 tab[256];
2134     const mlib_u8 *tab0 = table[0];
2135     const mlib_u8 *tab1 = table[1];
2136     const mlib_u8 *tab2 = table[2];
2137     const mlib_u8 *tab3 = table[3];
2138     mlib_s32 i, j;
2139     mlib_u32 s0, s1, s2, s3, s4;
2140 
2141     s0 = tab0[0];
2142     s1 = tab1[0];
2143     s2 = tab2[0];
2144     s3 = tab3[0];
2145     for (i = 1; i < 256; i++) {
2146 #ifdef _LITTLE_ENDIAN
2147       s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2148 #else
2149       s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2150 #endif /* _LITTLE_ENDIAN */
2151       s0 = tab0[i];
2152       s1 = tab1[i];
2153       s2 = tab2[i];
2154       s3 = tab3[i];
2155       tab[i - 1] = s4;
2156     }
2157 
2158 #ifdef _LITTLE_ENDIAN
2159     s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2160 #else
2161     s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2162 #endif /* _LITTLE_ENDIAN */
2163     tab[255] = s4;
2164 
2165     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2166       mlib_u32 *da;
2167       mlib_u8 *dp = dst;
2168       mlib_u8 *sa = (void *)src;
2169       mlib_u32 s0, t0, s1, t1, t2;
2170       mlib_s32 size = xsize, off;
2171       mlib_u32 shift, shift1, res1, res2;
2172 
2173       if (((mlib_addr) dp & 3) == 0) {
2174 
2175         da = (mlib_u32 *) dp;
2176 
2177         s0 = sa[0];
2178         s1 = sa[1];
2179         sa += 2;
2180 
2181 #ifdef __SUNPRO_C
2182 #pragma pipeloop(0)
2183 #endif /* __SUNPRO_C */
2184         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2185           t0 = tab[s0];
2186           t1 = tab[s1];
2187           s0 = sa[0];
2188           s1 = sa[1];
2189           da[0] = t0;
2190           da[1] = t1;
2191         }
2192 
2193         t0 = tab[s0];
2194         t1 = tab[s1];
2195         da[0] = t0;
2196         da[1] = t1;
2197 
2198         if (size & 1)
2199           da[2] = tab[sa[0]];
2200 
2201       }
2202       else {
2203 
2204         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2205         shift = 8 * off;
2206         shift1 = 32 - shift;
2207 
2208         for (i = 0; i < off; i++) {
2209           dp[i] = table[i][sa[0]];
2210         }
2211 
2212         dp += i;
2213         t0 = tab[sa[0]];
2214         sa++;
2215 
2216         da = (mlib_u32 *) dp;
2217 
2218         s0 = sa[0];
2219         s1 = sa[1];
2220         sa += 2;
2221 
2222 #ifdef __SUNPRO_C
2223 #pragma pipeloop(0)
2224 #endif /* __SUNPRO_C */
2225         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2226           t1 = tab[s0];
2227           t2 = tab[s1];
2228 #ifdef _LITTLE_ENDIAN
2229           res1 = (t0 >> shift) + (t1 << shift1);
2230           res2 = (t1 >> shift) + (t2 << shift1);
2231 #else
2232           res1 = (t0 << shift) + (t1 >> shift1);
2233           res2 = (t1 << shift) + (t2 >> shift1);
2234 #endif /* _LITTLE_ENDIAN */
2235           t0 = t2;
2236           s0 = sa[0];
2237           s1 = sa[1];
2238           da[0] = res1;
2239           da[1] = res2;
2240         }
2241 
2242         t1 = tab[s0];
2243         t2 = tab[s1];
2244 #ifdef _LITTLE_ENDIAN
2245         res1 = (t0 >> shift) + (t1 << shift1);
2246         res2 = (t1 >> shift) + (t2 << shift1);
2247 #else
2248         res1 = (t0 << shift) + (t1 >> shift1);
2249         res2 = (t1 << shift) + (t2 >> shift1);
2250 #endif /* _LITTLE_ENDIAN */
2251         da[0] = res1;
2252         da[1] = res2;
2253 #ifdef _LITTLE_ENDIAN
2254         t0 = (da[2] >> shift1);
2255         da[2] = (t2 >> shift) + (t0 << shift1);
2256 #else
2257         t0 = (da[2] << shift1);
2258         da[2] = (t2 << shift) + (t0 >> shift1);
2259 #endif /* _LITTLE_ENDIAN */
2260         da += 2;
2261         dp = (mlib_u8 *) da + (4 - off);
2262 
2263         if ((size & 1) == 0) {
2264           t0 = tab[sa[0]];
2265 #ifdef _LITTLE_ENDIAN
2266           dp[3] = (mlib_u8) (t0 >> 24);
2267           dp[2] = (mlib_u8) (t0 >> 16);
2268           dp[1] = (mlib_u8) (t0 >> 8);
2269           dp[0] = (mlib_u8) t0;
2270 #else
2271           dp[0] = (mlib_u8) (t0 >> 24);
2272           dp[1] = (mlib_u8) (t0 >> 16);
2273           dp[2] = (mlib_u8) (t0 >> 8);
2274           dp[3] = (mlib_u8) t0;
2275 #endif /* _LITTLE_ENDIAN */
2276         }
2277       }
2278     }
2279   }
2280 }
2281 
2282 /***************************************************************/
2283 
2284 #ifdef _MSC_VER
2285 #pragma optimize("", off)
2286 #endif /* _MSC_VER */
2287 
2288 void mlib_c_ImageLookUpSI_S16_U8(const mlib_s16 *src,
2289                                  mlib_s32       slb,
2290                                  mlib_u8        *dst,
2291                                  mlib_s32       dlb,
2292                                  mlib_s32       xsize,
2293                                  mlib_s32       ysize,
2294                                  mlib_s32       csize,
2295                                  const mlib_u8  **table)
2296 {
2297   const mlib_u8 *table_base[4];
2298   mlib_s32 c;
2299 
2300   for (c = 0; c < csize; c++) {
2301     table_base[c] = &table[c][32768];
2302   }
2303 
2304   if ((xsize < 8) || (csize == 2)) {
2305     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base);
2306   }
2307   else if (csize == 3) {
2308     mlib_s32 i, j;
2309 
2310     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2311       mlib_u32 *da;
2312       mlib_u8 *dp = dst;
2313       mlib_s16 *sa = (void *)src;
2314       const mlib_u8 *tab0 = table_base[0];
2315       const mlib_u8 *tab1 = table_base[1];
2316       const mlib_u8 *tab2 = table_base[2];
2317       mlib_s32 s0, s1;
2318       mlib_u32 t0, t1, t2, t3, t4, t5;
2319       mlib_u32 res1, res2;
2320       mlib_s32 size = xsize, off;
2321 
2322       off = (mlib_s32) ((mlib_addr) dp & 3);
2323 
2324 #ifdef __SUNPRO_C
2325 #pragma pipeloop(0)
2326 #endif /* __SUNPRO_C */
2327       for (i = 0; i < off; i++) {
2328         s0 = *sa++;
2329         dp[0] = tab0[s0];
2330         dp[1] = tab1[s0];
2331         dp[2] = tab2[s0];
2332         dp += 3;
2333       }
2334 
2335       size -= off;
2336       da = (mlib_u32 *) dp;
2337       s0 = sa[0];
2338       s1 = sa[1];
2339       sa += 2;
2340 
2341 #ifdef __SUNPRO_C
2342 #pragma pipeloop(0)
2343 #endif /* __SUNPRO_C */
2344       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2345         t0 = tab0[s0];
2346         t1 = tab1[s0];
2347         t2 = tab2[s0];
2348         t3 = tab0[s1];
2349         t4 = tab1[s1];
2350         t5 = tab2[s1];
2351 #ifdef _LITTLE_ENDIAN
2352         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2353         res2 = (t5 << 8) + t4;
2354 #else
2355         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2356         res2 = (t4 << 24) + (t5 << 16);
2357 #endif /* _LITTLE_ENDIAN */
2358         s0 = sa[0];
2359         s1 = sa[1];
2360         t0 = tab0[s0];
2361         t1 = tab1[s0];
2362         t2 = tab2[s0];
2363         t3 = tab0[s1];
2364         t4 = tab1[s1];
2365         t5 = tab2[s1];
2366 #ifdef _LITTLE_ENDIAN
2367         res2 += ((t1 << 24) + (t0 << 16));
2368         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2369 #else
2370         res2 += ((t0 << 8) + t1);
2371         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2372 #endif /* _LITTLE_ENDIAN */
2373         s0 = sa[2];
2374         s1 = sa[3];
2375         da[1] = res2;
2376         da[2] = res1;
2377       }
2378 
2379       t0 = tab0[s0];
2380       t1 = tab1[s0];
2381       t2 = tab2[s0];
2382       t3 = tab0[s1];
2383       t4 = tab1[s1];
2384       t5 = tab2[s1];
2385 #ifdef _LITTLE_ENDIAN
2386       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2387       res2 = (t5 << 8) + t4;
2388 #else
2389       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2390       res2 = (t4 << 24) + (t5 << 16);
2391 #endif /* _LITTLE_ENDIAN */
2392       s0 = sa[0];
2393       s1 = sa[1];
2394       t0 = tab0[s0];
2395       t1 = tab1[s0];
2396       t2 = tab2[s0];
2397       t3 = tab0[s1];
2398       t4 = tab1[s1];
2399       t5 = tab2[s1];
2400 #ifdef _LITTLE_ENDIAN
2401       res2 += ((t1 << 24) + (t0 << 16));
2402       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2403 #else
2404       res2 += ((t0 << 8) + t1);
2405       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2406 #endif /* _LITTLE_ENDIAN */
2407       da[1] = res2;
2408       da[2] = res1;
2409       da += 3;
2410       sa += 2;
2411       dp = (mlib_u8 *) da;
2412       i += 4;
2413 
2414 #ifdef __SUNPRO_C
2415 #pragma pipeloop(0)
2416 #endif /* __SUNPRO_C */
2417       for (; i < size; i++) {
2418         s0 = *sa++;
2419         dp[0] = tab0[s0];
2420         dp[1] = tab1[s0];
2421         dp[2] = tab2[s0];
2422         dp += 3;
2423       }
2424     }
2425 
2426   }
2427   else if (csize == 4) {
2428     mlib_s32 i, j;
2429 
2430     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2431       mlib_u32 *da;
2432       mlib_u8 *dp = dst;
2433       mlib_s16 *sa = (void *)src;
2434       const mlib_u8 *tab0 = table_base[0];
2435       const mlib_u8 *tab1 = table_base[1];
2436       const mlib_u8 *tab2 = table_base[2];
2437       const mlib_u8 *tab3 = table_base[3];
2438       mlib_s32 s0;
2439       mlib_u32 t0, t1, t2, t3;
2440       mlib_s32 size = xsize, off;
2441       mlib_u32 shift, shift1, res1, res2, res;
2442 
2443       if (((mlib_addr) dp & 3) == 0) {
2444 
2445         da = (mlib_u32 *) dp;
2446 
2447         s0 = sa[0];
2448         sa++;
2449 
2450 #ifdef __SUNPRO_C
2451 #pragma pipeloop(0)
2452 #endif /* __SUNPRO_C */
2453         for (i = 0; i < size - 1; i++, da++, sa++) {
2454           t0 = tab0[s0];
2455           t1 = tab1[s0];
2456           t2 = tab2[s0];
2457           t3 = tab3[s0];
2458 #ifdef _LITTLE_ENDIAN
2459           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2460 #else
2461           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2462 #endif /* _LITTLE_ENDIAN */
2463           s0 = sa[0];
2464           da[0] = res;
2465         }
2466 
2467         t0 = tab0[s0];
2468         t1 = tab1[s0];
2469         t2 = tab2[s0];
2470         t3 = tab3[s0];
2471 #ifdef _LITTLE_ENDIAN
2472         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2473 #else
2474         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2475 #endif /* _LITTLE_ENDIAN */
2476         da[0] = res;
2477 
2478       }
2479       else {
2480 
2481         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2482         shift = 8 * off;
2483         shift1 = 32 - shift;
2484 
2485         s0 = *sa++;
2486 
2487         for (i = 0; i < off; i++) {
2488           dp[i] = table_base[i][s0];
2489         }
2490 
2491         dp += i;
2492         da = (mlib_u32 *) dp;
2493 
2494         t0 = tab0[s0];
2495         t1 = tab1[s0];
2496         t2 = tab2[s0];
2497         t3 = tab3[s0];
2498 
2499 #ifdef _LITTLE_ENDIAN
2500         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2501 #else
2502         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2503 #endif /* _LITTLE_ENDIAN */
2504 
2505         s0 = sa[0];
2506         sa++;
2507 
2508 #ifdef __SUNPRO_C
2509 #pragma pipeloop(0)
2510 #endif /* __SUNPRO_C */
2511         for (i = 0; i < size - 2; i++, da++, sa++) {
2512           t0 = tab0[s0];
2513           t1 = tab1[s0];
2514           t2 = tab2[s0];
2515           t3 = tab3[s0];
2516 #ifdef _LITTLE_ENDIAN
2517           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2518           res = (res1 >> shift) + (res2 << shift1);
2519 #else
2520           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2521           res = (res1 << shift) + (res2 >> shift1);
2522 #endif /* _LITTLE_ENDIAN */
2523           res1 = res2;
2524           s0 = sa[0];
2525           da[0] = res;
2526         }
2527 
2528         t0 = tab0[s0];
2529         t1 = tab1[s0];
2530         t2 = tab2[s0];
2531         t3 = tab3[s0];
2532 #ifdef _LITTLE_ENDIAN
2533         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2534         res = (res1 >> shift) + (res2 << shift1);
2535 #else
2536         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2537         res = (res1 << shift) + (res2 >> shift1);
2538 #endif /* _LITTLE_ENDIAN */
2539         da[0] = res;
2540 #ifdef _LITTLE_ENDIAN
2541         res1 = (da[1] >> shift1);
2542         da[1] = (res2 >> shift) + (res1 << shift1);
2543 #else
2544         res1 = (da[1] << shift1);
2545         da[1] = (res2 << shift) + (res1 >> shift1);
2546 #endif /* _LITTLE_ENDIAN */
2547       }
2548     }
2549   }
2550 }
2551 
2552 #ifdef _MSC_VER
2553 #pragma optimize("", on)
2554 #endif /* _MSC_VER */
2555 
2556 /***************************************************************/
2557 void mlib_c_ImageLookUpSI_U16_U8(const mlib_u16 *src,
2558                                  mlib_s32       slb,
2559                                  mlib_u8        *dst,
2560                                  mlib_s32       dlb,
2561                                  mlib_s32       xsize,
2562                                  mlib_s32       ysize,
2563                                  mlib_s32       csize,
2564                                  const mlib_u8  **table)
2565 {
2566   const mlib_u8 *table_base[4];
2567   mlib_s32 c;
2568 
2569   for (c = 0; c < csize; c++) {
2570     table_base[c] = &table[c][0];
2571   }
2572 
2573   if ((xsize < 8) || (csize == 2)) {
2574     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base);
2575   }
2576   else if (csize == 3) {
2577     mlib_s32 i, j;
2578 
2579     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2580       mlib_u32 *da;
2581       mlib_u8 *dp = dst;
2582       mlib_u16 *sa = (void *)src;
2583       const mlib_u8 *tab0 = table_base[0];
2584       const mlib_u8 *tab1 = table_base[1];
2585       const mlib_u8 *tab2 = table_base[2];
2586       mlib_s32 s0, s1;
2587       mlib_u32 t0, t1, t2, t3, t4, t5;
2588       mlib_u32 res1, res2;
2589       mlib_s32 size = xsize, off;
2590 
2591       off = (mlib_s32) ((mlib_addr) dp & 3);
2592 
2593 #ifdef __SUNPRO_C
2594 #pragma pipeloop(0)
2595 #endif /* __SUNPRO_C */
2596       for (i = 0; i < off; i++) {
2597         s0 = *sa++;
2598         dp[0] = tab0[s0];
2599         dp[1] = tab1[s0];
2600         dp[2] = tab2[s0];
2601         dp += 3;
2602       }
2603 
2604       size -= off;
2605       da = (mlib_u32 *) dp;
2606       s0 = sa[0];
2607       s1 = sa[1];
2608       sa += 2;
2609 
2610 #ifdef __SUNPRO_C
2611 #pragma pipeloop(0)
2612 #endif /* __SUNPRO_C */
2613       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2614         t0 = tab0[s0];
2615         t1 = tab1[s0];
2616         t2 = tab2[s0];
2617         t3 = tab0[s1];
2618         t4 = tab1[s1];
2619         t5 = tab2[s1];
2620 #ifdef _LITTLE_ENDIAN
2621         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2622         res2 = (t5 << 8) + t4;
2623 #else
2624         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2625         res2 = (t4 << 24) + (t5 << 16);
2626 #endif /* _LITTLE_ENDIAN */
2627         s0 = sa[0];
2628         s1 = sa[1];
2629         t0 = tab0[s0];
2630         t1 = tab1[s0];
2631         t2 = tab2[s0];
2632         t3 = tab0[s1];
2633         t4 = tab1[s1];
2634         t5 = tab2[s1];
2635 #ifdef _LITTLE_ENDIAN
2636         res2 += ((t1 << 24) + (t0 << 16));
2637         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2638 #else
2639         res2 += ((t0 << 8) + t1);
2640         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2641 #endif /* _LITTLE_ENDIAN */
2642         s0 = sa[2];
2643         s1 = sa[3];
2644         da[1] = res2;
2645         da[2] = res1;
2646       }
2647 
2648       t0 = tab0[s0];
2649       t1 = tab1[s0];
2650       t2 = tab2[s0];
2651       t3 = tab0[s1];
2652       t4 = tab1[s1];
2653       t5 = tab2[s1];
2654 #ifdef _LITTLE_ENDIAN
2655       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2656       res2 = (t5 << 8) + t4;
2657 #else
2658       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2659       res2 = (t4 << 24) + (t5 << 16);
2660 #endif /* _LITTLE_ENDIAN */
2661       s0 = sa[0];
2662       s1 = sa[1];
2663       t0 = tab0[s0];
2664       t1 = tab1[s0];
2665       t2 = tab2[s0];
2666       t3 = tab0[s1];
2667       t4 = tab1[s1];
2668       t5 = tab2[s1];
2669 #ifdef _LITTLE_ENDIAN
2670       res2 += ((t1 << 24) + (t0 << 16));
2671       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2672 #else
2673       res2 += ((t0 << 8) + t1);
2674       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2675 #endif /* _LITTLE_ENDIAN */
2676       da[1] = res2;
2677       da[2] = res1;
2678       da += 3;
2679       sa += 2;
2680       dp = (mlib_u8 *) da;
2681       i += 4;
2682 
2683 #ifdef __SUNPRO_C
2684 #pragma pipeloop(0)
2685 #endif /* __SUNPRO_C */
2686       for (; i < size; i++) {
2687         s0 = *sa++;
2688         dp[0] = tab0[s0];
2689         dp[1] = tab1[s0];
2690         dp[2] = tab2[s0];
2691         dp += 3;
2692       }
2693     }
2694 
2695   }
2696   else if (csize == 4) {
2697     mlib_s32 i, j;
2698 
2699     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2700       mlib_u32 *da;
2701       mlib_u8 *dp = dst;
2702       mlib_u16 *sa = (void *)src;
2703       const mlib_u8 *tab0 = table_base[0];
2704       const mlib_u8 *tab1 = table_base[1];
2705       const mlib_u8 *tab2 = table_base[2];
2706       const mlib_u8 *tab3 = table_base[3];
2707       mlib_s32 s0;
2708       mlib_u32 t0, t1, t2, t3;
2709       mlib_s32 size = xsize, off;
2710       mlib_u32 shift, shift1, res1, res2, res;
2711 
2712       if (((mlib_addr) dp & 3) == 0) {
2713 
2714         da = (mlib_u32 *) dp;
2715 
2716         s0 = sa[0];
2717         sa++;
2718 
2719 #ifdef __SUNPRO_C
2720 #pragma pipeloop(0)
2721 #endif /* __SUNPRO_C */
2722         for (i = 0; i < size - 1; i++, da++, sa++) {
2723           t0 = tab0[s0];
2724           t1 = tab1[s0];
2725           t2 = tab2[s0];
2726           t3 = tab3[s0];
2727 #ifdef _LITTLE_ENDIAN
2728           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2729 #else
2730           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2731 #endif /* _LITTLE_ENDIAN */
2732           s0 = sa[0];
2733           da[0] = res;
2734         }
2735 
2736         t0 = tab0[s0];
2737         t1 = tab1[s0];
2738         t2 = tab2[s0];
2739         t3 = tab3[s0];
2740 #ifdef _LITTLE_ENDIAN
2741         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2742 #else
2743         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2744 #endif /* _LITTLE_ENDIAN */
2745         da[0] = res;
2746 
2747       }
2748       else {
2749 
2750         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2751         shift = 8 * off;
2752         shift1 = 32 - shift;
2753 
2754         s0 = *sa++;
2755 
2756         for (i = 0; i < off; i++) {
2757           dp[i] = table_base[i][s0];
2758         }
2759 
2760         dp += i;
2761         da = (mlib_u32 *) dp;
2762 
2763         t0 = tab0[s0];
2764         t1 = tab1[s0];
2765         t2 = tab2[s0];
2766         t3 = tab3[s0];
2767 
2768 #ifdef _LITTLE_ENDIAN
2769         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2770 #else
2771         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2772 #endif /* _LITTLE_ENDIAN */
2773 
2774         s0 = sa[0];
2775         sa++;
2776 
2777 #ifdef __SUNPRO_C
2778 #pragma pipeloop(0)
2779 #endif /* __SUNPRO_C */
2780         for (i = 0; i < size - 2; i++, da++, sa++) {
2781           t0 = tab0[s0];
2782           t1 = tab1[s0];
2783           t2 = tab2[s0];
2784           t3 = tab3[s0];
2785 #ifdef _LITTLE_ENDIAN
2786           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2787           res = (res1 >> shift) + (res2 << shift1);
2788 #else
2789           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2790           res = (res1 << shift) + (res2 >> shift1);
2791 #endif /* _LITTLE_ENDIAN */
2792           res1 = res2;
2793           s0 = sa[0];
2794           da[0] = res;
2795         }
2796 
2797         t0 = tab0[s0];
2798         t1 = tab1[s0];
2799         t2 = tab2[s0];
2800         t3 = tab3[s0];
2801 #ifdef _LITTLE_ENDIAN
2802         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2803         res = (res1 >> shift) + (res2 << shift1);
2804 #else
2805         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2806         res = (res1 << shift) + (res2 >> shift1);
2807 #endif /* _LITTLE_ENDIAN */
2808         da[0] = res;
2809 #ifdef _LITTLE_ENDIAN
2810         res1 = (da[1] >> shift1);
2811         da[1] = (res2 >> shift) + (res1 << shift1);
2812 #else
2813         res1 = (da[1] << shift1);
2814         da[1] = (res2 << shift) + (res1 >> shift1);
2815 #endif /* _LITTLE_ENDIAN */
2816       }
2817     }
2818   }
2819 }
2820 
2821 /***************************************************************/
2822 void mlib_c_ImageLookUpSI_S32_U8(const mlib_s32 *src,
2823                                  mlib_s32       slb,
2824                                  mlib_u8        *dst,
2825                                  mlib_s32       dlb,
2826                                  mlib_s32       xsize,
2827                                  mlib_s32       ysize,
2828                                  mlib_s32       csize,
2829                                  const mlib_u8  **table)
2830 {
2831   const mlib_u8 *table_base[4];
2832   mlib_s32 c;
2833 
2834   for (c = 0; c < csize; c++) {
2835     table_base[c] = &table[c][TABLE_SHIFT_S32];
2836   }
2837 
2838   MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s32, table_base);
2839 }
2840 
2841 /***************************************************************/
2842 void mlib_c_ImageLookUpSI_U8_S16(const mlib_u8  *src,
2843                                  mlib_s32       slb,
2844                                  mlib_s16       *dst,
2845                                  mlib_s32       dlb,
2846                                  mlib_s32       xsize,
2847                                  mlib_s32       ysize,
2848                                  mlib_s32       csize,
2849                                  const mlib_s16 **table)
2850 {
2851 
2852   if ((xsize < 4) || ((xsize * ysize) < 250)) {
2853     MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u8, table);
2854 
2855   }
2856   else if (csize == 2) {
2857     mlib_u32 tab[256];
2858     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2859     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2860     mlib_s32 i, j;
2861     mlib_u32 s0, s1, s2;
2862 
2863     s0 = tab0[0];
2864     s1 = tab1[0];
2865     for (i = 1; i < 256; i++) {
2866 #ifdef _LITTLE_ENDIAN
2867       s2 = (s1 << 16) + s0;
2868 #else
2869       s2 = (s0 << 16) + s1;
2870 #endif /* _LITTLE_ENDIAN */
2871       s0 = tab0[i];
2872       s1 = tab1[i];
2873       tab[i - 1] = s2;
2874     }
2875 
2876 #ifdef _LITTLE_ENDIAN
2877     s2 = (s1 << 16) + s0;
2878 #else
2879     s2 = (s0 << 16) + s1;
2880 #endif /* _LITTLE_ENDIAN */
2881     tab[255] = s2;
2882 
2883     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2884       mlib_u32 *da;
2885       mlib_u16 *dp = (mlib_u16 *) dst;
2886       mlib_u8 *sa = (void *)src;
2887       mlib_u32 s0, t0, s1, t1, t2;
2888       mlib_u32 res1, res2;
2889       mlib_s32 size = xsize;
2890 
2891       if (((mlib_addr) dp & 3) == 0) {
2892 
2893         da = (mlib_u32 *) dp;
2894         s0 = sa[0];
2895         s1 = sa[1];
2896         sa += 2;
2897 
2898 #ifdef __SUNPRO_C
2899 #pragma pipeloop(0)
2900 #endif /* __SUNPRO_C */
2901         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2902           t0 = tab[s0];
2903           t1 = tab[s1];
2904           s0 = sa[0];
2905           s1 = sa[1];
2906           da[0] = t0;
2907           da[1] = t1;
2908         }
2909 
2910         t0 = tab[s0];
2911         t1 = tab[s1];
2912         da[0] = t0;
2913         da[1] = t1;
2914 
2915         if (size & 1)
2916           da[2] = tab[sa[0]];
2917 
2918       }
2919       else {
2920 
2921         t0 = tab[*sa++];
2922 #ifdef _LITTLE_ENDIAN
2923         *dp++ = (mlib_u16) (t0);
2924 #else
2925         *dp++ = (mlib_u16) (t0 >> 16);
2926 #endif /* _LITTLE_ENDIAN */
2927         da = (mlib_u32 *) dp;
2928         s0 = sa[0];
2929         s1 = sa[1];
2930         sa += 2;
2931 
2932 #ifdef __SUNPRO_C
2933 #pragma pipeloop(0)
2934 #endif /* __SUNPRO_C */
2935         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2936           t1 = tab[s0];
2937           t2 = tab[s1];
2938 #ifdef _LITTLE_ENDIAN
2939           res1 = (t0 >> 16) + (t1 << 16);
2940           res2 = (t1 >> 16) + (t2 << 16);
2941 #else
2942           res1 = (t0 << 16) + (t1 >> 16);
2943           res2 = (t1 << 16) + (t2 >> 16);
2944 #endif /* _LITTLE_ENDIAN */
2945           t0 = t2;
2946           s0 = sa[0];
2947           s1 = sa[1];
2948           da[0] = res1;
2949           da[1] = res2;
2950         }
2951 
2952         t1 = tab[s0];
2953         t2 = tab[s1];
2954 #ifdef _LITTLE_ENDIAN
2955         res1 = (t0 >> 16) + (t1 << 16);
2956         res2 = (t1 >> 16) + (t2 << 16);
2957 #else
2958         res1 = (t0 << 16) + (t1 >> 16);
2959         res2 = (t1 << 16) + (t2 >> 16);
2960 #endif /* _LITTLE_ENDIAN */
2961         da[0] = res1;
2962         da[1] = res2;
2963         da += 2;
2964         dp = (mlib_u16 *) da;
2965 #ifdef _LITTLE_ENDIAN
2966         dp[0] = (mlib_u16) (t2 >> 16);
2967 #else
2968         dp[0] = (mlib_u16) t2;
2969 #endif /* _LITTLE_ENDIAN */
2970 
2971         if ((size & 1) == 0) {
2972           t0 = tab[sa[0]];
2973 #ifdef _LITTLE_ENDIAN
2974           dp[2] = (mlib_u16) (t0 >> 16);
2975           dp[1] = (mlib_u16) t0;
2976 #else
2977           dp[1] = (mlib_u16) (t0 >> 16);
2978           dp[2] = (mlib_u16) t0;
2979 #endif /* _LITTLE_ENDIAN */
2980         }
2981       }
2982     }
2983 
2984   }
2985   else if (csize == 3) {
2986     mlib_u32 tab[512];
2987     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2988     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2989     mlib_u16 *tab2 = (mlib_u16 *) table[2];
2990     mlib_s32 i, j;
2991     mlib_u32 s0, s1, s2, s3, s4;
2992 
2993     s0 = tab0[0];
2994     s1 = tab1[0];
2995     s2 = tab2[0];
2996     for (i = 1; i < 256; i++) {
2997 #ifdef _LITTLE_ENDIAN
2998       s3 = (s0 << 16);
2999       s4 = (s2 << 16) + s1;
3000 #else
3001       s3 = s0;
3002       s4 = (s1 << 16) + s2;
3003 #endif /* _LITTLE_ENDIAN */
3004       s0 = tab0[i];
3005       s1 = tab1[i];
3006       s2 = tab2[i];
3007       tab[2 * i - 2] = s3;
3008       tab[2 * i - 1] = s4;
3009     }
3010 
3011 #ifdef _LITTLE_ENDIAN
3012     s4 = (s2 << 16) + s1;
3013     tab[510] = s0 << 16;
3014 #else
3015     s4 = (s1 << 16) + s2;
3016     tab[510] = s0;
3017 #endif /* _LITTLE_ENDIAN */
3018     tab[511] = s4;
3019 
3020     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3021       mlib_u32 *da;
3022       mlib_u16 *dp = (mlib_u16 *) dst, *ptr;
3023       mlib_u8 *sa = (void *)src;
3024       mlib_u32 s0, s1, t0, t1, t2, t3;
3025       mlib_u32 res1, res2;
3026       mlib_s32 size = xsize, off;
3027 
3028       off = (mlib_s32) ((mlib_addr) dp & 3);
3029 
3030       if (off != 0) {
3031         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
3032         dp[0] = ptr[1];
3033         dp[1] = ptr[2];
3034         dp[2] = ptr[3];
3035         dp += 3;
3036         sa++;
3037         size--;
3038       }
3039 
3040       da = (mlib_u32 *) dp;
3041       s0 = sa[0] << 3;
3042       s1 = sa[1] << 3;
3043       sa += 2;
3044 
3045 #ifdef __SUNPRO_C
3046 #pragma pipeloop(0)
3047 #endif /* __SUNPRO_C */
3048       for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) {
3049         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3050         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3051         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3052         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3053 #ifdef _LITTLE_ENDIAN
3054         res1 = (t0 >> 16) + (t1 << 16);
3055         res2 = (t1 >> 16) + t2;
3056 #else
3057         res1 = (t0 << 16) + (t1 >> 16);
3058         res2 = (t1 << 16) + t2;
3059 #endif /* _LITTLE_ENDIAN */
3060         s0 = sa[0] << 3;
3061         s1 = sa[1] << 3;
3062         da[0] = res1;
3063         da[1] = res2;
3064         da[2] = t3;
3065       }
3066 
3067       t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3068       t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3069       t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3070       t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3071 #ifdef _LITTLE_ENDIAN
3072       res1 = (t0 >> 16) + (t1 << 16);
3073       res2 = (t1 >> 16) + t2;
3074 #else
3075       res1 = (t0 << 16) + (t1 >> 16);
3076       res2 = (t1 << 16) + t2;
3077 #endif /* _LITTLE_ENDIAN */
3078       da[0] = res1;
3079       da[1] = res2;
3080       da[2] = t3;
3081       da += 3;
3082       dp = (mlib_u16 *) da;
3083       i += 2;
3084 
3085       if (i < size) {
3086         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
3087         dp[0] = ptr[1];
3088         dp[1] = ptr[2];
3089         dp[2] = ptr[3];
3090       }
3091     }
3092 
3093   }
3094   else if (csize == 4) {
3095     mlib_u32 tab[512];
3096     mlib_u16 *tab0 = (mlib_u16 *) table[0];
3097     mlib_u16 *tab1 = (mlib_u16 *) table[1];
3098     mlib_u16 *tab2 = (mlib_u16 *) table[2];
3099     mlib_u16 *tab3 = (mlib_u16 *) table[3];
3100     mlib_s32 i, j;
3101     mlib_u32 s0, s1, s2, s3, s4, s5;
3102 
3103     s0 = tab0[0];
3104     s1 = tab1[0];
3105     s2 = tab2[0];
3106     s3 = tab3[0];
3107     for (i = 1; i < 256; i++) {
3108 #ifdef _LITTLE_ENDIAN
3109       s4 = (s1 << 16) + s0;
3110       s5 = (s3 << 16) + s2;
3111 #else
3112       s4 = (s0 << 16) + s1;
3113       s5 = (s2 << 16) + s3;
3114 #endif /* _LITTLE_ENDIAN */
3115       s0 = tab0[i];
3116       s1 = tab1[i];
3117       s2 = tab2[i];
3118       s3 = tab3[i];
3119       tab[2 * i - 2] = s4;
3120       tab[2 * i - 1] = s5;
3121     }
3122 
3123 #ifdef _LITTLE_ENDIAN
3124     s4 = (s1 << 16) + s0;
3125     s5 = (s3 << 16) + s2;
3126 #else
3127     s4 = (s0 << 16) + s1;
3128     s5 = (s2 << 16) + s3;
3129 #endif /* _LITTLE_ENDIAN */
3130     tab[510] = s4;
3131     tab[511] = s5;
3132 
3133     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3134       mlib_u32 *da;
3135       mlib_u16 *dp = (mlib_u16 *) dst;
3136       mlib_u8 *sa = (void *)src;
3137       mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5;
3138       mlib_s32 size = xsize;
3139       mlib_u32 res1, res2, res3, res4;
3140 
3141       if (((mlib_addr) dp & 3) == 0) {
3142 
3143         da = (mlib_u32 *) dp;
3144 
3145         s0 = sa[0] << 3;
3146         s1 = sa[1] << 3;
3147         sa += 2;
3148 
3149 #ifdef __SUNPRO_C
3150 #pragma pipeloop(0)
3151 #endif /* __SUNPRO_C */
3152         for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) {
3153           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3154           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3155           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3156           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3157           s0 = sa[0] << 3;
3158           s1 = sa[1] << 3;
3159           da[0] = t0;
3160           da[1] = t1;
3161           da[2] = t2;
3162           da[3] = t3;
3163         }
3164 
3165         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3166         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3167         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3168         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3169         da[0] = t0;
3170         da[1] = t1;
3171         da[2] = t2;
3172         da[3] = t3;
3173 
3174         if (size & 1) {
3175           da[4] = tab[2 * sa[0]];
3176           da[5] = tab[2 * sa[0] + 1];
3177         }
3178 
3179       }
3180       else {
3181 
3182         t4 = tab[2 * sa[0]];
3183         t5 = tab[2 * sa[0] + 1];
3184 #ifdef _LITTLE_ENDIAN
3185         *dp++ = (mlib_u16) (t4);
3186 #else
3187         *dp++ = (mlib_u16) (t4 >> 16);
3188 #endif /* _LITTLE_ENDIAN */
3189         sa++;
3190         da = (mlib_u32 *) dp;
3191 #ifdef _LITTLE_ENDIAN
3192         *da++ = (t4 >> 16) + (t5 << 16);
3193 #else
3194         *da++ = (t4 << 16) + (t5 >> 16);
3195 #endif /* _LITTLE_ENDIAN */
3196         s0 = sa[0] << 3;
3197         s1 = sa[1] << 3;
3198         sa += 2;
3199 
3200 #ifdef __SUNPRO_C
3201 #pragma pipeloop(0)
3202 #endif /* __SUNPRO_C */
3203         for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) {
3204           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3205           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3206           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3207           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3208 #ifdef _LITTLE_ENDIAN
3209           res1 = (t5 >> 16) + (t0 << 16);
3210           res2 = (t0 >> 16) + (t1 << 16);
3211           res3 = (t1 >> 16) + (t2 << 16);
3212           res4 = (t2 >> 16) + (t3 << 16);
3213 #else
3214           res1 = (t5 << 16) + (t0 >> 16);
3215           res2 = (t0 << 16) + (t1 >> 16);
3216           res3 = (t1 << 16) + (t2 >> 16);
3217           res4 = (t2 << 16) + (t3 >> 16);
3218 #endif /* _LITTLE_ENDIAN */
3219           s0 = sa[0] << 3;
3220           s1 = sa[1] << 3;
3221           da[0] = res1;
3222           da[1] = res2;
3223           da[2] = res3;
3224           da[3] = res4;
3225           t5 = t3;
3226         }
3227 
3228         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3229         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3230         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3231         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3232 #ifdef _LITTLE_ENDIAN
3233         res1 = (t5 >> 16) + (t0 << 16);
3234         res2 = (t0 >> 16) + (t1 << 16);
3235         res3 = (t1 >> 16) + (t2 << 16);
3236         res4 = (t2 >> 16) + (t3 << 16);
3237 #else
3238         res1 = (t5 << 16) + (t0 >> 16);
3239         res2 = (t0 << 16) + (t1 >> 16);
3240         res3 = (t1 << 16) + (t2 >> 16);
3241         res4 = (t2 << 16) + (t3 >> 16);
3242 #endif /* _LITTLE_ENDIAN */
3243         da[0] = res1;
3244         da[1] = res2;
3245         da[2] = res3;
3246         da[3] = res4;
3247         da += 4;
3248         dp = (mlib_u16 *) da;
3249 #ifdef _LITTLE_ENDIAN
3250         dp[0] = (mlib_u16) (t3 >> 16);
3251 #else
3252         dp[0] = (mlib_u16) t3;
3253 #endif /* _LITTLE_ENDIAN */
3254 
3255         if ((size & 1) == 0) {
3256           t0 = tab[2 * sa[0]];
3257 #ifdef _LITTLE_ENDIAN
3258           dp[2] = (mlib_u16) (t0 >> 16);
3259           dp[1] = (mlib_u16) t0;
3260 #else
3261           dp[1] = (mlib_u16) (t0 >> 16);
3262           dp[2] = (mlib_u16) t0;
3263 #endif /* _LITTLE_ENDIAN */
3264           t0 = tab[2 * sa[0] + 1];
3265 #ifdef _LITTLE_ENDIAN
3266           dp[4] = (mlib_u16) (t0 >> 16);
3267           dp[3] = (mlib_u16) t0;
3268 #else
3269           dp[3] = (mlib_u16) (t0 >> 16);
3270           dp[4] = (mlib_u16) t0;
3271 #endif /* _LITTLE_ENDIAN */
3272         }
3273       }
3274     }
3275   }
3276 }
3277 
3278 /***************************************************************/
3279 void mlib_c_ImageLookUpSI_S16_S16(const mlib_s16 *src,
3280                                   mlib_s32       slb,
3281                                   mlib_s16       *dst,
3282                                   mlib_s32       dlb,
3283                                   mlib_s32       xsize,
3284                                   mlib_s32       ysize,
3285                                   mlib_s32       csize,
3286                                   const mlib_s16 **table)
3287 {
3288   const mlib_s16 *table_base[4];
3289   mlib_s32 c;
3290 
3291   for (c = 0; c < csize; c++) {
3292     table_base[c] = &table[c][32768];
3293   }
3294 
3295   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s16, table_base);
3296 }
3297 
3298 /***************************************************************/
3299 void mlib_c_ImageLookUpSI_U16_S16(const mlib_u16 *src,
3300                                   mlib_s32       slb,
3301                                   mlib_s16       *dst,
3302                                   mlib_s32       dlb,
3303                                   mlib_s32       xsize,
3304                                   mlib_s32       ysize,
3305                                   mlib_s32       csize,
3306                                   const mlib_s16 **table)
3307 {
3308   const mlib_s16 *table_base[4];
3309   mlib_s32 c;
3310 
3311   for (c = 0; c < csize; c++) {
3312     table_base[c] = &table[c][0];
3313   }
3314 
3315   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u16, table_base);
3316 }
3317 
3318 /***************************************************************/
3319 void mlib_c_ImageLookUpSI_S32_S16(const mlib_s32 *src,
3320                                   mlib_s32       slb,
3321                                   mlib_s16       *dst,
3322                                   mlib_s32       dlb,
3323                                   mlib_s32       xsize,
3324                                   mlib_s32       ysize,
3325                                   mlib_s32       csize,
3326                                   const mlib_s16 **table)
3327 {
3328   const mlib_s16 *table_base[4];
3329   mlib_s32 c;
3330 
3331   for (c = 0; c < csize; c++) {
3332     table_base[c] = &table[c][TABLE_SHIFT_S32];
3333   }
3334 
3335   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s32, table_base);
3336 }
3337 
3338 /***************************************************************/
3339 void mlib_c_ImageLookUpSI_S16_U16(const mlib_s16 *src,
3340                                   mlib_s32       slb,
3341                                   mlib_u16       *dst,
3342                                   mlib_s32       dlb,
3343                                   mlib_s32       xsize,
3344                                   mlib_s32       ysize,
3345                                   mlib_s32       csize,
3346                                   const mlib_u16 **table)
3347 {
3348   const mlib_u16 *table_base[4];
3349   mlib_s32 c;
3350 
3351   for (c = 0; c < csize; c++) {
3352     table_base[c] = &table[c][32768];
3353   }
3354 
3355   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s16, table_base);
3356 }
3357 
3358 /***************************************************************/
3359 void mlib_c_ImageLookUpSI_U16_U16(const mlib_u16 *src,
3360                                   mlib_s32       slb,
3361                                   mlib_u16       *dst,
3362                                   mlib_s32       dlb,
3363                                   mlib_s32       xsize,
3364                                   mlib_s32       ysize,
3365                                   mlib_s32       csize,
3366                                   const mlib_u16 **table)
3367 {
3368   const mlib_u16 *table_base[4];
3369   mlib_s32 c;
3370 
3371   for (c = 0; c < csize; c++) {
3372     table_base[c] = &table[c][0];
3373   }
3374 
3375   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_u16, table_base);
3376 }
3377 
3378 /***************************************************************/
3379 void mlib_c_ImageLookUpSI_S32_U16(const mlib_s32 *src,
3380                                   mlib_s32       slb,
3381                                   mlib_u16       *dst,
3382                                   mlib_s32       dlb,
3383                                   mlib_s32       xsize,
3384                                   mlib_s32       ysize,
3385                                   mlib_s32       csize,
3386                                   const mlib_u16 **table)
3387 {
3388   const mlib_u16 *table_base[4];
3389   mlib_s32 c;
3390 
3391   for (c = 0; c < csize; c++) {
3392     table_base[c] = &table[c][TABLE_SHIFT_S32];
3393   }
3394 
3395   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s32, table_base);
3396 }
3397 
3398 /***************************************************************/
3399 void mlib_c_ImageLookUpSI_U8_S32(const mlib_u8  *src,
3400                                  mlib_s32       slb,
3401                                  mlib_s32       *dst,
3402                                  mlib_s32       dlb,
3403                                  mlib_s32       xsize,
3404                                  mlib_s32       ysize,
3405                                  mlib_s32       csize,
3406                                  const mlib_s32 **table)
3407 {
3408 
3409   if (xsize < 7) {
3410     MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u8, table);
3411   }
3412   else if (csize == 2) {
3413     mlib_s32 i, j;
3414 
3415     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3416       mlib_u32 *sa;
3417       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3418       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3419       mlib_u32 s0, t0, t1, t2, t3;
3420       mlib_s32 off;
3421       mlib_s32 size = xsize;
3422       mlib_u32 *dp = (mlib_u32 *) dst;
3423       mlib_u8 *sp = (void *)src;
3424 
3425       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3426 
3427       for (i = 0; i < off; i++, sp++) {
3428         *dp++ = tab0[sp[0]];
3429         *dp++ = tab1[sp[0]];
3430         size--;
3431       }
3432 
3433       sa = (mlib_u32 *) sp;
3434 
3435       s0 = sa[0];
3436       sa++;
3437 
3438 #ifdef __SUNPRO_C
3439 #pragma pipeloop(0)
3440 #endif /* __SUNPRO_C */
3441       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
3442 #ifdef _LITTLE_ENDIAN
3443         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3444         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3445         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3446         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3447 #else
3448         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3449         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3450         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3451         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3452 #endif /* _LITTLE_ENDIAN */
3453         dp[0] = t0;
3454         dp[1] = t1;
3455         dp[2] = t2;
3456         dp[3] = t3;
3457 #ifdef _LITTLE_ENDIAN
3458         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3459         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3460         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3461         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3462 #else
3463         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3464         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3465         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3466         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3467 #endif /* _LITTLE_ENDIAN */
3468         s0 = sa[0];
3469         dp[4] = t0;
3470         dp[5] = t1;
3471         dp[6] = t2;
3472         dp[7] = t3;
3473       }
3474 
3475 #ifdef _LITTLE_ENDIAN
3476       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3477       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3478       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3479       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3480 #else
3481       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3482       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3483       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3484       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3485 #endif /* _LITTLE_ENDIAN */
3486       dp[0] = t0;
3487       dp[1] = t1;
3488       dp[2] = t2;
3489       dp[3] = t3;
3490 #ifdef _LITTLE_ENDIAN
3491       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3492       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3493       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3494       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3495 #else
3496       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3497       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3498       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3499       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3500 #endif /* _LITTLE_ENDIAN */
3501       dp[4] = t0;
3502       dp[5] = t1;
3503       dp[6] = t2;
3504       dp[7] = t3;
3505       dp += 8;
3506       sp = (mlib_u8 *) sa;
3507       i += 4;
3508 
3509       for (; i < size; i++, sp++) {
3510         *dp++ = tab0[sp[0]];
3511         *dp++ = tab1[sp[0]];
3512       }
3513     }
3514 
3515   }
3516   else if (csize == 3) {
3517     mlib_s32 i, j;
3518 
3519     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3520       mlib_u32 *sa;
3521       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3522       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3523       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3524       mlib_u32 s0, t0, t1, t2, t3, t4, t5;
3525       mlib_s32 off;
3526       mlib_s32 size = xsize;
3527       mlib_u32 *dp = (mlib_u32 *) dst;
3528       mlib_u8 *sp = (void *)src;
3529 
3530       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3531 
3532       for (i = 0; i < off; i++, sp++) {
3533         *dp++ = tab0[sp[0]];
3534         *dp++ = tab1[sp[0]];
3535         *dp++ = tab2[sp[0]];
3536         size--;
3537       }
3538 
3539       sa = (mlib_u32 *) sp;
3540 
3541       s0 = sa[0];
3542       sa++;
3543 
3544 #ifdef __SUNPRO_C
3545 #pragma pipeloop(0)
3546 #endif /* __SUNPRO_C */
3547       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
3548 #ifdef _LITTLE_ENDIAN
3549         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3550         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3551         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3552         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3553         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3554         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3555 #else
3556         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3557         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3558         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3559         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3560         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3561         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3562 #endif /* _LITTLE_ENDIAN */
3563         dp[0] = t0;
3564         dp[1] = t1;
3565         dp[2] = t2;
3566         dp[3] = t3;
3567         dp[4] = t4;
3568         dp[5] = t5;
3569 #ifdef _LITTLE_ENDIAN
3570         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3571         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3572         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3573         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3574         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3575         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3576 #else
3577         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3578         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3579         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3580         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3581         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3582         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3583 #endif /* _LITTLE_ENDIAN */
3584         s0 = sa[0];
3585         dp[6] = t0;
3586         dp[7] = t1;
3587         dp[8] = t2;
3588         dp[9] = t3;
3589         dp[10] = t4;
3590         dp[11] = t5;
3591       }
3592 
3593 #ifdef _LITTLE_ENDIAN
3594       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3595       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3596       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3597       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3598       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3599       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3600 #else
3601       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3602       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3603       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3604       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3605       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3606       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3607 #endif /* _LITTLE_ENDIAN */
3608       dp[0] = t0;
3609       dp[1] = t1;
3610       dp[2] = t2;
3611       dp[3] = t3;
3612       dp[4] = t4;
3613       dp[5] = t5;
3614 #ifdef _LITTLE_ENDIAN
3615       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3616       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3617       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3618       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3619       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3620       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3621 #else
3622       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3623       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3624       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3625       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3626       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3627       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3628 #endif /* _LITTLE_ENDIAN */
3629       dp[6] = t0;
3630       dp[7] = t1;
3631       dp[8] = t2;
3632       dp[9] = t3;
3633       dp[10] = t4;
3634       dp[11] = t5;
3635       dp += 12;
3636       sp = (mlib_u8 *) sa;
3637       i += 4;
3638 
3639       for (; i < size; i++, sp++) {
3640         *dp++ = tab0[sp[0]];
3641         *dp++ = tab1[sp[0]];
3642         *dp++ = tab2[sp[0]];
3643       }
3644     }
3645 
3646   }
3647   else if (csize == 4) {
3648     mlib_s32 i, j;
3649 
3650     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3651       mlib_u32 *sa;
3652       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3653       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3654       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3655       mlib_u32 *tab3 = (mlib_u32 *) table[3];
3656       mlib_u32 s0, t0, t1, t2, t3;
3657       mlib_s32 off;
3658       mlib_s32 size = xsize;
3659       mlib_u32 *dp = (mlib_u32 *) dst;
3660       mlib_u8 *sp = (void *)src;
3661 
3662       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3663 
3664       for (i = 0; i < off; i++, sp++) {
3665         *dp++ = tab0[sp[0]];
3666         *dp++ = tab1[sp[0]];
3667         *dp++ = tab2[sp[0]];
3668         *dp++ = tab3[sp[0]];
3669         size--;
3670       }
3671 
3672       sa = (mlib_u32 *) sp;
3673 
3674       s0 = sa[0];
3675       sa++;
3676 
3677 #ifdef __SUNPRO_C
3678 #pragma pipeloop(0)
3679 #endif /* __SUNPRO_C */
3680       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
3681 #ifdef _LITTLE_ENDIAN
3682         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3683         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3684         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3685         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3686 #else
3687         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3688         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3689         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3690         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3691 #endif /* _LITTLE_ENDIAN */
3692         dp[0] = t0;
3693         dp[1] = t1;
3694         dp[2] = t2;
3695         dp[3] = t3;
3696 #ifdef _LITTLE_ENDIAN
3697         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3698         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3699         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3700         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3701 #else
3702         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3703         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3704         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3705         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3706 #endif /* _LITTLE_ENDIAN */
3707         dp[4] = t0;
3708         dp[5] = t1;
3709         dp[6] = t2;
3710         dp[7] = t3;
3711 #ifdef _LITTLE_ENDIAN
3712         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3713         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3714         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3715         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3716 #else
3717         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3718         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3719         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3720         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3721 #endif /* _LITTLE_ENDIAN */
3722         dp[8] = t0;
3723         dp[9] = t1;
3724         dp[10] = t2;
3725         dp[11] = t3;
3726 #ifdef _LITTLE_ENDIAN
3727         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3728         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3729         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3730         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3731 #else
3732         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3733         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3734         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3735         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3736 #endif /* _LITTLE_ENDIAN */
3737         s0 = sa[0];
3738         dp[12] = t0;
3739         dp[13] = t1;
3740         dp[14] = t2;
3741         dp[15] = t3;
3742       }
3743 
3744 #ifdef _LITTLE_ENDIAN
3745       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3746       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3747       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3748       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3749 #else
3750       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3751       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3752       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3753       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3754 #endif /* _LITTLE_ENDIAN */
3755       dp[0] = t0;
3756       dp[1] = t1;
3757       dp[2] = t2;
3758       dp[3] = t3;
3759 #ifdef _LITTLE_ENDIAN
3760       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3761       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3762       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3763       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3764 #else
3765       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3766       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3767       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3768       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3769 #endif /* _LITTLE_ENDIAN */
3770       dp[4] = t0;
3771       dp[5] = t1;
3772       dp[6] = t2;
3773       dp[7] = t3;
3774 #ifdef _LITTLE_ENDIAN
3775       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3776       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3777       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3778       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3779 #else
3780       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3781       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3782       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3783       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3784 #endif /* _LITTLE_ENDIAN */
3785       dp[8] = t0;
3786       dp[9] = t1;
3787       dp[10] = t2;
3788       dp[11] = t3;
3789 #ifdef _LITTLE_ENDIAN
3790       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3791       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3792       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3793       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3794 #else
3795       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3796       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3797       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3798       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3799 #endif /* _LITTLE_ENDIAN */
3800       dp[12] = t0;
3801       dp[13] = t1;
3802       dp[14] = t2;
3803       dp[15] = t3;
3804       dp += 16;
3805       sp = (mlib_u8 *) sa;
3806       i += 4;
3807 
3808       for (; i < size; i++, sp++) {
3809         *dp++ = tab0[sp[0]];
3810         *dp++ = tab1[sp[0]];
3811         *dp++ = tab2[sp[0]];
3812         *dp++ = tab3[sp[0]];
3813       }
3814     }
3815   }
3816 }
3817 
3818 /***************************************************************/
3819 void mlib_c_ImageLookUpSI_S16_S32(const mlib_s16 *src,
3820                                   mlib_s32       slb,
3821                                   mlib_s32       *dst,
3822                                   mlib_s32       dlb,
3823                                   mlib_s32       xsize,
3824                                   mlib_s32       ysize,
3825                                   mlib_s32       csize,
3826                                   const mlib_s32 **table)
3827 {
3828   const mlib_s32 *table_base[4];
3829   mlib_s32 c;
3830 
3831   for (c = 0; c < csize; c++) {
3832     table_base[c] = &table[c][32768];
3833   }
3834 
3835   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s16, table_base);
3836 }
3837 
3838 /***************************************************************/
3839 void mlib_c_ImageLookUpSI_U16_S32(const mlib_u16 *src,
3840                                   mlib_s32       slb,
3841                                   mlib_s32       *dst,
3842                                   mlib_s32       dlb,
3843                                   mlib_s32       xsize,
3844                                   mlib_s32       ysize,
3845                                   mlib_s32       csize,
3846                                   const mlib_s32 **table)
3847 {
3848   const mlib_s32 *table_base[4];
3849   mlib_s32 c;
3850 
3851   for (c = 0; c < csize; c++) {
3852     table_base[c] = &table[c][0];
3853   }
3854 
3855   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u16, table_base);
3856 }
3857 
3858 /***************************************************************/
3859 void mlib_c_ImageLookUpSI_S32_S32(const mlib_s32 *src,
3860                                   mlib_s32       slb,
3861                                   mlib_s32       *dst,
3862                                   mlib_s32       dlb,
3863                                   mlib_s32       xsize,
3864                                   mlib_s32       ysize,
3865                                   mlib_s32       csize,
3866                                   const mlib_s32 **table)
3867 {
3868   const mlib_s32 *table_base[4];
3869   mlib_s32 c;
3870 
3871   for (c = 0; c < csize; c++) {
3872     table_base[c] = &table[c][TABLE_SHIFT_S32];
3873   }
3874 
3875   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s32, table_base);
3876 }
3877 
3878 /***************************************************************/