1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 #include "mlib_image.h"
  28 #include "mlib_ImageLookUp.h"
  29 #include "mlib_c_ImageLookUp.h"
  30 
  31 /***************************************************************/
  32 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE)                         \
  33 {                                                                       \
  34   mlib_s32 i, j, k;                                                     \
  35                                                                         \
  36   if (xsize < 2) {                                                      \
  37     for(j = 0; j < ysize; j++, dst += dlb, src += slb){                 \
  38       for(k = 0; k < csize; k++) {                                      \
  39         DTYPE *da = dst + k;                                            \
  40         const STYPE *sa = src + k;                                      \
  41         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  42                                                                         \
  43         for(i = 0; i < xsize; i++, da += csize, sa += csize)            \
  44         *da=tab[*sa];                                                   \
  45       }                                                                 \
  46     }                                                                   \
  47   } else {                                                              \
  48     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {                \
  49       for(k = 0; k < csize; k++) {                                      \
  50         DTYPE    *da = dst + k;                                         \
  51         const STYPE *sa = src + k;                                      \
  52         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  53         mlib_s32 s0, t0, s1, t1;                                        \
  54                                                                         \
  55         s0 = (mlib_s32)sa[0];                                           \
  56         s1 = (mlib_s32)sa[csize];                                       \
  57         sa += 2*csize;                                                  \
  58                                                                         \
  59         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2*csize) { \
  60           t0 = (mlib_s32)tab[s0];                                       \
  61           t1 = (mlib_s32)tab[s1];                                       \
  62           s0 = (mlib_s32)sa[0];                                         \
  63           s1 = (mlib_s32)sa[csize];                                     \
  64           da[0] = (DTYPE)t0;                                            \
  65           da[csize] = (DTYPE)t1;                                        \
  66         }                                                               \
  67         t0 = (mlib_s32)tab[s0];                                         \
  68         t1 = (mlib_s32)tab[s1];                                         \
  69         da[0] = (DTYPE)t0;                                              \
  70         da[csize] = (DTYPE)t1;                                          \
  71         if (xsize & 1) da[2*csize] = tab[sa[0]];                        \
  72       }                                                                 \
  73     }                                                                   \
  74   }                                                                     \
  75 }
  76 
  77 /***************************************************************/
  78 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE)                 \
  79 {                                                                 \
  80   mlib_s32 i, j, k;                                               \
  81                                                                   \
  82   if (xsize < 2) {                                                \
  83     for(j = 0; j < ysize; j++, dst += dlb, src += slb){           \
  84       for(k = 0; k < csize; k++) {                                \
  85         DTYPE *da = dst + k;                                      \
  86         const STYPE *sa = (void *)src;                                    \
  87         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  88                                                                   \
  89         for(i = 0; i < xsize; i++, da += csize, sa ++)            \
  90         *da=tab[*sa];                                             \
  91       }                                                           \
  92     }                                                             \
  93   } else {                                                        \
  94     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {          \
  95       for(k = 0; k < csize; k++) {                                \
  96         DTYPE *da = dst + k;                                      \
  97         const STYPE *sa = (void *)src;                                    \
  98         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  99         mlib_s32 s0, t0, s1, t1;                                  \
 100                                                                   \
 101         s0 = (mlib_s32)sa[0];                                     \
 102         s1 = (mlib_s32)sa[1];                                     \
 103         sa += 2;                                                  \
 104                                                                   \
 105         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2) { \
 106           t0 = (mlib_s32)tab[s0];                                 \
 107           t1 = (mlib_s32)tab[s1];                                 \
 108           s0 = (mlib_s32)sa[0];                                   \
 109           s1 = (mlib_s32)sa[1];                                   \
 110           da[0] = (DTYPE)t0;                                      \
 111           da[csize] = (DTYPE)t1;                                  \
 112         }                                                         \
 113         t0 = (mlib_s32)tab[s0];                                   \
 114         t1 = (mlib_s32)tab[s1];                                   \
 115         da[0] = (DTYPE)t0;                                        \
 116         da[csize] = (DTYPE)t1;                                    \
 117         if (xsize & 1) da[2*csize] = tab[sa[0]];                  \
 118       }                                                           \
 119     }                                                             \
 120   }                                                               \
 121 }
 122 
 123 #ifdef _LITTLE_ENDIAN
 124 
 125 /***************************************************************/
 126 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 127   t3 = table0[s0 & 0xFF];                                       \
 128   t2 = table1[s0>>8];                                           \
 129   t1 = table2[s1 & 0xFF];                                       \
 130   t0 = table3[s1>>8]
 131 
 132 /***************************************************************/
 133 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 134   t3 = table0[s0 >> 8];                                         \
 135   t2 = table1[s1 & 0xFF];                                       \
 136   t1 = table2[s1 >> 8];                                         \
 137   t0 = table3[s2 & 0xFF]
 138 
 139 /***************************************************************/
 140 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 141   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 << 1) & 0x1FE));    \
 142   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7) & 0x1FE));    \
 143   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 15)  & 0x1FE));  \
 144   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 >> 23)  & 0x1FE))
 145 
 146 /***************************************************************/
 147 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 148   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 7) & 0x1FE));    \
 149   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15)  & 0x1FE));  \
 150   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 23)  & 0x1FE));  \
 151   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 << 1) & 0x1FE))
 152 
 153 /***************************************************************/
 154 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 155   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 7) & 0x1FE));    \
 156   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 15)  & 0x1FE));  \
 157   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 >> 23)  & 0x1FE))
 158 
 159 /***************************************************************/
 160 #define READ_U8_S32(table0, table1, table2, table3)             \
 161   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 << 2) & 0x3FC));    \
 162   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 6) & 0x3FC));    \
 163   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 14)  & 0x3FC));  \
 164   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 >> 22)  & 0x3FC))
 165 
 166 #else /* _LITTLE_ENDIAN */
 167 
 168 /***********/
 169 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 170   t0 = table0[s0>>8];                                           \
 171   t1 = table1[s0 & 0xFF];                                       \
 172   t2 = table2[s1>>8];                                           \
 173   t3 = table3[s1 & 0xFF]
 174 
 175 /***************************************************************/
 176 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 177   t0 = table0[s0 & 0xFF];                                       \
 178   t1 = table1[s1 >> 8];                                         \
 179   t2 = table2[s1 & 0xFF];                                       \
 180   t3 = table3[s2 >> 8]
 181 
 182 /***************************************************************/
 183 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 184   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 23) & 0x1FE));   \
 185   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15) & 0x1FE));   \
 186   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 7)  & 0x1FE));   \
 187   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 << 1)  & 0x1FE))
 188 
 189 /***************************************************************/
 190 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 191   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 15) & 0x1FE));   \
 192   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7)  & 0x1FE));   \
 193   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 << 1)  & 0x1FE));   \
 194   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 >> 23) & 0x1FE))
 195 
 196 /***************************************************************/
 197 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 198   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 15) & 0x1FE));   \
 199   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 7)  & 0x1FE));   \
 200   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 << 1)  & 0x1FE))
 201 
 202 /***************************************************************/
 203 #define READ_U8_S32(table0, table1, table2, table3)             \
 204   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 >> 22) & 0x3FC));   \
 205   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 14) & 0x3FC));   \
 206   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 6)  & 0x3FC));   \
 207   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 << 2)  & 0x3FC))
 208 
 209 #endif /* _LITTLE_ENDIAN */
 210 
 211 /***************************************************************/
 212 void mlib_c_ImageLookUp_U8_U8(const mlib_u8 *src,
 213                               mlib_s32      slb,
 214                               mlib_u8       *dst,
 215                               mlib_s32      dlb,
 216                               mlib_s32      xsize,
 217                               mlib_s32      ysize,
 218                               mlib_s32      csize,
 219                               const mlib_u8 **table)
 220 {
 221 
 222   if (xsize * csize < 9) {
 223     MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u8, table);
 224   }
 225   else if (csize == 1) {
 226     mlib_s32 i, j;
 227 
 228     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 229       mlib_u32 *da;
 230       mlib_u16 *sa;
 231       mlib_u8 *tab = (mlib_u8 *) table[0];
 232       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 233       mlib_s32 off;
 234       mlib_s32 size = xsize;
 235       mlib_u8 *dp = dst, *sp = (void *)src;
 236 
 237       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 238 
 239       for (i = 0; i < off; i++, sp++) {
 240         *dp++ = tab[sp[0]];
 241         size--;
 242       }
 243 
 244       da = (mlib_u32 *) dp;
 245 
 246       if (((mlib_addr) sp & 1) == 0) {
 247         sa = (mlib_u16 *) sp;
 248 
 249         s0 = sa[0];
 250         s1 = sa[1];
 251         sa += 2;
 252 
 253 #ifdef __SUNPRO_C
 254 #pragma pipeloop(0)
 255 #endif /* __SUNPRO_C */
 256         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 257           READ_U8_U8_ALIGN(tab, tab, tab, tab);
 258           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 259           s0 = sa[0];
 260           s1 = sa[1];
 261           da[0] = t;
 262         }
 263 
 264         READ_U8_U8_ALIGN(tab, tab, tab, tab);
 265         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 266         da[0] = t;
 267         da++;
 268         dp = (mlib_u8 *) da;
 269         sp = (mlib_u8 *) sa;
 270         i += 4;
 271         for (; i < size; i++, dp++, sp++)
 272           dp[0] = tab[sp[0]];
 273 
 274       }
 275       else {
 276         sa = (mlib_u16 *) (sp - 1);
 277 
 278         s0 = sa[0];
 279         s1 = sa[1];
 280         s2 = sa[2];
 281         sa += 3;
 282 
 283 #ifdef __SUNPRO_C
 284 #pragma pipeloop(0)
 285 #endif /* __SUNPRO_C */
 286         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 287           READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 288           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 289           s0 = s2;
 290           s1 = sa[0];
 291           s2 = sa[1];
 292           da[0] = t;
 293         }
 294 
 295         READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 296         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 297         da[0] = t;
 298         da++;
 299         dp = (mlib_u8 *) da;
 300 #ifdef _LITTLE_ENDIAN
 301         *dp++ = tab[s2 >> 8];
 302 #else
 303         *dp++ = tab[s2 & 0xFF];
 304 #endif /* _LITTLE_ENDIAN */
 305         sp = (mlib_u8 *) sa;
 306         i += 5;
 307         for (; i < size; i++, dp++, sp++)
 308           dp[0] = tab[sp[0]];
 309       }
 310     }
 311 
 312   }
 313   else if (csize == 2) {
 314     mlib_s32 i, j;
 315 
 316     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 317       mlib_u32 *da;
 318       mlib_u16 *sa;
 319       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 320       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 321       mlib_u8 *tab;
 322       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 323       mlib_s32 off;
 324       mlib_s32 size = xsize * 2;
 325       mlib_u8 *dp = dst, *sp = (void *)src;
 326 
 327       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 328 
 329       for (i = 0; i < off - 1; i += 2, sp += 2) {
 330         *dp++ = tab0[sp[0]];
 331         *dp++ = tab1[sp[1]];
 332         size -= 2;
 333       }
 334 
 335       if ((off & 1) != 0) {
 336         *dp++ = tab0[sp[0]];
 337         size--;
 338         sp++;
 339         tab = tab0;
 340         tab0 = tab1;
 341         tab1 = tab;
 342       }
 343 
 344       da = (mlib_u32 *) dp;
 345 
 346       if (((mlib_addr) sp & 1) == 0) {
 347         sa = (mlib_u16 *) sp;
 348 
 349         s0 = sa[0];
 350         s1 = sa[1];
 351         sa += 2;
 352 
 353 #ifdef __SUNPRO_C
 354 #pragma pipeloop(0)
 355 #endif /* __SUNPRO_C */
 356         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 357           READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 358           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 359           s0 = sa[0];
 360           s1 = sa[1];
 361           da[0] = t;
 362         }
 363 
 364         READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 365         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 366         da[0] = t;
 367         da++;
 368         dp = (mlib_u8 *) da;
 369         sp = (mlib_u8 *) sa;
 370         i += 4;
 371 
 372         for (; i < size - 1; i += 2, sp += 2) {
 373           *dp++ = tab0[sp[0]];
 374           *dp++ = tab1[sp[1]];
 375         }
 376 
 377         if (i < size)
 378           *dp = tab0[(*sp)];
 379 
 380       }
 381       else {
 382         sa = (mlib_u16 *) (sp - 1);
 383 
 384         s0 = sa[0];
 385         s1 = sa[1];
 386         s2 = sa[2];
 387         sa += 3;
 388 
 389 #ifdef __SUNPRO_C
 390 #pragma pipeloop(0)
 391 #endif /* __SUNPRO_C */
 392         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 393           READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 394           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 395           s0 = s2;
 396           s1 = sa[0];
 397           s2 = sa[1];
 398           da[0] = t;
 399         }
 400 
 401         READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 402         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 403         da[0] = t;
 404         da++;
 405         dp = (mlib_u8 *) da;
 406 #ifdef _LITTLE_ENDIAN
 407         *dp++ = tab0[s2 >> 8];
 408 #else
 409         *dp++ = tab0[s2 & 0xFF];
 410 #endif /* _LITTLE_ENDIAN */
 411         sp = (mlib_u8 *) sa;
 412         i += 5;
 413 
 414         for (; i < size - 1; i += 2, sp += 2) {
 415           *dp++ = tab1[sp[0]];
 416           *dp++ = tab0[sp[1]];
 417         }
 418 
 419         if (i < size)
 420           *dp = tab1[(*sp)];
 421       }
 422     }
 423 
 424   }
 425   else if (csize == 3) {
 426     mlib_s32 i, j;
 427 
 428     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 429       mlib_u32 *da;
 430       mlib_u16 *sa;
 431       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 432       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 433       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 434       mlib_u8 *tab;
 435       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 436       mlib_s32 off;
 437       mlib_s32 size = xsize * 3;
 438       mlib_u8 *dp = dst, *sp = (void *)src;
 439 
 440       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 441 
 442       if (off == 1) {
 443         *dp++ = tab0[sp[0]];
 444         tab = tab0;
 445         tab0 = tab1;
 446         tab1 = tab2;
 447         tab2 = tab;
 448         size--;
 449         sp++;
 450       }
 451       else if (off == 2) {
 452         *dp++ = tab0[sp[0]];
 453         *dp++ = tab1[sp[1]];
 454         tab = tab2;
 455         tab2 = tab1;
 456         tab1 = tab0;
 457         tab0 = tab;
 458         size -= 2;
 459         sp += 2;
 460       }
 461       else if (off == 3) {
 462         *dp++ = tab0[sp[0]];
 463         *dp++ = tab1[sp[1]];
 464         *dp++ = tab2[sp[2]];
 465         size -= 3;
 466         sp += 3;
 467       }
 468 
 469       da = (mlib_u32 *) dp;
 470 
 471       if (((mlib_addr) sp & 1) == 0) {
 472         sa = (mlib_u16 *) sp;
 473 
 474         s0 = sa[0];
 475         s1 = sa[1];
 476         sa += 2;
 477 
 478 #ifdef __SUNPRO_C
 479 #pragma pipeloop(0)
 480 #endif /* __SUNPRO_C */
 481         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 482           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 483           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 484           tab = tab0;
 485           tab0 = tab1;
 486           tab1 = tab2;
 487           tab2 = tab;
 488           s0 = sa[0];
 489           s1 = sa[1];
 490           da[0] = t;
 491         }
 492 
 493         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 494         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 495         da[0] = t;
 496         da++;
 497         dp = (mlib_u8 *) da;
 498         sp = (mlib_u8 *) sa;
 499         i += 4;
 500 
 501         if (i < size) {
 502           *dp++ = tab1[(*sp)];
 503           i++;
 504           sp++;
 505         }
 506 
 507         if (i < size) {
 508           *dp++ = tab2[(*sp)];
 509           i++;
 510           sp++;
 511         }
 512 
 513         if (i < size) {
 514           *dp++ = tab0[(*sp)];
 515         }
 516 
 517       }
 518       else {
 519         sa = (mlib_u16 *) (sp - 1);
 520 
 521         s0 = sa[0];
 522         s1 = sa[1];
 523         s2 = sa[2];
 524         sa += 3;
 525 
 526 #ifdef __SUNPRO_C
 527 #pragma pipeloop(0)
 528 #endif /* __SUNPRO_C */
 529         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 530           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 531           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 532           tab = tab0;
 533           tab0 = tab1;
 534           tab1 = tab2;
 535           tab2 = tab;
 536           s0 = s2;
 537           s1 = sa[0];
 538           s2 = sa[1];
 539           da[0] = t;
 540         }
 541 
 542         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 543         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 544         da[0] = t;
 545         da++;
 546         dp = (mlib_u8 *) da;
 547 #ifdef _LITTLE_ENDIAN
 548         *dp++ = tab1[s2 >> 8];
 549 #else
 550         *dp++ = tab1[s2 & 0xFF];
 551 #endif /* _LITTLE_ENDIAN */
 552         sp = (mlib_u8 *) sa;
 553         i += 5;
 554 
 555         if (i < size) {
 556           *dp++ = tab2[(*sp)];
 557           i++;
 558           sp++;
 559         }
 560 
 561         if (i < size) {
 562           *dp++ = tab0[(*sp)];
 563           i++;
 564           sp++;
 565         }
 566 
 567         if (i < size) {
 568           *dp = tab1[(*sp)];
 569         }
 570       }
 571     }
 572 
 573   }
 574   else if (csize == 4) {
 575     mlib_s32 i, j;
 576 
 577     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 578       mlib_u32 *da;
 579       mlib_u16 *sa;
 580       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 581       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 582       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 583       mlib_u8 *tab3 = (mlib_u8 *) table[3];
 584       mlib_u8 *tab;
 585       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 586       mlib_s32 off;
 587       mlib_s32 size = xsize * 4;
 588       mlib_u8 *dp = dst, *sp = (void *)src;
 589 
 590       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 591 
 592       if (off == 1) {
 593         *dp++ = tab0[sp[0]];
 594         tab = tab0;
 595         tab0 = tab1;
 596         tab1 = tab2;
 597         tab2 = tab3;
 598         tab3 = tab;
 599         size--;
 600         sp++;
 601       }
 602       else if (off == 2) {
 603         *dp++ = tab0[sp[0]];
 604         *dp++ = tab1[sp[1]];
 605         tab = tab0;
 606         tab0 = tab2;
 607         tab2 = tab;
 608         tab = tab1;
 609         tab1 = tab3;
 610         tab3 = tab;
 611         size -= 2;
 612         sp += 2;
 613       }
 614       else if (off == 3) {
 615         *dp++ = tab0[sp[0]];
 616         *dp++ = tab1[sp[1]];
 617         *dp++ = tab2[sp[2]];
 618         tab = tab3;
 619         tab3 = tab2;
 620         tab2 = tab1;
 621         tab1 = tab0;
 622         tab0 = tab;
 623         size -= 3;
 624         sp += 3;
 625       }
 626 
 627       da = (mlib_u32 *) dp;
 628 
 629       if (((mlib_addr) sp & 1) == 0) {
 630         sa = (mlib_u16 *) sp;
 631 
 632         s0 = sa[0];
 633         s1 = sa[1];
 634         sa += 2;
 635 
 636 #ifdef __SUNPRO_C
 637 #pragma pipeloop(0)
 638 #endif /* __SUNPRO_C */
 639         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 640           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 641           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 642           s0 = sa[0];
 643           s1 = sa[1];
 644           da[0] = t;
 645         }
 646 
 647         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 648         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 649         da[0] = t;
 650         da++;
 651         dp = (mlib_u8 *) da;
 652         sp = (mlib_u8 *) sa;
 653         i += 4;
 654 
 655         if (i < size) {
 656           *dp++ = tab0[(*sp)];
 657           i++;
 658           sp++;
 659         }
 660 
 661         if (i < size) {
 662           *dp++ = tab1[(*sp)];
 663           i++;
 664           sp++;
 665         }
 666 
 667         if (i < size) {
 668           *dp = tab2[(*sp)];
 669         }
 670 
 671       }
 672       else {
 673         sa = (mlib_u16 *) (sp - 1);
 674 
 675         s0 = sa[0];
 676         s1 = sa[1];
 677         s2 = sa[2];
 678         sa += 3;
 679 
 680 #ifdef __SUNPRO_C
 681 #pragma pipeloop(0)
 682 #endif /* __SUNPRO_C */
 683         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 684           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 685           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 686           s0 = s2;
 687           s1 = sa[0];
 688           s2 = sa[1];
 689           da[0] = t;
 690         }
 691 
 692         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 693         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 694         da[0] = t;
 695         da++;
 696         dp = (mlib_u8 *) da;
 697 #ifdef _LITTLE_ENDIAN
 698         *dp++ = tab0[s2 >> 8];
 699 #else
 700         *dp++ = tab0[s2 & 0xFF];
 701 #endif /* _LITTLE_ENDIAN */
 702         sp = (mlib_u8 *) sa;
 703         i += 5;
 704 
 705         if (i < size) {
 706           *dp++ = tab1[(*sp)];
 707           i++;
 708           sp++;
 709         }
 710 
 711         if (i < size) {
 712           *dp++ = tab2[(*sp)];
 713           i++;
 714           sp++;
 715         }
 716 
 717         if (i < size) {
 718           *dp = tab3[(*sp)];
 719         }
 720       }
 721     }
 722   }
 723 }
 724 
 725 /***************************************************************/
 726 void mlib_c_ImageLookUp_S16_U8(const mlib_s16 *src,
 727                                mlib_s32       slb,
 728                                mlib_u8        *dst,
 729                                mlib_s32       dlb,
 730                                mlib_s32       xsize,
 731                                mlib_s32       ysize,
 732                                mlib_s32       csize,
 733                                const mlib_u8  **table)
 734 {
 735   const mlib_u8 *table_base[4];
 736   mlib_s32 c;
 737 
 738   for (c = 0; c < csize; c++) {
 739     table_base[c] = &table[c][32768];
 740   }
 741 
 742 #ifdef __GNUC__
 743 #pragma GCC diagnostic push
 744 #pragma GCC diagnostic ignored "-Warray-bounds"
 745 #endif
 746   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s16, table_base);
 747 #ifdef __GNUC__
 748 #pragma GCC diagnostic pop
 749 #endif
 750 }
 751 
 752 /***************************************************************/
 753 void mlib_c_ImageLookUp_U16_U8(const mlib_u16 *src,
 754                                mlib_s32       slb,
 755                                mlib_u8        *dst,
 756                                mlib_s32       dlb,
 757                                mlib_s32       xsize,
 758                                mlib_s32       ysize,
 759                                mlib_s32       csize,
 760                                const mlib_u8  **table)
 761 {
 762   const mlib_u8 *table_base[4];
 763   mlib_s32 c;
 764 
 765   for (c = 0; c < csize; c++) {
 766     table_base[c] = &table[c][0];
 767   }
 768 
 769 #ifdef __GNUC__
 770 #pragma GCC diagnostic push
 771 #pragma GCC diagnostic ignored "-Warray-bounds"
 772 #endif
 773   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u16, table_base);
 774 #ifdef __GNUC__
 775 #pragma GCC diagnostic pop
 776 #endif
 777 }
 778 
 779 /***************************************************************/
 780 void mlib_c_ImageLookUp_S32_U8(const mlib_s32 *src,
 781                                mlib_s32       slb,
 782                                mlib_u8        *dst,
 783                                mlib_s32       dlb,
 784                                mlib_s32       xsize,
 785                                mlib_s32       ysize,
 786                                mlib_s32       csize,
 787                                const mlib_u8  **table)
 788 {
 789   const mlib_u8 *table_base[4];
 790   mlib_s32 c;
 791 
 792   for (c = 0; c < csize; c++) {
 793     table_base[c] = &table[c][TABLE_SHIFT_S32];
 794   }
 795 
 796 #ifdef __GNUC__
 797 #pragma GCC diagnostic push
 798 #pragma GCC diagnostic ignored "-Warray-bounds"
 799 #endif
 800   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s32, table_base);
 801 #ifdef __GNUC__
 802 #pragma GCC diagnostic pop
 803 #endif
 804 }
 805 
 806 /***************************************************************/
 807 void mlib_c_ImageLookUp_U8_S16(const mlib_u8  *src,
 808                                mlib_s32       slb,
 809                                mlib_s16       *dst,
 810                                mlib_s32       dlb,
 811                                mlib_s32       xsize,
 812                                mlib_s32       ysize,
 813                                mlib_s32       csize,
 814                                const mlib_s16 **table)
 815 {
 816 
 817   if (xsize * csize < 12) {
 818     MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u8, table);
 819   }
 820   else if (csize == 1) {
 821     mlib_s32 i, j;
 822 
 823     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 824       mlib_u32 *sa;
 825       mlib_u32 *da;
 826       mlib_u16 *tab = (mlib_u16 *) table[0];
 827       mlib_u32 s0, s1, t0, t1, t2, t3;
 828       mlib_u32 res1, res2;
 829       mlib_s32 off;
 830       mlib_s32 size = xsize;
 831       mlib_u16 *dp = (mlib_u16 *) dst;
 832       mlib_u8 *sp = (void *)src;
 833 
 834       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 835 
 836       for (i = 0; i < off; i++, sp++) {
 837         *dp++ = tab[sp[0]];
 838         size--;
 839       }
 840 
 841       sa = (mlib_u32 *) sp;
 842 
 843       if (((mlib_addr) dp & 3) == 0) {
 844         da = (mlib_u32 *) dp;
 845 
 846         s0 = sa[0];
 847         sa++;
 848 
 849 #ifdef __SUNPRO_C
 850 #pragma pipeloop(0)
 851 #endif /* __SUNPRO_C */
 852         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 853           READ_U8_S16_ALIGN(tab, tab, tab, tab);
 854           res1 = (t0 << 16) + t1;
 855           res2 = (t2 << 16) + t3;
 856           s0 = sa[0];
 857           da[0] = res1;
 858           da[1] = res2;
 859         }
 860 
 861         READ_U8_S16_ALIGN(tab, tab, tab, tab);
 862         res1 = (t0 << 16) + t1;
 863         res2 = (t2 << 16) + t3;
 864         da[0] = res1;
 865         da[1] = res2;
 866         da += 2;
 867         dp = (mlib_u16 *) da;
 868         sp = (mlib_u8 *) sa;
 869         i += 4;
 870         for (; i < size; i++, dp++, sp++)
 871           dp[0] = tab[sp[0]];
 872 
 873       }
 874       else {
 875 
 876         *dp++ = tab[(*sp)];
 877         size--;
 878         da = (mlib_u32 *) dp;
 879 
 880         s0 = sa[0];
 881         s1 = sa[1];
 882         sa += 2;
 883 
 884 #ifdef __SUNPRO_C
 885 #pragma pipeloop(0)
 886 #endif /* __SUNPRO_C */
 887         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 888           READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 889           s0 = s1;
 890           res1 = (t0 << 16) + t1;
 891           res2 = (t2 << 16) + t3;
 892           s1 = sa[0];
 893           da[0] = res1;
 894           da[1] = res2;
 895         }
 896 
 897         READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 898         res1 = (t0 << 16) + t1;
 899         res2 = (t2 << 16) + t3;
 900         da[0] = res1;
 901         da[1] = res2;
 902         ADD_READ_U8_S16_NOTALIGN(tab, tab, tab);
 903         res1 = (t0 << 16) + t1;
 904         da[2] = res1;
 905         da += 3;
 906         dp = (mlib_u16 *) da;
 907         *dp++ = (mlib_u16) t2;
 908         sp = (mlib_u8 *) sa;
 909         i += 7;
 910         for (; i < size; i++, dp++, sp++)
 911           dp[0] = tab[sp[0]];
 912       }
 913     }
 914 
 915   }
 916   else if (csize == 2) {
 917     mlib_s32 i, j;
 918 
 919     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 920       mlib_u32 *sa;
 921       mlib_u32 *da;
 922       mlib_u16 *tab0 = (mlib_u16 *) table[0];
 923       mlib_u16 *tab1 = (mlib_u16 *) table[1];
 924       mlib_u16 *tab;
 925       mlib_u32 s0, s1, t0, t1, t2, t3;
 926       mlib_u32 res1, res2;
 927       mlib_s32 off;
 928       mlib_s32 size = xsize * 2;
 929       mlib_u16 *dp = (mlib_u16 *) dst;
 930       mlib_u8 *sp = (void *)src;
 931 
 932       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 933 
 934       for (i = 0; i < off - 1; i += 2, sp += 2) {
 935         *dp++ = tab0[sp[0]];
 936         *dp++ = tab1[sp[1]];
 937         size -= 2;
 938       }
 939 
 940       if ((off & 1) != 0) {
 941         *dp++ = tab0[*sp];
 942         size--;
 943         sp++;
 944         tab = tab0;
 945         tab0 = tab1;
 946         tab1 = tab;
 947       }
 948 
 949       sa = (mlib_u32 *) sp;
 950 
 951       if (((mlib_addr) dp & 3) == 0) {
 952         da = (mlib_u32 *) dp;
 953 
 954         s0 = sa[0];
 955         sa++;
 956 
 957 #ifdef __SUNPRO_C
 958 #pragma pipeloop(0)
 959 #endif /* __SUNPRO_C */
 960         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 961           READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 962           res1 = (t0 << 16) + t1;
 963           res2 = (t2 << 16) + t3;
 964           s0 = sa[0];
 965           da[0] = res1;
 966           da[1] = res2;
 967         }
 968 
 969         READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 970         res1 = (t0 << 16) + t1;
 971         res2 = (t2 << 16) + t3;
 972         da[0] = res1;
 973         da[1] = res2;
 974         da += 2;
 975         dp = (mlib_u16 *) da;
 976         sp = (mlib_u8 *) sa;
 977         i += 4;
 978 
 979         for (; i < size - 1; i += 2, sp += 2) {
 980           *dp++ = tab0[sp[0]];
 981           *dp++ = tab1[sp[1]];
 982         }
 983 
 984         if (i < size)
 985           *dp = tab0[(*sp)];
 986 
 987       }
 988       else {
 989 
 990         *dp++ = tab0[(*sp)];
 991         size--;
 992         da = (mlib_u32 *) dp;
 993 
 994         s0 = sa[0];
 995         s1 = sa[1];
 996         sa += 2;
 997 
 998 #ifdef __SUNPRO_C
 999 #pragma pipeloop(0)
1000 #endif /* __SUNPRO_C */
1001         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1002           READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
1003           s0 = s1;
1004           res1 = (t0 << 16) + t1;
1005           res2 = (t2 << 16) + t3;
1006           s1 = sa[0];
1007           da[0] = res1;
1008           da[1] = res2;
1009         }
1010 
1011         READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
1012         res1 = (t0 << 16) + t1;
1013         res2 = (t2 << 16) + t3;
1014         da[0] = res1;
1015         da[1] = res2;
1016         ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1);
1017         res1 = (t0 << 16) + t1;
1018         da[2] = res1;
1019         da += 3;
1020         dp = (mlib_u16 *) da;
1021         *dp++ = (mlib_u16) t2;
1022         sp = (mlib_u8 *) sa;
1023         i += 7;
1024 
1025         for (; i < size - 1; i += 2, sp += 2) {
1026           *dp++ = tab0[sp[0]];
1027           *dp++ = tab1[sp[1]];
1028         }
1029 
1030         if (i < size)
1031           *dp = tab0[(*sp)];
1032       }
1033     }
1034 
1035   }
1036   else if (csize == 3) {
1037     mlib_s32 i, j;
1038 
1039     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1040       mlib_u32 *sa;
1041       mlib_u32 *da;
1042       mlib_u16 *tab0 = (mlib_u16 *) table[0];
1043       mlib_u16 *tab1 = (mlib_u16 *) table[1];
1044       mlib_u16 *tab2 = (mlib_u16 *) table[2];
1045       mlib_u16 *tab;
1046       mlib_u32 s0, s1, t0, t1, t2, t3;
1047       mlib_u32 res1, res2;
1048       mlib_s32 off;
1049       mlib_s32 size = xsize * 3;
1050       mlib_u16 *dp = (mlib_u16 *) dst;
1051       mlib_u8 *sp = (void *)src;
1052 
1053       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1054 
1055       if (off == 1) {
1056         *dp++ = tab0[(*sp)];
1057         tab = tab0;
1058         tab0 = tab1;
1059         tab1 = tab2;
1060         tab2 = tab;
1061         size--;
1062         sp++;
1063       }
1064       else if (off == 2) {
1065         *dp++ = tab0[sp[0]];
1066         *dp++ = tab1[sp[1]];
1067         tab = tab2;
1068         tab2 = tab1;
1069         tab1 = tab0;
1070         tab0 = tab;
1071         size -= 2;
1072         sp += 2;
1073       }
1074       else if (off == 3) {
1075         *dp++ = tab0[sp[0]];
1076         *dp++ = tab1[sp[1]];
1077         *dp++ = tab2[sp[2]];
1078         size -= 3;
1079         sp += 3;
1080       }
1081 
1082       sa = (mlib_u32 *) sp;
1083 
1084       if (((mlib_addr) dp & 3) == 0) {
1085         da = (mlib_u32 *) dp;
1086 
1087         s0 = sa[0];
1088         sa++;
1089 
1090 #ifdef __SUNPRO_C
1091 #pragma pipeloop(0)
1092 #endif /* __SUNPRO_C */
1093         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1094           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1095           res1 = (t0 << 16) + t1;
1096           res2 = (t2 << 16) + t3;
1097           tab = tab0;
1098           tab0 = tab1;
1099           tab1 = tab2;
1100           tab2 = tab;
1101           s0 = sa[0];
1102           da[0] = res1;
1103           da[1] = res2;
1104         }
1105 
1106         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1107         res1 = (t0 << 16) + t1;
1108         res2 = (t2 << 16) + t3;
1109         da[0] = res1;
1110         da[1] = res2;
1111         da += 2;
1112         dp = (mlib_u16 *) da;
1113         sp = (mlib_u8 *) sa;
1114         i += 4;
1115 
1116         if (i < size) {
1117           *dp++ = tab1[(*sp)];
1118           i++;
1119           sp++;
1120         }
1121 
1122         if (i < size) {
1123           *dp++ = tab2[(*sp)];
1124           i++;
1125           sp++;
1126         }
1127 
1128         if (i < size) {
1129           *dp = tab0[(*sp)];
1130         }
1131 
1132       }
1133       else {
1134 
1135         *dp++ = tab0[(*sp)];
1136         size--;
1137         da = (mlib_u32 *) dp;
1138 
1139         s0 = sa[0];
1140         s1 = sa[1];
1141         sa += 2;
1142 
1143 #ifdef __SUNPRO_C
1144 #pragma pipeloop(0)
1145 #endif /* __SUNPRO_C */
1146         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1147           READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1148           s0 = s1;
1149           res1 = (t0 << 16) + t1;
1150           res2 = (t2 << 16) + t3;
1151           tab = tab0;
1152           tab0 = tab1;
1153           tab1 = tab2;
1154           tab2 = tab;
1155           s1 = sa[0];
1156           da[0] = res1;
1157           da[1] = res2;
1158         }
1159 
1160         READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1161         res1 = (t0 << 16) + t1;
1162         res2 = (t2 << 16) + t3;
1163         da[0] = res1;
1164         da[1] = res2;
1165         ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1);
1166         res1 = (t0 << 16) + t1;
1167         da[2] = res1;
1168         da += 3;
1169         dp = (mlib_u16 *) da;
1170         *dp++ = (mlib_u16) t2;
1171         sp = (mlib_u8 *) sa;
1172         i += 7;
1173 
1174         if (i < size) {
1175           *dp++ = tab2[(*sp)];
1176           i++;
1177           sp++;
1178         }
1179 
1180         if (i < size) {
1181           *dp++ = tab0[(*sp)];
1182           i++;
1183           sp++;
1184         }
1185 
1186         if (i < size) {
1187           *dp = tab1[(*sp)];
1188         }
1189       }
1190     }
1191 
1192   }
1193   else if (csize == 4) {
1194     mlib_s32 i, j;
1195 
1196     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1197       mlib_u32 *sa;
1198       mlib_u32 *da;
1199       mlib_u16 *tab0 = (mlib_u16 *) table[0];
1200       mlib_u16 *tab1 = (mlib_u16 *) table[1];
1201       mlib_u16 *tab2 = (mlib_u16 *) table[2];
1202       mlib_u16 *tab3 = (mlib_u16 *) table[3];
1203       mlib_u16 *tab;
1204       mlib_u32 s0, s1, t0, t1, t2, t3;
1205       mlib_u32 res1, res2;
1206       mlib_s32 off;
1207       mlib_s32 size = xsize * 4;
1208       mlib_u16 *dp = (mlib_u16 *) dst;
1209       mlib_u8 *sp = (void *)src;
1210 
1211       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1212 
1213       if (off == 1) {
1214         *dp++ = tab0[(*sp)];
1215         tab = tab0;
1216         tab0 = tab1;
1217         tab1 = tab2;
1218         tab2 = tab3;
1219         tab3 = tab;
1220         size--;
1221         sp++;
1222       }
1223       else if (off == 2) {
1224         *dp++ = tab0[sp[0]];
1225         *dp++ = tab1[sp[1]];
1226         tab = tab0;
1227         tab0 = tab2;
1228         tab2 = tab;
1229         tab = tab1;
1230         tab1 = tab3;
1231         tab3 = tab;
1232         size -= 2;
1233         sp += 2;
1234       }
1235       else if (off == 3) {
1236         *dp++ = tab0[sp[0]];
1237         *dp++ = tab1[sp[1]];
1238         *dp++ = tab2[sp[2]];
1239         tab = tab3;
1240         tab3 = tab2;
1241         tab2 = tab1;
1242         tab1 = tab0;
1243         tab0 = tab;
1244         size -= 3;
1245         sp += 3;
1246       }
1247 
1248       sa = (mlib_u32 *) sp;
1249 
1250       if (((mlib_addr) dp & 3) == 0) {
1251         da = (mlib_u32 *) dp;
1252 
1253         s0 = sa[0];
1254         sa++;
1255 
1256 #ifdef __SUNPRO_C
1257 #pragma pipeloop(0)
1258 #endif /* __SUNPRO_C */
1259         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1260           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1261           res1 = (t0 << 16) + t1;
1262           res2 = (t2 << 16) + t3;
1263           s0 = sa[0];
1264           da[0] = res1;
1265           da[1] = res2;
1266         }
1267 
1268         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1269         res1 = (t0 << 16) + t1;
1270         res2 = (t2 << 16) + t3;
1271         da[0] = res1;
1272         da[1] = res2;
1273         da += 2;
1274         dp = (mlib_u16 *) da;
1275         sp = (mlib_u8 *) sa;
1276         i += 4;
1277 
1278         if (i < size) {
1279           *dp++ = tab0[(*sp)];
1280           i++;
1281           sp++;
1282         }
1283 
1284         if (i < size) {
1285           *dp++ = tab1[(*sp)];
1286           i++;
1287           sp++;
1288         }
1289 
1290         if (i < size) {
1291           *dp = tab2[(*sp)];
1292         }
1293 
1294       }
1295       else {
1296 
1297         *dp++ = tab0[(*sp)];
1298         size--;
1299         da = (mlib_u32 *) dp;
1300 
1301         s0 = sa[0];
1302         s1 = sa[1];
1303         sa += 2;
1304 
1305 #ifdef __SUNPRO_C
1306 #pragma pipeloop(0)
1307 #endif /* __SUNPRO_C */
1308         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1309           READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1310           s0 = s1;
1311           res1 = (t0 << 16) + t1;
1312           res2 = (t2 << 16) + t3;
1313           s1 = sa[0];
1314           da[0] = res1;
1315           da[1] = res2;
1316         }
1317 
1318         READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1319         res1 = (t0 << 16) + t1;
1320         res2 = (t2 << 16) + t3;
1321         da[0] = res1;
1322         da[1] = res2;
1323         ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3);
1324         res1 = (t0 << 16) + t1;
1325         da[2] = res1;
1326         da += 3;
1327         dp = (mlib_u16 *) da;
1328         *dp++ = (mlib_u16) t2;
1329         sp = (mlib_u8 *) sa;
1330         i += 7;
1331 
1332         if (i < size) {
1333           *dp++ = tab0[(*sp)];
1334           i++;
1335           sp++;
1336         }
1337 
1338         if (i < size) {
1339           *dp++ = tab1[(*sp)];
1340           i++;
1341           sp++;
1342         }
1343 
1344         if (i < size) {
1345           *dp = tab2[(*sp)];
1346         }
1347       }
1348     }
1349   }
1350 }
1351 
1352 /***************************************************************/
1353 void mlib_c_ImageLookUp_S16_S16(const mlib_s16 *src,
1354                                 mlib_s32       slb,
1355                                 mlib_s16       *dst,
1356                                 mlib_s32       dlb,
1357                                 mlib_s32       xsize,
1358                                 mlib_s32       ysize,
1359                                 mlib_s32       csize,
1360                                 const mlib_s16 **table)
1361 {
1362   const mlib_s16 *table_base[4];
1363   mlib_s32 c;
1364 
1365   for (c = 0; c < csize; c++) {
1366     table_base[c] = &table[c][32768];
1367   }
1368 
1369 #ifdef __GNUC__
1370 #pragma GCC diagnostic push
1371 #pragma GCC diagnostic ignored "-Warray-bounds"
1372 #endif
1373   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s16, table_base);
1374 #ifdef __GNUC__
1375 #pragma GCC diagnostic pop
1376 #endif
1377 }
1378 
1379 /***************************************************************/
1380 void mlib_c_ImageLookUp_U16_S16(const mlib_u16 *src,
1381                                 mlib_s32       slb,
1382                                 mlib_s16       *dst,
1383                                 mlib_s32       dlb,
1384                                 mlib_s32       xsize,
1385                                 mlib_s32       ysize,
1386                                 mlib_s32       csize,
1387                                 const mlib_s16 **table)
1388 {
1389   const mlib_s16 *table_base[4];
1390   mlib_s32 c;
1391 
1392   for (c = 0; c < csize; c++) {
1393     table_base[c] = &table[c][0];
1394   }
1395 
1396 #ifdef __GNUC__
1397 #pragma GCC diagnostic push
1398 #pragma GCC diagnostic ignored "-Warray-bounds"
1399 #endif
1400   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u16, table_base);
1401 #ifdef __GNUC__
1402 #pragma GCC diagnostic pop
1403 #endif
1404 }
1405 
1406 /***************************************************************/
1407 void mlib_c_ImageLookUp_S32_S16(const mlib_s32 *src,
1408                                 mlib_s32       slb,
1409                                 mlib_s16       *dst,
1410                                 mlib_s32       dlb,
1411                                 mlib_s32       xsize,
1412                                 mlib_s32       ysize,
1413                                 mlib_s32       csize,
1414                                 const mlib_s16 **table)
1415 {
1416   const mlib_s16 *table_base[4];
1417   mlib_s32 c;
1418 
1419   for (c = 0; c < csize; c++) {
1420     table_base[c] = &table[c][TABLE_SHIFT_S32];
1421   }
1422 
1423 #ifdef __GNUC__
1424 #pragma GCC diagnostic push
1425 #pragma GCC diagnostic ignored "-Warray-bounds"
1426 #endif
1427   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s32, table_base);
1428 #ifdef __GNUC__
1429 #pragma GCC diagnostic pop
1430 #endif
1431 }
1432 
1433 /***************************************************************/
1434 void mlib_c_ImageLookUp_S16_U16(const mlib_s16 *src,
1435                                 mlib_s32       slb,
1436                                 mlib_u16       *dst,
1437                                 mlib_s32       dlb,
1438                                 mlib_s32       xsize,
1439                                 mlib_s32       ysize,
1440                                 mlib_s32       csize,
1441                                 const mlib_s16 **table)
1442 {
1443   const mlib_s16 *table_base[4];
1444   mlib_s32 c;
1445 
1446   for (c = 0; c < csize; c++) {
1447     table_base[c] = &table[c][32768];
1448   }
1449 
1450 #ifdef __GNUC__
1451 #pragma GCC diagnostic push
1452 #pragma GCC diagnostic ignored "-Warray-bounds"
1453 #endif
1454   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s16, table_base);
1455 #ifdef __GNUC__
1456 #pragma GCC diagnostic pop
1457 #endif
1458 }
1459 
1460 /***************************************************************/
1461 void mlib_c_ImageLookUp_U16_U16(const mlib_u16 *src,
1462                                 mlib_s32       slb,
1463                                 mlib_u16       *dst,
1464                                 mlib_s32       dlb,
1465                                 mlib_s32       xsize,
1466                                 mlib_s32       ysize,
1467                                 mlib_s32       csize,
1468                                 const mlib_s16 **table)
1469 {
1470   const mlib_s16 *table_base[4];
1471   mlib_s32 c;
1472 
1473   for (c = 0; c < csize; c++) {
1474     table_base[c] = &table[c][0];
1475   }
1476 
1477 #ifdef __GNUC__
1478 #pragma GCC diagnostic push
1479 #pragma GCC diagnostic ignored "-Warray-bounds"
1480 #endif
1481   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_u16, table_base);
1482 #ifdef __GNUC__
1483 #pragma GCC diagnostic pop
1484 #endif
1485 }
1486 
1487 /***************************************************************/
1488 void mlib_c_ImageLookUp_S32_U16(const mlib_s32 *src,
1489                                 mlib_s32       slb,
1490                                 mlib_u16       *dst,
1491                                 mlib_s32       dlb,
1492                                 mlib_s32       xsize,
1493                                 mlib_s32       ysize,
1494                                 mlib_s32       csize,
1495                                 const mlib_s16 **table)
1496 {
1497   const mlib_s16 *table_base[4];
1498   mlib_s32 c;
1499 
1500   for (c = 0; c < csize; c++) {
1501     table_base[c] = &table[c][TABLE_SHIFT_S32];
1502   }
1503 
1504 #ifdef __GNUC__
1505 #pragma GCC diagnostic push
1506 #pragma GCC diagnostic ignored "-Warray-bounds"
1507 #endif
1508   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s32, table_base);
1509 #ifdef __GNUC__
1510 #pragma GCC diagnostic pop
1511 #endif
1512 }
1513 
1514 /***************************************************************/
1515 void mlib_c_ImageLookUp_U8_S32(const mlib_u8  *src,
1516                                mlib_s32       slb,
1517                                mlib_s32       *dst,
1518                                mlib_s32       dlb,
1519                                mlib_s32       xsize,
1520                                mlib_s32       ysize,
1521                                mlib_s32       csize,
1522                                const mlib_s32 **table)
1523 {
1524 
1525   if (xsize * csize < 7) {
1526     MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u8, table);
1527   }
1528   else if (csize == 1) {
1529     mlib_s32 i, j;
1530 
1531     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1532       mlib_u32 *sa;
1533       mlib_u32 *tab = (mlib_u32 *) table[0];
1534       mlib_u32 s0, t0, t1, t2, t3;
1535       mlib_s32 off;
1536       mlib_s32 size = xsize;
1537       mlib_u32 *dp = (mlib_u32 *) dst;
1538       mlib_u8 *sp = (void *)src;
1539 
1540       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1541 
1542       for (i = 0; i < off; i++, sp++) {
1543         *dp++ = tab[sp[0]];
1544         size--;
1545       }
1546 
1547       sa = (mlib_u32 *) sp;
1548 
1549       s0 = sa[0];
1550       sa++;
1551 
1552 #ifdef __SUNPRO_C
1553 #pragma pipeloop(0)
1554 #endif /* __SUNPRO_C */
1555       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1556         READ_U8_S32(tab, tab, tab, tab);
1557         s0 = sa[0];
1558         dp[0] = t0;
1559         dp[1] = t1;
1560         dp[2] = t2;
1561         dp[3] = t3;
1562       }
1563 
1564       READ_U8_S32(tab, tab, tab, tab);
1565       dp[0] = t0;
1566       dp[1] = t1;
1567       dp[2] = t2;
1568       dp[3] = t3;
1569       dp += 4;
1570       sp = (mlib_u8 *) sa;
1571       i += 4;
1572       for (; i < size; i++, dp++, sp++)
1573         dp[0] = tab[sp[0]];
1574     }
1575 
1576   }
1577   else if (csize == 2) {
1578     mlib_s32 i, j;
1579 
1580     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1581       mlib_u32 *sa;
1582       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1583       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1584       mlib_u32 *tab;
1585       mlib_u32 s0, t0, t1, t2, t3;
1586       mlib_s32 off;
1587       mlib_s32 size = xsize * 2;
1588       mlib_u32 *dp = (mlib_u32 *) dst;
1589       mlib_u8 *sp = (void *)src;
1590 
1591       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1592 
1593       for (i = 0; i < off - 1; i += 2, sp += 2) {
1594         *dp++ = tab0[sp[0]];
1595         *dp++ = tab1[sp[1]];
1596         size -= 2;
1597       }
1598 
1599       if ((off & 1) != 0) {
1600         *dp++ = tab0[*sp];
1601         size--;
1602         sp++;
1603         tab = tab0;
1604         tab0 = tab1;
1605         tab1 = tab;
1606       }
1607 
1608       sa = (mlib_u32 *) sp;
1609 
1610       s0 = sa[0];
1611       sa++;
1612 
1613 #ifdef __SUNPRO_C
1614 #pragma pipeloop(0)
1615 #endif /* __SUNPRO_C */
1616       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1617         READ_U8_S32(tab0, tab1, tab0, tab1);
1618         s0 = sa[0];
1619         dp[0] = t0;
1620         dp[1] = t1;
1621         dp[2] = t2;
1622         dp[3] = t3;
1623       }
1624 
1625       READ_U8_S32(tab0, tab1, tab0, tab1);
1626       dp[0] = t0;
1627       dp[1] = t1;
1628       dp[2] = t2;
1629       dp[3] = t3;
1630       dp += 4;
1631       sp = (mlib_u8 *) sa;
1632       i += 4;
1633 
1634       for (; i < size - 1; i += 2, sp += 2) {
1635         *dp++ = tab0[sp[0]];
1636         *dp++ = tab1[sp[1]];
1637       }
1638 
1639       if (i < size)
1640         *dp = tab0[(*sp)];
1641     }
1642 
1643   }
1644   else if (csize == 3) {
1645     mlib_s32 i, j;
1646 
1647     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1648       mlib_u32 *sa;
1649       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1650       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1651       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1652       mlib_u32 *tab;
1653       mlib_u32 s0, t0, t1, t2, t3;
1654       mlib_s32 off;
1655       mlib_s32 size = xsize * 3;
1656       mlib_u32 *dp = (mlib_u32 *) dst;
1657       mlib_u8 *sp = (void *)src;
1658 
1659       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1660 
1661       if (off == 1) {
1662         *dp++ = tab0[(*sp)];
1663         tab = tab0;
1664         tab0 = tab1;
1665         tab1 = tab2;
1666         tab2 = tab;
1667         size--;
1668         sp++;
1669       }
1670       else if (off == 2) {
1671         *dp++ = tab0[sp[0]];
1672         *dp++ = tab1[sp[1]];
1673         tab = tab2;
1674         tab2 = tab1;
1675         tab1 = tab0;
1676         tab0 = tab;
1677         size -= 2;
1678         sp += 2;
1679       }
1680       else if (off == 3) {
1681         *dp++ = tab0[sp[0]];
1682         *dp++ = tab1[sp[1]];
1683         *dp++ = tab2[sp[2]];
1684         size -= 3;
1685         sp += 3;
1686       }
1687 
1688       sa = (mlib_u32 *) sp;
1689 
1690       s0 = sa[0];
1691       sa++;
1692 
1693 #ifdef __SUNPRO_C
1694 #pragma pipeloop(0)
1695 #endif /* __SUNPRO_C */
1696       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1697         READ_U8_S32(tab0, tab1, tab2, tab0);
1698         tab = tab0;
1699         tab0 = tab1;
1700         tab1 = tab2;
1701         tab2 = tab;
1702         s0 = sa[0];
1703         dp[0] = t0;
1704         dp[1] = t1;
1705         dp[2] = t2;
1706         dp[3] = t3;
1707       }
1708 
1709       READ_U8_S32(tab0, tab1, tab2, tab0);
1710       dp[0] = t0;
1711       dp[1] = t1;
1712       dp[2] = t2;
1713       dp[3] = t3;
1714       dp += 4;
1715       sp = (mlib_u8 *) sa;
1716       i += 4;
1717 
1718       if (i < size) {
1719         *dp++ = tab1[(*sp)];
1720         i++;
1721         sp++;
1722       }
1723 
1724       if (i < size) {
1725         *dp++ = tab2[(*sp)];
1726         i++;
1727         sp++;
1728       }
1729 
1730       if (i < size) {
1731         *dp = tab0[(*sp)];
1732       }
1733     }
1734 
1735   }
1736   else if (csize == 4) {
1737     mlib_s32 i, j;
1738 
1739     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1740       mlib_u32 *sa;
1741       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1742       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1743       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1744       mlib_u32 *tab3 = (mlib_u32 *) table[3];
1745       mlib_u32 *tab;
1746       mlib_u32 s0, t0, t1, t2, t3;
1747       mlib_s32 off;
1748       mlib_s32 size = xsize * 4;
1749       mlib_u32 *dp = (mlib_u32 *) dst;
1750       mlib_u8 *sp = (void *)src;
1751 
1752       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1753 
1754       if (off == 1) {
1755         *dp++ = tab0[(*sp)];
1756         tab = tab0;
1757         tab0 = tab1;
1758         tab1 = tab2;
1759         tab2 = tab3;
1760         tab3 = tab;
1761         size--;
1762         sp++;
1763       }
1764       else if (off == 2) {
1765         *dp++ = tab0[sp[0]];
1766         *dp++ = tab1[sp[1]];
1767         tab = tab0;
1768         tab0 = tab2;
1769         tab2 = tab;
1770         tab = tab1;
1771         tab1 = tab3;
1772         tab3 = tab;
1773         size -= 2;
1774         sp += 2;
1775       }
1776       else if (off == 3) {
1777         *dp++ = tab0[sp[0]];
1778         *dp++ = tab1[sp[1]];
1779         *dp++ = tab2[sp[2]];
1780         tab = tab3;
1781         tab3 = tab2;
1782         tab2 = tab1;
1783         tab1 = tab0;
1784         tab0 = tab;
1785         size -= 3;
1786         sp += 3;
1787       }
1788 
1789       sa = (mlib_u32 *) sp;
1790 
1791       s0 = sa[0];
1792       sa++;
1793 
1794 #ifdef __SUNPRO_C
1795 #pragma pipeloop(0)
1796 #endif /* __SUNPRO_C */
1797       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1798         READ_U8_S32(tab0, tab1, tab2, tab3);
1799         s0 = sa[0];
1800         dp[0] = t0;
1801         dp[1] = t1;
1802         dp[2] = t2;
1803         dp[3] = t3;
1804       }
1805 
1806       READ_U8_S32(tab0, tab1, tab2, tab3);
1807       dp[0] = t0;
1808       dp[1] = t1;
1809       dp[2] = t2;
1810       dp[3] = t3;
1811       dp += 4;
1812       sp = (mlib_u8 *) sa;
1813       i += 4;
1814 
1815       if (i < size) {
1816         *dp++ = tab0[(*sp)];
1817         i++;
1818         sp++;
1819       }
1820 
1821       if (i < size) {
1822         *dp++ = tab1[(*sp)];
1823         i++;
1824         sp++;
1825       }
1826 
1827       if (i < size) {
1828         *dp = tab2[(*sp)];
1829       }
1830     }
1831   }
1832 }
1833 
1834 /***************************************************************/
1835 void mlib_c_ImageLookUp_S16_S32(const mlib_s16 *src,
1836                                 mlib_s32       slb,
1837                                 mlib_s32       *dst,
1838                                 mlib_s32       dlb,
1839                                 mlib_s32       xsize,
1840                                 mlib_s32       ysize,
1841                                 mlib_s32       csize,
1842                                 const mlib_s32 **table)
1843 {
1844   const mlib_s32 *table_base[4];
1845   mlib_s32 c;
1846 
1847   for (c = 0; c < csize; c++) {
1848     table_base[c] = &table[c][32768];
1849   }
1850 
1851 #ifdef __GNUC__
1852 #pragma GCC diagnostic push
1853 #pragma GCC diagnostic ignored "-Warray-bounds"
1854 #endif
1855   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s16, table_base);
1856 #ifdef __GNUC__
1857 #pragma GCC diagnostic pop
1858 #endif
1859 }
1860 
1861 /***************************************************************/
1862 void mlib_c_ImageLookUp_U16_S32(const mlib_u16 *src,
1863                                 mlib_s32       slb,
1864                                 mlib_s32       *dst,
1865                                 mlib_s32       dlb,
1866                                 mlib_s32       xsize,
1867                                 mlib_s32       ysize,
1868                                 mlib_s32       csize,
1869                                 const mlib_s32 **table)
1870 {
1871   const mlib_s32 *table_base[4];
1872   mlib_s32 c;
1873 
1874   for (c = 0; c < csize; c++) {
1875     table_base[c] = &table[c][0];
1876   }
1877 
1878 #ifdef __GNUC__
1879 #pragma GCC diagnostic push
1880 #pragma GCC diagnostic ignored "-Warray-bounds"
1881 #endif
1882   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u16, table_base);
1883 #ifdef __GNUC__
1884 #pragma GCC diagnostic pop
1885 #endif
1886 }
1887 
1888 /***************************************************************/
1889 void mlib_c_ImageLookUp_S32_S32(const mlib_s32 *src,
1890                                 mlib_s32       slb,
1891                                 mlib_s32       *dst,
1892                                 mlib_s32       dlb,
1893                                 mlib_s32       xsize,
1894                                 mlib_s32       ysize,
1895                                 mlib_s32       csize,
1896                                 const mlib_s32 **table)
1897 {
1898   const mlib_s32 *table_base[4];
1899   mlib_s32 c;
1900 
1901   for (c = 0; c < csize; c++) {
1902     table_base[c] = &table[c][TABLE_SHIFT_S32];
1903   }
1904 
1905 #ifdef __GNUC__
1906 #pragma GCC diagnostic push
1907 #pragma GCC diagnostic ignored "-Warray-bounds"
1908 #endif
1909   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s32, table_base);
1910 #ifdef __GNUC__
1911 #pragma GCC diagnostic pop
1912 #endif
1913 }
1914 
1915 /***************************************************************/
1916 void mlib_c_ImageLookUpSI_U8_U8(const mlib_u8 *src,
1917                                 mlib_s32      slb,
1918                                 mlib_u8       *dst,
1919                                 mlib_s32      dlb,
1920                                 mlib_s32      xsize,
1921                                 mlib_s32      ysize,
1922                                 mlib_s32      csize,
1923                                 const mlib_u8 **table)
1924 {
1925 
1926   if ((xsize < 8) || ((xsize * ysize) < 250)) {
1927     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u8, table);
1928   }
1929   else if (csize == 2) {
1930 
1931     mlib_u16 tab[256];
1932     const mlib_u8 *tab0 = table[0];
1933     const mlib_u8 *tab1 = table[1];
1934     mlib_s32 i, j, s0, s1, s2;
1935 
1936     s0 = tab0[0];
1937     s1 = tab1[0];
1938     for (i = 1; i < 256; i++) {
1939 #ifdef _LITTLE_ENDIAN
1940       s2 = (s1 << 8) + s0;
1941 #else
1942       s2 = (s0 << 8) + s1;
1943 #endif /* _LITTLE_ENDIAN */
1944       s0 = tab0[i];
1945       s1 = tab1[i];
1946       tab[i - 1] = (mlib_u16) s2;
1947     }
1948 
1949 #ifdef _LITTLE_ENDIAN
1950     s2 = (s1 << 8) + s0;
1951 #else
1952     s2 = (s0 << 8) + s1;
1953 #endif /* _LITTLE_ENDIAN */
1954     tab[255] = (mlib_u16) s2;
1955 
1956     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1957       mlib_s32 *da;
1958       mlib_u8 *dp = dst;
1959       mlib_u8 *sa = (void *)src;
1960       mlib_s32 s0, t0, s1, t1, t, t2, off;
1961       mlib_s32 size = xsize;
1962 
1963       if (((mlib_addr) dp & 1) == 0) {
1964 
1965         if (((mlib_addr) dp & 3) != 0) {
1966           *((mlib_u16 *) dp) = tab[sa[0]];
1967           sa++;
1968           size--;
1969           dp += 2;
1970         }
1971 
1972         da = (mlib_s32 *) dp;
1973 
1974         s0 = sa[0];
1975         s1 = sa[1];
1976         sa += 2;
1977 
1978 #ifdef __SUNPRO_C
1979 #pragma pipeloop(0)
1980 #endif /* __SUNPRO_C */
1981         for (i = 0; i < size - 3; i += 2, da++, sa += 2) {
1982           t0 = tab[s0];
1983           t1 = tab[s1];
1984 #ifdef _LITTLE_ENDIAN
1985           t = (t1 << 16) + t0;
1986 #else
1987           t = (t0 << 16) + t1;
1988 #endif /* _LITTLE_ENDIAN */
1989           s0 = sa[0];
1990           s1 = sa[1];
1991           da[0] = t;
1992         }
1993 
1994         t0 = tab[s0];
1995         t1 = tab[s1];
1996 #ifdef _LITTLE_ENDIAN
1997         t = (t1 << 16) + t0;
1998 #else
1999         t = (t0 << 16) + t1;
2000 #endif /* _LITTLE_ENDIAN */
2001         da[0] = t;
2002         da++;
2003 
2004         if (size & 1)
2005           *((mlib_u16 *) da) = tab[sa[0]];
2006 
2007       }
2008       else {
2009 
2010         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2011 
2012         if (off > 1) {
2013           t0 = tab[sa[0]];
2014 #ifdef _LITTLE_ENDIAN
2015           dp[1] = (t0 >> 8);
2016           dp[0] = t0;
2017 #else
2018           dp[0] = (t0 >> 8);
2019           dp[1] = t0;
2020 #endif /* _LITTLE_ENDIAN */
2021           sa++;
2022           size--;
2023           dp += 2;
2024         }
2025 
2026         t0 = tab[sa[0]];
2027         sa++;
2028 #ifdef _LITTLE_ENDIAN
2029         *dp++ = t0;
2030 #else
2031         *dp++ = (t0 >> 8);
2032 #endif /* _LITTLE_ENDIAN */
2033 
2034         da = (mlib_s32 *) dp;
2035 
2036         s0 = sa[0];
2037         s1 = sa[1];
2038         sa += 2;
2039 
2040 #ifdef __SUNPRO_C
2041 #pragma pipeloop(0)
2042 #endif /* __SUNPRO_C */
2043         for (i = 0; i < size - 4; i += 2, da++, sa += 2) {
2044           t1 = tab[s0];
2045           t2 = tab[s1];
2046 #ifdef _LITTLE_ENDIAN
2047           t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
2048 #else
2049           t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
2050 #endif /* _LITTLE_ENDIAN */
2051           t0 = t2;
2052           s0 = sa[0];
2053           s1 = sa[1];
2054           da[0] = t;
2055         }
2056 
2057         t1 = tab[s0];
2058         t2 = tab[s1];
2059 #ifdef _LITTLE_ENDIAN
2060         t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
2061 #else
2062         t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
2063 #endif /* _LITTLE_ENDIAN */
2064         da[0] = t;
2065         da++;
2066         dp = (mlib_u8 *) da;
2067 #ifdef _LITTLE_ENDIAN
2068         dp[0] = (t2 >> 8);
2069 #else
2070         dp[0] = t2;
2071 #endif /* _LITTLE_ENDIAN */
2072 
2073         if ((size & 1) == 0) {
2074           t0 = tab[sa[0]];
2075 #ifdef _LITTLE_ENDIAN
2076           dp[2] = (t0 >> 8);
2077           dp[1] = t0;
2078 #else
2079           dp[1] = (t0 >> 8);
2080           dp[2] = t0;
2081 #endif /* _LITTLE_ENDIAN */
2082         }
2083       }
2084     }
2085 
2086   }
2087   else if (csize == 3) {
2088     mlib_u32 tab[256];
2089     const mlib_u8 *tab0 = table[0];
2090     const mlib_u8 *tab1 = table[1];
2091     const mlib_u8 *tab2 = table[2];
2092     mlib_s32 i, j;
2093     mlib_u32 s0, s1, s2, s3;
2094 
2095     s0 = tab0[0];
2096     s1 = tab1[0];
2097     s2 = tab2[0];
2098     for (i = 1; i < 256; i++) {
2099 #ifdef _LITTLE_ENDIAN
2100       s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
2101 #else
2102       s3 = (s0 << 16) + (s1 << 8) + s2;
2103 #endif /* _LITTLE_ENDIAN */
2104       s0 = tab0[i];
2105       s1 = tab1[i];
2106       s2 = tab2[i];
2107       tab[i - 1] = s3;
2108     }
2109 
2110 #ifdef _LITTLE_ENDIAN
2111     s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
2112 #else
2113     s3 = (s0 << 16) + (s1 << 8) + s2;
2114 #endif /* _LITTLE_ENDIAN */
2115     tab[255] = s3;
2116 
2117     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2118       mlib_u32 *da;
2119       mlib_u8 *dp = dst;
2120       mlib_u8 *sa = (void *)src, *ptr;
2121       mlib_u32 s0, s1, t0, t1;
2122       mlib_u32 res1, res2;
2123       mlib_s32 size = xsize, off;
2124 
2125       off = (mlib_s32) ((mlib_addr) dp & 3);
2126 
2127 #ifdef __SUNPRO_C
2128 #pragma pipeloop(0)
2129 #endif /* __SUNPRO_C */
2130       for (i = 0; i < off; i++) {
2131         ptr = (mlib_u8 *) (tab + sa[0]);
2132         dp[0] = ptr[1];
2133         dp[1] = ptr[2];
2134         dp[2] = ptr[3];
2135         dp += 3;
2136         sa++;
2137       }
2138 
2139       size -= off;
2140       da = (mlib_u32 *) dp;
2141       s0 = sa[0];
2142       s1 = sa[1];
2143       sa += 2;
2144 
2145 #ifdef __SUNPRO_C
2146 #pragma pipeloop(0)
2147 #endif /* __SUNPRO_C */
2148       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2149         t0 = tab[s0];
2150         t1 = tab[s1];
2151 #ifdef _LITTLE_ENDIAN
2152         da[0] = (t0 >> 8) + (t1 << 16);
2153         res2 = (t1 >> 16);
2154 #else
2155         da[0] = (t0 << 8) + (t1 >> 16);
2156         res2 = (t1 << 16);
2157 #endif /* _LITTLE_ENDIAN */
2158         s0 = sa[0];
2159         s1 = sa[1];
2160         t0 = tab[s0];
2161         t1 = tab[s1];
2162 #ifdef _LITTLE_ENDIAN
2163         res2 += (t0 << 8);
2164         res1 = (t0 >> 24) + t1;
2165 #else
2166         res2 += (t0 >> 8);
2167         res1 = (t0 << 24) + t1;
2168 #endif /* _LITTLE_ENDIAN */
2169         s0 = sa[2];
2170         s1 = sa[3];
2171         da[1] = res2;
2172         da[2] = res1;
2173       }
2174 
2175       t0 = tab[s0];
2176       t1 = tab[s1];
2177 #ifdef _LITTLE_ENDIAN
2178       da[0] = (t0 >> 8) + (t1 << 16);
2179       res2 = (t1 >> 16);
2180 #else
2181       da[0] = (t0 << 8) + (t1 >> 16);
2182       res2 = (t1 << 16);
2183 #endif /* _LITTLE_ENDIAN */
2184       s0 = sa[0];
2185       s1 = sa[1];
2186       t0 = tab[s0];
2187       t1 = tab[s1];
2188 #ifdef _LITTLE_ENDIAN
2189       res2 += (t0 << 8);
2190       res1 = (t0 >> 24) + t1;
2191 #else
2192       res2 += (t0 >> 8);
2193       res1 = (t0 << 24) + t1;
2194 #endif /* _LITTLE_ENDIAN */
2195       da[1] = res2;
2196       da[2] = res1;
2197       da += 3;
2198       sa += 2;
2199       dp = (mlib_u8 *) da;
2200       i += 4;
2201 
2202 #ifdef __SUNPRO_C
2203 #pragma pipeloop(0)
2204 #endif /* __SUNPRO_C */
2205       for (; i < size; i++) {
2206         ptr = (mlib_u8 *) (tab + sa[0]);
2207         dp[0] = ptr[1];
2208         dp[1] = ptr[2];
2209         dp[2] = ptr[3];
2210         dp += 3;
2211         sa++;
2212       }
2213     }
2214 
2215   }
2216   else if (csize == 4) {
2217     mlib_u32 tab[256];
2218     const mlib_u8 *tab0 = table[0];
2219     const mlib_u8 *tab1 = table[1];
2220     const mlib_u8 *tab2 = table[2];
2221     const mlib_u8 *tab3 = table[3];
2222     mlib_s32 i, j;
2223     mlib_u32 s0, s1, s2, s3, s4;
2224 
2225     s0 = tab0[0];
2226     s1 = tab1[0];
2227     s2 = tab2[0];
2228     s3 = tab3[0];
2229     for (i = 1; i < 256; i++) {
2230 #ifdef _LITTLE_ENDIAN
2231       s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2232 #else
2233       s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2234 #endif /* _LITTLE_ENDIAN */
2235       s0 = tab0[i];
2236       s1 = tab1[i];
2237       s2 = tab2[i];
2238       s3 = tab3[i];
2239       tab[i - 1] = s4;
2240     }
2241 
2242 #ifdef _LITTLE_ENDIAN
2243     s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2244 #else
2245     s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2246 #endif /* _LITTLE_ENDIAN */
2247     tab[255] = s4;
2248 
2249     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2250       mlib_u32 *da;
2251       mlib_u8 *dp = dst;
2252       mlib_u8 *sa = (void *)src;
2253       mlib_u32 s0, t0, s1, t1, t2;
2254       mlib_s32 size = xsize, off;
2255       mlib_u32 shift, shift1, res1, res2;
2256 
2257       if (((mlib_addr) dp & 3) == 0) {
2258 
2259         da = (mlib_u32 *) dp;
2260 
2261         s0 = sa[0];
2262         s1 = sa[1];
2263         sa += 2;
2264 
2265 #ifdef __SUNPRO_C
2266 #pragma pipeloop(0)
2267 #endif /* __SUNPRO_C */
2268         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2269           t0 = tab[s0];
2270           t1 = tab[s1];
2271           s0 = sa[0];
2272           s1 = sa[1];
2273           da[0] = t0;
2274           da[1] = t1;
2275         }
2276 
2277         t0 = tab[s0];
2278         t1 = tab[s1];
2279         da[0] = t0;
2280         da[1] = t1;
2281 
2282         if (size & 1)
2283           da[2] = tab[sa[0]];
2284 
2285       }
2286       else {
2287 
2288         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2289         shift = 8 * off;
2290         shift1 = 32 - shift;
2291 
2292         for (i = 0; i < off; i++) {
2293           dp[i] = table[i][sa[0]];
2294         }
2295 
2296         dp += i;
2297         t0 = tab[sa[0]];
2298         sa++;
2299 
2300         da = (mlib_u32 *) dp;
2301 
2302         s0 = sa[0];
2303         s1 = sa[1];
2304         sa += 2;
2305 
2306 #ifdef __SUNPRO_C
2307 #pragma pipeloop(0)
2308 #endif /* __SUNPRO_C */
2309         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2310           t1 = tab[s0];
2311           t2 = tab[s1];
2312 #ifdef _LITTLE_ENDIAN
2313           res1 = (t0 >> shift) + (t1 << shift1);
2314           res2 = (t1 >> shift) + (t2 << shift1);
2315 #else
2316           res1 = (t0 << shift) + (t1 >> shift1);
2317           res2 = (t1 << shift) + (t2 >> shift1);
2318 #endif /* _LITTLE_ENDIAN */
2319           t0 = t2;
2320           s0 = sa[0];
2321           s1 = sa[1];
2322           da[0] = res1;
2323           da[1] = res2;
2324         }
2325 
2326         t1 = tab[s0];
2327         t2 = tab[s1];
2328 #ifdef _LITTLE_ENDIAN
2329         res1 = (t0 >> shift) + (t1 << shift1);
2330         res2 = (t1 >> shift) + (t2 << shift1);
2331 #else
2332         res1 = (t0 << shift) + (t1 >> shift1);
2333         res2 = (t1 << shift) + (t2 >> shift1);
2334 #endif /* _LITTLE_ENDIAN */
2335         da[0] = res1;
2336         da[1] = res2;
2337 #ifdef _LITTLE_ENDIAN
2338         t0 = (da[2] >> shift1);
2339         da[2] = (t2 >> shift) + (t0 << shift1);
2340 #else
2341         t0 = (da[2] << shift1);
2342         da[2] = (t2 << shift) + (t0 >> shift1);
2343 #endif /* _LITTLE_ENDIAN */
2344         da += 2;
2345         dp = (mlib_u8 *) da + (4 - off);
2346 
2347         if ((size & 1) == 0) {
2348           t0 = tab[sa[0]];
2349 #ifdef _LITTLE_ENDIAN
2350           dp[3] = (mlib_u8) (t0 >> 24);
2351           dp[2] = (mlib_u8) (t0 >> 16);
2352           dp[1] = (mlib_u8) (t0 >> 8);
2353           dp[0] = (mlib_u8) t0;
2354 #else
2355           dp[0] = (mlib_u8) (t0 >> 24);
2356           dp[1] = (mlib_u8) (t0 >> 16);
2357           dp[2] = (mlib_u8) (t0 >> 8);
2358           dp[3] = (mlib_u8) t0;
2359 #endif /* _LITTLE_ENDIAN */
2360         }
2361       }
2362     }
2363   }
2364 }
2365 
2366 /***************************************************************/
2367 
2368 #ifdef _MSC_VER
2369 #pragma optimize("", off)
2370 #endif /* _MSC_VER */
2371 
2372 void mlib_c_ImageLookUpSI_S16_U8(const mlib_s16 *src,
2373                                  mlib_s32       slb,
2374                                  mlib_u8        *dst,
2375                                  mlib_s32       dlb,
2376                                  mlib_s32       xsize,
2377                                  mlib_s32       ysize,
2378                                  mlib_s32       csize,
2379                                  const mlib_u8  **table)
2380 {
2381   const mlib_u8 *table_base[4];
2382   mlib_s32 c;
2383 
2384   for (c = 0; c < csize; c++) {
2385     table_base[c] = &table[c][32768];
2386   }
2387 
2388   if ((xsize < 8) || (csize == 2)) {
2389     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base);
2390   }
2391   else if (csize == 3) {
2392     mlib_s32 i, j;
2393 
2394     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2395       mlib_u32 *da;
2396       mlib_u8 *dp = dst;
2397       mlib_s16 *sa = (void *)src;
2398       const mlib_u8 *tab0 = table_base[0];
2399       const mlib_u8 *tab1 = table_base[1];
2400       const mlib_u8 *tab2 = table_base[2];
2401       mlib_s32 s0, s1;
2402       mlib_u32 t0, t1, t2, t3, t4, t5;
2403       mlib_u32 res1, res2;
2404       mlib_s32 size = xsize, off;
2405 
2406       off = (mlib_s32) ((mlib_addr) dp & 3);
2407 
2408 #ifdef __SUNPRO_C
2409 #pragma pipeloop(0)
2410 #endif /* __SUNPRO_C */
2411       for (i = 0; i < off; i++) {
2412         s0 = *sa++;
2413         dp[0] = tab0[s0];
2414         dp[1] = tab1[s0];
2415         dp[2] = tab2[s0];
2416         dp += 3;
2417       }
2418 
2419       size -= off;
2420       da = (mlib_u32 *) dp;
2421       s0 = sa[0];
2422       s1 = sa[1];
2423       sa += 2;
2424 
2425 #ifdef __SUNPRO_C
2426 #pragma pipeloop(0)
2427 #endif /* __SUNPRO_C */
2428       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2429         t0 = tab0[s0];
2430         t1 = tab1[s0];
2431         t2 = tab2[s0];
2432         t3 = tab0[s1];
2433         t4 = tab1[s1];
2434         t5 = tab2[s1];
2435 #ifdef _LITTLE_ENDIAN
2436         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2437         res2 = (t5 << 8) + t4;
2438 #else
2439         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2440         res2 = (t4 << 24) + (t5 << 16);
2441 #endif /* _LITTLE_ENDIAN */
2442         s0 = sa[0];
2443         s1 = sa[1];
2444         t0 = tab0[s0];
2445         t1 = tab1[s0];
2446         t2 = tab2[s0];
2447         t3 = tab0[s1];
2448         t4 = tab1[s1];
2449         t5 = tab2[s1];
2450 #ifdef _LITTLE_ENDIAN
2451         res2 += ((t1 << 24) + (t0 << 16));
2452         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2453 #else
2454         res2 += ((t0 << 8) + t1);
2455         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2456 #endif /* _LITTLE_ENDIAN */
2457         s0 = sa[2];
2458         s1 = sa[3];
2459         da[1] = res2;
2460         da[2] = res1;
2461       }
2462 
2463       t0 = tab0[s0];
2464       t1 = tab1[s0];
2465       t2 = tab2[s0];
2466       t3 = tab0[s1];
2467       t4 = tab1[s1];
2468       t5 = tab2[s1];
2469 #ifdef _LITTLE_ENDIAN
2470       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2471       res2 = (t5 << 8) + t4;
2472 #else
2473       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2474       res2 = (t4 << 24) + (t5 << 16);
2475 #endif /* _LITTLE_ENDIAN */
2476       s0 = sa[0];
2477       s1 = sa[1];
2478       t0 = tab0[s0];
2479       t1 = tab1[s0];
2480       t2 = tab2[s0];
2481       t3 = tab0[s1];
2482       t4 = tab1[s1];
2483       t5 = tab2[s1];
2484 #ifdef _LITTLE_ENDIAN
2485       res2 += ((t1 << 24) + (t0 << 16));
2486       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2487 #else
2488       res2 += ((t0 << 8) + t1);
2489       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2490 #endif /* _LITTLE_ENDIAN */
2491       da[1] = res2;
2492       da[2] = res1;
2493       da += 3;
2494       sa += 2;
2495       dp = (mlib_u8 *) da;
2496       i += 4;
2497 
2498 #ifdef __SUNPRO_C
2499 #pragma pipeloop(0)
2500 #endif /* __SUNPRO_C */
2501       for (; i < size; i++) {
2502         s0 = *sa++;
2503         dp[0] = tab0[s0];
2504         dp[1] = tab1[s0];
2505         dp[2] = tab2[s0];
2506         dp += 3;
2507       }
2508     }
2509 
2510   }
2511   else if (csize == 4) {
2512     mlib_s32 i, j;
2513 
2514     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2515       mlib_u32 *da;
2516       mlib_u8 *dp = dst;
2517       mlib_s16 *sa = (void *)src;
2518       const mlib_u8 *tab0 = table_base[0];
2519       const mlib_u8 *tab1 = table_base[1];
2520       const mlib_u8 *tab2 = table_base[2];
2521       const mlib_u8 *tab3 = table_base[3];
2522       mlib_s32 s0;
2523       mlib_u32 t0, t1, t2, t3;
2524       mlib_s32 size = xsize, off;
2525       mlib_u32 shift, shift1, res1, res2, res;
2526 
2527       if (((mlib_addr) dp & 3) == 0) {
2528 
2529         da = (mlib_u32 *) dp;
2530 
2531         s0 = sa[0];
2532         sa++;
2533 
2534 #ifdef __SUNPRO_C
2535 #pragma pipeloop(0)
2536 #endif /* __SUNPRO_C */
2537         for (i = 0; i < size - 1; i++, da++, sa++) {
2538           t0 = tab0[s0];
2539           t1 = tab1[s0];
2540           t2 = tab2[s0];
2541           t3 = tab3[s0];
2542 #ifdef _LITTLE_ENDIAN
2543           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2544 #else
2545           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2546 #endif /* _LITTLE_ENDIAN */
2547           s0 = sa[0];
2548           da[0] = res;
2549         }
2550 
2551         t0 = tab0[s0];
2552         t1 = tab1[s0];
2553         t2 = tab2[s0];
2554         t3 = tab3[s0];
2555 #ifdef _LITTLE_ENDIAN
2556         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2557 #else
2558         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2559 #endif /* _LITTLE_ENDIAN */
2560         da[0] = res;
2561 
2562       }
2563       else {
2564 
2565         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2566         shift = 8 * off;
2567         shift1 = 32 - shift;
2568 
2569         s0 = *sa++;
2570 
2571         for (i = 0; i < off; i++) {
2572           dp[i] = table_base[i][s0];
2573         }
2574 
2575         dp += i;
2576         da = (mlib_u32 *) dp;
2577 
2578         t0 = tab0[s0];
2579         t1 = tab1[s0];
2580         t2 = tab2[s0];
2581         t3 = tab3[s0];
2582 
2583 #ifdef _LITTLE_ENDIAN
2584         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2585 #else
2586         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2587 #endif /* _LITTLE_ENDIAN */
2588 
2589         s0 = sa[0];
2590         sa++;
2591 
2592 #ifdef __SUNPRO_C
2593 #pragma pipeloop(0)
2594 #endif /* __SUNPRO_C */
2595         for (i = 0; i < size - 2; i++, da++, sa++) {
2596           t0 = tab0[s0];
2597           t1 = tab1[s0];
2598           t2 = tab2[s0];
2599           t3 = tab3[s0];
2600 #ifdef _LITTLE_ENDIAN
2601           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2602           res = (res1 >> shift) + (res2 << shift1);
2603 #else
2604           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2605           res = (res1 << shift) + (res2 >> shift1);
2606 #endif /* _LITTLE_ENDIAN */
2607           res1 = res2;
2608           s0 = sa[0];
2609           da[0] = res;
2610         }
2611 
2612         t0 = tab0[s0];
2613         t1 = tab1[s0];
2614         t2 = tab2[s0];
2615         t3 = tab3[s0];
2616 #ifdef _LITTLE_ENDIAN
2617         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2618         res = (res1 >> shift) + (res2 << shift1);
2619 #else
2620         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2621         res = (res1 << shift) + (res2 >> shift1);
2622 #endif /* _LITTLE_ENDIAN */
2623         da[0] = res;
2624 #ifdef _LITTLE_ENDIAN
2625         res1 = (da[1] >> shift1);
2626         da[1] = (res2 >> shift) + (res1 << shift1);
2627 #else
2628         res1 = (da[1] << shift1);
2629         da[1] = (res2 << shift) + (res1 >> shift1);
2630 #endif /* _LITTLE_ENDIAN */
2631       }
2632     }
2633   }
2634 }
2635 
2636 #ifdef _MSC_VER
2637 #pragma optimize("", on)
2638 #endif /* _MSC_VER */
2639 
2640 /***************************************************************/
2641 void mlib_c_ImageLookUpSI_U16_U8(const mlib_u16 *src,
2642                                  mlib_s32       slb,
2643                                  mlib_u8        *dst,
2644                                  mlib_s32       dlb,
2645                                  mlib_s32       xsize,
2646                                  mlib_s32       ysize,
2647                                  mlib_s32       csize,
2648                                  const mlib_u8  **table)
2649 {
2650   const mlib_u8 *table_base[4];
2651   mlib_s32 c;
2652 
2653   for (c = 0; c < csize; c++) {
2654     table_base[c] = &table[c][0];
2655   }
2656 
2657   if ((xsize < 8) || (csize == 2)) {
2658     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base);
2659   }
2660   else if (csize == 3) {
2661     mlib_s32 i, j;
2662 
2663     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2664       mlib_u32 *da;
2665       mlib_u8 *dp = dst;
2666       mlib_u16 *sa = (void *)src;
2667       const mlib_u8 *tab0 = table_base[0];
2668       const mlib_u8 *tab1 = table_base[1];
2669       const mlib_u8 *tab2 = table_base[2];
2670       mlib_s32 s0, s1;
2671       mlib_u32 t0, t1, t2, t3, t4, t5;
2672       mlib_u32 res1, res2;
2673       mlib_s32 size = xsize, off;
2674 
2675       off = (mlib_s32) ((mlib_addr) dp & 3);
2676 
2677 #ifdef __SUNPRO_C
2678 #pragma pipeloop(0)
2679 #endif /* __SUNPRO_C */
2680       for (i = 0; i < off; i++) {
2681         s0 = *sa++;
2682         dp[0] = tab0[s0];
2683         dp[1] = tab1[s0];
2684         dp[2] = tab2[s0];
2685         dp += 3;
2686       }
2687 
2688       size -= off;
2689       da = (mlib_u32 *) dp;
2690       s0 = sa[0];
2691       s1 = sa[1];
2692       sa += 2;
2693 
2694 #ifdef __SUNPRO_C
2695 #pragma pipeloop(0)
2696 #endif /* __SUNPRO_C */
2697       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2698         t0 = tab0[s0];
2699         t1 = tab1[s0];
2700         t2 = tab2[s0];
2701         t3 = tab0[s1];
2702         t4 = tab1[s1];
2703         t5 = tab2[s1];
2704 #ifdef _LITTLE_ENDIAN
2705         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2706         res2 = (t5 << 8) + t4;
2707 #else
2708         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2709         res2 = (t4 << 24) + (t5 << 16);
2710 #endif /* _LITTLE_ENDIAN */
2711         s0 = sa[0];
2712         s1 = sa[1];
2713         t0 = tab0[s0];
2714         t1 = tab1[s0];
2715         t2 = tab2[s0];
2716         t3 = tab0[s1];
2717         t4 = tab1[s1];
2718         t5 = tab2[s1];
2719 #ifdef _LITTLE_ENDIAN
2720         res2 += ((t1 << 24) + (t0 << 16));
2721         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2722 #else
2723         res2 += ((t0 << 8) + t1);
2724         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2725 #endif /* _LITTLE_ENDIAN */
2726         s0 = sa[2];
2727         s1 = sa[3];
2728         da[1] = res2;
2729         da[2] = res1;
2730       }
2731 
2732       t0 = tab0[s0];
2733       t1 = tab1[s0];
2734       t2 = tab2[s0];
2735       t3 = tab0[s1];
2736       t4 = tab1[s1];
2737       t5 = tab2[s1];
2738 #ifdef _LITTLE_ENDIAN
2739       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2740       res2 = (t5 << 8) + t4;
2741 #else
2742       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2743       res2 = (t4 << 24) + (t5 << 16);
2744 #endif /* _LITTLE_ENDIAN */
2745       s0 = sa[0];
2746       s1 = sa[1];
2747       t0 = tab0[s0];
2748       t1 = tab1[s0];
2749       t2 = tab2[s0];
2750       t3 = tab0[s1];
2751       t4 = tab1[s1];
2752       t5 = tab2[s1];
2753 #ifdef _LITTLE_ENDIAN
2754       res2 += ((t1 << 24) + (t0 << 16));
2755       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2756 #else
2757       res2 += ((t0 << 8) + t1);
2758       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2759 #endif /* _LITTLE_ENDIAN */
2760       da[1] = res2;
2761       da[2] = res1;
2762       da += 3;
2763       sa += 2;
2764       dp = (mlib_u8 *) da;
2765       i += 4;
2766 
2767 #ifdef __SUNPRO_C
2768 #pragma pipeloop(0)
2769 #endif /* __SUNPRO_C */
2770       for (; i < size; i++) {
2771         s0 = *sa++;
2772         dp[0] = tab0[s0];
2773         dp[1] = tab1[s0];
2774         dp[2] = tab2[s0];
2775         dp += 3;
2776       }
2777     }
2778 
2779   }
2780   else if (csize == 4) {
2781     mlib_s32 i, j;
2782 
2783     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2784       mlib_u32 *da;
2785       mlib_u8 *dp = dst;
2786       mlib_u16 *sa = (void *)src;
2787       const mlib_u8 *tab0 = table_base[0];
2788       const mlib_u8 *tab1 = table_base[1];
2789       const mlib_u8 *tab2 = table_base[2];
2790       const mlib_u8 *tab3 = table_base[3];
2791       mlib_s32 s0;
2792       mlib_u32 t0, t1, t2, t3;
2793       mlib_s32 size = xsize, off;
2794       mlib_u32 shift, shift1, res1, res2, res;
2795 
2796       if (((mlib_addr) dp & 3) == 0) {
2797 
2798         da = (mlib_u32 *) dp;
2799 
2800         s0 = sa[0];
2801         sa++;
2802 
2803 #ifdef __SUNPRO_C
2804 #pragma pipeloop(0)
2805 #endif /* __SUNPRO_C */
2806         for (i = 0; i < size - 1; i++, da++, sa++) {
2807           t0 = tab0[s0];
2808           t1 = tab1[s0];
2809           t2 = tab2[s0];
2810           t3 = tab3[s0];
2811 #ifdef _LITTLE_ENDIAN
2812           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2813 #else
2814           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2815 #endif /* _LITTLE_ENDIAN */
2816           s0 = sa[0];
2817           da[0] = res;
2818         }
2819 
2820         t0 = tab0[s0];
2821         t1 = tab1[s0];
2822         t2 = tab2[s0];
2823         t3 = tab3[s0];
2824 #ifdef _LITTLE_ENDIAN
2825         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2826 #else
2827         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2828 #endif /* _LITTLE_ENDIAN */
2829         da[0] = res;
2830 
2831       }
2832       else {
2833 
2834         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2835         shift = 8 * off;
2836         shift1 = 32 - shift;
2837 
2838         s0 = *sa++;
2839 
2840         for (i = 0; i < off; i++) {
2841           dp[i] = table_base[i][s0];
2842         }
2843 
2844         dp += i;
2845         da = (mlib_u32 *) dp;
2846 
2847         t0 = tab0[s0];
2848         t1 = tab1[s0];
2849         t2 = tab2[s0];
2850         t3 = tab3[s0];
2851 
2852 #ifdef _LITTLE_ENDIAN
2853         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2854 #else
2855         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2856 #endif /* _LITTLE_ENDIAN */
2857 
2858         s0 = sa[0];
2859         sa++;
2860 
2861 #ifdef __SUNPRO_C
2862 #pragma pipeloop(0)
2863 #endif /* __SUNPRO_C */
2864         for (i = 0; i < size - 2; i++, da++, sa++) {
2865           t0 = tab0[s0];
2866           t1 = tab1[s0];
2867           t2 = tab2[s0];
2868           t3 = tab3[s0];
2869 #ifdef _LITTLE_ENDIAN
2870           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2871           res = (res1 >> shift) + (res2 << shift1);
2872 #else
2873           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2874           res = (res1 << shift) + (res2 >> shift1);
2875 #endif /* _LITTLE_ENDIAN */
2876           res1 = res2;
2877           s0 = sa[0];
2878           da[0] = res;
2879         }
2880 
2881         t0 = tab0[s0];
2882         t1 = tab1[s0];
2883         t2 = tab2[s0];
2884         t3 = tab3[s0];
2885 #ifdef _LITTLE_ENDIAN
2886         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2887         res = (res1 >> shift) + (res2 << shift1);
2888 #else
2889         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2890         res = (res1 << shift) + (res2 >> shift1);
2891 #endif /* _LITTLE_ENDIAN */
2892         da[0] = res;
2893 #ifdef _LITTLE_ENDIAN
2894         res1 = (da[1] >> shift1);
2895         da[1] = (res2 >> shift) + (res1 << shift1);
2896 #else
2897         res1 = (da[1] << shift1);
2898         da[1] = (res2 << shift) + (res1 >> shift1);
2899 #endif /* _LITTLE_ENDIAN */
2900       }
2901     }
2902   }
2903 }
2904 
2905 /***************************************************************/
2906 void mlib_c_ImageLookUpSI_S32_U8(const mlib_s32 *src,
2907                                  mlib_s32       slb,
2908                                  mlib_u8        *dst,
2909                                  mlib_s32       dlb,
2910                                  mlib_s32       xsize,
2911                                  mlib_s32       ysize,
2912                                  mlib_s32       csize,
2913                                  const mlib_u8  **table)
2914 {
2915   const mlib_u8 *table_base[4];
2916   mlib_s32 c;
2917 
2918   for (c = 0; c < csize; c++) {
2919     table_base[c] = &table[c][TABLE_SHIFT_S32];
2920   }
2921 
2922 #ifdef __GNUC__
2923 #pragma GCC diagnostic push
2924 #pragma GCC diagnostic ignored "-Warray-bounds"
2925 #endif
2926   MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s32, table_base);
2927 #ifdef __GNUC__
2928 #pragma GCC diagnostic pop
2929 #endif
2930 }
2931 
2932 /***************************************************************/
2933 void mlib_c_ImageLookUpSI_U8_S16(const mlib_u8  *src,
2934                                  mlib_s32       slb,
2935                                  mlib_s16       *dst,
2936                                  mlib_s32       dlb,
2937                                  mlib_s32       xsize,
2938                                  mlib_s32       ysize,
2939                                  mlib_s32       csize,
2940                                  const mlib_s16 **table)
2941 {
2942 
2943   if ((xsize < 4) || ((xsize * ysize) < 250)) {
2944     MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u8, table);
2945 
2946   }
2947   else if (csize == 2) {
2948     mlib_u32 tab[256];
2949     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2950     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2951     mlib_s32 i, j;
2952     mlib_u32 s0, s1, s2;
2953 
2954     s0 = tab0[0];
2955     s1 = tab1[0];
2956     for (i = 1; i < 256; i++) {
2957 #ifdef _LITTLE_ENDIAN
2958       s2 = (s1 << 16) + s0;
2959 #else
2960       s2 = (s0 << 16) + s1;
2961 #endif /* _LITTLE_ENDIAN */
2962       s0 = tab0[i];
2963       s1 = tab1[i];
2964       tab[i - 1] = s2;
2965     }
2966 
2967 #ifdef _LITTLE_ENDIAN
2968     s2 = (s1 << 16) + s0;
2969 #else
2970     s2 = (s0 << 16) + s1;
2971 #endif /* _LITTLE_ENDIAN */
2972     tab[255] = s2;
2973 
2974     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2975       mlib_u32 *da;
2976       mlib_u16 *dp = (mlib_u16 *) dst;
2977       mlib_u8 *sa = (void *)src;
2978       mlib_u32 s0, t0, s1, t1, t2;
2979       mlib_u32 res1, res2;
2980       mlib_s32 size = xsize;
2981 
2982       if (((mlib_addr) dp & 3) == 0) {
2983 
2984         da = (mlib_u32 *) dp;
2985         s0 = sa[0];
2986         s1 = sa[1];
2987         sa += 2;
2988 
2989 #ifdef __SUNPRO_C
2990 #pragma pipeloop(0)
2991 #endif /* __SUNPRO_C */
2992         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2993           t0 = tab[s0];
2994           t1 = tab[s1];
2995           s0 = sa[0];
2996           s1 = sa[1];
2997           da[0] = t0;
2998           da[1] = t1;
2999         }
3000 
3001         t0 = tab[s0];
3002         t1 = tab[s1];
3003         da[0] = t0;
3004         da[1] = t1;
3005 
3006         if (size & 1)
3007           da[2] = tab[sa[0]];
3008 
3009       }
3010       else {
3011 
3012         t0 = tab[*sa++];
3013 #ifdef _LITTLE_ENDIAN
3014         *dp++ = (mlib_u16) (t0);
3015 #else
3016         *dp++ = (mlib_u16) (t0 >> 16);
3017 #endif /* _LITTLE_ENDIAN */
3018         da = (mlib_u32 *) dp;
3019         s0 = sa[0];
3020         s1 = sa[1];
3021         sa += 2;
3022 
3023 #ifdef __SUNPRO_C
3024 #pragma pipeloop(0)
3025 #endif /* __SUNPRO_C */
3026         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
3027           t1 = tab[s0];
3028           t2 = tab[s1];
3029 #ifdef _LITTLE_ENDIAN
3030           res1 = (t0 >> 16) + (t1 << 16);
3031           res2 = (t1 >> 16) + (t2 << 16);
3032 #else
3033           res1 = (t0 << 16) + (t1 >> 16);
3034           res2 = (t1 << 16) + (t2 >> 16);
3035 #endif /* _LITTLE_ENDIAN */
3036           t0 = t2;
3037           s0 = sa[0];
3038           s1 = sa[1];
3039           da[0] = res1;
3040           da[1] = res2;
3041         }
3042 
3043         t1 = tab[s0];
3044         t2 = tab[s1];
3045 #ifdef _LITTLE_ENDIAN
3046         res1 = (t0 >> 16) + (t1 << 16);
3047         res2 = (t1 >> 16) + (t2 << 16);
3048 #else
3049         res1 = (t0 << 16) + (t1 >> 16);
3050         res2 = (t1 << 16) + (t2 >> 16);
3051 #endif /* _LITTLE_ENDIAN */
3052         da[0] = res1;
3053         da[1] = res2;
3054         da += 2;
3055         dp = (mlib_u16 *) da;
3056 #ifdef _LITTLE_ENDIAN
3057         dp[0] = (mlib_u16) (t2 >> 16);
3058 #else
3059         dp[0] = (mlib_u16) t2;
3060 #endif /* _LITTLE_ENDIAN */
3061 
3062         if ((size & 1) == 0) {
3063           t0 = tab[sa[0]];
3064 #ifdef _LITTLE_ENDIAN
3065           dp[2] = (mlib_u16) (t0 >> 16);
3066           dp[1] = (mlib_u16) t0;
3067 #else
3068           dp[1] = (mlib_u16) (t0 >> 16);
3069           dp[2] = (mlib_u16) t0;
3070 #endif /* _LITTLE_ENDIAN */
3071         }
3072       }
3073     }
3074 
3075   }
3076   else if (csize == 3) {
3077     mlib_u32 tab[512];
3078     mlib_u16 *tab0 = (mlib_u16 *) table[0];
3079     mlib_u16 *tab1 = (mlib_u16 *) table[1];
3080     mlib_u16 *tab2 = (mlib_u16 *) table[2];
3081     mlib_s32 i, j;
3082     mlib_u32 s0, s1, s2, s3, s4;
3083 
3084     s0 = tab0[0];
3085     s1 = tab1[0];
3086     s2 = tab2[0];
3087     for (i = 1; i < 256; i++) {
3088 #ifdef _LITTLE_ENDIAN
3089       s3 = (s0 << 16);
3090       s4 = (s2 << 16) + s1;
3091 #else
3092       s3 = s0;
3093       s4 = (s1 << 16) + s2;
3094 #endif /* _LITTLE_ENDIAN */
3095       s0 = tab0[i];
3096       s1 = tab1[i];
3097       s2 = tab2[i];
3098       tab[2 * i - 2] = s3;
3099       tab[2 * i - 1] = s4;
3100     }
3101 
3102 #ifdef _LITTLE_ENDIAN
3103     s4 = (s2 << 16) + s1;
3104     tab[510] = s0 << 16;
3105 #else
3106     s4 = (s1 << 16) + s2;
3107     tab[510] = s0;
3108 #endif /* _LITTLE_ENDIAN */
3109     tab[511] = s4;
3110 
3111     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3112       mlib_u32 *da;
3113       mlib_u16 *dp = (mlib_u16 *) dst, *ptr;
3114       mlib_u8 *sa = (void *)src;
3115       mlib_u32 s0, s1, t0, t1, t2, t3;
3116       mlib_u32 res1, res2;
3117       mlib_s32 size = xsize, off;
3118 
3119       off = (mlib_s32) ((mlib_addr) dp & 3);
3120 
3121       if (off != 0) {
3122         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
3123         dp[0] = ptr[1];
3124         dp[1] = ptr[2];
3125         dp[2] = ptr[3];
3126         dp += 3;
3127         sa++;
3128         size--;
3129       }
3130 
3131       da = (mlib_u32 *) dp;
3132       s0 = sa[0] << 3;
3133       s1 = sa[1] << 3;
3134       sa += 2;
3135 
3136 #ifdef __SUNPRO_C
3137 #pragma pipeloop(0)
3138 #endif /* __SUNPRO_C */
3139       for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) {
3140         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3141         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3142         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3143         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3144 #ifdef _LITTLE_ENDIAN
3145         res1 = (t0 >> 16) + (t1 << 16);
3146         res2 = (t1 >> 16) + t2;
3147 #else
3148         res1 = (t0 << 16) + (t1 >> 16);
3149         res2 = (t1 << 16) + t2;
3150 #endif /* _LITTLE_ENDIAN */
3151         s0 = sa[0] << 3;
3152         s1 = sa[1] << 3;
3153         da[0] = res1;
3154         da[1] = res2;
3155         da[2] = t3;
3156       }
3157 
3158       t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3159       t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3160       t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3161       t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3162 #ifdef _LITTLE_ENDIAN
3163       res1 = (t0 >> 16) + (t1 << 16);
3164       res2 = (t1 >> 16) + t2;
3165 #else
3166       res1 = (t0 << 16) + (t1 >> 16);
3167       res2 = (t1 << 16) + t2;
3168 #endif /* _LITTLE_ENDIAN */
3169       da[0] = res1;
3170       da[1] = res2;
3171       da[2] = t3;
3172       da += 3;
3173       dp = (mlib_u16 *) da;
3174       i += 2;
3175 
3176       if (i < size) {
3177         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
3178         dp[0] = ptr[1];
3179         dp[1] = ptr[2];
3180         dp[2] = ptr[3];
3181       }
3182     }
3183 
3184   }
3185   else if (csize == 4) {
3186     mlib_u32 tab[512];
3187     mlib_u16 *tab0 = (mlib_u16 *) table[0];
3188     mlib_u16 *tab1 = (mlib_u16 *) table[1];
3189     mlib_u16 *tab2 = (mlib_u16 *) table[2];
3190     mlib_u16 *tab3 = (mlib_u16 *) table[3];
3191     mlib_s32 i, j;
3192     mlib_u32 s0, s1, s2, s3, s4, s5;
3193 
3194     s0 = tab0[0];
3195     s1 = tab1[0];
3196     s2 = tab2[0];
3197     s3 = tab3[0];
3198     for (i = 1; i < 256; i++) {
3199 #ifdef _LITTLE_ENDIAN
3200       s4 = (s1 << 16) + s0;
3201       s5 = (s3 << 16) + s2;
3202 #else
3203       s4 = (s0 << 16) + s1;
3204       s5 = (s2 << 16) + s3;
3205 #endif /* _LITTLE_ENDIAN */
3206       s0 = tab0[i];
3207       s1 = tab1[i];
3208       s2 = tab2[i];
3209       s3 = tab3[i];
3210       tab[2 * i - 2] = s4;
3211       tab[2 * i - 1] = s5;
3212     }
3213 
3214 #ifdef _LITTLE_ENDIAN
3215     s4 = (s1 << 16) + s0;
3216     s5 = (s3 << 16) + s2;
3217 #else
3218     s4 = (s0 << 16) + s1;
3219     s5 = (s2 << 16) + s3;
3220 #endif /* _LITTLE_ENDIAN */
3221     tab[510] = s4;
3222     tab[511] = s5;
3223 
3224     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3225       mlib_u32 *da;
3226       mlib_u16 *dp = (mlib_u16 *) dst;
3227       mlib_u8 *sa = (void *)src;
3228       mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5;
3229       mlib_s32 size = xsize;
3230       mlib_u32 res1, res2, res3, res4;
3231 
3232       if (((mlib_addr) dp & 3) == 0) {
3233 
3234         da = (mlib_u32 *) dp;
3235 
3236         s0 = sa[0] << 3;
3237         s1 = sa[1] << 3;
3238         sa += 2;
3239 
3240 #ifdef __SUNPRO_C
3241 #pragma pipeloop(0)
3242 #endif /* __SUNPRO_C */
3243         for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) {
3244           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3245           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3246           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3247           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3248           s0 = sa[0] << 3;
3249           s1 = sa[1] << 3;
3250           da[0] = t0;
3251           da[1] = t1;
3252           da[2] = t2;
3253           da[3] = t3;
3254         }
3255 
3256         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3257         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3258         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3259         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3260         da[0] = t0;
3261         da[1] = t1;
3262         da[2] = t2;
3263         da[3] = t3;
3264 
3265         if (size & 1) {
3266           da[4] = tab[2 * sa[0]];
3267           da[5] = tab[2 * sa[0] + 1];
3268         }
3269 
3270       }
3271       else {
3272 
3273         t4 = tab[2 * sa[0]];
3274         t5 = tab[2 * sa[0] + 1];
3275 #ifdef _LITTLE_ENDIAN
3276         *dp++ = (mlib_u16) (t4);
3277 #else
3278         *dp++ = (mlib_u16) (t4 >> 16);
3279 #endif /* _LITTLE_ENDIAN */
3280         sa++;
3281         da = (mlib_u32 *) dp;
3282 #ifdef _LITTLE_ENDIAN
3283         *da++ = (t4 >> 16) + (t5 << 16);
3284 #else
3285         *da++ = (t4 << 16) + (t5 >> 16);
3286 #endif /* _LITTLE_ENDIAN */
3287         s0 = sa[0] << 3;
3288         s1 = sa[1] << 3;
3289         sa += 2;
3290 
3291 #ifdef __SUNPRO_C
3292 #pragma pipeloop(0)
3293 #endif /* __SUNPRO_C */
3294         for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) {
3295           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3296           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3297           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3298           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3299 #ifdef _LITTLE_ENDIAN
3300           res1 = (t5 >> 16) + (t0 << 16);
3301           res2 = (t0 >> 16) + (t1 << 16);
3302           res3 = (t1 >> 16) + (t2 << 16);
3303           res4 = (t2 >> 16) + (t3 << 16);
3304 #else
3305           res1 = (t5 << 16) + (t0 >> 16);
3306           res2 = (t0 << 16) + (t1 >> 16);
3307           res3 = (t1 << 16) + (t2 >> 16);
3308           res4 = (t2 << 16) + (t3 >> 16);
3309 #endif /* _LITTLE_ENDIAN */
3310           s0 = sa[0] << 3;
3311           s1 = sa[1] << 3;
3312           da[0] = res1;
3313           da[1] = res2;
3314           da[2] = res3;
3315           da[3] = res4;
3316           t5 = t3;
3317         }
3318 
3319         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3320         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3321         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3322         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3323 #ifdef _LITTLE_ENDIAN
3324         res1 = (t5 >> 16) + (t0 << 16);
3325         res2 = (t0 >> 16) + (t1 << 16);
3326         res3 = (t1 >> 16) + (t2 << 16);
3327         res4 = (t2 >> 16) + (t3 << 16);
3328 #else
3329         res1 = (t5 << 16) + (t0 >> 16);
3330         res2 = (t0 << 16) + (t1 >> 16);
3331         res3 = (t1 << 16) + (t2 >> 16);
3332         res4 = (t2 << 16) + (t3 >> 16);
3333 #endif /* _LITTLE_ENDIAN */
3334         da[0] = res1;
3335         da[1] = res2;
3336         da[2] = res3;
3337         da[3] = res4;
3338         da += 4;
3339         dp = (mlib_u16 *) da;
3340 #ifdef _LITTLE_ENDIAN
3341         dp[0] = (mlib_u16) (t3 >> 16);
3342 #else
3343         dp[0] = (mlib_u16) t3;
3344 #endif /* _LITTLE_ENDIAN */
3345 
3346         if ((size & 1) == 0) {
3347           t0 = tab[2 * sa[0]];
3348 #ifdef _LITTLE_ENDIAN
3349           dp[2] = (mlib_u16) (t0 >> 16);
3350           dp[1] = (mlib_u16) t0;
3351 #else
3352           dp[1] = (mlib_u16) (t0 >> 16);
3353           dp[2] = (mlib_u16) t0;
3354 #endif /* _LITTLE_ENDIAN */
3355           t0 = tab[2 * sa[0] + 1];
3356 #ifdef _LITTLE_ENDIAN
3357           dp[4] = (mlib_u16) (t0 >> 16);
3358           dp[3] = (mlib_u16) t0;
3359 #else
3360           dp[3] = (mlib_u16) (t0 >> 16);
3361           dp[4] = (mlib_u16) t0;
3362 #endif /* _LITTLE_ENDIAN */
3363         }
3364       }
3365     }
3366   }
3367 }
3368 
3369 /***************************************************************/
3370 void mlib_c_ImageLookUpSI_S16_S16(const mlib_s16 *src,
3371                                   mlib_s32       slb,
3372                                   mlib_s16       *dst,
3373                                   mlib_s32       dlb,
3374                                   mlib_s32       xsize,
3375                                   mlib_s32       ysize,
3376                                   mlib_s32       csize,
3377                                   const mlib_s16 **table)
3378 {
3379   const mlib_s16 *table_base[4];
3380   mlib_s32 c;
3381 
3382   for (c = 0; c < csize; c++) {
3383     table_base[c] = &table[c][32768];
3384   }
3385 
3386 #ifdef __GNUC__
3387 #pragma GCC diagnostic push
3388 #pragma GCC diagnostic ignored "-Warray-bounds"
3389 #endif
3390   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s16, table_base);
3391 #ifdef __GNUC__
3392 #pragma GCC diagnostic pop
3393 #endif
3394 }
3395 
3396 /***************************************************************/
3397 void mlib_c_ImageLookUpSI_U16_S16(const mlib_u16 *src,
3398                                   mlib_s32       slb,
3399                                   mlib_s16       *dst,
3400                                   mlib_s32       dlb,
3401                                   mlib_s32       xsize,
3402                                   mlib_s32       ysize,
3403                                   mlib_s32       csize,
3404                                   const mlib_s16 **table)
3405 {
3406   const mlib_s16 *table_base[4];
3407   mlib_s32 c;
3408 
3409   for (c = 0; c < csize; c++) {
3410     table_base[c] = &table[c][0];
3411   }
3412 
3413   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u16, table_base);
3414 }
3415 
3416 /***************************************************************/
3417 void mlib_c_ImageLookUpSI_S32_S16(const mlib_s32 *src,
3418                                   mlib_s32       slb,
3419                                   mlib_s16       *dst,
3420                                   mlib_s32       dlb,
3421                                   mlib_s32       xsize,
3422                                   mlib_s32       ysize,
3423                                   mlib_s32       csize,
3424                                   const mlib_s16 **table)
3425 {
3426   const mlib_s16 *table_base[4];
3427   mlib_s32 c;
3428 
3429   for (c = 0; c < csize; c++) {
3430     table_base[c] = &table[c][TABLE_SHIFT_S32];
3431   }
3432 
3433 #ifdef __GNUC__
3434 #pragma GCC diagnostic push
3435 #pragma GCC diagnostic ignored "-Warray-bounds"
3436 #endif
3437   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s32, table_base);
3438 #ifdef __GNUC__
3439 #pragma GCC diagnostic pop
3440 #endif
3441 }
3442 
3443 /***************************************************************/
3444 void mlib_c_ImageLookUpSI_S16_U16(const mlib_s16 *src,
3445                                   mlib_s32       slb,
3446                                   mlib_u16       *dst,
3447                                   mlib_s32       dlb,
3448                                   mlib_s32       xsize,
3449                                   mlib_s32       ysize,
3450                                   mlib_s32       csize,
3451                                   const mlib_u16 **table)
3452 {
3453   const mlib_u16 *table_base[4];
3454   mlib_s32 c;
3455 
3456   for (c = 0; c < csize; c++) {
3457     table_base[c] = &table[c][32768];
3458   }
3459 
3460   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s16, table_base);
3461 }
3462 
3463 /***************************************************************/
3464 void mlib_c_ImageLookUpSI_U16_U16(const mlib_u16 *src,
3465                                   mlib_s32       slb,
3466                                   mlib_u16       *dst,
3467                                   mlib_s32       dlb,
3468                                   mlib_s32       xsize,
3469                                   mlib_s32       ysize,
3470                                   mlib_s32       csize,
3471                                   const mlib_u16 **table)
3472 {
3473   const mlib_u16 *table_base[4];
3474   mlib_s32 c;
3475 
3476   for (c = 0; c < csize; c++) {
3477     table_base[c] = &table[c][0];
3478   }
3479 
3480 #ifdef __GNUC__
3481 #pragma GCC diagnostic push
3482 #pragma GCC diagnostic ignored "-Warray-bounds"
3483 #endif
3484   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_u16, table_base);
3485 #ifdef __GNUC__
3486 #pragma GCC diagnostic pop
3487 #endif
3488 }
3489 
3490 /***************************************************************/
3491 void mlib_c_ImageLookUpSI_S32_U16(const mlib_s32 *src,
3492                                   mlib_s32       slb,
3493                                   mlib_u16       *dst,
3494                                   mlib_s32       dlb,
3495                                   mlib_s32       xsize,
3496                                   mlib_s32       ysize,
3497                                   mlib_s32       csize,
3498                                   const mlib_u16 **table)
3499 {
3500   const mlib_u16 *table_base[4];
3501   mlib_s32 c;
3502 
3503   for (c = 0; c < csize; c++) {
3504     table_base[c] = &table[c][TABLE_SHIFT_S32];
3505   }
3506 
3507 #ifdef __GNUC__
3508 #pragma GCC diagnostic push
3509 #pragma GCC diagnostic ignored "-Warray-bounds"
3510 #endif
3511   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s32, table_base);
3512 #ifdef __GNUC__
3513 #pragma GCC diagnostic pop
3514 #endif
3515 }
3516 
3517 /***************************************************************/
3518 void mlib_c_ImageLookUpSI_U8_S32(const mlib_u8  *src,
3519                                  mlib_s32       slb,
3520                                  mlib_s32       *dst,
3521                                  mlib_s32       dlb,
3522                                  mlib_s32       xsize,
3523                                  mlib_s32       ysize,
3524                                  mlib_s32       csize,
3525                                  const mlib_s32 **table)
3526 {
3527 
3528   if (xsize < 7) {
3529     MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u8, table);
3530   }
3531   else if (csize == 2) {
3532     mlib_s32 i, j;
3533 
3534     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3535       mlib_u32 *sa;
3536       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3537       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3538       mlib_u32 s0, t0, t1, t2, t3;
3539       mlib_s32 off;
3540       mlib_s32 size = xsize;
3541       mlib_u32 *dp = (mlib_u32 *) dst;
3542       mlib_u8 *sp = (void *)src;
3543 
3544       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3545 
3546       for (i = 0; i < off; i++, sp++) {
3547         *dp++ = tab0[sp[0]];
3548         *dp++ = tab1[sp[0]];
3549         size--;
3550       }
3551 
3552       sa = (mlib_u32 *) sp;
3553 
3554       s0 = sa[0];
3555       sa++;
3556 
3557 #ifdef __SUNPRO_C
3558 #pragma pipeloop(0)
3559 #endif /* __SUNPRO_C */
3560       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
3561 #ifdef _LITTLE_ENDIAN
3562         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3563         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3564         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3565         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3566 #else
3567         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3568         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3569         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3570         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3571 #endif /* _LITTLE_ENDIAN */
3572         dp[0] = t0;
3573         dp[1] = t1;
3574         dp[2] = t2;
3575         dp[3] = t3;
3576 #ifdef _LITTLE_ENDIAN
3577         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3578         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3579         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3580         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3581 #else
3582         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3583         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3584         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3585         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3586 #endif /* _LITTLE_ENDIAN */
3587         s0 = sa[0];
3588         dp[4] = t0;
3589         dp[5] = t1;
3590         dp[6] = t2;
3591         dp[7] = t3;
3592       }
3593 
3594 #ifdef _LITTLE_ENDIAN
3595       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3596       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3597       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3598       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3599 #else
3600       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3601       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3602       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3603       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3604 #endif /* _LITTLE_ENDIAN */
3605       dp[0] = t0;
3606       dp[1] = t1;
3607       dp[2] = t2;
3608       dp[3] = t3;
3609 #ifdef _LITTLE_ENDIAN
3610       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3611       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3612       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3613       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3614 #else
3615       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3616       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3617       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3618       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3619 #endif /* _LITTLE_ENDIAN */
3620       dp[4] = t0;
3621       dp[5] = t1;
3622       dp[6] = t2;
3623       dp[7] = t3;
3624       dp += 8;
3625       sp = (mlib_u8 *) sa;
3626       i += 4;
3627 
3628       for (; i < size; i++, sp++) {
3629         *dp++ = tab0[sp[0]];
3630         *dp++ = tab1[sp[0]];
3631       }
3632     }
3633 
3634   }
3635   else if (csize == 3) {
3636     mlib_s32 i, j;
3637 
3638     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3639       mlib_u32 *sa;
3640       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3641       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3642       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3643       mlib_u32 s0, t0, t1, t2, t3, t4, t5;
3644       mlib_s32 off;
3645       mlib_s32 size = xsize;
3646       mlib_u32 *dp = (mlib_u32 *) dst;
3647       mlib_u8 *sp = (void *)src;
3648 
3649       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3650 
3651       for (i = 0; i < off; i++, sp++) {
3652         *dp++ = tab0[sp[0]];
3653         *dp++ = tab1[sp[0]];
3654         *dp++ = tab2[sp[0]];
3655         size--;
3656       }
3657 
3658       sa = (mlib_u32 *) sp;
3659 
3660       s0 = sa[0];
3661       sa++;
3662 
3663 #ifdef __SUNPRO_C
3664 #pragma pipeloop(0)
3665 #endif /* __SUNPRO_C */
3666       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
3667 #ifdef _LITTLE_ENDIAN
3668         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3669         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3670         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3671         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3672         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3673         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3674 #else
3675         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3676         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3677         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3678         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3679         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3680         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3681 #endif /* _LITTLE_ENDIAN */
3682         dp[0] = t0;
3683         dp[1] = t1;
3684         dp[2] = t2;
3685         dp[3] = t3;
3686         dp[4] = t4;
3687         dp[5] = t5;
3688 #ifdef _LITTLE_ENDIAN
3689         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3690         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3691         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3692         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3693         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3694         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3695 #else
3696         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3697         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3698         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3699         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3700         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3701         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3702 #endif /* _LITTLE_ENDIAN */
3703         s0 = sa[0];
3704         dp[6] = t0;
3705         dp[7] = t1;
3706         dp[8] = t2;
3707         dp[9] = t3;
3708         dp[10] = t4;
3709         dp[11] = t5;
3710       }
3711 
3712 #ifdef _LITTLE_ENDIAN
3713       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3714       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3715       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3716       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3717       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3718       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3719 #else
3720       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3721       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3722       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3723       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3724       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3725       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3726 #endif /* _LITTLE_ENDIAN */
3727       dp[0] = t0;
3728       dp[1] = t1;
3729       dp[2] = t2;
3730       dp[3] = t3;
3731       dp[4] = t4;
3732       dp[5] = t5;
3733 #ifdef _LITTLE_ENDIAN
3734       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3735       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3736       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3737       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3738       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3739       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3740 #else
3741       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3742       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3743       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3744       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3745       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3746       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3747 #endif /* _LITTLE_ENDIAN */
3748       dp[6] = t0;
3749       dp[7] = t1;
3750       dp[8] = t2;
3751       dp[9] = t3;
3752       dp[10] = t4;
3753       dp[11] = t5;
3754       dp += 12;
3755       sp = (mlib_u8 *) sa;
3756       i += 4;
3757 
3758       for (; i < size; i++, sp++) {
3759         *dp++ = tab0[sp[0]];
3760         *dp++ = tab1[sp[0]];
3761         *dp++ = tab2[sp[0]];
3762       }
3763     }
3764 
3765   }
3766   else if (csize == 4) {
3767     mlib_s32 i, j;
3768 
3769     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3770       mlib_u32 *sa;
3771       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3772       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3773       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3774       mlib_u32 *tab3 = (mlib_u32 *) table[3];
3775       mlib_u32 s0, t0, t1, t2, t3;
3776       mlib_s32 off;
3777       mlib_s32 size = xsize;
3778       mlib_u32 *dp = (mlib_u32 *) dst;
3779       mlib_u8 *sp = (void *)src;
3780 
3781       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3782 
3783       for (i = 0; i < off; i++, sp++) {
3784         *dp++ = tab0[sp[0]];
3785         *dp++ = tab1[sp[0]];
3786         *dp++ = tab2[sp[0]];
3787         *dp++ = tab3[sp[0]];
3788         size--;
3789       }
3790 
3791       sa = (mlib_u32 *) sp;
3792 
3793       s0 = sa[0];
3794       sa++;
3795 
3796 #ifdef __SUNPRO_C
3797 #pragma pipeloop(0)
3798 #endif /* __SUNPRO_C */
3799       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
3800 #ifdef _LITTLE_ENDIAN
3801         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3802         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3803         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3804         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3805 #else
3806         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3807         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3808         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3809         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3810 #endif /* _LITTLE_ENDIAN */
3811         dp[0] = t0;
3812         dp[1] = t1;
3813         dp[2] = t2;
3814         dp[3] = t3;
3815 #ifdef _LITTLE_ENDIAN
3816         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3817         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3818         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3819         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3820 #else
3821         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3822         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3823         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3824         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3825 #endif /* _LITTLE_ENDIAN */
3826         dp[4] = t0;
3827         dp[5] = t1;
3828         dp[6] = t2;
3829         dp[7] = t3;
3830 #ifdef _LITTLE_ENDIAN
3831         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3832         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3833         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3834         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3835 #else
3836         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3837         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3838         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3839         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3840 #endif /* _LITTLE_ENDIAN */
3841         dp[8] = t0;
3842         dp[9] = t1;
3843         dp[10] = t2;
3844         dp[11] = t3;
3845 #ifdef _LITTLE_ENDIAN
3846         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3847         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3848         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3849         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3850 #else
3851         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3852         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3853         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3854         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3855 #endif /* _LITTLE_ENDIAN */
3856         s0 = sa[0];
3857         dp[12] = t0;
3858         dp[13] = t1;
3859         dp[14] = t2;
3860         dp[15] = t3;
3861       }
3862 
3863 #ifdef _LITTLE_ENDIAN
3864       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3865       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3866       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3867       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3868 #else
3869       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3870       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3871       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3872       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3873 #endif /* _LITTLE_ENDIAN */
3874       dp[0] = t0;
3875       dp[1] = t1;
3876       dp[2] = t2;
3877       dp[3] = t3;
3878 #ifdef _LITTLE_ENDIAN
3879       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3880       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3881       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3882       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3883 #else
3884       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3885       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3886       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3887       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3888 #endif /* _LITTLE_ENDIAN */
3889       dp[4] = t0;
3890       dp[5] = t1;
3891       dp[6] = t2;
3892       dp[7] = t3;
3893 #ifdef _LITTLE_ENDIAN
3894       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3895       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3896       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3897       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3898 #else
3899       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3900       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3901       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3902       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3903 #endif /* _LITTLE_ENDIAN */
3904       dp[8] = t0;
3905       dp[9] = t1;
3906       dp[10] = t2;
3907       dp[11] = t3;
3908 #ifdef _LITTLE_ENDIAN
3909       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3910       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3911       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3912       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3913 #else
3914       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3915       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3916       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3917       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3918 #endif /* _LITTLE_ENDIAN */
3919       dp[12] = t0;
3920       dp[13] = t1;
3921       dp[14] = t2;
3922       dp[15] = t3;
3923       dp += 16;
3924       sp = (mlib_u8 *) sa;
3925       i += 4;
3926 
3927       for (; i < size; i++, sp++) {
3928         *dp++ = tab0[sp[0]];
3929         *dp++ = tab1[sp[0]];
3930         *dp++ = tab2[sp[0]];
3931         *dp++ = tab3[sp[0]];
3932       }
3933     }
3934   }
3935 }
3936 
3937 /***************************************************************/
3938 void mlib_c_ImageLookUpSI_S16_S32(const mlib_s16 *src,
3939                                   mlib_s32       slb,
3940                                   mlib_s32       *dst,
3941                                   mlib_s32       dlb,
3942                                   mlib_s32       xsize,
3943                                   mlib_s32       ysize,
3944                                   mlib_s32       csize,
3945                                   const mlib_s32 **table)
3946 {
3947   const mlib_s32 *table_base[4];
3948   mlib_s32 c;
3949 
3950   for (c = 0; c < csize; c++) {
3951     table_base[c] = &table[c][32768];
3952   }
3953 
3954 #ifdef __GNUC__
3955 #pragma GCC diagnostic push
3956 #pragma GCC diagnostic ignored "-Warray-bounds"
3957 #endif
3958   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s16, table_base);
3959 #ifdef __GNUC__
3960 #pragma GCC diagnostic pop
3961 #endif
3962 }
3963 
3964 /***************************************************************/
3965 void mlib_c_ImageLookUpSI_U16_S32(const mlib_u16 *src,
3966                                   mlib_s32       slb,
3967                                   mlib_s32       *dst,
3968                                   mlib_s32       dlb,
3969                                   mlib_s32       xsize,
3970                                   mlib_s32       ysize,
3971                                   mlib_s32       csize,
3972                                   const mlib_s32 **table)
3973 {
3974   const mlib_s32 *table_base[4];
3975   mlib_s32 c;
3976 
3977   for (c = 0; c < csize; c++) {
3978     table_base[c] = &table[c][0];
3979   }
3980 
3981 #ifdef __GNUC__
3982 #pragma GCC diagnostic push
3983 #pragma GCC diagnostic ignored "-Warray-bounds"
3984 #endif
3985   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u16, table_base);
3986 #ifdef __GNUC__
3987 #pragma GCC diagnostic pop
3988 #endif
3989 }
3990 
3991 /***************************************************************/
3992 void mlib_c_ImageLookUpSI_S32_S32(const mlib_s32 *src,
3993                                   mlib_s32       slb,
3994                                   mlib_s32       *dst,
3995                                   mlib_s32       dlb,
3996                                   mlib_s32       xsize,
3997                                   mlib_s32       ysize,
3998                                   mlib_s32       csize,
3999                                   const mlib_s32 **table)
4000 {
4001   const mlib_s32 *table_base[4];
4002   mlib_s32 c;
4003 
4004   for (c = 0; c < csize; c++) {
4005     table_base[c] = &table[c][TABLE_SHIFT_S32];
4006   }
4007 
4008   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s32, table_base);
4009 }
4010 
4011 /***************************************************************/