1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 #include "mlib_image.h"
  28 #include "mlib_ImageLookUp.h"
  29 #include "mlib_c_ImageLookUp.h"
  30 
  31 /***************************************************************/
  32 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE)                         \
  33 {                                                                       \
  34   mlib_s32 i, j, k;                                                     \
  35                                                                         \
  36   if (xsize < 2) {                                                      \
  37     for(j = 0; j < ysize; j++, dst += dlb, src += slb){                 \
  38       for(k = 0; k < csize; k++) {                                      \
  39         DTYPE *da = dst + k;                                            \
  40         const STYPE *sa = src + k;                                      \
  41         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  42                                                                         \
  43         for(i = 0; i < xsize; i++, da += csize, sa += csize)            \
  44         *da=tab[*sa];                                                   \
  45       }                                                                 \
  46     }                                                                   \
  47   } else {                                                              \
  48     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {                \
  49       for(k = 0; k < csize; k++) {                                      \
  50         DTYPE    *da = dst + k;                                         \
  51         const STYPE *sa = src + k;                                      \
  52         DTYPE *tab = (DTYPE*) TABLE[k];                                 \
  53         mlib_s32 s0, t0, s1, t1;                                        \
  54                                                                         \
  55         s0 = (mlib_s32)sa[0];                                           \
  56         s1 = (mlib_s32)sa[csize];                                       \
  57         sa += 2*csize;                                                  \
  58                                                                         \
  59         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2*csize) { \
  60           t0 = (mlib_s32)tab[s0];                                       \
  61           t1 = (mlib_s32)tab[s1];                                       \
  62           s0 = (mlib_s32)sa[0];                                         \
  63           s1 = (mlib_s32)sa[csize];                                     \
  64           da[0] = (DTYPE)t0;                                            \
  65           da[csize] = (DTYPE)t1;                                        \
  66         }                                                               \
  67         t0 = (mlib_s32)tab[s0];                                         \
  68         t1 = (mlib_s32)tab[s1];                                         \
  69         da[0] = (DTYPE)t0;                                              \
  70         da[csize] = (DTYPE)t1;                                          \
  71         if (xsize & 1) da[2*csize] = tab[sa[0]];                        \
  72       }                                                                 \
  73     }                                                                   \
  74   }                                                                     \
  75 }
  76 
  77 /***************************************************************/
  78 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE)                 \
  79 {                                                                 \
  80   mlib_s32 i, j, k;                                               \
  81                                                                   \
  82   if (xsize < 2) {                                                \
  83     for(j = 0; j < ysize; j++, dst += dlb, src += slb){           \
  84       for(k = 0; k < csize; k++) {                                \
  85         DTYPE *da = dst + k;                                      \
  86         const STYPE *sa = (void *)src;                                    \
  87         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  88                                                                   \
  89         for(i = 0; i < xsize; i++, da += csize, sa ++)            \
  90         *da=tab[*sa];                                             \
  91       }                                                           \
  92     }                                                             \
  93   } else {                                                        \
  94     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {          \
  95       for(k = 0; k < csize; k++) {                                \
  96         DTYPE *da = dst + k;                                      \
  97         const STYPE *sa = (void *)src;                                    \
  98         DTYPE *tab = (DTYPE*) TABLE[k];                           \
  99         mlib_s32 s0, t0, s1, t1;                                  \
 100                                                                   \
 101         s0 = (mlib_s32)sa[0];                                     \
 102         s1 = (mlib_s32)sa[1];                                     \
 103         sa += 2;                                                  \
 104                                                                   \
 105         for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2) { \
 106           t0 = (mlib_s32)tab[s0];                                 \
 107           t1 = (mlib_s32)tab[s1];                                 \
 108           s0 = (mlib_s32)sa[0];                                   \
 109           s1 = (mlib_s32)sa[1];                                   \
 110           da[0] = (DTYPE)t0;                                      \
 111           da[csize] = (DTYPE)t1;                                  \
 112         }                                                         \
 113         t0 = (mlib_s32)tab[s0];                                   \
 114         t1 = (mlib_s32)tab[s1];                                   \
 115         da[0] = (DTYPE)t0;                                        \
 116         da[csize] = (DTYPE)t1;                                    \
 117         if (xsize & 1) da[2*csize] = tab[sa[0]];                  \
 118       }                                                           \
 119     }                                                             \
 120   }                                                               \
 121 }
 122 
 123 #ifdef _LITTLE_ENDIAN
 124 
 125 /***************************************************************/
 126 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 127   t3 = table0[s0 & 0xFF];                                       \
 128   t2 = table1[s0>>8];                                           \
 129   t1 = table2[s1 & 0xFF];                                       \
 130   t0 = table3[s1>>8]
 131 
 132 /***************************************************************/
 133 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 134   t3 = table0[s0 >> 8];                                         \
 135   t2 = table1[s1 & 0xFF];                                       \
 136   t1 = table2[s1 >> 8];                                         \
 137   t0 = table3[s2 & 0xFF]
 138 
 139 /***************************************************************/
 140 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 141   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 << 1) & 0x1FE));    \
 142   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7) & 0x1FE));    \
 143   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 15)  & 0x1FE));  \
 144   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 >> 23)  & 0x1FE))
 145 
 146 /***************************************************************/
 147 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 148   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 7) & 0x1FE));    \
 149   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15)  & 0x1FE));  \
 150   t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 23)  & 0x1FE));  \
 151   t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 << 1) & 0x1FE))
 152 
 153 /***************************************************************/
 154 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 155   t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 7) & 0x1FE));    \
 156   t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 15)  & 0x1FE));  \
 157   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 >> 23)  & 0x1FE))
 158 
 159 /***************************************************************/
 160 #define READ_U8_S32(table0, table1, table2, table3)             \
 161   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 << 2) & 0x3FC));    \
 162   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 6) & 0x3FC));    \
 163   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 14)  & 0x3FC));  \
 164   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 >> 22)  & 0x3FC))
 165 
 166 #else /* _LITTLE_ENDIAN */
 167 
 168 /***********/
 169 #define READ_U8_U8_ALIGN(table0, table1, table2, table3)        \
 170   t0 = table0[s0>>8];                                           \
 171   t1 = table1[s0 & 0xFF];                                       \
 172   t2 = table2[s1>>8];                                           \
 173   t3 = table3[s1 & 0xFF]
 174 
 175 /***************************************************************/
 176 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3)     \
 177   t0 = table0[s0 & 0xFF];                                       \
 178   t1 = table1[s1 >> 8];                                         \
 179   t2 = table2[s1 & 0xFF];                                       \
 180   t3 = table3[s2 >> 8]
 181 
 182 /***************************************************************/
 183 #define READ_U8_S16_ALIGN(table0, table1, table2, table3)       \
 184   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 23) & 0x1FE));   \
 185   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15) & 0x1FE));   \
 186   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 7)  & 0x1FE));   \
 187   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 << 1)  & 0x1FE))
 188 
 189 /***************************************************************/
 190 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3)    \
 191   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 15) & 0x1FE));   \
 192   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7)  & 0x1FE));   \
 193   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 << 1)  & 0x1FE));   \
 194   t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 >> 23) & 0x1FE))
 195 
 196 /***************************************************************/
 197 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2)        \
 198   t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 15) & 0x1FE));   \
 199   t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 7)  & 0x1FE));   \
 200   t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 << 1)  & 0x1FE))
 201 
 202 /***************************************************************/
 203 #define READ_U8_S32(table0, table1, table2, table3)             \
 204   t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 >> 22) & 0x3FC));   \
 205   t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 14) & 0x3FC));   \
 206   t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 6)  & 0x3FC));   \
 207   t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 << 2)  & 0x3FC))
 208 
 209 #endif /* _LITTLE_ENDIAN */
 210 
 211 /***************************************************************/
 212 void mlib_c_ImageLookUp_U8_U8(const mlib_u8 *src,
 213                               mlib_s32      slb,
 214                               mlib_u8       *dst,
 215                               mlib_s32      dlb,
 216                               mlib_s32      xsize,
 217                               mlib_s32      ysize,
 218                               mlib_s32      csize,
 219                               const mlib_u8 **table)
 220 {
 221 
 222   if (xsize * csize < 9) {
 223     MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u8, table);
 224   }
 225   else if (csize == 1) {
 226     mlib_s32 i, j;
 227 
 228     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 229       mlib_u32 *da;
 230       mlib_u16 *sa;
 231       mlib_u8 *tab = (mlib_u8 *) table[0];
 232       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 233       mlib_s32 off;
 234       mlib_s32 size = xsize;
 235       mlib_u8 *dp = dst, *sp = (void *)src;
 236 
 237       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 238 
 239       for (i = 0; i < off; i++, sp++) {
 240         *dp++ = tab[sp[0]];
 241         size--;
 242       }
 243 
 244       da = (mlib_u32 *) dp;
 245 
 246       if (((mlib_addr) sp & 1) == 0) {
 247         sa = (mlib_u16 *) sp;
 248 
 249         s0 = sa[0];
 250         s1 = sa[1];
 251         sa += 2;
 252 
 253         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 254           READ_U8_U8_ALIGN(tab, tab, tab, tab);
 255           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 256           s0 = sa[0];
 257           s1 = sa[1];
 258           da[0] = t;
 259         }
 260 
 261         READ_U8_U8_ALIGN(tab, tab, tab, tab);
 262         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 263         da[0] = t;
 264         da++;
 265         dp = (mlib_u8 *) da;
 266         sp = (mlib_u8 *) sa;
 267         i += 4;
 268         for (; i < size; i++, dp++, sp++)
 269           dp[0] = tab[sp[0]];
 270 
 271       }
 272       else {
 273         sa = (mlib_u16 *) (sp - 1);
 274 
 275         s0 = sa[0];
 276         s1 = sa[1];
 277         s2 = sa[2];
 278         sa += 3;
 279 
 280         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 281           READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 282           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 283           s0 = s2;
 284           s1 = sa[0];
 285           s2 = sa[1];
 286           da[0] = t;
 287         }
 288 
 289         READ_U8_U8_NOTALIGN(tab, tab, tab, tab);
 290         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 291         da[0] = t;
 292         da++;
 293         dp = (mlib_u8 *) da;
 294 #ifdef _LITTLE_ENDIAN
 295         *dp++ = tab[s2 >> 8];
 296 #else
 297         *dp++ = tab[s2 & 0xFF];
 298 #endif /* _LITTLE_ENDIAN */
 299         sp = (mlib_u8 *) sa;
 300         i += 5;
 301         for (; i < size; i++, dp++, sp++)
 302           dp[0] = tab[sp[0]];
 303       }
 304     }
 305 
 306   }
 307   else if (csize == 2) {
 308     mlib_s32 i, j;
 309 
 310     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 311       mlib_u32 *da;
 312       mlib_u16 *sa;
 313       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 314       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 315       mlib_u8 *tab;
 316       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 317       mlib_s32 off;
 318       mlib_s32 size = xsize * 2;
 319       mlib_u8 *dp = dst, *sp = (void *)src;
 320 
 321       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 322 
 323       for (i = 0; i < off - 1; i += 2, sp += 2) {
 324         *dp++ = tab0[sp[0]];
 325         *dp++ = tab1[sp[1]];
 326         size -= 2;
 327       }
 328 
 329       if ((off & 1) != 0) {
 330         *dp++ = tab0[sp[0]];
 331         size--;
 332         sp++;
 333         tab = tab0;
 334         tab0 = tab1;
 335         tab1 = tab;
 336       }
 337 
 338       da = (mlib_u32 *) dp;
 339 
 340       if (((mlib_addr) sp & 1) == 0) {
 341         sa = (mlib_u16 *) sp;
 342 
 343         s0 = sa[0];
 344         s1 = sa[1];
 345         sa += 2;
 346 
 347         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 348           READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 349           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 350           s0 = sa[0];
 351           s1 = sa[1];
 352           da[0] = t;
 353         }
 354 
 355         READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1);
 356         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 357         da[0] = t;
 358         da++;
 359         dp = (mlib_u8 *) da;
 360         sp = (mlib_u8 *) sa;
 361         i += 4;
 362 
 363         for (; i < size - 1; i += 2, sp += 2) {
 364           *dp++ = tab0[sp[0]];
 365           *dp++ = tab1[sp[1]];
 366         }
 367 
 368         if (i < size)
 369           *dp = tab0[(*sp)];
 370 
 371       }
 372       else {
 373         sa = (mlib_u16 *) (sp - 1);
 374 
 375         s0 = sa[0];
 376         s1 = sa[1];
 377         s2 = sa[2];
 378         sa += 3;
 379 
 380         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 381           READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 382           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 383           s0 = s2;
 384           s1 = sa[0];
 385           s2 = sa[1];
 386           da[0] = t;
 387         }
 388 
 389         READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1);
 390         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 391         da[0] = t;
 392         da++;
 393         dp = (mlib_u8 *) da;
 394 #ifdef _LITTLE_ENDIAN
 395         *dp++ = tab0[s2 >> 8];
 396 #else
 397         *dp++ = tab0[s2 & 0xFF];
 398 #endif /* _LITTLE_ENDIAN */
 399         sp = (mlib_u8 *) sa;
 400         i += 5;
 401 
 402         for (; i < size - 1; i += 2, sp += 2) {
 403           *dp++ = tab1[sp[0]];
 404           *dp++ = tab0[sp[1]];
 405         }
 406 
 407         if (i < size)
 408           *dp = tab1[(*sp)];
 409       }
 410     }
 411 
 412   }
 413   else if (csize == 3) {
 414     mlib_s32 i, j;
 415 
 416     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 417       mlib_u32 *da;
 418       mlib_u16 *sa;
 419       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 420       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 421       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 422       mlib_u8 *tab;
 423       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 424       mlib_s32 off;
 425       mlib_s32 size = xsize * 3;
 426       mlib_u8 *dp = dst, *sp = (void *)src;
 427 
 428       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 429 
 430       if (off == 1) {
 431         *dp++ = tab0[sp[0]];
 432         tab = tab0;
 433         tab0 = tab1;
 434         tab1 = tab2;
 435         tab2 = tab;
 436         size--;
 437         sp++;
 438       }
 439       else if (off == 2) {
 440         *dp++ = tab0[sp[0]];
 441         *dp++ = tab1[sp[1]];
 442         tab = tab2;
 443         tab2 = tab1;
 444         tab1 = tab0;
 445         tab0 = tab;
 446         size -= 2;
 447         sp += 2;
 448       }
 449       else if (off == 3) {
 450         *dp++ = tab0[sp[0]];
 451         *dp++ = tab1[sp[1]];
 452         *dp++ = tab2[sp[2]];
 453         size -= 3;
 454         sp += 3;
 455       }
 456 
 457       da = (mlib_u32 *) dp;
 458 
 459       if (((mlib_addr) sp & 1) == 0) {
 460         sa = (mlib_u16 *) sp;
 461 
 462         s0 = sa[0];
 463         s1 = sa[1];
 464         sa += 2;
 465 
 466         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 467           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 468           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 469           tab = tab0;
 470           tab0 = tab1;
 471           tab1 = tab2;
 472           tab2 = tab;
 473           s0 = sa[0];
 474           s1 = sa[1];
 475           da[0] = t;
 476         }
 477 
 478         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0);
 479         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 480         da[0] = t;
 481         da++;
 482         dp = (mlib_u8 *) da;
 483         sp = (mlib_u8 *) sa;
 484         i += 4;
 485 
 486         if (i < size) {
 487           *dp++ = tab1[(*sp)];
 488           i++;
 489           sp++;
 490         }
 491 
 492         if (i < size) {
 493           *dp++ = tab2[(*sp)];
 494           i++;
 495           sp++;
 496         }
 497 
 498         if (i < size) {
 499           *dp++ = tab0[(*sp)];
 500         }
 501 
 502       }
 503       else {
 504         sa = (mlib_u16 *) (sp - 1);
 505 
 506         s0 = sa[0];
 507         s1 = sa[1];
 508         s2 = sa[2];
 509         sa += 3;
 510 
 511         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 512           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 513           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 514           tab = tab0;
 515           tab0 = tab1;
 516           tab1 = tab2;
 517           tab2 = tab;
 518           s0 = s2;
 519           s1 = sa[0];
 520           s2 = sa[1];
 521           da[0] = t;
 522         }
 523 
 524         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0);
 525         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 526         da[0] = t;
 527         da++;
 528         dp = (mlib_u8 *) da;
 529 #ifdef _LITTLE_ENDIAN
 530         *dp++ = tab1[s2 >> 8];
 531 #else
 532         *dp++ = tab1[s2 & 0xFF];
 533 #endif /* _LITTLE_ENDIAN */
 534         sp = (mlib_u8 *) sa;
 535         i += 5;
 536 
 537         if (i < size) {
 538           *dp++ = tab2[(*sp)];
 539           i++;
 540           sp++;
 541         }
 542 
 543         if (i < size) {
 544           *dp++ = tab0[(*sp)];
 545           i++;
 546           sp++;
 547         }
 548 
 549         if (i < size) {
 550           *dp = tab1[(*sp)];
 551         }
 552       }
 553     }
 554 
 555   }
 556   else if (csize == 4) {
 557     mlib_s32 i, j;
 558 
 559     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 560       mlib_u32 *da;
 561       mlib_u16 *sa;
 562       mlib_u8 *tab0 = (mlib_u8 *) table[0];
 563       mlib_u8 *tab1 = (mlib_u8 *) table[1];
 564       mlib_u8 *tab2 = (mlib_u8 *) table[2];
 565       mlib_u8 *tab3 = (mlib_u8 *) table[3];
 566       mlib_u8 *tab;
 567       mlib_u32 s0, s1, s2, t0, t1, t2, t3, t;
 568       mlib_s32 off;
 569       mlib_s32 size = xsize * 4;
 570       mlib_u8 *dp = dst, *sp = (void *)src;
 571 
 572       off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3);
 573 
 574       if (off == 1) {
 575         *dp++ = tab0[sp[0]];
 576         tab = tab0;
 577         tab0 = tab1;
 578         tab1 = tab2;
 579         tab2 = tab3;
 580         tab3 = tab;
 581         size--;
 582         sp++;
 583       }
 584       else if (off == 2) {
 585         *dp++ = tab0[sp[0]];
 586         *dp++ = tab1[sp[1]];
 587         tab = tab0;
 588         tab0 = tab2;
 589         tab2 = tab;
 590         tab = tab1;
 591         tab1 = tab3;
 592         tab3 = tab;
 593         size -= 2;
 594         sp += 2;
 595       }
 596       else if (off == 3) {
 597         *dp++ = tab0[sp[0]];
 598         *dp++ = tab1[sp[1]];
 599         *dp++ = tab2[sp[2]];
 600         tab = tab3;
 601         tab3 = tab2;
 602         tab2 = tab1;
 603         tab1 = tab0;
 604         tab0 = tab;
 605         size -= 3;
 606         sp += 3;
 607       }
 608 
 609       da = (mlib_u32 *) dp;
 610 
 611       if (((mlib_addr) sp & 1) == 0) {
 612         sa = (mlib_u16 *) sp;
 613 
 614         s0 = sa[0];
 615         s1 = sa[1];
 616         sa += 2;
 617 
 618         for (i = 0; i < size - 7; i += 4, da++, sa += 2) {
 619           READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 620           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 621           s0 = sa[0];
 622           s1 = sa[1];
 623           da[0] = t;
 624         }
 625 
 626         READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3);
 627         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 628         da[0] = t;
 629         da++;
 630         dp = (mlib_u8 *) da;
 631         sp = (mlib_u8 *) sa;
 632         i += 4;
 633 
 634         if (i < size) {
 635           *dp++ = tab0[(*sp)];
 636           i++;
 637           sp++;
 638         }
 639 
 640         if (i < size) {
 641           *dp++ = tab1[(*sp)];
 642           i++;
 643           sp++;
 644         }
 645 
 646         if (i < size) {
 647           *dp = tab2[(*sp)];
 648         }
 649 
 650       }
 651       else {
 652         sa = (mlib_u16 *) (sp - 1);
 653 
 654         s0 = sa[0];
 655         s1 = sa[1];
 656         s2 = sa[2];
 657         sa += 3;
 658 
 659         for (i = 0; i < size - 8; i += 4, da++, sa += 2) {
 660           READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 661           t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 662           s0 = s2;
 663           s1 = sa[0];
 664           s2 = sa[1];
 665           da[0] = t;
 666         }
 667 
 668         READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3);
 669         t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
 670         da[0] = t;
 671         da++;
 672         dp = (mlib_u8 *) da;
 673 #ifdef _LITTLE_ENDIAN
 674         *dp++ = tab0[s2 >> 8];
 675 #else
 676         *dp++ = tab0[s2 & 0xFF];
 677 #endif /* _LITTLE_ENDIAN */
 678         sp = (mlib_u8 *) sa;
 679         i += 5;
 680 
 681         if (i < size) {
 682           *dp++ = tab1[(*sp)];
 683           i++;
 684           sp++;
 685         }
 686 
 687         if (i < size) {
 688           *dp++ = tab2[(*sp)];
 689           i++;
 690           sp++;
 691         }
 692 
 693         if (i < size) {
 694           *dp = tab3[(*sp)];
 695         }
 696       }
 697     }
 698   }
 699 }
 700 
 701 /***************************************************************/
 702 void mlib_c_ImageLookUp_S16_U8(const mlib_s16 *src,
 703                                mlib_s32       slb,
 704                                mlib_u8        *dst,
 705                                mlib_s32       dlb,
 706                                mlib_s32       xsize,
 707                                mlib_s32       ysize,
 708                                mlib_s32       csize,
 709                                const mlib_u8  **table)
 710 {
 711   const mlib_u8 *table_base[4];
 712   mlib_s32 c;
 713 
 714   for (c = 0; c < csize; c++) {
 715     table_base[c] = &table[c][32768];
 716   }
 717 
 718   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s16, table_base);
 719 }
 720 
 721 /***************************************************************/
 722 void mlib_c_ImageLookUp_U16_U8(const mlib_u16 *src,
 723                                mlib_s32       slb,
 724                                mlib_u8        *dst,
 725                                mlib_s32       dlb,
 726                                mlib_s32       xsize,
 727                                mlib_s32       ysize,
 728                                mlib_s32       csize,
 729                                const mlib_u8  **table)
 730 {
 731   const mlib_u8 *table_base[4];
 732   mlib_s32 c;
 733 
 734   for (c = 0; c < csize; c++) {
 735     table_base[c] = &table[c][0];
 736   }
 737 
 738   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u16, table_base);
 739 }
 740 
 741 /***************************************************************/
 742 void mlib_c_ImageLookUp_S32_U8(const mlib_s32 *src,
 743                                mlib_s32       slb,
 744                                mlib_u8        *dst,
 745                                mlib_s32       dlb,
 746                                mlib_s32       xsize,
 747                                mlib_s32       ysize,
 748                                mlib_s32       csize,
 749                                const mlib_u8  **table)
 750 {
 751   const mlib_u8 *table_base[4];
 752   mlib_s32 c;
 753 
 754   for (c = 0; c < csize; c++) {
 755     table_base[c] = &table[c][TABLE_SHIFT_S32];
 756   }
 757 
 758   MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s32, table_base);
 759 }
 760 
 761 /***************************************************************/
 762 void mlib_c_ImageLookUp_U8_S16(const mlib_u8  *src,
 763                                mlib_s32       slb,
 764                                mlib_s16       *dst,
 765                                mlib_s32       dlb,
 766                                mlib_s32       xsize,
 767                                mlib_s32       ysize,
 768                                mlib_s32       csize,
 769                                const mlib_s16 **table)
 770 {
 771 
 772   if (xsize * csize < 12) {
 773     MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u8, table);
 774   }
 775   else if (csize == 1) {
 776     mlib_s32 i, j;
 777 
 778     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 779       mlib_u32 *sa;
 780       mlib_u32 *da;
 781       mlib_u16 *tab = (mlib_u16 *) table[0];
 782       mlib_u32 s0, s1, t0, t1, t2, t3;
 783       mlib_u32 res1, res2;
 784       mlib_s32 off;
 785       mlib_s32 size = xsize;
 786       mlib_u16 *dp = (mlib_u16 *) dst;
 787       mlib_u8 *sp = (void *)src;
 788 
 789       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 790 
 791       for (i = 0; i < off; i++, sp++) {
 792         *dp++ = tab[sp[0]];
 793         size--;
 794       }
 795 
 796       sa = (mlib_u32 *) sp;
 797 
 798       if (((mlib_addr) dp & 3) == 0) {
 799         da = (mlib_u32 *) dp;
 800 
 801         s0 = sa[0];
 802         sa++;
 803 
 804         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 805           READ_U8_S16_ALIGN(tab, tab, tab, tab);
 806           res1 = (t0 << 16) + t1;
 807           res2 = (t2 << 16) + t3;
 808           s0 = sa[0];
 809           da[0] = res1;
 810           da[1] = res2;
 811         }
 812 
 813         READ_U8_S16_ALIGN(tab, tab, tab, tab);
 814         res1 = (t0 << 16) + t1;
 815         res2 = (t2 << 16) + t3;
 816         da[0] = res1;
 817         da[1] = res2;
 818         da += 2;
 819         dp = (mlib_u16 *) da;
 820         sp = (mlib_u8 *) sa;
 821         i += 4;
 822         for (; i < size; i++, dp++, sp++)
 823           dp[0] = tab[sp[0]];
 824 
 825       }
 826       else {
 827 
 828         *dp++ = tab[(*sp)];
 829         size--;
 830         da = (mlib_u32 *) dp;
 831 
 832         s0 = sa[0];
 833         s1 = sa[1];
 834         sa += 2;
 835 
 836         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 837           READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 838           s0 = s1;
 839           res1 = (t0 << 16) + t1;
 840           res2 = (t2 << 16) + t3;
 841           s1 = sa[0];
 842           da[0] = res1;
 843           da[1] = res2;
 844         }
 845 
 846         READ_U8_S16_NOTALIGN(tab, tab, tab, tab);
 847         res1 = (t0 << 16) + t1;
 848         res2 = (t2 << 16) + t3;
 849         da[0] = res1;
 850         da[1] = res2;
 851         ADD_READ_U8_S16_NOTALIGN(tab, tab, tab);
 852         res1 = (t0 << 16) + t1;
 853         da[2] = res1;
 854         da += 3;
 855         dp = (mlib_u16 *) da;
 856         *dp++ = (mlib_u16) t2;
 857         sp = (mlib_u8 *) sa;
 858         i += 7;
 859         for (; i < size; i++, dp++, sp++)
 860           dp[0] = tab[sp[0]];
 861       }
 862     }
 863 
 864   }
 865   else if (csize == 2) {
 866     mlib_s32 i, j;
 867 
 868     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 869       mlib_u32 *sa;
 870       mlib_u32 *da;
 871       mlib_u16 *tab0 = (mlib_u16 *) table[0];
 872       mlib_u16 *tab1 = (mlib_u16 *) table[1];
 873       mlib_u16 *tab;
 874       mlib_u32 s0, s1, t0, t1, t2, t3;
 875       mlib_u32 res1, res2;
 876       mlib_s32 off;
 877       mlib_s32 size = xsize * 2;
 878       mlib_u16 *dp = (mlib_u16 *) dst;
 879       mlib_u8 *sp = (void *)src;
 880 
 881       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 882 
 883       for (i = 0; i < off - 1; i += 2, sp += 2) {
 884         *dp++ = tab0[sp[0]];
 885         *dp++ = tab1[sp[1]];
 886         size -= 2;
 887       }
 888 
 889       if ((off & 1) != 0) {
 890         *dp++ = tab0[*sp];
 891         size--;
 892         sp++;
 893         tab = tab0;
 894         tab0 = tab1;
 895         tab1 = tab;
 896       }
 897 
 898       sa = (mlib_u32 *) sp;
 899 
 900       if (((mlib_addr) dp & 3) == 0) {
 901         da = (mlib_u32 *) dp;
 902 
 903         s0 = sa[0];
 904         sa++;
 905 
 906         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
 907           READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 908           res1 = (t0 << 16) + t1;
 909           res2 = (t2 << 16) + t3;
 910           s0 = sa[0];
 911           da[0] = res1;
 912           da[1] = res2;
 913         }
 914 
 915         READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1);
 916         res1 = (t0 << 16) + t1;
 917         res2 = (t2 << 16) + t3;
 918         da[0] = res1;
 919         da[1] = res2;
 920         da += 2;
 921         dp = (mlib_u16 *) da;
 922         sp = (mlib_u8 *) sa;
 923         i += 4;
 924 
 925         for (; i < size - 1; i += 2, sp += 2) {
 926           *dp++ = tab0[sp[0]];
 927           *dp++ = tab1[sp[1]];
 928         }
 929 
 930         if (i < size)
 931           *dp = tab0[(*sp)];
 932 
 933       }
 934       else {
 935 
 936         *dp++ = tab0[(*sp)];
 937         size--;
 938         da = (mlib_u32 *) dp;
 939 
 940         s0 = sa[0];
 941         s1 = sa[1];
 942         sa += 2;
 943 
 944         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
 945           READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 946           s0 = s1;
 947           res1 = (t0 << 16) + t1;
 948           res2 = (t2 << 16) + t3;
 949           s1 = sa[0];
 950           da[0] = res1;
 951           da[1] = res2;
 952         }
 953 
 954         READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0);
 955         res1 = (t0 << 16) + t1;
 956         res2 = (t2 << 16) + t3;
 957         da[0] = res1;
 958         da[1] = res2;
 959         ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1);
 960         res1 = (t0 << 16) + t1;
 961         da[2] = res1;
 962         da += 3;
 963         dp = (mlib_u16 *) da;
 964         *dp++ = (mlib_u16) t2;
 965         sp = (mlib_u8 *) sa;
 966         i += 7;
 967 
 968         for (; i < size - 1; i += 2, sp += 2) {
 969           *dp++ = tab0[sp[0]];
 970           *dp++ = tab1[sp[1]];
 971         }
 972 
 973         if (i < size)
 974           *dp = tab0[(*sp)];
 975       }
 976     }
 977 
 978   }
 979   else if (csize == 3) {
 980     mlib_s32 i, j;
 981 
 982     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 983       mlib_u32 *sa;
 984       mlib_u32 *da;
 985       mlib_u16 *tab0 = (mlib_u16 *) table[0];
 986       mlib_u16 *tab1 = (mlib_u16 *) table[1];
 987       mlib_u16 *tab2 = (mlib_u16 *) table[2];
 988       mlib_u16 *tab;
 989       mlib_u32 s0, s1, t0, t1, t2, t3;
 990       mlib_u32 res1, res2;
 991       mlib_s32 off;
 992       mlib_s32 size = xsize * 3;
 993       mlib_u16 *dp = (mlib_u16 *) dst;
 994       mlib_u8 *sp = (void *)src;
 995 
 996       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 997 
 998       if (off == 1) {
 999         *dp++ = tab0[(*sp)];
1000         tab = tab0;
1001         tab0 = tab1;
1002         tab1 = tab2;
1003         tab2 = tab;
1004         size--;
1005         sp++;
1006       }
1007       else if (off == 2) {
1008         *dp++ = tab0[sp[0]];
1009         *dp++ = tab1[sp[1]];
1010         tab = tab2;
1011         tab2 = tab1;
1012         tab1 = tab0;
1013         tab0 = tab;
1014         size -= 2;
1015         sp += 2;
1016       }
1017       else if (off == 3) {
1018         *dp++ = tab0[sp[0]];
1019         *dp++ = tab1[sp[1]];
1020         *dp++ = tab2[sp[2]];
1021         size -= 3;
1022         sp += 3;
1023       }
1024 
1025       sa = (mlib_u32 *) sp;
1026 
1027       if (((mlib_addr) dp & 3) == 0) {
1028         da = (mlib_u32 *) dp;
1029 
1030         s0 = sa[0];
1031         sa++;
1032 
1033         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1034           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1035           res1 = (t0 << 16) + t1;
1036           res2 = (t2 << 16) + t3;
1037           tab = tab0;
1038           tab0 = tab1;
1039           tab1 = tab2;
1040           tab2 = tab;
1041           s0 = sa[0];
1042           da[0] = res1;
1043           da[1] = res2;
1044         }
1045 
1046         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0);
1047         res1 = (t0 << 16) + t1;
1048         res2 = (t2 << 16) + t3;
1049         da[0] = res1;
1050         da[1] = res2;
1051         da += 2;
1052         dp = (mlib_u16 *) da;
1053         sp = (mlib_u8 *) sa;
1054         i += 4;
1055 
1056         if (i < size) {
1057           *dp++ = tab1[(*sp)];
1058           i++;
1059           sp++;
1060         }
1061 
1062         if (i < size) {
1063           *dp++ = tab2[(*sp)];
1064           i++;
1065           sp++;
1066         }
1067 
1068         if (i < size) {
1069           *dp = tab0[(*sp)];
1070         }
1071 
1072       }
1073       else {
1074 
1075         *dp++ = tab0[(*sp)];
1076         size--;
1077         da = (mlib_u32 *) dp;
1078 
1079         s0 = sa[0];
1080         s1 = sa[1];
1081         sa += 2;
1082 
1083         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1084           READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1085           s0 = s1;
1086           res1 = (t0 << 16) + t1;
1087           res2 = (t2 << 16) + t3;
1088           tab = tab0;
1089           tab0 = tab1;
1090           tab1 = tab2;
1091           tab2 = tab;
1092           s1 = sa[0];
1093           da[0] = res1;
1094           da[1] = res2;
1095         }
1096 
1097         READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1);
1098         res1 = (t0 << 16) + t1;
1099         res2 = (t2 << 16) + t3;
1100         da[0] = res1;
1101         da[1] = res2;
1102         ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1);
1103         res1 = (t0 << 16) + t1;
1104         da[2] = res1;
1105         da += 3;
1106         dp = (mlib_u16 *) da;
1107         *dp++ = (mlib_u16) t2;
1108         sp = (mlib_u8 *) sa;
1109         i += 7;
1110 
1111         if (i < size) {
1112           *dp++ = tab2[(*sp)];
1113           i++;
1114           sp++;
1115         }
1116 
1117         if (i < size) {
1118           *dp++ = tab0[(*sp)];
1119           i++;
1120           sp++;
1121         }
1122 
1123         if (i < size) {
1124           *dp = tab1[(*sp)];
1125         }
1126       }
1127     }
1128 
1129   }
1130   else if (csize == 4) {
1131     mlib_s32 i, j;
1132 
1133     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1134       mlib_u32 *sa;
1135       mlib_u32 *da;
1136       mlib_u16 *tab0 = (mlib_u16 *) table[0];
1137       mlib_u16 *tab1 = (mlib_u16 *) table[1];
1138       mlib_u16 *tab2 = (mlib_u16 *) table[2];
1139       mlib_u16 *tab3 = (mlib_u16 *) table[3];
1140       mlib_u16 *tab;
1141       mlib_u32 s0, s1, t0, t1, t2, t3;
1142       mlib_u32 res1, res2;
1143       mlib_s32 off;
1144       mlib_s32 size = xsize * 4;
1145       mlib_u16 *dp = (mlib_u16 *) dst;
1146       mlib_u8 *sp = (void *)src;
1147 
1148       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1149 
1150       if (off == 1) {
1151         *dp++ = tab0[(*sp)];
1152         tab = tab0;
1153         tab0 = tab1;
1154         tab1 = tab2;
1155         tab2 = tab3;
1156         tab3 = tab;
1157         size--;
1158         sp++;
1159       }
1160       else if (off == 2) {
1161         *dp++ = tab0[sp[0]];
1162         *dp++ = tab1[sp[1]];
1163         tab = tab0;
1164         tab0 = tab2;
1165         tab2 = tab;
1166         tab = tab1;
1167         tab1 = tab3;
1168         tab3 = tab;
1169         size -= 2;
1170         sp += 2;
1171       }
1172       else if (off == 3) {
1173         *dp++ = tab0[sp[0]];
1174         *dp++ = tab1[sp[1]];
1175         *dp++ = tab2[sp[2]];
1176         tab = tab3;
1177         tab3 = tab2;
1178         tab2 = tab1;
1179         tab1 = tab0;
1180         tab0 = tab;
1181         size -= 3;
1182         sp += 3;
1183       }
1184 
1185       sa = (mlib_u32 *) sp;
1186 
1187       if (((mlib_addr) dp & 3) == 0) {
1188         da = (mlib_u32 *) dp;
1189 
1190         s0 = sa[0];
1191         sa++;
1192 
1193         for (i = 0; i < size - 7; i += 4, da += 2, sa++) {
1194           READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1195           res1 = (t0 << 16) + t1;
1196           res2 = (t2 << 16) + t3;
1197           s0 = sa[0];
1198           da[0] = res1;
1199           da[1] = res2;
1200         }
1201 
1202         READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3);
1203         res1 = (t0 << 16) + t1;
1204         res2 = (t2 << 16) + t3;
1205         da[0] = res1;
1206         da[1] = res2;
1207         da += 2;
1208         dp = (mlib_u16 *) da;
1209         sp = (mlib_u8 *) sa;
1210         i += 4;
1211 
1212         if (i < size) {
1213           *dp++ = tab0[(*sp)];
1214           i++;
1215           sp++;
1216         }
1217 
1218         if (i < size) {
1219           *dp++ = tab1[(*sp)];
1220           i++;
1221           sp++;
1222         }
1223 
1224         if (i < size) {
1225           *dp = tab2[(*sp)];
1226         }
1227 
1228       }
1229       else {
1230 
1231         *dp++ = tab0[(*sp)];
1232         size--;
1233         da = (mlib_u32 *) dp;
1234 
1235         s0 = sa[0];
1236         s1 = sa[1];
1237         sa += 2;
1238 
1239         for (i = 0; i < size - 10; i += 4, da += 2, sa++) {
1240           READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1241           s0 = s1;
1242           res1 = (t0 << 16) + t1;
1243           res2 = (t2 << 16) + t3;
1244           s1 = sa[0];
1245           da[0] = res1;
1246           da[1] = res2;
1247         }
1248 
1249         READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0);
1250         res1 = (t0 << 16) + t1;
1251         res2 = (t2 << 16) + t3;
1252         da[0] = res1;
1253         da[1] = res2;
1254         ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3);
1255         res1 = (t0 << 16) + t1;
1256         da[2] = res1;
1257         da += 3;
1258         dp = (mlib_u16 *) da;
1259         *dp++ = (mlib_u16) t2;
1260         sp = (mlib_u8 *) sa;
1261         i += 7;
1262 
1263         if (i < size) {
1264           *dp++ = tab0[(*sp)];
1265           i++;
1266           sp++;
1267         }
1268 
1269         if (i < size) {
1270           *dp++ = tab1[(*sp)];
1271           i++;
1272           sp++;
1273         }
1274 
1275         if (i < size) {
1276           *dp = tab2[(*sp)];
1277         }
1278       }
1279     }
1280   }
1281 }
1282 
1283 /***************************************************************/
1284 void mlib_c_ImageLookUp_S16_S16(const mlib_s16 *src,
1285                                 mlib_s32       slb,
1286                                 mlib_s16       *dst,
1287                                 mlib_s32       dlb,
1288                                 mlib_s32       xsize,
1289                                 mlib_s32       ysize,
1290                                 mlib_s32       csize,
1291                                 const mlib_s16 **table)
1292 {
1293   const mlib_s16 *table_base[4];
1294   mlib_s32 c;
1295 
1296   for (c = 0; c < csize; c++) {
1297     table_base[c] = &table[c][32768];
1298   }
1299 
1300   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s16, table_base);
1301 }
1302 
1303 /***************************************************************/
1304 void mlib_c_ImageLookUp_U16_S16(const mlib_u16 *src,
1305                                 mlib_s32       slb,
1306                                 mlib_s16       *dst,
1307                                 mlib_s32       dlb,
1308                                 mlib_s32       xsize,
1309                                 mlib_s32       ysize,
1310                                 mlib_s32       csize,
1311                                 const mlib_s16 **table)
1312 {
1313   const mlib_s16 *table_base[4];
1314   mlib_s32 c;
1315 
1316   for (c = 0; c < csize; c++) {
1317     table_base[c] = &table[c][0];
1318   }
1319 
1320   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u16, table_base);
1321 }
1322 
1323 /***************************************************************/
1324 void mlib_c_ImageLookUp_S32_S16(const mlib_s32 *src,
1325                                 mlib_s32       slb,
1326                                 mlib_s16       *dst,
1327                                 mlib_s32       dlb,
1328                                 mlib_s32       xsize,
1329                                 mlib_s32       ysize,
1330                                 mlib_s32       csize,
1331                                 const mlib_s16 **table)
1332 {
1333   const mlib_s16 *table_base[4];
1334   mlib_s32 c;
1335 
1336   for (c = 0; c < csize; c++) {
1337     table_base[c] = &table[c][TABLE_SHIFT_S32];
1338   }
1339 
1340   MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s32, table_base);
1341 }
1342 
1343 /***************************************************************/
1344 void mlib_c_ImageLookUp_S16_U16(const mlib_s16 *src,
1345                                 mlib_s32       slb,
1346                                 mlib_u16       *dst,
1347                                 mlib_s32       dlb,
1348                                 mlib_s32       xsize,
1349                                 mlib_s32       ysize,
1350                                 mlib_s32       csize,
1351                                 const mlib_s16 **table)
1352 {
1353   const mlib_s16 *table_base[4];
1354   mlib_s32 c;
1355 
1356   for (c = 0; c < csize; c++) {
1357     table_base[c] = &table[c][32768];
1358   }
1359 
1360   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s16, table_base);
1361 }
1362 
1363 /***************************************************************/
1364 void mlib_c_ImageLookUp_U16_U16(const mlib_u16 *src,
1365                                 mlib_s32       slb,
1366                                 mlib_u16       *dst,
1367                                 mlib_s32       dlb,
1368                                 mlib_s32       xsize,
1369                                 mlib_s32       ysize,
1370                                 mlib_s32       csize,
1371                                 const mlib_s16 **table)
1372 {
1373   const mlib_s16 *table_base[4];
1374   mlib_s32 c;
1375 
1376   for (c = 0; c < csize; c++) {
1377     table_base[c] = &table[c][0];
1378   }
1379 
1380   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_u16, table_base);
1381 }
1382 
1383 /***************************************************************/
1384 void mlib_c_ImageLookUp_S32_U16(const mlib_s32 *src,
1385                                 mlib_s32       slb,
1386                                 mlib_u16       *dst,
1387                                 mlib_s32       dlb,
1388                                 mlib_s32       xsize,
1389                                 mlib_s32       ysize,
1390                                 mlib_s32       csize,
1391                                 const mlib_s16 **table)
1392 {
1393   const mlib_s16 *table_base[4];
1394   mlib_s32 c;
1395 
1396   for (c = 0; c < csize; c++) {
1397     table_base[c] = &table[c][TABLE_SHIFT_S32];
1398   }
1399 
1400   MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s32, table_base);
1401 }
1402 
1403 /***************************************************************/
1404 void mlib_c_ImageLookUp_U8_S32(const mlib_u8  *src,
1405                                mlib_s32       slb,
1406                                mlib_s32       *dst,
1407                                mlib_s32       dlb,
1408                                mlib_s32       xsize,
1409                                mlib_s32       ysize,
1410                                mlib_s32       csize,
1411                                const mlib_s32 **table)
1412 {
1413 
1414   if (xsize * csize < 7) {
1415     MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u8, table);
1416   }
1417   else if (csize == 1) {
1418     mlib_s32 i, j;
1419 
1420     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1421       mlib_u32 *sa;
1422       mlib_u32 *tab = (mlib_u32 *) table[0];
1423       mlib_u32 s0, t0, t1, t2, t3;
1424       mlib_s32 off;
1425       mlib_s32 size = xsize;
1426       mlib_u32 *dp = (mlib_u32 *) dst;
1427       mlib_u8 *sp = (void *)src;
1428 
1429       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1430 
1431       for (i = 0; i < off; i++, sp++) {
1432         *dp++ = tab[sp[0]];
1433         size--;
1434       }
1435 
1436       sa = (mlib_u32 *) sp;
1437 
1438       s0 = sa[0];
1439       sa++;
1440 
1441       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1442         READ_U8_S32(tab, tab, tab, tab);
1443         s0 = sa[0];
1444         dp[0] = t0;
1445         dp[1] = t1;
1446         dp[2] = t2;
1447         dp[3] = t3;
1448       }
1449 
1450       READ_U8_S32(tab, tab, tab, tab);
1451       dp[0] = t0;
1452       dp[1] = t1;
1453       dp[2] = t2;
1454       dp[3] = t3;
1455       dp += 4;
1456       sp = (mlib_u8 *) sa;
1457       i += 4;
1458       for (; i < size; i++, dp++, sp++)
1459         dp[0] = tab[sp[0]];
1460     }
1461 
1462   }
1463   else if (csize == 2) {
1464     mlib_s32 i, j;
1465 
1466     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1467       mlib_u32 *sa;
1468       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1469       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1470       mlib_u32 *tab;
1471       mlib_u32 s0, t0, t1, t2, t3;
1472       mlib_s32 off;
1473       mlib_s32 size = xsize * 2;
1474       mlib_u32 *dp = (mlib_u32 *) dst;
1475       mlib_u8 *sp = (void *)src;
1476 
1477       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1478 
1479       for (i = 0; i < off - 1; i += 2, sp += 2) {
1480         *dp++ = tab0[sp[0]];
1481         *dp++ = tab1[sp[1]];
1482         size -= 2;
1483       }
1484 
1485       if ((off & 1) != 0) {
1486         *dp++ = tab0[*sp];
1487         size--;
1488         sp++;
1489         tab = tab0;
1490         tab0 = tab1;
1491         tab1 = tab;
1492       }
1493 
1494       sa = (mlib_u32 *) sp;
1495 
1496       s0 = sa[0];
1497       sa++;
1498 
1499       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1500         READ_U8_S32(tab0, tab1, tab0, tab1);
1501         s0 = sa[0];
1502         dp[0] = t0;
1503         dp[1] = t1;
1504         dp[2] = t2;
1505         dp[3] = t3;
1506       }
1507 
1508       READ_U8_S32(tab0, tab1, tab0, tab1);
1509       dp[0] = t0;
1510       dp[1] = t1;
1511       dp[2] = t2;
1512       dp[3] = t3;
1513       dp += 4;
1514       sp = (mlib_u8 *) sa;
1515       i += 4;
1516 
1517       for (; i < size - 1; i += 2, sp += 2) {
1518         *dp++ = tab0[sp[0]];
1519         *dp++ = tab1[sp[1]];
1520       }
1521 
1522       if (i < size)
1523         *dp = tab0[(*sp)];
1524     }
1525 
1526   }
1527   else if (csize == 3) {
1528     mlib_s32 i, j;
1529 
1530     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1531       mlib_u32 *sa;
1532       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1533       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1534       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1535       mlib_u32 *tab;
1536       mlib_u32 s0, t0, t1, t2, t3;
1537       mlib_s32 off;
1538       mlib_s32 size = xsize * 3;
1539       mlib_u32 *dp = (mlib_u32 *) dst;
1540       mlib_u8 *sp = (void *)src;
1541 
1542       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1543 
1544       if (off == 1) {
1545         *dp++ = tab0[(*sp)];
1546         tab = tab0;
1547         tab0 = tab1;
1548         tab1 = tab2;
1549         tab2 = tab;
1550         size--;
1551         sp++;
1552       }
1553       else if (off == 2) {
1554         *dp++ = tab0[sp[0]];
1555         *dp++ = tab1[sp[1]];
1556         tab = tab2;
1557         tab2 = tab1;
1558         tab1 = tab0;
1559         tab0 = tab;
1560         size -= 2;
1561         sp += 2;
1562       }
1563       else if (off == 3) {
1564         *dp++ = tab0[sp[0]];
1565         *dp++ = tab1[sp[1]];
1566         *dp++ = tab2[sp[2]];
1567         size -= 3;
1568         sp += 3;
1569       }
1570 
1571       sa = (mlib_u32 *) sp;
1572 
1573       s0 = sa[0];
1574       sa++;
1575 
1576       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1577         READ_U8_S32(tab0, tab1, tab2, tab0);
1578         tab = tab0;
1579         tab0 = tab1;
1580         tab1 = tab2;
1581         tab2 = tab;
1582         s0 = sa[0];
1583         dp[0] = t0;
1584         dp[1] = t1;
1585         dp[2] = t2;
1586         dp[3] = t3;
1587       }
1588 
1589       READ_U8_S32(tab0, tab1, tab2, tab0);
1590       dp[0] = t0;
1591       dp[1] = t1;
1592       dp[2] = t2;
1593       dp[3] = t3;
1594       dp += 4;
1595       sp = (mlib_u8 *) sa;
1596       i += 4;
1597 
1598       if (i < size) {
1599         *dp++ = tab1[(*sp)];
1600         i++;
1601         sp++;
1602       }
1603 
1604       if (i < size) {
1605         *dp++ = tab2[(*sp)];
1606         i++;
1607         sp++;
1608       }
1609 
1610       if (i < size) {
1611         *dp = tab0[(*sp)];
1612       }
1613     }
1614 
1615   }
1616   else if (csize == 4) {
1617     mlib_s32 i, j;
1618 
1619     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1620       mlib_u32 *sa;
1621       mlib_u32 *tab0 = (mlib_u32 *) table[0];
1622       mlib_u32 *tab1 = (mlib_u32 *) table[1];
1623       mlib_u32 *tab2 = (mlib_u32 *) table[2];
1624       mlib_u32 *tab3 = (mlib_u32 *) table[3];
1625       mlib_u32 *tab;
1626       mlib_u32 s0, t0, t1, t2, t3;
1627       mlib_s32 off;
1628       mlib_s32 size = xsize * 4;
1629       mlib_u32 *dp = (mlib_u32 *) dst;
1630       mlib_u8 *sp = (void *)src;
1631 
1632       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
1633 
1634       if (off == 1) {
1635         *dp++ = tab0[(*sp)];
1636         tab = tab0;
1637         tab0 = tab1;
1638         tab1 = tab2;
1639         tab2 = tab3;
1640         tab3 = tab;
1641         size--;
1642         sp++;
1643       }
1644       else if (off == 2) {
1645         *dp++ = tab0[sp[0]];
1646         *dp++ = tab1[sp[1]];
1647         tab = tab0;
1648         tab0 = tab2;
1649         tab2 = tab;
1650         tab = tab1;
1651         tab1 = tab3;
1652         tab3 = tab;
1653         size -= 2;
1654         sp += 2;
1655       }
1656       else if (off == 3) {
1657         *dp++ = tab0[sp[0]];
1658         *dp++ = tab1[sp[1]];
1659         *dp++ = tab2[sp[2]];
1660         tab = tab3;
1661         tab3 = tab2;
1662         tab2 = tab1;
1663         tab1 = tab0;
1664         tab0 = tab;
1665         size -= 3;
1666         sp += 3;
1667       }
1668 
1669       sa = (mlib_u32 *) sp;
1670 
1671       s0 = sa[0];
1672       sa++;
1673 
1674       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
1675         READ_U8_S32(tab0, tab1, tab2, tab3);
1676         s0 = sa[0];
1677         dp[0] = t0;
1678         dp[1] = t1;
1679         dp[2] = t2;
1680         dp[3] = t3;
1681       }
1682 
1683       READ_U8_S32(tab0, tab1, tab2, tab3);
1684       dp[0] = t0;
1685       dp[1] = t1;
1686       dp[2] = t2;
1687       dp[3] = t3;
1688       dp += 4;
1689       sp = (mlib_u8 *) sa;
1690       i += 4;
1691 
1692       if (i < size) {
1693         *dp++ = tab0[(*sp)];
1694         i++;
1695         sp++;
1696       }
1697 
1698       if (i < size) {
1699         *dp++ = tab1[(*sp)];
1700         i++;
1701         sp++;
1702       }
1703 
1704       if (i < size) {
1705         *dp = tab2[(*sp)];
1706       }
1707     }
1708   }
1709 }
1710 
1711 /***************************************************************/
1712 void mlib_c_ImageLookUp_S16_S32(const mlib_s16 *src,
1713                                 mlib_s32       slb,
1714                                 mlib_s32       *dst,
1715                                 mlib_s32       dlb,
1716                                 mlib_s32       xsize,
1717                                 mlib_s32       ysize,
1718                                 mlib_s32       csize,
1719                                 const mlib_s32 **table)
1720 {
1721   const mlib_s32 *table_base[4];
1722   mlib_s32 c;
1723 
1724   for (c = 0; c < csize; c++) {
1725     table_base[c] = &table[c][32768];
1726   }
1727 
1728   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s16, table_base);
1729 }
1730 
1731 /***************************************************************/
1732 void mlib_c_ImageLookUp_U16_S32(const mlib_u16 *src,
1733                                 mlib_s32       slb,
1734                                 mlib_s32       *dst,
1735                                 mlib_s32       dlb,
1736                                 mlib_s32       xsize,
1737                                 mlib_s32       ysize,
1738                                 mlib_s32       csize,
1739                                 const mlib_s32 **table)
1740 {
1741   const mlib_s32 *table_base[4];
1742   mlib_s32 c;
1743 
1744   for (c = 0; c < csize; c++) {
1745     table_base[c] = &table[c][0];
1746   }
1747 
1748   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u16, table_base);
1749 }
1750 
1751 /***************************************************************/
1752 void mlib_c_ImageLookUp_S32_S32(const mlib_s32 *src,
1753                                 mlib_s32       slb,
1754                                 mlib_s32       *dst,
1755                                 mlib_s32       dlb,
1756                                 mlib_s32       xsize,
1757                                 mlib_s32       ysize,
1758                                 mlib_s32       csize,
1759                                 const mlib_s32 **table)
1760 {
1761   const mlib_s32 *table_base[4];
1762   mlib_s32 c;
1763 
1764   for (c = 0; c < csize; c++) {
1765     table_base[c] = &table[c][TABLE_SHIFT_S32];
1766   }
1767 
1768   MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s32, table_base);
1769 }
1770 
1771 /***************************************************************/
1772 void mlib_c_ImageLookUpSI_U8_U8(const mlib_u8 *src,
1773                                 mlib_s32      slb,
1774                                 mlib_u8       *dst,
1775                                 mlib_s32      dlb,
1776                                 mlib_s32      xsize,
1777                                 mlib_s32      ysize,
1778                                 mlib_s32      csize,
1779                                 const mlib_u8 **table)
1780 {
1781 
1782   if ((xsize < 8) || ((xsize * ysize) < 250)) {
1783     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u8, table);
1784   }
1785   else if (csize == 2) {
1786 
1787     mlib_u16 tab[256];
1788     const mlib_u8 *tab0 = table[0];
1789     const mlib_u8 *tab1 = table[1];
1790     mlib_s32 i, j, s0, s1, s2;
1791 
1792     s0 = tab0[0];
1793     s1 = tab1[0];
1794     for (i = 1; i < 256; i++) {
1795 #ifdef _LITTLE_ENDIAN
1796       s2 = (s1 << 8) + s0;
1797 #else
1798       s2 = (s0 << 8) + s1;
1799 #endif /* _LITTLE_ENDIAN */
1800       s0 = tab0[i];
1801       s1 = tab1[i];
1802       tab[i - 1] = (mlib_u16) s2;
1803     }
1804 
1805 #ifdef _LITTLE_ENDIAN
1806     s2 = (s1 << 8) + s0;
1807 #else
1808     s2 = (s0 << 8) + s1;
1809 #endif /* _LITTLE_ENDIAN */
1810     tab[255] = (mlib_u16) s2;
1811 
1812     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1813       mlib_s32 *da;
1814       mlib_u8 *dp = dst;
1815       mlib_u8 *sa = (void *)src;
1816       mlib_s32 s0, t0, s1, t1, t, t2, off;
1817       mlib_s32 size = xsize;
1818 
1819       if (((mlib_addr) dp & 1) == 0) {
1820 
1821         if (((mlib_addr) dp & 3) != 0) {
1822           *((mlib_u16 *) dp) = tab[sa[0]];
1823           sa++;
1824           size--;
1825           dp += 2;
1826         }
1827 
1828         da = (mlib_s32 *) dp;
1829 
1830         s0 = sa[0];
1831         s1 = sa[1];
1832         sa += 2;
1833 
1834         for (i = 0; i < size - 3; i += 2, da++, sa += 2) {
1835           t0 = tab[s0];
1836           t1 = tab[s1];
1837 #ifdef _LITTLE_ENDIAN
1838           t = (t1 << 16) + t0;
1839 #else
1840           t = (t0 << 16) + t1;
1841 #endif /* _LITTLE_ENDIAN */
1842           s0 = sa[0];
1843           s1 = sa[1];
1844           da[0] = t;
1845         }
1846 
1847         t0 = tab[s0];
1848         t1 = tab[s1];
1849 #ifdef _LITTLE_ENDIAN
1850         t = (t1 << 16) + t0;
1851 #else
1852         t = (t0 << 16) + t1;
1853 #endif /* _LITTLE_ENDIAN */
1854         da[0] = t;
1855         da++;
1856 
1857         if (size & 1)
1858           *((mlib_u16 *) da) = tab[sa[0]];
1859 
1860       }
1861       else {
1862 
1863         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
1864 
1865         if (off > 1) {
1866           t0 = tab[sa[0]];
1867 #ifdef _LITTLE_ENDIAN
1868           dp[1] = (t0 >> 8);
1869           dp[0] = t0;
1870 #else
1871           dp[0] = (t0 >> 8);
1872           dp[1] = t0;
1873 #endif /* _LITTLE_ENDIAN */
1874           sa++;
1875           size--;
1876           dp += 2;
1877         }
1878 
1879         t0 = tab[sa[0]];
1880         sa++;
1881 #ifdef _LITTLE_ENDIAN
1882         *dp++ = t0;
1883 #else
1884         *dp++ = (t0 >> 8);
1885 #endif /* _LITTLE_ENDIAN */
1886 
1887         da = (mlib_s32 *) dp;
1888 
1889         s0 = sa[0];
1890         s1 = sa[1];
1891         sa += 2;
1892 
1893         for (i = 0; i < size - 4; i += 2, da++, sa += 2) {
1894           t1 = tab[s0];
1895           t2 = tab[s1];
1896 #ifdef _LITTLE_ENDIAN
1897           t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1898 #else
1899           t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1900 #endif /* _LITTLE_ENDIAN */
1901           t0 = t2;
1902           s0 = sa[0];
1903           s1 = sa[1];
1904           da[0] = t;
1905         }
1906 
1907         t1 = tab[s0];
1908         t2 = tab[s1];
1909 #ifdef _LITTLE_ENDIAN
1910         t = (t0 >> 8) + (t1 << 8) + (t2 << 24);
1911 #else
1912         t = (t0 << 24) + (t1 << 8) + (t2 >> 8);
1913 #endif /* _LITTLE_ENDIAN */
1914         da[0] = t;
1915         da++;
1916         dp = (mlib_u8 *) da;
1917 #ifdef _LITTLE_ENDIAN
1918         dp[0] = (t2 >> 8);
1919 #else
1920         dp[0] = t2;
1921 #endif /* _LITTLE_ENDIAN */
1922 
1923         if ((size & 1) == 0) {
1924           t0 = tab[sa[0]];
1925 #ifdef _LITTLE_ENDIAN
1926           dp[2] = (t0 >> 8);
1927           dp[1] = t0;
1928 #else
1929           dp[1] = (t0 >> 8);
1930           dp[2] = t0;
1931 #endif /* _LITTLE_ENDIAN */
1932         }
1933       }
1934     }
1935 
1936   }
1937   else if (csize == 3) {
1938     mlib_u32 tab[256];
1939     const mlib_u8 *tab0 = table[0];
1940     const mlib_u8 *tab1 = table[1];
1941     const mlib_u8 *tab2 = table[2];
1942     mlib_s32 i, j;
1943     mlib_u32 s0, s1, s2, s3;
1944 
1945     s0 = tab0[0];
1946     s1 = tab1[0];
1947     s2 = tab2[0];
1948     for (i = 1; i < 256; i++) {
1949 #ifdef _LITTLE_ENDIAN
1950       s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
1951 #else
1952       s3 = (s0 << 16) + (s1 << 8) + s2;
1953 #endif /* _LITTLE_ENDIAN */
1954       s0 = tab0[i];
1955       s1 = tab1[i];
1956       s2 = tab2[i];
1957       tab[i - 1] = s3;
1958     }
1959 
1960 #ifdef _LITTLE_ENDIAN
1961     s3 = (s2 << 24) + (s1 << 16) + (s0 << 8);
1962 #else
1963     s3 = (s0 << 16) + (s1 << 8) + s2;
1964 #endif /* _LITTLE_ENDIAN */
1965     tab[255] = s3;
1966 
1967     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
1968       mlib_u32 *da;
1969       mlib_u8 *dp = dst;
1970       mlib_u8 *sa = (void *)src, *ptr;
1971       mlib_u32 s0, s1, t0, t1;
1972       mlib_u32 res1, res2;
1973       mlib_s32 size = xsize, off;
1974 
1975       off = (mlib_s32) ((mlib_addr) dp & 3);
1976 
1977       for (i = 0; i < off; i++) {
1978         ptr = (mlib_u8 *) (tab + sa[0]);
1979         dp[0] = ptr[1];
1980         dp[1] = ptr[2];
1981         dp[2] = ptr[3];
1982         dp += 3;
1983         sa++;
1984       }
1985 
1986       size -= off;
1987       da = (mlib_u32 *) dp;
1988       s0 = sa[0];
1989       s1 = sa[1];
1990       sa += 2;
1991 
1992       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
1993         t0 = tab[s0];
1994         t1 = tab[s1];
1995 #ifdef _LITTLE_ENDIAN
1996         da[0] = (t0 >> 8) + (t1 << 16);
1997         res2 = (t1 >> 16);
1998 #else
1999         da[0] = (t0 << 8) + (t1 >> 16);
2000         res2 = (t1 << 16);
2001 #endif /* _LITTLE_ENDIAN */
2002         s0 = sa[0];
2003         s1 = sa[1];
2004         t0 = tab[s0];
2005         t1 = tab[s1];
2006 #ifdef _LITTLE_ENDIAN
2007         res2 += (t0 << 8);
2008         res1 = (t0 >> 24) + t1;
2009 #else
2010         res2 += (t0 >> 8);
2011         res1 = (t0 << 24) + t1;
2012 #endif /* _LITTLE_ENDIAN */
2013         s0 = sa[2];
2014         s1 = sa[3];
2015         da[1] = res2;
2016         da[2] = res1;
2017       }
2018 
2019       t0 = tab[s0];
2020       t1 = tab[s1];
2021 #ifdef _LITTLE_ENDIAN
2022       da[0] = (t0 >> 8) + (t1 << 16);
2023       res2 = (t1 >> 16);
2024 #else
2025       da[0] = (t0 << 8) + (t1 >> 16);
2026       res2 = (t1 << 16);
2027 #endif /* _LITTLE_ENDIAN */
2028       s0 = sa[0];
2029       s1 = sa[1];
2030       t0 = tab[s0];
2031       t1 = tab[s1];
2032 #ifdef _LITTLE_ENDIAN
2033       res2 += (t0 << 8);
2034       res1 = (t0 >> 24) + t1;
2035 #else
2036       res2 += (t0 >> 8);
2037       res1 = (t0 << 24) + t1;
2038 #endif /* _LITTLE_ENDIAN */
2039       da[1] = res2;
2040       da[2] = res1;
2041       da += 3;
2042       sa += 2;
2043       dp = (mlib_u8 *) da;
2044       i += 4;
2045 
2046       for (; i < size; i++) {
2047         ptr = (mlib_u8 *) (tab + sa[0]);
2048         dp[0] = ptr[1];
2049         dp[1] = ptr[2];
2050         dp[2] = ptr[3];
2051         dp += 3;
2052         sa++;
2053       }
2054     }
2055 
2056   }
2057   else if (csize == 4) {
2058     mlib_u32 tab[256];
2059     const mlib_u8 *tab0 = table[0];
2060     const mlib_u8 *tab1 = table[1];
2061     const mlib_u8 *tab2 = table[2];
2062     const mlib_u8 *tab3 = table[3];
2063     mlib_s32 i, j;
2064     mlib_u32 s0, s1, s2, s3, s4;
2065 
2066     s0 = tab0[0];
2067     s1 = tab1[0];
2068     s2 = tab2[0];
2069     s3 = tab3[0];
2070     for (i = 1; i < 256; i++) {
2071 #ifdef _LITTLE_ENDIAN
2072       s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2073 #else
2074       s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2075 #endif /* _LITTLE_ENDIAN */
2076       s0 = tab0[i];
2077       s1 = tab1[i];
2078       s2 = tab2[i];
2079       s3 = tab3[i];
2080       tab[i - 1] = s4;
2081     }
2082 
2083 #ifdef _LITTLE_ENDIAN
2084     s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0;
2085 #else
2086     s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2087 #endif /* _LITTLE_ENDIAN */
2088     tab[255] = s4;
2089 
2090     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2091       mlib_u32 *da;
2092       mlib_u8 *dp = dst;
2093       mlib_u8 *sa = (void *)src;
2094       mlib_u32 s0, t0, s1, t1, t2;
2095       mlib_s32 size = xsize, off;
2096       mlib_u32 shift, shift1, res1, res2;
2097 
2098       if (((mlib_addr) dp & 3) == 0) {
2099 
2100         da = (mlib_u32 *) dp;
2101 
2102         s0 = sa[0];
2103         s1 = sa[1];
2104         sa += 2;
2105 
2106         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2107           t0 = tab[s0];
2108           t1 = tab[s1];
2109           s0 = sa[0];
2110           s1 = sa[1];
2111           da[0] = t0;
2112           da[1] = t1;
2113         }
2114 
2115         t0 = tab[s0];
2116         t1 = tab[s1];
2117         da[0] = t0;
2118         da[1] = t1;
2119 
2120         if (size & 1)
2121           da[2] = tab[sa[0]];
2122 
2123       }
2124       else {
2125 
2126         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2127         shift = 8 * off;
2128         shift1 = 32 - shift;
2129 
2130         for (i = 0; i < off; i++) {
2131           dp[i] = table[i][sa[0]];
2132         }
2133 
2134         dp += i;
2135         t0 = tab[sa[0]];
2136         sa++;
2137 
2138         da = (mlib_u32 *) dp;
2139 
2140         s0 = sa[0];
2141         s1 = sa[1];
2142         sa += 2;
2143 
2144         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2145           t1 = tab[s0];
2146           t2 = tab[s1];
2147 #ifdef _LITTLE_ENDIAN
2148           res1 = (t0 >> shift) + (t1 << shift1);
2149           res2 = (t1 >> shift) + (t2 << shift1);
2150 #else
2151           res1 = (t0 << shift) + (t1 >> shift1);
2152           res2 = (t1 << shift) + (t2 >> shift1);
2153 #endif /* _LITTLE_ENDIAN */
2154           t0 = t2;
2155           s0 = sa[0];
2156           s1 = sa[1];
2157           da[0] = res1;
2158           da[1] = res2;
2159         }
2160 
2161         t1 = tab[s0];
2162         t2 = tab[s1];
2163 #ifdef _LITTLE_ENDIAN
2164         res1 = (t0 >> shift) + (t1 << shift1);
2165         res2 = (t1 >> shift) + (t2 << shift1);
2166 #else
2167         res1 = (t0 << shift) + (t1 >> shift1);
2168         res2 = (t1 << shift) + (t2 >> shift1);
2169 #endif /* _LITTLE_ENDIAN */
2170         da[0] = res1;
2171         da[1] = res2;
2172 #ifdef _LITTLE_ENDIAN
2173         t0 = (da[2] >> shift1);
2174         da[2] = (t2 >> shift) + (t0 << shift1);
2175 #else
2176         t0 = (da[2] << shift1);
2177         da[2] = (t2 << shift) + (t0 >> shift1);
2178 #endif /* _LITTLE_ENDIAN */
2179         da += 2;
2180         dp = (mlib_u8 *) da + (4 - off);
2181 
2182         if ((size & 1) == 0) {
2183           t0 = tab[sa[0]];
2184 #ifdef _LITTLE_ENDIAN
2185           dp[3] = (mlib_u8) (t0 >> 24);
2186           dp[2] = (mlib_u8) (t0 >> 16);
2187           dp[1] = (mlib_u8) (t0 >> 8);
2188           dp[0] = (mlib_u8) t0;
2189 #else
2190           dp[0] = (mlib_u8) (t0 >> 24);
2191           dp[1] = (mlib_u8) (t0 >> 16);
2192           dp[2] = (mlib_u8) (t0 >> 8);
2193           dp[3] = (mlib_u8) t0;
2194 #endif /* _LITTLE_ENDIAN */
2195         }
2196       }
2197     }
2198   }
2199 }
2200 
2201 /***************************************************************/
2202 
2203 #ifdef _MSC_VER
2204 #pragma optimize("", off)
2205 #endif /* _MSC_VER */
2206 
2207 void mlib_c_ImageLookUpSI_S16_U8(const mlib_s16 *src,
2208                                  mlib_s32       slb,
2209                                  mlib_u8        *dst,
2210                                  mlib_s32       dlb,
2211                                  mlib_s32       xsize,
2212                                  mlib_s32       ysize,
2213                                  mlib_s32       csize,
2214                                  const mlib_u8  **table)
2215 {
2216   const mlib_u8 *table_base[4];
2217   mlib_s32 c;
2218 
2219   for (c = 0; c < csize; c++) {
2220     table_base[c] = &table[c][32768];
2221   }
2222 
2223   if ((xsize < 8) || (csize == 2)) {
2224     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base);
2225   }
2226   else if (csize == 3) {
2227     mlib_s32 i, j;
2228 
2229     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2230       mlib_u32 *da;
2231       mlib_u8 *dp = dst;
2232       mlib_s16 *sa = (void *)src;
2233       const mlib_u8 *tab0 = table_base[0];
2234       const mlib_u8 *tab1 = table_base[1];
2235       const mlib_u8 *tab2 = table_base[2];
2236       mlib_s32 s0, s1;
2237       mlib_u32 t0, t1, t2, t3, t4, t5;
2238       mlib_u32 res1, res2;
2239       mlib_s32 size = xsize, off;
2240 
2241       off = (mlib_s32) ((mlib_addr) dp & 3);
2242 
2243       for (i = 0; i < off; i++) {
2244         s0 = *sa++;
2245         dp[0] = tab0[s0];
2246         dp[1] = tab1[s0];
2247         dp[2] = tab2[s0];
2248         dp += 3;
2249       }
2250 
2251       size -= off;
2252       da = (mlib_u32 *) dp;
2253       s0 = sa[0];
2254       s1 = sa[1];
2255       sa += 2;
2256 
2257       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2258         t0 = tab0[s0];
2259         t1 = tab1[s0];
2260         t2 = tab2[s0];
2261         t3 = tab0[s1];
2262         t4 = tab1[s1];
2263         t5 = tab2[s1];
2264 #ifdef _LITTLE_ENDIAN
2265         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2266         res2 = (t5 << 8) + t4;
2267 #else
2268         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2269         res2 = (t4 << 24) + (t5 << 16);
2270 #endif /* _LITTLE_ENDIAN */
2271         s0 = sa[0];
2272         s1 = sa[1];
2273         t0 = tab0[s0];
2274         t1 = tab1[s0];
2275         t2 = tab2[s0];
2276         t3 = tab0[s1];
2277         t4 = tab1[s1];
2278         t5 = tab2[s1];
2279 #ifdef _LITTLE_ENDIAN
2280         res2 += ((t1 << 24) + (t0 << 16));
2281         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2282 #else
2283         res2 += ((t0 << 8) + t1);
2284         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2285 #endif /* _LITTLE_ENDIAN */
2286         s0 = sa[2];
2287         s1 = sa[3];
2288         da[1] = res2;
2289         da[2] = res1;
2290       }
2291 
2292       t0 = tab0[s0];
2293       t1 = tab1[s0];
2294       t2 = tab2[s0];
2295       t3 = tab0[s1];
2296       t4 = tab1[s1];
2297       t5 = tab2[s1];
2298 #ifdef _LITTLE_ENDIAN
2299       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2300       res2 = (t5 << 8) + t4;
2301 #else
2302       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2303       res2 = (t4 << 24) + (t5 << 16);
2304 #endif /* _LITTLE_ENDIAN */
2305       s0 = sa[0];
2306       s1 = sa[1];
2307       t0 = tab0[s0];
2308       t1 = tab1[s0];
2309       t2 = tab2[s0];
2310       t3 = tab0[s1];
2311       t4 = tab1[s1];
2312       t5 = tab2[s1];
2313 #ifdef _LITTLE_ENDIAN
2314       res2 += ((t1 << 24) + (t0 << 16));
2315       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2316 #else
2317       res2 += ((t0 << 8) + t1);
2318       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2319 #endif /* _LITTLE_ENDIAN */
2320       da[1] = res2;
2321       da[2] = res1;
2322       da += 3;
2323       sa += 2;
2324       dp = (mlib_u8 *) da;
2325       i += 4;
2326 
2327       for (; i < size; i++) {
2328         s0 = *sa++;
2329         dp[0] = tab0[s0];
2330         dp[1] = tab1[s0];
2331         dp[2] = tab2[s0];
2332         dp += 3;
2333       }
2334     }
2335 
2336   }
2337   else if (csize == 4) {
2338     mlib_s32 i, j;
2339 
2340     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2341       mlib_u32 *da;
2342       mlib_u8 *dp = dst;
2343       mlib_s16 *sa = (void *)src;
2344       const mlib_u8 *tab0 = table_base[0];
2345       const mlib_u8 *tab1 = table_base[1];
2346       const mlib_u8 *tab2 = table_base[2];
2347       const mlib_u8 *tab3 = table_base[3];
2348       mlib_s32 s0;
2349       mlib_u32 t0, t1, t2, t3;
2350       mlib_s32 size = xsize, off;
2351       mlib_u32 shift, shift1, res1, res2, res;
2352 
2353       if (((mlib_addr) dp & 3) == 0) {
2354 
2355         da = (mlib_u32 *) dp;
2356 
2357         s0 = sa[0];
2358         sa++;
2359 
2360         for (i = 0; i < size - 1; i++, da++, sa++) {
2361           t0 = tab0[s0];
2362           t1 = tab1[s0];
2363           t2 = tab2[s0];
2364           t3 = tab3[s0];
2365 #ifdef _LITTLE_ENDIAN
2366           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2367 #else
2368           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2369 #endif /* _LITTLE_ENDIAN */
2370           s0 = sa[0];
2371           da[0] = res;
2372         }
2373 
2374         t0 = tab0[s0];
2375         t1 = tab1[s0];
2376         t2 = tab2[s0];
2377         t3 = tab3[s0];
2378 #ifdef _LITTLE_ENDIAN
2379         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2380 #else
2381         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2382 #endif /* _LITTLE_ENDIAN */
2383         da[0] = res;
2384 
2385       }
2386       else {
2387 
2388         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2389         shift = 8 * off;
2390         shift1 = 32 - shift;
2391 
2392         s0 = *sa++;
2393 
2394         for (i = 0; i < off; i++) {
2395           dp[i] = table_base[i][s0];
2396         }
2397 
2398         dp += i;
2399         da = (mlib_u32 *) dp;
2400 
2401         t0 = tab0[s0];
2402         t1 = tab1[s0];
2403         t2 = tab2[s0];
2404         t3 = tab3[s0];
2405 
2406 #ifdef _LITTLE_ENDIAN
2407         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2408 #else
2409         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2410 #endif /* _LITTLE_ENDIAN */
2411 
2412         s0 = sa[0];
2413         sa++;
2414 
2415         for (i = 0; i < size - 2; i++, da++, sa++) {
2416           t0 = tab0[s0];
2417           t1 = tab1[s0];
2418           t2 = tab2[s0];
2419           t3 = tab3[s0];
2420 #ifdef _LITTLE_ENDIAN
2421           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2422           res = (res1 >> shift) + (res2 << shift1);
2423 #else
2424           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2425           res = (res1 << shift) + (res2 >> shift1);
2426 #endif /* _LITTLE_ENDIAN */
2427           res1 = res2;
2428           s0 = sa[0];
2429           da[0] = res;
2430         }
2431 
2432         t0 = tab0[s0];
2433         t1 = tab1[s0];
2434         t2 = tab2[s0];
2435         t3 = tab3[s0];
2436 #ifdef _LITTLE_ENDIAN
2437         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2438         res = (res1 >> shift) + (res2 << shift1);
2439 #else
2440         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2441         res = (res1 << shift) + (res2 >> shift1);
2442 #endif /* _LITTLE_ENDIAN */
2443         da[0] = res;
2444 #ifdef _LITTLE_ENDIAN
2445         res1 = (da[1] >> shift1);
2446         da[1] = (res2 >> shift) + (res1 << shift1);
2447 #else
2448         res1 = (da[1] << shift1);
2449         da[1] = (res2 << shift) + (res1 >> shift1);
2450 #endif /* _LITTLE_ENDIAN */
2451       }
2452     }
2453   }
2454 }
2455 
2456 #ifdef _MSC_VER
2457 #pragma optimize("", on)
2458 #endif /* _MSC_VER */
2459 
2460 /***************************************************************/
2461 void mlib_c_ImageLookUpSI_U16_U8(const mlib_u16 *src,
2462                                  mlib_s32       slb,
2463                                  mlib_u8        *dst,
2464                                  mlib_s32       dlb,
2465                                  mlib_s32       xsize,
2466                                  mlib_s32       ysize,
2467                                  mlib_s32       csize,
2468                                  const mlib_u8  **table)
2469 {
2470   const mlib_u8 *table_base[4];
2471   mlib_s32 c;
2472 
2473   for (c = 0; c < csize; c++) {
2474     table_base[c] = &table[c][0];
2475   }
2476 
2477   if ((xsize < 8) || (csize == 2)) {
2478     MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base);
2479   }
2480   else if (csize == 3) {
2481     mlib_s32 i, j;
2482 
2483     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2484       mlib_u32 *da;
2485       mlib_u8 *dp = dst;
2486       mlib_u16 *sa = (void *)src;
2487       const mlib_u8 *tab0 = table_base[0];
2488       const mlib_u8 *tab1 = table_base[1];
2489       const mlib_u8 *tab2 = table_base[2];
2490       mlib_s32 s0, s1;
2491       mlib_u32 t0, t1, t2, t3, t4, t5;
2492       mlib_u32 res1, res2;
2493       mlib_s32 size = xsize, off;
2494 
2495       off = (mlib_s32) ((mlib_addr) dp & 3);
2496 
2497       for (i = 0; i < off; i++) {
2498         s0 = *sa++;
2499         dp[0] = tab0[s0];
2500         dp[1] = tab1[s0];
2501         dp[2] = tab2[s0];
2502         dp += 3;
2503       }
2504 
2505       size -= off;
2506       da = (mlib_u32 *) dp;
2507       s0 = sa[0];
2508       s1 = sa[1];
2509       sa += 2;
2510 
2511       for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) {
2512         t0 = tab0[s0];
2513         t1 = tab1[s0];
2514         t2 = tab2[s0];
2515         t3 = tab0[s1];
2516         t4 = tab1[s1];
2517         t5 = tab2[s1];
2518 #ifdef _LITTLE_ENDIAN
2519         da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2520         res2 = (t5 << 8) + t4;
2521 #else
2522         da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2523         res2 = (t4 << 24) + (t5 << 16);
2524 #endif /* _LITTLE_ENDIAN */
2525         s0 = sa[0];
2526         s1 = sa[1];
2527         t0 = tab0[s0];
2528         t1 = tab1[s0];
2529         t2 = tab2[s0];
2530         t3 = tab0[s1];
2531         t4 = tab1[s1];
2532         t5 = tab2[s1];
2533 #ifdef _LITTLE_ENDIAN
2534         res2 += ((t1 << 24) + (t0 << 16));
2535         res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2536 #else
2537         res2 += ((t0 << 8) + t1);
2538         res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2539 #endif /* _LITTLE_ENDIAN */
2540         s0 = sa[2];
2541         s1 = sa[3];
2542         da[1] = res2;
2543         da[2] = res1;
2544       }
2545 
2546       t0 = tab0[s0];
2547       t1 = tab1[s0];
2548       t2 = tab2[s0];
2549       t3 = tab0[s1];
2550       t4 = tab1[s1];
2551       t5 = tab2[s1];
2552 #ifdef _LITTLE_ENDIAN
2553       da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2554       res2 = (t5 << 8) + t4;
2555 #else
2556       da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2557       res2 = (t4 << 24) + (t5 << 16);
2558 #endif /* _LITTLE_ENDIAN */
2559       s0 = sa[0];
2560       s1 = sa[1];
2561       t0 = tab0[s0];
2562       t1 = tab1[s0];
2563       t2 = tab2[s0];
2564       t3 = tab0[s1];
2565       t4 = tab1[s1];
2566       t5 = tab2[s1];
2567 #ifdef _LITTLE_ENDIAN
2568       res2 += ((t1 << 24) + (t0 << 16));
2569       res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2;
2570 #else
2571       res2 += ((t0 << 8) + t1);
2572       res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5;
2573 #endif /* _LITTLE_ENDIAN */
2574       da[1] = res2;
2575       da[2] = res1;
2576       da += 3;
2577       sa += 2;
2578       dp = (mlib_u8 *) da;
2579       i += 4;
2580 
2581       for (; i < size; i++) {
2582         s0 = *sa++;
2583         dp[0] = tab0[s0];
2584         dp[1] = tab1[s0];
2585         dp[2] = tab2[s0];
2586         dp += 3;
2587       }
2588     }
2589 
2590   }
2591   else if (csize == 4) {
2592     mlib_s32 i, j;
2593 
2594     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2595       mlib_u32 *da;
2596       mlib_u8 *dp = dst;
2597       mlib_u16 *sa = (void *)src;
2598       const mlib_u8 *tab0 = table_base[0];
2599       const mlib_u8 *tab1 = table_base[1];
2600       const mlib_u8 *tab2 = table_base[2];
2601       const mlib_u8 *tab3 = table_base[3];
2602       mlib_s32 s0;
2603       mlib_u32 t0, t1, t2, t3;
2604       mlib_s32 size = xsize, off;
2605       mlib_u32 shift, shift1, res1, res2, res;
2606 
2607       if (((mlib_addr) dp & 3) == 0) {
2608 
2609         da = (mlib_u32 *) dp;
2610 
2611         s0 = sa[0];
2612         sa++;
2613 
2614         for (i = 0; i < size - 1; i++, da++, sa++) {
2615           t0 = tab0[s0];
2616           t1 = tab1[s0];
2617           t2 = tab2[s0];
2618           t3 = tab3[s0];
2619 #ifdef _LITTLE_ENDIAN
2620           res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2621 #else
2622           res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2623 #endif /* _LITTLE_ENDIAN */
2624           s0 = sa[0];
2625           da[0] = res;
2626         }
2627 
2628         t0 = tab0[s0];
2629         t1 = tab1[s0];
2630         t2 = tab2[s0];
2631         t3 = tab3[s0];
2632 #ifdef _LITTLE_ENDIAN
2633         res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2634 #else
2635         res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2636 #endif /* _LITTLE_ENDIAN */
2637         da[0] = res;
2638 
2639       }
2640       else {
2641 
2642         off = (mlib_s32) (4 - ((mlib_addr) dp & 3));
2643         shift = 8 * off;
2644         shift1 = 32 - shift;
2645 
2646         s0 = *sa++;
2647 
2648         for (i = 0; i < off; i++) {
2649           dp[i] = table_base[i][s0];
2650         }
2651 
2652         dp += i;
2653         da = (mlib_u32 *) dp;
2654 
2655         t0 = tab0[s0];
2656         t1 = tab1[s0];
2657         t2 = tab2[s0];
2658         t3 = tab3[s0];
2659 
2660 #ifdef _LITTLE_ENDIAN
2661         res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2662 #else
2663         res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2664 #endif /* _LITTLE_ENDIAN */
2665 
2666         s0 = sa[0];
2667         sa++;
2668 
2669         for (i = 0; i < size - 2; i++, da++, sa++) {
2670           t0 = tab0[s0];
2671           t1 = tab1[s0];
2672           t2 = tab2[s0];
2673           t3 = tab3[s0];
2674 #ifdef _LITTLE_ENDIAN
2675           res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2676           res = (res1 >> shift) + (res2 << shift1);
2677 #else
2678           res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2679           res = (res1 << shift) + (res2 >> shift1);
2680 #endif /* _LITTLE_ENDIAN */
2681           res1 = res2;
2682           s0 = sa[0];
2683           da[0] = res;
2684         }
2685 
2686         t0 = tab0[s0];
2687         t1 = tab1[s0];
2688         t2 = tab2[s0];
2689         t3 = tab3[s0];
2690 #ifdef _LITTLE_ENDIAN
2691         res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0;
2692         res = (res1 >> shift) + (res2 << shift1);
2693 #else
2694         res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3;
2695         res = (res1 << shift) + (res2 >> shift1);
2696 #endif /* _LITTLE_ENDIAN */
2697         da[0] = res;
2698 #ifdef _LITTLE_ENDIAN
2699         res1 = (da[1] >> shift1);
2700         da[1] = (res2 >> shift) + (res1 << shift1);
2701 #else
2702         res1 = (da[1] << shift1);
2703         da[1] = (res2 << shift) + (res1 >> shift1);
2704 #endif /* _LITTLE_ENDIAN */
2705       }
2706     }
2707   }
2708 }
2709 
2710 /***************************************************************/
2711 void mlib_c_ImageLookUpSI_S32_U8(const mlib_s32 *src,
2712                                  mlib_s32       slb,
2713                                  mlib_u8        *dst,
2714                                  mlib_s32       dlb,
2715                                  mlib_s32       xsize,
2716                                  mlib_s32       ysize,
2717                                  mlib_s32       csize,
2718                                  const mlib_u8  **table)
2719 {
2720   const mlib_u8 *table_base[4];
2721   mlib_s32 c;
2722 
2723   for (c = 0; c < csize; c++) {
2724     table_base[c] = &table[c][TABLE_SHIFT_S32];
2725   }
2726 
2727   MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s32, table_base);
2728 }
2729 
2730 /***************************************************************/
2731 void mlib_c_ImageLookUpSI_U8_S16(const mlib_u8  *src,
2732                                  mlib_s32       slb,
2733                                  mlib_s16       *dst,
2734                                  mlib_s32       dlb,
2735                                  mlib_s32       xsize,
2736                                  mlib_s32       ysize,
2737                                  mlib_s32       csize,
2738                                  const mlib_s16 **table)
2739 {
2740 
2741   if ((xsize < 4) || ((xsize * ysize) < 250)) {
2742     MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u8, table);
2743 
2744   }
2745   else if (csize == 2) {
2746     mlib_u32 tab[256];
2747     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2748     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2749     mlib_s32 i, j;
2750     mlib_u32 s0, s1, s2;
2751 
2752     s0 = tab0[0];
2753     s1 = tab1[0];
2754     for (i = 1; i < 256; i++) {
2755 #ifdef _LITTLE_ENDIAN
2756       s2 = (s1 << 16) + s0;
2757 #else
2758       s2 = (s0 << 16) + s1;
2759 #endif /* _LITTLE_ENDIAN */
2760       s0 = tab0[i];
2761       s1 = tab1[i];
2762       tab[i - 1] = s2;
2763     }
2764 
2765 #ifdef _LITTLE_ENDIAN
2766     s2 = (s1 << 16) + s0;
2767 #else
2768     s2 = (s0 << 16) + s1;
2769 #endif /* _LITTLE_ENDIAN */
2770     tab[255] = s2;
2771 
2772     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2773       mlib_u32 *da;
2774       mlib_u16 *dp = (mlib_u16 *) dst;
2775       mlib_u8 *sa = (void *)src;
2776       mlib_u32 s0, t0, s1, t1, t2;
2777       mlib_u32 res1, res2;
2778       mlib_s32 size = xsize;
2779 
2780       if (((mlib_addr) dp & 3) == 0) {
2781 
2782         da = (mlib_u32 *) dp;
2783         s0 = sa[0];
2784         s1 = sa[1];
2785         sa += 2;
2786 
2787         for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) {
2788           t0 = tab[s0];
2789           t1 = tab[s1];
2790           s0 = sa[0];
2791           s1 = sa[1];
2792           da[0] = t0;
2793           da[1] = t1;
2794         }
2795 
2796         t0 = tab[s0];
2797         t1 = tab[s1];
2798         da[0] = t0;
2799         da[1] = t1;
2800 
2801         if (size & 1)
2802           da[2] = tab[sa[0]];
2803 
2804       }
2805       else {
2806 
2807         t0 = tab[*sa++];
2808 #ifdef _LITTLE_ENDIAN
2809         *dp++ = (mlib_u16) (t0);
2810 #else
2811         *dp++ = (mlib_u16) (t0 >> 16);
2812 #endif /* _LITTLE_ENDIAN */
2813         da = (mlib_u32 *) dp;
2814         s0 = sa[0];
2815         s1 = sa[1];
2816         sa += 2;
2817 
2818         for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) {
2819           t1 = tab[s0];
2820           t2 = tab[s1];
2821 #ifdef _LITTLE_ENDIAN
2822           res1 = (t0 >> 16) + (t1 << 16);
2823           res2 = (t1 >> 16) + (t2 << 16);
2824 #else
2825           res1 = (t0 << 16) + (t1 >> 16);
2826           res2 = (t1 << 16) + (t2 >> 16);
2827 #endif /* _LITTLE_ENDIAN */
2828           t0 = t2;
2829           s0 = sa[0];
2830           s1 = sa[1];
2831           da[0] = res1;
2832           da[1] = res2;
2833         }
2834 
2835         t1 = tab[s0];
2836         t2 = tab[s1];
2837 #ifdef _LITTLE_ENDIAN
2838         res1 = (t0 >> 16) + (t1 << 16);
2839         res2 = (t1 >> 16) + (t2 << 16);
2840 #else
2841         res1 = (t0 << 16) + (t1 >> 16);
2842         res2 = (t1 << 16) + (t2 >> 16);
2843 #endif /* _LITTLE_ENDIAN */
2844         da[0] = res1;
2845         da[1] = res2;
2846         da += 2;
2847         dp = (mlib_u16 *) da;
2848 #ifdef _LITTLE_ENDIAN
2849         dp[0] = (mlib_u16) (t2 >> 16);
2850 #else
2851         dp[0] = (mlib_u16) t2;
2852 #endif /* _LITTLE_ENDIAN */
2853 
2854         if ((size & 1) == 0) {
2855           t0 = tab[sa[0]];
2856 #ifdef _LITTLE_ENDIAN
2857           dp[2] = (mlib_u16) (t0 >> 16);
2858           dp[1] = (mlib_u16) t0;
2859 #else
2860           dp[1] = (mlib_u16) (t0 >> 16);
2861           dp[2] = (mlib_u16) t0;
2862 #endif /* _LITTLE_ENDIAN */
2863         }
2864       }
2865     }
2866 
2867   }
2868   else if (csize == 3) {
2869     mlib_u32 tab[512];
2870     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2871     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2872     mlib_u16 *tab2 = (mlib_u16 *) table[2];
2873     mlib_s32 i, j;
2874     mlib_u32 s0, s1, s2, s3, s4;
2875 
2876     s0 = tab0[0];
2877     s1 = tab1[0];
2878     s2 = tab2[0];
2879     for (i = 1; i < 256; i++) {
2880 #ifdef _LITTLE_ENDIAN
2881       s3 = (s0 << 16);
2882       s4 = (s2 << 16) + s1;
2883 #else
2884       s3 = s0;
2885       s4 = (s1 << 16) + s2;
2886 #endif /* _LITTLE_ENDIAN */
2887       s0 = tab0[i];
2888       s1 = tab1[i];
2889       s2 = tab2[i];
2890       tab[2 * i - 2] = s3;
2891       tab[2 * i - 1] = s4;
2892     }
2893 
2894 #ifdef _LITTLE_ENDIAN
2895     s4 = (s2 << 16) + s1;
2896     tab[510] = s0 << 16;
2897 #else
2898     s4 = (s1 << 16) + s2;
2899     tab[510] = s0;
2900 #endif /* _LITTLE_ENDIAN */
2901     tab[511] = s4;
2902 
2903     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
2904       mlib_u32 *da;
2905       mlib_u16 *dp = (mlib_u16 *) dst, *ptr;
2906       mlib_u8 *sa = (void *)src;
2907       mlib_u32 s0, s1, t0, t1, t2, t3;
2908       mlib_u32 res1, res2;
2909       mlib_s32 size = xsize, off;
2910 
2911       off = (mlib_s32) ((mlib_addr) dp & 3);
2912 
2913       if (off != 0) {
2914         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
2915         dp[0] = ptr[1];
2916         dp[1] = ptr[2];
2917         dp[2] = ptr[3];
2918         dp += 3;
2919         sa++;
2920         size--;
2921       }
2922 
2923       da = (mlib_u32 *) dp;
2924       s0 = sa[0] << 3;
2925       s1 = sa[1] << 3;
2926       sa += 2;
2927 
2928       for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) {
2929         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
2930         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
2931         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
2932         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
2933 #ifdef _LITTLE_ENDIAN
2934         res1 = (t0 >> 16) + (t1 << 16);
2935         res2 = (t1 >> 16) + t2;
2936 #else
2937         res1 = (t0 << 16) + (t1 >> 16);
2938         res2 = (t1 << 16) + t2;
2939 #endif /* _LITTLE_ENDIAN */
2940         s0 = sa[0] << 3;
2941         s1 = sa[1] << 3;
2942         da[0] = res1;
2943         da[1] = res2;
2944         da[2] = t3;
2945       }
2946 
2947       t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
2948       t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
2949       t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
2950       t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
2951 #ifdef _LITTLE_ENDIAN
2952       res1 = (t0 >> 16) + (t1 << 16);
2953       res2 = (t1 >> 16) + t2;
2954 #else
2955       res1 = (t0 << 16) + (t1 >> 16);
2956       res2 = (t1 << 16) + t2;
2957 #endif /* _LITTLE_ENDIAN */
2958       da[0] = res1;
2959       da[1] = res2;
2960       da[2] = t3;
2961       da += 3;
2962       dp = (mlib_u16 *) da;
2963       i += 2;
2964 
2965       if (i < size) {
2966         ptr = (mlib_u16 *) (tab + 2 * sa[0]);
2967         dp[0] = ptr[1];
2968         dp[1] = ptr[2];
2969         dp[2] = ptr[3];
2970       }
2971     }
2972 
2973   }
2974   else if (csize == 4) {
2975     mlib_u32 tab[512];
2976     mlib_u16 *tab0 = (mlib_u16 *) table[0];
2977     mlib_u16 *tab1 = (mlib_u16 *) table[1];
2978     mlib_u16 *tab2 = (mlib_u16 *) table[2];
2979     mlib_u16 *tab3 = (mlib_u16 *) table[3];
2980     mlib_s32 i, j;
2981     mlib_u32 s0, s1, s2, s3, s4, s5;
2982 
2983     s0 = tab0[0];
2984     s1 = tab1[0];
2985     s2 = tab2[0];
2986     s3 = tab3[0];
2987     for (i = 1; i < 256; i++) {
2988 #ifdef _LITTLE_ENDIAN
2989       s4 = (s1 << 16) + s0;
2990       s5 = (s3 << 16) + s2;
2991 #else
2992       s4 = (s0 << 16) + s1;
2993       s5 = (s2 << 16) + s3;
2994 #endif /* _LITTLE_ENDIAN */
2995       s0 = tab0[i];
2996       s1 = tab1[i];
2997       s2 = tab2[i];
2998       s3 = tab3[i];
2999       tab[2 * i - 2] = s4;
3000       tab[2 * i - 1] = s5;
3001     }
3002 
3003 #ifdef _LITTLE_ENDIAN
3004     s4 = (s1 << 16) + s0;
3005     s5 = (s3 << 16) + s2;
3006 #else
3007     s4 = (s0 << 16) + s1;
3008     s5 = (s2 << 16) + s3;
3009 #endif /* _LITTLE_ENDIAN */
3010     tab[510] = s4;
3011     tab[511] = s5;
3012 
3013     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3014       mlib_u32 *da;
3015       mlib_u16 *dp = (mlib_u16 *) dst;
3016       mlib_u8 *sa = (void *)src;
3017       mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5;
3018       mlib_s32 size = xsize;
3019       mlib_u32 res1, res2, res3, res4;
3020 
3021       if (((mlib_addr) dp & 3) == 0) {
3022 
3023         da = (mlib_u32 *) dp;
3024 
3025         s0 = sa[0] << 3;
3026         s1 = sa[1] << 3;
3027         sa += 2;
3028 
3029         for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) {
3030           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3031           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3032           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3033           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3034           s0 = sa[0] << 3;
3035           s1 = sa[1] << 3;
3036           da[0] = t0;
3037           da[1] = t1;
3038           da[2] = t2;
3039           da[3] = t3;
3040         }
3041 
3042         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3043         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3044         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3045         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3046         da[0] = t0;
3047         da[1] = t1;
3048         da[2] = t2;
3049         da[3] = t3;
3050 
3051         if (size & 1) {
3052           da[4] = tab[2 * sa[0]];
3053           da[5] = tab[2 * sa[0] + 1];
3054         }
3055 
3056       }
3057       else {
3058 
3059         t4 = tab[2 * sa[0]];
3060         t5 = tab[2 * sa[0] + 1];
3061 #ifdef _LITTLE_ENDIAN
3062         *dp++ = (mlib_u16) (t4);
3063 #else
3064         *dp++ = (mlib_u16) (t4 >> 16);
3065 #endif /* _LITTLE_ENDIAN */
3066         sa++;
3067         da = (mlib_u32 *) dp;
3068 #ifdef _LITTLE_ENDIAN
3069         *da++ = (t4 >> 16) + (t5 << 16);
3070 #else
3071         *da++ = (t4 << 16) + (t5 >> 16);
3072 #endif /* _LITTLE_ENDIAN */
3073         s0 = sa[0] << 3;
3074         s1 = sa[1] << 3;
3075         sa += 2;
3076 
3077         for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) {
3078           t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3079           t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3080           t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3081           t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3082 #ifdef _LITTLE_ENDIAN
3083           res1 = (t5 >> 16) + (t0 << 16);
3084           res2 = (t0 >> 16) + (t1 << 16);
3085           res3 = (t1 >> 16) + (t2 << 16);
3086           res4 = (t2 >> 16) + (t3 << 16);
3087 #else
3088           res1 = (t5 << 16) + (t0 >> 16);
3089           res2 = (t0 << 16) + (t1 >> 16);
3090           res3 = (t1 << 16) + (t2 >> 16);
3091           res4 = (t2 << 16) + (t3 >> 16);
3092 #endif /* _LITTLE_ENDIAN */
3093           s0 = sa[0] << 3;
3094           s1 = sa[1] << 3;
3095           da[0] = res1;
3096           da[1] = res2;
3097           da[2] = res3;
3098           da[3] = res4;
3099           t5 = t3;
3100         }
3101 
3102         t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0);
3103         t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4);
3104         t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1);
3105         t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4);
3106 #ifdef _LITTLE_ENDIAN
3107         res1 = (t5 >> 16) + (t0 << 16);
3108         res2 = (t0 >> 16) + (t1 << 16);
3109         res3 = (t1 >> 16) + (t2 << 16);
3110         res4 = (t2 >> 16) + (t3 << 16);
3111 #else
3112         res1 = (t5 << 16) + (t0 >> 16);
3113         res2 = (t0 << 16) + (t1 >> 16);
3114         res3 = (t1 << 16) + (t2 >> 16);
3115         res4 = (t2 << 16) + (t3 >> 16);
3116 #endif /* _LITTLE_ENDIAN */
3117         da[0] = res1;
3118         da[1] = res2;
3119         da[2] = res3;
3120         da[3] = res4;
3121         da += 4;
3122         dp = (mlib_u16 *) da;
3123 #ifdef _LITTLE_ENDIAN
3124         dp[0] = (mlib_u16) (t3 >> 16);
3125 #else
3126         dp[0] = (mlib_u16) t3;
3127 #endif /* _LITTLE_ENDIAN */
3128 
3129         if ((size & 1) == 0) {
3130           t0 = tab[2 * sa[0]];
3131 #ifdef _LITTLE_ENDIAN
3132           dp[2] = (mlib_u16) (t0 >> 16);
3133           dp[1] = (mlib_u16) t0;
3134 #else
3135           dp[1] = (mlib_u16) (t0 >> 16);
3136           dp[2] = (mlib_u16) t0;
3137 #endif /* _LITTLE_ENDIAN */
3138           t0 = tab[2 * sa[0] + 1];
3139 #ifdef _LITTLE_ENDIAN
3140           dp[4] = (mlib_u16) (t0 >> 16);
3141           dp[3] = (mlib_u16) t0;
3142 #else
3143           dp[3] = (mlib_u16) (t0 >> 16);
3144           dp[4] = (mlib_u16) t0;
3145 #endif /* _LITTLE_ENDIAN */
3146         }
3147       }
3148     }
3149   }
3150 }
3151 
3152 /***************************************************************/
3153 void mlib_c_ImageLookUpSI_S16_S16(const mlib_s16 *src,
3154                                   mlib_s32       slb,
3155                                   mlib_s16       *dst,
3156                                   mlib_s32       dlb,
3157                                   mlib_s32       xsize,
3158                                   mlib_s32       ysize,
3159                                   mlib_s32       csize,
3160                                   const mlib_s16 **table)
3161 {
3162   const mlib_s16 *table_base[4];
3163   mlib_s32 c;
3164 
3165   for (c = 0; c < csize; c++) {
3166     table_base[c] = &table[c][32768];
3167   }
3168 
3169   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s16, table_base);
3170 }
3171 
3172 /***************************************************************/
3173 void mlib_c_ImageLookUpSI_U16_S16(const mlib_u16 *src,
3174                                   mlib_s32       slb,
3175                                   mlib_s16       *dst,
3176                                   mlib_s32       dlb,
3177                                   mlib_s32       xsize,
3178                                   mlib_s32       ysize,
3179                                   mlib_s32       csize,
3180                                   const mlib_s16 **table)
3181 {
3182   const mlib_s16 *table_base[4];
3183   mlib_s32 c;
3184 
3185   for (c = 0; c < csize; c++) {
3186     table_base[c] = &table[c][0];
3187   }
3188 
3189   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u16, table_base);
3190 }
3191 
3192 /***************************************************************/
3193 void mlib_c_ImageLookUpSI_S32_S16(const mlib_s32 *src,
3194                                   mlib_s32       slb,
3195                                   mlib_s16       *dst,
3196                                   mlib_s32       dlb,
3197                                   mlib_s32       xsize,
3198                                   mlib_s32       ysize,
3199                                   mlib_s32       csize,
3200                                   const mlib_s16 **table)
3201 {
3202   const mlib_s16 *table_base[4];
3203   mlib_s32 c;
3204 
3205   for (c = 0; c < csize; c++) {
3206     table_base[c] = &table[c][TABLE_SHIFT_S32];
3207   }
3208 
3209   MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s32, table_base);
3210 }
3211 
3212 /***************************************************************/
3213 void mlib_c_ImageLookUpSI_S16_U16(const mlib_s16 *src,
3214                                   mlib_s32       slb,
3215                                   mlib_u16       *dst,
3216                                   mlib_s32       dlb,
3217                                   mlib_s32       xsize,
3218                                   mlib_s32       ysize,
3219                                   mlib_s32       csize,
3220                                   const mlib_u16 **table)
3221 {
3222   const mlib_u16 *table_base[4];
3223   mlib_s32 c;
3224 
3225   for (c = 0; c < csize; c++) {
3226     table_base[c] = &table[c][32768];
3227   }
3228 
3229   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s16, table_base);
3230 }
3231 
3232 /***************************************************************/
3233 void mlib_c_ImageLookUpSI_U16_U16(const mlib_u16 *src,
3234                                   mlib_s32       slb,
3235                                   mlib_u16       *dst,
3236                                   mlib_s32       dlb,
3237                                   mlib_s32       xsize,
3238                                   mlib_s32       ysize,
3239                                   mlib_s32       csize,
3240                                   const mlib_u16 **table)
3241 {
3242   const mlib_u16 *table_base[4];
3243   mlib_s32 c;
3244 
3245   for (c = 0; c < csize; c++) {
3246     table_base[c] = &table[c][0];
3247   }
3248 
3249   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_u16, table_base);
3250 }
3251 
3252 /***************************************************************/
3253 void mlib_c_ImageLookUpSI_S32_U16(const mlib_s32 *src,
3254                                   mlib_s32       slb,
3255                                   mlib_u16       *dst,
3256                                   mlib_s32       dlb,
3257                                   mlib_s32       xsize,
3258                                   mlib_s32       ysize,
3259                                   mlib_s32       csize,
3260                                   const mlib_u16 **table)
3261 {
3262   const mlib_u16 *table_base[4];
3263   mlib_s32 c;
3264 
3265   for (c = 0; c < csize; c++) {
3266     table_base[c] = &table[c][TABLE_SHIFT_S32];
3267   }
3268 
3269   MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s32, table_base);
3270 }
3271 
3272 /***************************************************************/
3273 void mlib_c_ImageLookUpSI_U8_S32(const mlib_u8  *src,
3274                                  mlib_s32       slb,
3275                                  mlib_s32       *dst,
3276                                  mlib_s32       dlb,
3277                                  mlib_s32       xsize,
3278                                  mlib_s32       ysize,
3279                                  mlib_s32       csize,
3280                                  const mlib_s32 **table)
3281 {
3282 
3283   if (xsize < 7) {
3284     MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u8, table);
3285   }
3286   else if (csize == 2) {
3287     mlib_s32 i, j;
3288 
3289     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3290       mlib_u32 *sa;
3291       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3292       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3293       mlib_u32 s0, t0, t1, t2, t3;
3294       mlib_s32 off;
3295       mlib_s32 size = xsize;
3296       mlib_u32 *dp = (mlib_u32 *) dst;
3297       mlib_u8 *sp = (void *)src;
3298 
3299       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3300 
3301       for (i = 0; i < off; i++, sp++) {
3302         *dp++ = tab0[sp[0]];
3303         *dp++ = tab1[sp[0]];
3304         size--;
3305       }
3306 
3307       sa = (mlib_u32 *) sp;
3308 
3309       s0 = sa[0];
3310       sa++;
3311 
3312       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
3313 #ifdef _LITTLE_ENDIAN
3314         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3315         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3316         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3317         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3318 #else
3319         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3320         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3321         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3322         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3323 #endif /* _LITTLE_ENDIAN */
3324         dp[0] = t0;
3325         dp[1] = t1;
3326         dp[2] = t2;
3327         dp[3] = t3;
3328 #ifdef _LITTLE_ENDIAN
3329         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3330         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3331         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3332         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3333 #else
3334         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3335         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3336         t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3337         t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3338 #endif /* _LITTLE_ENDIAN */
3339         s0 = sa[0];
3340         dp[4] = t0;
3341         dp[5] = t1;
3342         dp[6] = t2;
3343         dp[7] = t3;
3344       }
3345 
3346 #ifdef _LITTLE_ENDIAN
3347       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3348       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3349       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3350       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3351 #else
3352       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3353       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3354       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3355       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3356 #endif /* _LITTLE_ENDIAN */
3357       dp[0] = t0;
3358       dp[1] = t1;
3359       dp[2] = t2;
3360       dp[3] = t3;
3361 #ifdef _LITTLE_ENDIAN
3362       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3363       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3364       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3365       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3366 #else
3367       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3368       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3369       t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3370       t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3371 #endif /* _LITTLE_ENDIAN */
3372       dp[4] = t0;
3373       dp[5] = t1;
3374       dp[6] = t2;
3375       dp[7] = t3;
3376       dp += 8;
3377       sp = (mlib_u8 *) sa;
3378       i += 4;
3379 
3380       for (; i < size; i++, sp++) {
3381         *dp++ = tab0[sp[0]];
3382         *dp++ = tab1[sp[0]];
3383       }
3384     }
3385 
3386   }
3387   else if (csize == 3) {
3388     mlib_s32 i, j;
3389 
3390     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3391       mlib_u32 *sa;
3392       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3393       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3394       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3395       mlib_u32 s0, t0, t1, t2, t3, t4, t5;
3396       mlib_s32 off;
3397       mlib_s32 size = xsize;
3398       mlib_u32 *dp = (mlib_u32 *) dst;
3399       mlib_u8 *sp = (void *)src;
3400 
3401       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3402 
3403       for (i = 0; i < off; i++, sp++) {
3404         *dp++ = tab0[sp[0]];
3405         *dp++ = tab1[sp[0]];
3406         *dp++ = tab2[sp[0]];
3407         size--;
3408       }
3409 
3410       sa = (mlib_u32 *) sp;
3411 
3412       s0 = sa[0];
3413       sa++;
3414 
3415       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
3416 #ifdef _LITTLE_ENDIAN
3417         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3418         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3419         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3420         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3421         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3422         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3423 #else
3424         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3425         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3426         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3427         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3428         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3429         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3430 #endif /* _LITTLE_ENDIAN */
3431         dp[0] = t0;
3432         dp[1] = t1;
3433         dp[2] = t2;
3434         dp[3] = t3;
3435         dp[4] = t4;
3436         dp[5] = t5;
3437 #ifdef _LITTLE_ENDIAN
3438         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3439         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3440         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3441         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3442         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3443         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3444 #else
3445         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3446         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3447         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3448         t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3449         t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3450         t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3451 #endif /* _LITTLE_ENDIAN */
3452         s0 = sa[0];
3453         dp[6] = t0;
3454         dp[7] = t1;
3455         dp[8] = t2;
3456         dp[9] = t3;
3457         dp[10] = t4;
3458         dp[11] = t5;
3459       }
3460 
3461 #ifdef _LITTLE_ENDIAN
3462       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3463       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3464       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3465       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3466       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3467       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3468 #else
3469       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3470       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3471       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3472       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3473       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3474       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3475 #endif /* _LITTLE_ENDIAN */
3476       dp[0] = t0;
3477       dp[1] = t1;
3478       dp[2] = t2;
3479       dp[3] = t3;
3480       dp[4] = t4;
3481       dp[5] = t5;
3482 #ifdef _LITTLE_ENDIAN
3483       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3484       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3485       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3486       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3487       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3488       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3489 #else
3490       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3491       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3492       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3493       t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3494       t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3495       t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3496 #endif /* _LITTLE_ENDIAN */
3497       dp[6] = t0;
3498       dp[7] = t1;
3499       dp[8] = t2;
3500       dp[9] = t3;
3501       dp[10] = t4;
3502       dp[11] = t5;
3503       dp += 12;
3504       sp = (mlib_u8 *) sa;
3505       i += 4;
3506 
3507       for (; i < size; i++, sp++) {
3508         *dp++ = tab0[sp[0]];
3509         *dp++ = tab1[sp[0]];
3510         *dp++ = tab2[sp[0]];
3511       }
3512     }
3513 
3514   }
3515   else if (csize == 4) {
3516     mlib_s32 i, j;
3517 
3518     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
3519       mlib_u32 *sa;
3520       mlib_u32 *tab0 = (mlib_u32 *) table[0];
3521       mlib_u32 *tab1 = (mlib_u32 *) table[1];
3522       mlib_u32 *tab2 = (mlib_u32 *) table[2];
3523       mlib_u32 *tab3 = (mlib_u32 *) table[3];
3524       mlib_u32 s0, t0, t1, t2, t3;
3525       mlib_s32 off;
3526       mlib_s32 size = xsize;
3527       mlib_u32 *dp = (mlib_u32 *) dst;
3528       mlib_u8 *sp = (void *)src;
3529 
3530       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
3531 
3532       for (i = 0; i < off; i++, sp++) {
3533         *dp++ = tab0[sp[0]];
3534         *dp++ = tab1[sp[0]];
3535         *dp++ = tab2[sp[0]];
3536         *dp++ = tab3[sp[0]];
3537         size--;
3538       }
3539 
3540       sa = (mlib_u32 *) sp;
3541 
3542       s0 = sa[0];
3543       sa++;
3544 
3545       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
3546 #ifdef _LITTLE_ENDIAN
3547         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3548         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3549         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3550         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3551 #else
3552         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3553         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3554         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3555         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3556 #endif /* _LITTLE_ENDIAN */
3557         dp[0] = t0;
3558         dp[1] = t1;
3559         dp[2] = t2;
3560         dp[3] = t3;
3561 #ifdef _LITTLE_ENDIAN
3562         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3563         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3564         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3565         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3566 #else
3567         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3568         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3569         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3570         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3571 #endif /* _LITTLE_ENDIAN */
3572         dp[4] = t0;
3573         dp[5] = t1;
3574         dp[6] = t2;
3575         dp[7] = t3;
3576 #ifdef _LITTLE_ENDIAN
3577         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3578         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3579         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3580         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3581 #else
3582         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3583         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3584         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3585         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3586 #endif /* _LITTLE_ENDIAN */
3587         dp[8] = t0;
3588         dp[9] = t1;
3589         dp[10] = t2;
3590         dp[11] = t3;
3591 #ifdef _LITTLE_ENDIAN
3592         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3593         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3594         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3595         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3596 #else
3597         t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3598         t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3599         t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3600         t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3601 #endif /* _LITTLE_ENDIAN */
3602         s0 = sa[0];
3603         dp[12] = t0;
3604         dp[13] = t1;
3605         dp[14] = t2;
3606         dp[15] = t3;
3607       }
3608 
3609 #ifdef _LITTLE_ENDIAN
3610       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3611       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3612       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3613       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3614 #else
3615       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3616       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3617       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3618       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3619 #endif /* _LITTLE_ENDIAN */
3620       dp[0] = t0;
3621       dp[1] = t1;
3622       dp[2] = t2;
3623       dp[3] = t3;
3624 #ifdef _LITTLE_ENDIAN
3625       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3626       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3627       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3628       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3629 #else
3630       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3631       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3632       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3633       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3634 #endif /* _LITTLE_ENDIAN */
3635       dp[4] = t0;
3636       dp[5] = t1;
3637       dp[6] = t2;
3638       dp[7] = t3;
3639 #ifdef _LITTLE_ENDIAN
3640       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC));
3641       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC));
3642       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC));
3643       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC));
3644 #else
3645       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC));
3646       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC));
3647       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC));
3648       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC));
3649 #endif /* _LITTLE_ENDIAN */
3650       dp[8] = t0;
3651       dp[9] = t1;
3652       dp[10] = t2;
3653       dp[11] = t3;
3654 #ifdef _LITTLE_ENDIAN
3655       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC));
3656       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC));
3657       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC));
3658       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC));
3659 #else
3660       t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC));
3661       t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC));
3662       t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC));
3663       t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC));
3664 #endif /* _LITTLE_ENDIAN */
3665       dp[12] = t0;
3666       dp[13] = t1;
3667       dp[14] = t2;
3668       dp[15] = t3;
3669       dp += 16;
3670       sp = (mlib_u8 *) sa;
3671       i += 4;
3672 
3673       for (; i < size; i++, sp++) {
3674         *dp++ = tab0[sp[0]];
3675         *dp++ = tab1[sp[0]];
3676         *dp++ = tab2[sp[0]];
3677         *dp++ = tab3[sp[0]];
3678       }
3679     }
3680   }
3681 }
3682 
3683 /***************************************************************/
3684 void mlib_c_ImageLookUpSI_S16_S32(const mlib_s16 *src,
3685                                   mlib_s32       slb,
3686                                   mlib_s32       *dst,
3687                                   mlib_s32       dlb,
3688                                   mlib_s32       xsize,
3689                                   mlib_s32       ysize,
3690                                   mlib_s32       csize,
3691                                   const mlib_s32 **table)
3692 {
3693   const mlib_s32 *table_base[4];
3694   mlib_s32 c;
3695 
3696   for (c = 0; c < csize; c++) {
3697     table_base[c] = &table[c][32768];
3698   }
3699 
3700   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s16, table_base);
3701 }
3702 
3703 /***************************************************************/
3704 void mlib_c_ImageLookUpSI_U16_S32(const mlib_u16 *src,
3705                                   mlib_s32       slb,
3706                                   mlib_s32       *dst,
3707                                   mlib_s32       dlb,
3708                                   mlib_s32       xsize,
3709                                   mlib_s32       ysize,
3710                                   mlib_s32       csize,
3711                                   const mlib_s32 **table)
3712 {
3713   const mlib_s32 *table_base[4];
3714   mlib_s32 c;
3715 
3716   for (c = 0; c < csize; c++) {
3717     table_base[c] = &table[c][0];
3718   }
3719 
3720   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u16, table_base);
3721 }
3722 
3723 /***************************************************************/
3724 void mlib_c_ImageLookUpSI_S32_S32(const mlib_s32 *src,
3725                                   mlib_s32       slb,
3726                                   mlib_s32       *dst,
3727                                   mlib_s32       dlb,
3728                                   mlib_s32       xsize,
3729                                   mlib_s32       ysize,
3730                                   mlib_s32       csize,
3731                                   const mlib_s32 **table)
3732 {
3733   const mlib_s32 *table_base[4];
3734   mlib_s32 c;
3735 
3736   for (c = 0; c < csize; c++) {
3737     table_base[c] = &table[c][TABLE_SHIFT_S32];
3738   }
3739 
3740   MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s32, table_base);
3741 }
3742 
3743 /***************************************************************/