1 /*
   2  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageLookUp_U8D64 - table lookup
  30  *      mlib_ImageLookUp_S16D64 - table lookup
  31  *      mlib_ImageLookUp_U16D64 - table lookup
  32  *      mlib_ImageLookUp_S32D64 - table lookup
  33  *
  34  * SYNOPSIS
  35  *      void mlib_ImageLookUp_U8_D64(src, slb,
  36  *                                   dst, dlb,
  37  *                                   xsize, ysize,
  38  *                                   csize, table)
  39  *
  40  *      void mlib_ImageLookUp_S16_D64(src, slb,
  41  *                                    dst, dlb,
  42  *                                    xsize, ysize,
  43  *                                    csize, table)
  44  *
  45  *      void mlib_ImageLookUp_U16_D64(src, slb,
  46  *                                    dst, dlb,
  47  *                                    xsize, ysize,
  48  *                                    csize, table)
  49  *
  50  *      void mlib_ImageLookUp_S32_D64(src, slb,
  51  *                                    dst, dlb,
  52  *                                    xsize, ysize,
  53  *                                    csize, table)
  54  *
  55  * ARGUMENT
  56  *      src     pointer to input image (BYTE, SHORT, USHORT, INT)
  57  *      slb     stride of input image (in pixels)
  58  *      dst     pointer to output image (DOUBLE)
  59  *      dlb     stride of output image (in pixels)
  60  *      xsize   image width
  61  *      ysize   image height
  62  *      csize   number of channels
  63  *      table   lookup table
  64  *
  65  * DESCRIPTION
  66  *      dst = table[src] (c, vis version)
  67  */
  68 
  69 #include "mlib_image.h"
  70 #include "mlib_ImageLookUp.h"
  71 
  72 /***************************************************************/
  73 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE)                 \
  74 {                                                               \
  75   mlib_s32 i, j, k;                                             \
  76                                                                 \
  77   if (xsize < 2) {                                              \
  78     for(j = 0; j < ysize; j++, dst += dlb, src += slb){         \
  79       for(k = 0; k < csize; k++) {                              \
  80         DTYPE *da = dst + k;                                    \
  81         const STYPE *sa = src + k;                              \
  82         DTYPE *tab = (DTYPE*) TABLE[k];                         \
  83                                                                 \
  84         for(i = 0; i < xsize; i++, da += csize, sa += csize)    \
  85         *da=tab[*sa];                                           \
  86       }                                                         \
  87     }                                                           \
  88   } else {                                                      \
  89     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {        \
  90       for(k = 0; k < csize; k++) {                              \
  91         DTYPE *da = dst + k;                                    \
  92         const STYPE *sa = src + k;                              \
  93         DTYPE *tab = (DTYPE*) TABLE[k];                         \
  94         mlib_s32 s0, s1;                                        \
  95         DTYPE t0, t1;                                           \
  96                                                                 \
  97         s0 = (mlib_s32)sa[0];                                   \
  98         s1 = (mlib_s32)sa[csize];                               \
  99         sa += 2*csize;                                          \
 100                                                                 \
 101         for(i = 0;                                              \
 102             i < xsize - 3;                                      \
 103             i+=2, da += 2*csize, sa += 2*csize) {               \
 104           t0 = tab[s0];                                         \
 105           t1 = tab[s1];                                         \
 106           s0 = (mlib_s32)sa[0];                                 \
 107           s1 = (mlib_s32)sa[csize];                             \
 108           da[0] = (DTYPE)t0;                                    \
 109           da[csize] = (DTYPE)t1;                                \
 110         }                                                       \
 111         t0 = tab[s0];                                           \
 112         t1 = tab[s1];                                           \
 113         da[0] = (DTYPE)t0;                                      \
 114         da[csize] = (DTYPE)t1;                                  \
 115         if (xsize & 1) da[2*csize] = tab[sa[0]];                \
 116       }                                                         \
 117     }                                                           \
 118   }                                                             \
 119 }
 120 
 121 /***************************************************************/
 122 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE)               \
 123 {                                                               \
 124   mlib_s32 i, j, k;                                             \
 125                                                                 \
 126   if (xsize < 2) {                                              \
 127     for(j = 0; j < ysize; j++, dst += dlb, src += slb){         \
 128       for(k = 0; k < csize; k++) {                              \
 129         DTYPE *da = dst + k;                                    \
 130         const STYPE *sa = (void *)src;                                  \
 131         DTYPE *tab = (DTYPE*) TABLE[k];                         \
 132                                                                 \
 133         for(i = 0; i < xsize; i++, da += csize, sa ++)          \
 134         *da=tab[*sa];                                           \
 135       }                                                         \
 136     }                                                           \
 137   } else {                                                      \
 138     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {        \
 139       for(k = 0; k < csize; k++) {                              \
 140         DTYPE *da = dst + k;                                    \
 141         const STYPE *sa = (void *)src;                                  \
 142         DTYPE *tab = (DTYPE*) TABLE[k];                         \
 143         mlib_s32 s0, s1;                                        \
 144         DTYPE t0, t1;                                           \
 145                                                                 \
 146         s0 = (mlib_s32)sa[0];                                   \
 147         s1 = (mlib_s32)sa[1];                                   \
 148         sa += 2;                                                \
 149                                                                 \
 150         for(i = 0;                                              \
 151             i < xsize - 3;                                      \
 152             i+=2, da += 2*csize, sa += 2) {                     \
 153           t0 = tab[s0];                                         \
 154           t1 = tab[s1];                                         \
 155           s0 = (mlib_s32)sa[0];                                 \
 156           s1 = (mlib_s32)sa[1];                                 \
 157           da[0] = (DTYPE)t0;                                    \
 158           da[csize] = (DTYPE)t1;                                \
 159         }                                                       \
 160         t0 = tab[s0];                                           \
 161         t1 = tab[s1];                                           \
 162         da[0] = (DTYPE)t0;                                      \
 163         da[csize] = (DTYPE)t1;                                  \
 164         if (xsize & 1) da[2*csize] = tab[sa[0]];                \
 165       }                                                         \
 166     }                                                           \
 167   }                                                             \
 168 }
 169 
 170 /***************************************************************/
 171 #ifdef _LITTLE_ENDIAN
 172 
 173 #define READ_U8_D64(table0, table1, table2, table3)             \
 174   t0 = *(mlib_d64*)((mlib_u8*)table0 + ((s0 << 3) & 0x7F8));    \
 175   t1 = *(mlib_d64*)((mlib_u8*)table1 + ((s0 >> 5) & 0x7F8));    \
 176   t2 = *(mlib_d64*)((mlib_u8*)table2 + ((s0 >> 13)  & 0x7F8));  \
 177   t3 = *(mlib_d64*)((mlib_u8*)table3 + ((s0 >> 21)  & 0x7F8))
 178 
 179 #else
 180 
 181 #define READ_U8_D64(table0, table1, table2, table3)             \
 182   t0 = *(mlib_d64*)((mlib_u8*)table0 + ((s0 >> 21) & 0x7F8));   \
 183   t1 = *(mlib_d64*)((mlib_u8*)table1 + ((s0 >> 13) & 0x7F8));   \
 184   t2 = *(mlib_d64*)((mlib_u8*)table2 + ((s0 >> 5)  & 0x7F8));   \
 185   t3 = *(mlib_d64*)((mlib_u8*)table3 + ((s0 << 3)  & 0x7F8))
 186 
 187 #endif /* _LITTLE_ENDIAN */
 188 
 189 /***************************************************************/
 190 void mlib_ImageLookUp_U8_D64(const mlib_u8  *src,
 191                              mlib_s32       slb,
 192                              mlib_d64       *dst,
 193                              mlib_s32       dlb,
 194                              mlib_s32       xsize,
 195                              mlib_s32       ysize,
 196                              mlib_s32       csize,
 197                              const mlib_d64 **table)
 198 {
 199 
 200   if (xsize * csize < 7) {
 201     MLIB_C_IMAGELOOKUP(mlib_d64, mlib_u8, table);
 202   }
 203   else if (csize == 1) {
 204     mlib_s32 i, j;
 205 
 206     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 207       mlib_u32 *sa;
 208       mlib_d64 *tab = (mlib_d64 *) table[0];
 209       mlib_u32 s0;
 210       mlib_d64 t0, t1, t2, t3;
 211       mlib_s32 off;
 212       mlib_s32 size = xsize;
 213       mlib_d64 *dp = (mlib_d64 *) dst;
 214       mlib_u8 *sp = (void *)src;
 215 
 216       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 217 
 218       for (i = 0; i < off; i++, sp++) {
 219         *dp++ = tab[sp[0]];
 220         size--;
 221       }
 222 
 223       sa = (mlib_u32 *) sp;
 224 
 225       s0 = sa[0];
 226       sa++;
 227 
 228 #ifdef __SUNPRO_C
 229 #pragma pipeloop(0)
 230 #endif /* __SUNPRO_C */
 231       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 232         READ_U8_D64(tab, tab, tab, tab);
 233         s0 = sa[0];
 234         dp[0] = t0;
 235         dp[1] = t1;
 236         dp[2] = t2;
 237         dp[3] = t3;
 238       }
 239 
 240       READ_U8_D64(tab, tab, tab, tab);
 241       dp[0] = t0;
 242       dp[1] = t1;
 243       dp[2] = t2;
 244       dp[3] = t3;
 245       dp += 4;
 246       sp = (mlib_u8 *) sa;
 247       i += 4;
 248       for (; i < size; i++, dp++, sp++)
 249         dp[0] = tab[sp[0]];
 250     }
 251   }
 252   else if (csize == 2) {
 253     mlib_s32 i, j;
 254 
 255     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 256       mlib_u32 *sa;
 257       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 258       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 259       mlib_d64 *tab;
 260       mlib_u32 s0;
 261       mlib_d64 t0, t1, t2, t3;
 262       mlib_s32 off;
 263       mlib_s32 size = xsize * 2;
 264       mlib_d64 *dp = (mlib_d64 *) dst;
 265       mlib_u8 *sp = (void *)src;
 266 
 267       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 268 
 269       for (i = 0; i < off - 1; i += 2, sp += 2) {
 270         *dp++ = tab0[sp[0]];
 271         *dp++ = tab1[sp[1]];
 272         size -= 2;
 273       }
 274 
 275       if ((off & 1) != 0) {
 276         *dp++ = tab0[*sp];
 277         size--;
 278         sp++;
 279         tab = tab0;
 280         tab0 = tab1;
 281         tab1 = tab;
 282       }
 283 
 284       sa = (mlib_u32 *) sp;
 285 
 286       s0 = sa[0];
 287       sa++;
 288 
 289 #ifdef __SUNPRO_C
 290 #pragma pipeloop(0)
 291 #endif /* __SUNPRO_C */
 292       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 293         READ_U8_D64(tab0, tab1, tab0, tab1);
 294         s0 = sa[0];
 295         dp[0] = t0;
 296         dp[1] = t1;
 297         dp[2] = t2;
 298         dp[3] = t3;
 299       }
 300 
 301       READ_U8_D64(tab0, tab1, tab0, tab1);
 302       dp[0] = t0;
 303       dp[1] = t1;
 304       dp[2] = t2;
 305       dp[3] = t3;
 306       dp += 4;
 307       sp = (mlib_u8 *) sa;
 308       i += 4;
 309 
 310       for (; i < size - 1; i += 2, sp += 2) {
 311         *dp++ = tab0[sp[0]];
 312         *dp++ = tab1[sp[1]];
 313       }
 314 
 315       if (i < size)
 316         *dp = tab0[(*sp)];
 317     }
 318   }
 319   else if (csize == 3) {
 320     mlib_s32 i, j;
 321 
 322     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 323       mlib_u32 *sa;
 324       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 325       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 326       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 327       mlib_d64 *tab;
 328       mlib_u32 s0;
 329       mlib_d64 t0, t1, t2, t3;
 330       mlib_s32 off;
 331       mlib_s32 size = xsize * 3;
 332       mlib_d64 *dp = (mlib_d64 *) dst;
 333       mlib_u8 *sp = (void *)src;
 334 
 335       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 336 
 337       if (off == 1) {
 338         *dp++ = tab0[(*sp)];
 339         tab = tab0;
 340         tab0 = tab1;
 341         tab1 = tab2;
 342         tab2 = tab;
 343         size--;
 344         sp++;
 345       }
 346       else if (off == 2) {
 347         *dp++ = tab0[sp[0]];
 348         *dp++ = tab1[sp[1]];
 349         tab = tab2;
 350         tab2 = tab1;
 351         tab1 = tab0;
 352         tab0 = tab;
 353         size -= 2;
 354         sp += 2;
 355       }
 356       else if (off == 3) {
 357         *dp++ = tab0[sp[0]];
 358         *dp++ = tab1[sp[1]];
 359         *dp++ = tab2[sp[2]];
 360         size -= 3;
 361         sp += 3;
 362       }
 363 
 364       sa = (mlib_u32 *) sp;
 365 
 366       s0 = sa[0];
 367       sa++;
 368 
 369 #ifdef __SUNPRO_C
 370 #pragma pipeloop(0)
 371 #endif /* __SUNPRO_C */
 372       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 373         READ_U8_D64(tab0, tab1, tab2, tab0);
 374         tab = tab0;
 375         tab0 = tab1;
 376         tab1 = tab2;
 377         tab2 = tab;
 378         s0 = sa[0];
 379         dp[0] = t0;
 380         dp[1] = t1;
 381         dp[2] = t2;
 382         dp[3] = t3;
 383       }
 384 
 385       READ_U8_D64(tab0, tab1, tab2, tab0);
 386       dp[0] = t0;
 387       dp[1] = t1;
 388       dp[2] = t2;
 389       dp[3] = t3;
 390       dp += 4;
 391       sp = (mlib_u8 *) sa;
 392       i += 4;
 393 
 394       if (i < size) {
 395         *dp++ = tab1[(*sp)];
 396         i++;
 397         sp++;
 398       }
 399 
 400       if (i < size) {
 401         *dp++ = tab2[(*sp)];
 402         i++;
 403         sp++;
 404       }
 405 
 406       if (i < size) {
 407         *dp = tab0[(*sp)];
 408       }
 409     }
 410   }
 411   else if (csize == 4) {
 412     mlib_s32 i, j;
 413 
 414     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 415       mlib_u32 *sa;
 416       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 417       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 418       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 419       mlib_d64 *tab3 = (mlib_d64 *) table[3];
 420       mlib_d64 *tab;
 421       mlib_u32 s0;
 422       mlib_d64 t0, t1, t2, t3;
 423       mlib_s32 off;
 424       mlib_s32 size = xsize * 4;
 425       mlib_d64 *dp = (mlib_d64 *) dst;
 426       mlib_u8 *sp = (void *)src;
 427 
 428       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 429 
 430       if (off == 1) {
 431         *dp++ = tab0[(*sp)];
 432         tab = tab0;
 433         tab0 = tab1;
 434         tab1 = tab2;
 435         tab2 = tab3;
 436         tab3 = tab;
 437         size--;
 438         sp++;
 439       }
 440       else if (off == 2) {
 441         *dp++ = tab0[sp[0]];
 442         *dp++ = tab1[sp[1]];
 443         tab = tab0;
 444         tab0 = tab2;
 445         tab2 = tab;
 446         tab = tab1;
 447         tab1 = tab3;
 448         tab3 = tab;
 449         size -= 2;
 450         sp += 2;
 451       }
 452       else if (off == 3) {
 453         *dp++ = tab0[sp[0]];
 454         *dp++ = tab1[sp[1]];
 455         *dp++ = tab2[sp[2]];
 456         tab = tab3;
 457         tab3 = tab2;
 458         tab2 = tab1;
 459         tab1 = tab0;
 460         tab0 = tab;
 461         size -= 3;
 462         sp += 3;
 463       }
 464 
 465       sa = (mlib_u32 *) sp;
 466 
 467       s0 = sa[0];
 468       sa++;
 469 
 470 #ifdef __SUNPRO_C
 471 #pragma pipeloop(0)
 472 #endif /* __SUNPRO_C */
 473       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 474         READ_U8_D64(tab0, tab1, tab2, tab3);
 475         s0 = sa[0];
 476         dp[0] = t0;
 477         dp[1] = t1;
 478         dp[2] = t2;
 479         dp[3] = t3;
 480       }
 481 
 482       READ_U8_D64(tab0, tab1, tab2, tab3);
 483       dp[0] = t0;
 484       dp[1] = t1;
 485       dp[2] = t2;
 486       dp[3] = t3;
 487       dp += 4;
 488       sp = (mlib_u8 *) sa;
 489       i += 4;
 490 
 491       if (i < size) {
 492         *dp++ = tab0[(*sp)];
 493         i++;
 494         sp++;
 495       }
 496 
 497       if (i < size) {
 498         *dp++ = tab1[(*sp)];
 499         i++;
 500         sp++;
 501       }
 502 
 503       if (i < size) {
 504         *dp = tab2[(*sp)];
 505       }
 506     }
 507   }
 508 }
 509 
 510 /***************************************************************/
 511 void mlib_ImageLookUp_S16_D64(const mlib_s16 *src,
 512                               mlib_s32       slb,
 513                               mlib_d64       *dst,
 514                               mlib_s32       dlb,
 515                               mlib_s32       xsize,
 516                               mlib_s32       ysize,
 517                               mlib_s32       csize,
 518                               const mlib_d64 **table)
 519 {
 520   const mlib_d64 *table_base[4];
 521   mlib_s32 c;
 522 
 523   for (c = 0; c < csize; c++) {
 524     table_base[c] = &table[c][32768];
 525   }
 526 
 527   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_s16, table_base);
 528 }
 529 
 530 /***************************************************************/
 531 void mlib_ImageLookUp_U16_D64(const mlib_u16 *src,
 532                               mlib_s32       slb,
 533                               mlib_d64       *dst,
 534                               mlib_s32       dlb,
 535                               mlib_s32       xsize,
 536                               mlib_s32       ysize,
 537                               mlib_s32       csize,
 538                               const mlib_d64 **table)
 539 {
 540   const mlib_d64 *table_base[4];
 541   mlib_s32 c;
 542 
 543   for (c = 0; c < csize; c++) {
 544     table_base[c] = &table[c][0];
 545   }
 546 
 547   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_u16, table_base);
 548 }
 549 
 550 /***************************************************************/
 551 void mlib_ImageLookUp_S32_D64(const mlib_s32 *src,
 552                               mlib_s32       slb,
 553                               mlib_d64       *dst,
 554                               mlib_s32       dlb,
 555                               mlib_s32       xsize,
 556                               mlib_s32       ysize,
 557                               mlib_s32       csize,
 558                               const mlib_d64 **table)
 559 {
 560   const mlib_d64 *table_base[4];
 561   mlib_u32 shift = TABLE_SHIFT_S32;
 562   mlib_s32 c;
 563 
 564   for (c = 0; c < csize; c++) {
 565     table_base[c] = &table[c][shift];
 566   }
 567 
 568   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_s32, table_base);
 569 }
 570 
 571 /***************************************************************/
 572 void mlib_ImageLookUpSI_U8_D64(const mlib_u8  *src,
 573                                mlib_s32       slb,
 574                                mlib_d64       *dst,
 575                                mlib_s32       dlb,
 576                                mlib_s32       xsize,
 577                                mlib_s32       ysize,
 578                                mlib_s32       csize,
 579                                const mlib_d64 **table)
 580 {
 581 
 582   if (xsize < 7) {
 583     MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_u8, table);
 584   }
 585   else if (csize == 2) {
 586     mlib_s32 i, j;
 587 
 588     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 589       mlib_u32 *sa;
 590       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 591       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 592       mlib_u32 s0;
 593       mlib_d64 t0, t1, t2, t3;
 594       mlib_s32 off;
 595       mlib_s32 size = xsize;
 596       mlib_d64 *dp = (mlib_d64 *) dst;
 597       mlib_u8 *sp = (void *)src;
 598 
 599       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 600 
 601       for (i = 0; i < off; i++, sp++) {
 602         *dp++ = tab0[sp[0]];
 603         *dp++ = tab1[sp[0]];
 604         size--;
 605       }
 606 
 607       sa = (mlib_u32 *) sp;
 608 
 609       s0 = sa[0];
 610       sa++;
 611 
 612 #ifdef __SUNPRO_C
 613 #pragma pipeloop(0)
 614 #endif /* __SUNPRO_C */
 615       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
 616 #ifdef _LITTLE_ENDIAN
 617         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 618         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 619         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 620         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 621 #else
 622         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 623         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 624         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 625         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 626 #endif /* _LITTLE_ENDIAN */
 627         dp[0] = t0;
 628         dp[1] = t1;
 629         dp[2] = t2;
 630         dp[3] = t3;
 631 #ifdef _LITTLE_ENDIAN
 632         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 633         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 634         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 635         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 636 #else
 637         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 638         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 639         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 640         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 641 #endif /* _LITTLE_ENDIAN */
 642         s0 = sa[0];
 643         dp[4] = t0;
 644         dp[5] = t1;
 645         dp[6] = t2;
 646         dp[7] = t3;
 647       }
 648 
 649 #ifdef _LITTLE_ENDIAN
 650       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 651       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 652       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 653       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 654 #else
 655       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 656       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 657       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 658       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 659 #endif /* _LITTLE_ENDIAN */
 660       dp[0] = t0;
 661       dp[1] = t1;
 662       dp[2] = t2;
 663       dp[3] = t3;
 664 #ifdef _LITTLE_ENDIAN
 665       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 666       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 667       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 668       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 669 #else
 670       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 671       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 672       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 673       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 674 #endif /* _LITTLE_ENDIAN */
 675       dp[4] = t0;
 676       dp[5] = t1;
 677       dp[6] = t2;
 678       dp[7] = t3;
 679       dp += 8;
 680       sp = (mlib_u8 *) sa;
 681       i += 4;
 682 
 683       for (; i < size; i++, sp++) {
 684         *dp++ = tab0[sp[0]];
 685         *dp++ = tab1[sp[0]];
 686       }
 687     }
 688   }
 689   else if (csize == 3) {
 690     mlib_s32 i, j;
 691 
 692     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 693       mlib_u32 *sa;
 694       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 695       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 696       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 697       mlib_u32 s0;
 698       mlib_d64 t0, t1, t2, t3, t4, t5;
 699       mlib_s32 off;
 700       mlib_s32 size = xsize;
 701       mlib_d64 *dp = (mlib_d64 *) dst;
 702       mlib_u8 *sp = (void *)src;
 703 
 704       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 705 
 706       for (i = 0; i < off; i++, sp++) {
 707         *dp++ = tab0[sp[0]];
 708         *dp++ = tab1[sp[0]];
 709         *dp++ = tab2[sp[0]];
 710         size--;
 711       }
 712 
 713       sa = (mlib_u32 *) sp;
 714 
 715       s0 = sa[0];
 716       sa++;
 717 
 718 #ifdef __SUNPRO_C
 719 #pragma pipeloop(0)
 720 #endif /* __SUNPRO_C */
 721       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
 722 #ifdef _LITTLE_ENDIAN
 723         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 724         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 725         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 726         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 727         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 728         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 729 #else
 730         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 731         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 732         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 733         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 734         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 735         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 736 #endif /* _LITTLE_ENDIAN */
 737         dp[0] = t0;
 738         dp[1] = t1;
 739         dp[2] = t2;
 740         dp[3] = t3;
 741         dp[4] = t4;
 742         dp[5] = t5;
 743 #ifdef _LITTLE_ENDIAN
 744         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 745         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 746         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 747         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 748         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 749         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 750 #else
 751         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 752         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 753         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 754         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 755         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 756         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 757 #endif /* _LITTLE_ENDIAN */
 758         s0 = sa[0];
 759         dp[6] = t0;
 760         dp[7] = t1;
 761         dp[8] = t2;
 762         dp[9] = t3;
 763         dp[10] = t4;
 764         dp[11] = t5;
 765       }
 766 
 767 #ifdef _LITTLE_ENDIAN
 768       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 769       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 770       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 771       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 772       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 773       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 774 #else
 775       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 776       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 777       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 778       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 779       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 780       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 781 #endif /* _LITTLE_ENDIAN */
 782       dp[0] = t0;
 783       dp[1] = t1;
 784       dp[2] = t2;
 785       dp[3] = t3;
 786       dp[4] = t4;
 787       dp[5] = t5;
 788 #ifdef _LITTLE_ENDIAN
 789       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 790       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 791       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 792       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 793       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 794       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 795 #else
 796       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 797       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 798       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 799       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 800       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 801       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 802 #endif /* _LITTLE_ENDIAN */
 803       dp[6] = t0;
 804       dp[7] = t1;
 805       dp[8] = t2;
 806       dp[9] = t3;
 807       dp[10] = t4;
 808       dp[11] = t5;
 809       dp += 12;
 810       sp = (mlib_u8 *) sa;
 811       i += 4;
 812 
 813       for (; i < size; i++, sp++) {
 814         *dp++ = tab0[sp[0]];
 815         *dp++ = tab1[sp[0]];
 816         *dp++ = tab2[sp[0]];
 817       }
 818     }
 819   }
 820   else if (csize == 4) {
 821     mlib_s32 i, j;
 822 
 823     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 824       mlib_u32 *sa;
 825       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 826       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 827       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 828       mlib_d64 *tab3 = (mlib_d64 *) table[3];
 829       mlib_u32 s0;
 830       mlib_d64 t0, t1, t2, t3;
 831       mlib_s32 off;
 832       mlib_s32 size = xsize;
 833       mlib_d64 *dp = (mlib_d64 *) dst;
 834       mlib_u8 *sp = (void *)src;
 835 
 836       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 837 
 838       for (i = 0; i < off; i++, sp++) {
 839         *dp++ = tab0[sp[0]];
 840         *dp++ = tab1[sp[0]];
 841         *dp++ = tab2[sp[0]];
 842         *dp++ = tab3[sp[0]];
 843         size--;
 844       }
 845 
 846       sa = (mlib_u32 *) sp;
 847 
 848       s0 = sa[0];
 849       sa++;
 850 
 851 #ifdef __SUNPRO_C
 852 #pragma pipeloop(0)
 853 #endif /* __SUNPRO_C */
 854       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
 855 #ifdef _LITTLE_ENDIAN
 856         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 857         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 858         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 859         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 860 #else
 861         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 862         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 863         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 864         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 865 #endif /* _LITTLE_ENDIAN */
 866         dp[0] = t0;
 867         dp[1] = t1;
 868         dp[2] = t2;
 869         dp[3] = t3;
 870 #ifdef _LITTLE_ENDIAN
 871         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 872         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 873         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 874         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 875 #else
 876         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 877         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 878         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 879         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 880 #endif /* _LITTLE_ENDIAN */
 881         dp[4] = t0;
 882         dp[5] = t1;
 883         dp[6] = t2;
 884         dp[7] = t3;
 885 #ifdef _LITTLE_ENDIAN
 886         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 887         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 888         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 889         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 890 #else
 891         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 892         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 893         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 894         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 895 #endif /* _LITTLE_ENDIAN */
 896         dp[8] = t0;
 897         dp[9] = t1;
 898         dp[10] = t2;
 899         dp[11] = t3;
 900 #ifdef _LITTLE_ENDIAN
 901         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 902         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 903         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 904         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 905 #else
 906         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 907         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 908         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 909         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 910 #endif /* _LITTLE_ENDIAN */
 911         s0 = sa[0];
 912         dp[12] = t0;
 913         dp[13] = t1;
 914         dp[14] = t2;
 915         dp[15] = t3;
 916       }
 917 
 918 #ifdef _LITTLE_ENDIAN
 919       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 920       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 921       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 922       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 923 #else
 924       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 925       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 926       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 927       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 928 #endif /* _LITTLE_ENDIAN */
 929       dp[0] = t0;
 930       dp[1] = t1;
 931       dp[2] = t2;
 932       dp[3] = t3;
 933 #ifdef _LITTLE_ENDIAN
 934       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 935       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 936       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 937       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 938 #else
 939       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 940       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 941       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 942       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 943 #endif /* _LITTLE_ENDIAN */
 944       dp[4] = t0;
 945       dp[5] = t1;
 946       dp[6] = t2;
 947       dp[7] = t3;
 948 #ifdef _LITTLE_ENDIAN
 949       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 950       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 951       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 952       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 953 #else
 954       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 955       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 956       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 957       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 958 #endif /* _LITTLE_ENDIAN */
 959       dp[8] = t0;
 960       dp[9] = t1;
 961       dp[10] = t2;
 962       dp[11] = t3;
 963 #ifdef _LITTLE_ENDIAN
 964       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 965       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 966       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 967       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 968 #else
 969       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 970       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 971       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 972       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 973 #endif /* _LITTLE_ENDIAN */
 974       dp[12] = t0;
 975       dp[13] = t1;
 976       dp[14] = t2;
 977       dp[15] = t3;
 978       dp += 16;
 979       sp = (mlib_u8 *) sa;
 980       i += 4;
 981 
 982       for (; i < size; i++, sp++) {
 983         *dp++ = tab0[sp[0]];
 984         *dp++ = tab1[sp[0]];
 985         *dp++ = tab2[sp[0]];
 986         *dp++ = tab3[sp[0]];
 987       }
 988     }
 989   }
 990 }
 991 
 992 /***************************************************************/
 993 void mlib_ImageLookUpSI_S16_D64(const mlib_s16 *src,
 994                                 mlib_s32       slb,
 995                                 mlib_d64       *dst,
 996                                 mlib_s32       dlb,
 997                                 mlib_s32       xsize,
 998                                 mlib_s32       ysize,
 999                                 mlib_s32       csize,
1000                                 const mlib_d64 **table)
1001 {
1002   const mlib_d64 *table_base[4];
1003   mlib_s32 c;
1004 
1005   for (c = 0; c < csize; c++) {
1006     table_base[c] = &table[c][32768];
1007   }
1008 
1009   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_s16, table_base);
1010 }
1011 
1012 /***************************************************************/
1013 void mlib_ImageLookUpSI_U16_D64(const mlib_u16 *src,
1014                                 mlib_s32       slb,
1015                                 mlib_d64       *dst,
1016                                 mlib_s32       dlb,
1017                                 mlib_s32       xsize,
1018                                 mlib_s32       ysize,
1019                                 mlib_s32       csize,
1020                                 const mlib_d64 **table)
1021 {
1022   const mlib_d64 *table_base[4];
1023   mlib_s32 c;
1024 
1025   for (c = 0; c < csize; c++) {
1026     table_base[c] = &table[c][0];
1027   }
1028 
1029   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_u16, table_base);
1030 }
1031 
1032 /***************************************************************/
1033 void mlib_ImageLookUpSI_S32_D64(const mlib_s32 *src,
1034                                 mlib_s32       slb,
1035                                 mlib_d64       *dst,
1036                                 mlib_s32       dlb,
1037                                 mlib_s32       xsize,
1038                                 mlib_s32       ysize,
1039                                 mlib_s32       csize,
1040                                 const mlib_d64 **table)
1041 {
1042   const mlib_d64 *table_base[4];
1043   mlib_u32 shift = TABLE_SHIFT_S32;
1044   mlib_s32 c;
1045 
1046   for (c = 0; c < csize; c++) {
1047     table_base[c] = &table[c][shift];
1048   }
1049 
1050   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_s32, table_base);
1051 }
1052 
1053 /***************************************************************/