1 /*
   2  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageLookUp_U8D64 - table lookup
  30  *      mlib_ImageLookUp_S16D64 - table lookup
  31  *      mlib_ImageLookUp_U16D64 - table lookup
  32  *      mlib_ImageLookUp_S32D64 - table lookup
  33  *
  34  * SYNOPSIS
  35  *      void mlib_ImageLookUp_U8_D64(src, slb,
  36  *                                   dst, dlb,
  37  *                                   xsize, ysize,
  38  *                                   csize, table)
  39  *
  40  *      void mlib_ImageLookUp_S16_D64(src, slb,
  41  *                                    dst, dlb,
  42  *                                    xsize, ysize,
  43  *                                    csize, table)
  44  *
  45  *      void mlib_ImageLookUp_U16_D64(src, slb,
  46  *                                    dst, dlb,
  47  *                                    xsize, ysize,
  48  *                                    csize, table)
  49  *
  50  *      void mlib_ImageLookUp_S32_D64(src, slb,
  51  *                                    dst, dlb,
  52  *                                    xsize, ysize,
  53  *                                    csize, table)
  54  *
  55  * ARGUMENT
  56  *      src     pointer to input image (BYTE, SHORT, USHORT, INT)
  57  *      slb     stride of input image (in pixels)
  58  *      dst     pointer to output image (DOUBLE)
  59  *      dlb     stride of output image (in pixels)
  60  *      xsize   image width
  61  *      ysize   image height
  62  *      csize   number of channels
  63  *      table   lookup table
  64  *
  65  * DESCRIPTION
  66  *      dst = table[src] (c, vis version)
  67  */
  68 
  69 #include "mlib_image.h"
  70 #include "mlib_ImageLookUp.h"
  71 
  72 /***************************************************************/
  73 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE)                 \
  74 {                                                               \
  75   mlib_s32 i, j, k;                                             \
  76                                                                 \
  77   if (xsize < 2) {                                              \
  78     for(j = 0; j < ysize; j++, dst += dlb, src += slb){         \
  79       for(k = 0; k < csize; k++) {                              \
  80         DTYPE *da = dst + k;                                    \
  81         const STYPE *sa = src + k;                              \
  82         DTYPE *tab = (DTYPE*) TABLE[k];                         \
  83                                                                 \
  84         for(i = 0; i < xsize; i++, da += csize, sa += csize)    \
  85         *da=tab[*sa];                                           \
  86       }                                                         \
  87     }                                                           \
  88   } else {                                                      \
  89     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {        \
  90       for(k = 0; k < csize; k++) {                              \
  91         DTYPE *da = dst + k;                                    \
  92         const STYPE *sa = src + k;                              \
  93         DTYPE *tab = (DTYPE*) TABLE[k];                         \
  94         mlib_s32 s0, s1;                                        \
  95         DTYPE t0, t1;                                           \
  96                                                                 \
  97         s0 = (mlib_s32)sa[0];                                   \
  98         s1 = (mlib_s32)sa[csize];                               \
  99         sa += 2*csize;                                          \
 100                                                                 \
 101         for(i = 0;                                              \
 102             i < xsize - 3;                                      \
 103             i+=2, da += 2*csize, sa += 2*csize) {               \
 104           t0 = tab[s0];                                         \
 105           t1 = tab[s1];                                         \
 106           s0 = (mlib_s32)sa[0];                                 \
 107           s1 = (mlib_s32)sa[csize];                             \
 108           da[0] = (DTYPE)t0;                                    \
 109           da[csize] = (DTYPE)t1;                                \
 110         }                                                       \
 111         t0 = tab[s0];                                           \
 112         t1 = tab[s1];                                           \
 113         da[0] = (DTYPE)t0;                                      \
 114         da[csize] = (DTYPE)t1;                                  \
 115         if (xsize & 1) da[2*csize] = tab[sa[0]];                \
 116       }                                                         \
 117     }                                                           \
 118   }                                                             \
 119 }
 120 
 121 /***************************************************************/
 122 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE)               \
 123 {                                                               \
 124   mlib_s32 i, j, k;                                             \
 125                                                                 \
 126   if (xsize < 2) {                                              \
 127     for(j = 0; j < ysize; j++, dst += dlb, src += slb){         \
 128       for(k = 0; k < csize; k++) {                              \
 129         DTYPE *da = dst + k;                                    \
 130         const STYPE *sa = (void *)src;                                  \
 131         DTYPE *tab = (DTYPE*) TABLE[k];                         \
 132                                                                 \
 133         for(i = 0; i < xsize; i++, da += csize, sa ++)          \
 134         *da=tab[*sa];                                           \
 135       }                                                         \
 136     }                                                           \
 137   } else {                                                      \
 138     for(j = 0; j < ysize; j++, dst += dlb, src += slb) {        \
 139       for(k = 0; k < csize; k++) {                              \
 140         DTYPE *da = dst + k;                                    \
 141         const STYPE *sa = (void *)src;                                  \
 142         DTYPE *tab = (DTYPE*) TABLE[k];                         \
 143         mlib_s32 s0, s1;                                        \
 144         DTYPE t0, t1;                                           \
 145                                                                 \
 146         s0 = (mlib_s32)sa[0];                                   \
 147         s1 = (mlib_s32)sa[1];                                   \
 148         sa += 2;                                                \
 149                                                                 \
 150         for(i = 0;                                              \
 151             i < xsize - 3;                                      \
 152             i+=2, da += 2*csize, sa += 2) {                     \
 153           t0 = tab[s0];                                         \
 154           t1 = tab[s1];                                         \
 155           s0 = (mlib_s32)sa[0];                                 \
 156           s1 = (mlib_s32)sa[1];                                 \
 157           da[0] = (DTYPE)t0;                                    \
 158           da[csize] = (DTYPE)t1;                                \
 159         }                                                       \
 160         t0 = tab[s0];                                           \
 161         t1 = tab[s1];                                           \
 162         da[0] = (DTYPE)t0;                                      \
 163         da[csize] = (DTYPE)t1;                                  \
 164         if (xsize & 1) da[2*csize] = tab[sa[0]];                \
 165       }                                                         \
 166     }                                                           \
 167   }                                                             \
 168 }
 169 
 170 /***************************************************************/
 171 #ifdef _LITTLE_ENDIAN
 172 
 173 #define READ_U8_D64(table0, table1, table2, table3)             \
 174   t0 = *(mlib_d64*)((mlib_u8*)table0 + ((s0 << 3) & 0x7F8));    \
 175   t1 = *(mlib_d64*)((mlib_u8*)table1 + ((s0 >> 5) & 0x7F8));    \
 176   t2 = *(mlib_d64*)((mlib_u8*)table2 + ((s0 >> 13)  & 0x7F8));  \
 177   t3 = *(mlib_d64*)((mlib_u8*)table3 + ((s0 >> 21)  & 0x7F8))
 178 
 179 #else
 180 
 181 #define READ_U8_D64(table0, table1, table2, table3)             \
 182   t0 = *(mlib_d64*)((mlib_u8*)table0 + ((s0 >> 21) & 0x7F8));   \
 183   t1 = *(mlib_d64*)((mlib_u8*)table1 + ((s0 >> 13) & 0x7F8));   \
 184   t2 = *(mlib_d64*)((mlib_u8*)table2 + ((s0 >> 5)  & 0x7F8));   \
 185   t3 = *(mlib_d64*)((mlib_u8*)table3 + ((s0 << 3)  & 0x7F8))
 186 
 187 #endif /* _LITTLE_ENDIAN */
 188 
 189 /***************************************************************/
 190 void mlib_ImageLookUp_U8_D64(const mlib_u8  *src,
 191                              mlib_s32       slb,
 192                              mlib_d64       *dst,
 193                              mlib_s32       dlb,
 194                              mlib_s32       xsize,
 195                              mlib_s32       ysize,
 196                              mlib_s32       csize,
 197                              const mlib_d64 **table)
 198 {
 199 
 200   if (xsize * csize < 7) {
 201     MLIB_C_IMAGELOOKUP(mlib_d64, mlib_u8, table);
 202   }
 203   else if (csize == 1) {
 204     mlib_s32 i, j;
 205 
 206     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 207       mlib_u32 *sa;
 208       mlib_d64 *tab = (mlib_d64 *) table[0];
 209       mlib_u32 s0;
 210       mlib_d64 t0, t1, t2, t3;
 211       mlib_s32 off;
 212       mlib_s32 size = xsize;
 213       mlib_d64 *dp = (mlib_d64 *) dst;
 214       mlib_u8 *sp = (void *)src;
 215 
 216       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 217 
 218       for (i = 0; i < off; i++, sp++) {
 219         *dp++ = tab[sp[0]];
 220         size--;
 221       }
 222 
 223       sa = (mlib_u32 *) sp;
 224 
 225       s0 = sa[0];
 226       sa++;
 227 
 228 #ifdef __SUNPRO_C
 229 #pragma pipeloop(0)
 230 #endif /* __SUNPRO_C */
 231       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 232         READ_U8_D64(tab, tab, tab, tab);
 233         s0 = sa[0];
 234         dp[0] = t0;
 235         dp[1] = t1;
 236         dp[2] = t2;
 237         dp[3] = t3;
 238       }
 239 
 240       READ_U8_D64(tab, tab, tab, tab);
 241       dp[0] = t0;
 242       dp[1] = t1;
 243       dp[2] = t2;
 244       dp[3] = t3;
 245       dp += 4;
 246       sp = (mlib_u8 *) sa;
 247       i += 4;
 248       for (; i < size; i++, dp++, sp++)
 249         dp[0] = tab[sp[0]];
 250     }
 251   }
 252   else if (csize == 2) {
 253     mlib_s32 i, j;
 254 
 255     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 256       mlib_u32 *sa;
 257       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 258       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 259       mlib_d64 *tab;
 260       mlib_u32 s0;
 261       mlib_d64 t0, t1, t2, t3;
 262       mlib_s32 off;
 263       mlib_s32 size = xsize * 2;
 264       mlib_d64 *dp = (mlib_d64 *) dst;
 265       mlib_u8 *sp = (void *)src;
 266 
 267       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 268 
 269       for (i = 0; i < off - 1; i += 2, sp += 2) {
 270         *dp++ = tab0[sp[0]];
 271         *dp++ = tab1[sp[1]];
 272         size -= 2;
 273       }
 274 
 275       if ((off & 1) != 0) {
 276         *dp++ = tab0[*sp];
 277         size--;
 278         sp++;
 279         tab = tab0;
 280         tab0 = tab1;
 281         tab1 = tab;
 282       }
 283 
 284       sa = (mlib_u32 *) sp;
 285 
 286       s0 = sa[0];
 287       sa++;
 288 
 289 #ifdef __SUNPRO_C
 290 #pragma pipeloop(0)
 291 #endif /* __SUNPRO_C */
 292       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 293         READ_U8_D64(tab0, tab1, tab0, tab1);
 294         s0 = sa[0];
 295         dp[0] = t0;
 296         dp[1] = t1;
 297         dp[2] = t2;
 298         dp[3] = t3;
 299       }
 300 
 301       READ_U8_D64(tab0, tab1, tab0, tab1);
 302       dp[0] = t0;
 303       dp[1] = t1;
 304       dp[2] = t2;
 305       dp[3] = t3;
 306       dp += 4;
 307       sp = (mlib_u8 *) sa;
 308       i += 4;
 309 
 310       for (; i < size - 1; i += 2, sp += 2) {
 311         *dp++ = tab0[sp[0]];
 312         *dp++ = tab1[sp[1]];
 313       }
 314 
 315       if (i < size)
 316         *dp = tab0[(*sp)];
 317     }
 318   }
 319   else if (csize == 3) {
 320     mlib_s32 i, j;
 321 
 322     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 323       mlib_u32 *sa;
 324       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 325       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 326       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 327       mlib_d64 *tab;
 328       mlib_u32 s0;
 329       mlib_d64 t0, t1, t2, t3;
 330       mlib_s32 off;
 331       mlib_s32 size = xsize * 3;
 332       mlib_d64 *dp = (mlib_d64 *) dst;
 333       mlib_u8 *sp = (void *)src;
 334 
 335       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 336 
 337       if (off == 1) {
 338         *dp++ = tab0[(*sp)];
 339         tab = tab0;
 340         tab0 = tab1;
 341         tab1 = tab2;
 342         tab2 = tab;
 343         size--;
 344         sp++;
 345       }
 346       else if (off == 2) {
 347         *dp++ = tab0[sp[0]];
 348         *dp++ = tab1[sp[1]];
 349         tab = tab2;
 350         tab2 = tab1;
 351         tab1 = tab0;
 352         tab0 = tab;
 353         size -= 2;
 354         sp += 2;
 355       }
 356       else if (off == 3) {
 357         *dp++ = tab0[sp[0]];
 358         *dp++ = tab1[sp[1]];
 359         *dp++ = tab2[sp[2]];
 360         size -= 3;
 361         sp += 3;
 362       }
 363 
 364       sa = (mlib_u32 *) sp;
 365 
 366       s0 = sa[0];
 367       sa++;
 368 
 369 #ifdef __SUNPRO_C
 370 #pragma pipeloop(0)
 371 #endif /* __SUNPRO_C */
 372       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 373         READ_U8_D64(tab0, tab1, tab2, tab0);
 374         tab = tab0;
 375         tab0 = tab1;
 376         tab1 = tab2;
 377         tab2 = tab;
 378         s0 = sa[0];
 379         dp[0] = t0;
 380         dp[1] = t1;
 381         dp[2] = t2;
 382         dp[3] = t3;
 383       }
 384 
 385       READ_U8_D64(tab0, tab1, tab2, tab0);
 386       dp[0] = t0;
 387       dp[1] = t1;
 388       dp[2] = t2;
 389       dp[3] = t3;
 390       dp += 4;
 391       sp = (mlib_u8 *) sa;
 392       i += 4;
 393 
 394       if (i < size) {
 395         *dp++ = tab1[(*sp)];
 396         i++;
 397         sp++;
 398       }
 399 
 400       if (i < size) {
 401         *dp++ = tab2[(*sp)];
 402         i++;
 403         sp++;
 404       }
 405 
 406       if (i < size) {
 407         *dp = tab0[(*sp)];
 408       }
 409     }
 410   }
 411   else if (csize == 4) {
 412     mlib_s32 i, j;
 413 
 414     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 415       mlib_u32 *sa;
 416       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 417       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 418       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 419       mlib_d64 *tab3 = (mlib_d64 *) table[3];
 420       mlib_d64 *tab;
 421       mlib_u32 s0;
 422       mlib_d64 t0, t1, t2, t3;
 423       mlib_s32 off;
 424       mlib_s32 size = xsize * 4;
 425       mlib_d64 *dp = (mlib_d64 *) dst;
 426       mlib_u8 *sp = (void *)src;
 427 
 428       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 429 
 430       if (off == 1) {
 431         *dp++ = tab0[(*sp)];
 432         tab = tab0;
 433         tab0 = tab1;
 434         tab1 = tab2;
 435         tab2 = tab3;
 436         tab3 = tab;
 437         size--;
 438         sp++;
 439       }
 440       else if (off == 2) {
 441         *dp++ = tab0[sp[0]];
 442         *dp++ = tab1[sp[1]];
 443         tab = tab0;
 444         tab0 = tab2;
 445         tab2 = tab;
 446         tab = tab1;
 447         tab1 = tab3;
 448         tab3 = tab;
 449         size -= 2;
 450         sp += 2;
 451       }
 452       else if (off == 3) {
 453         *dp++ = tab0[sp[0]];
 454         *dp++ = tab1[sp[1]];
 455         *dp++ = tab2[sp[2]];
 456         tab = tab3;
 457         tab3 = tab2;
 458         tab2 = tab1;
 459         tab1 = tab0;
 460         tab0 = tab;
 461         size -= 3;
 462         sp += 3;
 463       }
 464 
 465       sa = (mlib_u32 *) sp;
 466 
 467       s0 = sa[0];
 468       sa++;
 469 
 470 #ifdef __SUNPRO_C
 471 #pragma pipeloop(0)
 472 #endif /* __SUNPRO_C */
 473       for (i = 0; i < size - 7; i += 4, dp += 4, sa++) {
 474         READ_U8_D64(tab0, tab1, tab2, tab3);
 475         s0 = sa[0];
 476         dp[0] = t0;
 477         dp[1] = t1;
 478         dp[2] = t2;
 479         dp[3] = t3;
 480       }
 481 
 482       READ_U8_D64(tab0, tab1, tab2, tab3);
 483       dp[0] = t0;
 484       dp[1] = t1;
 485       dp[2] = t2;
 486       dp[3] = t3;
 487       dp += 4;
 488       sp = (mlib_u8 *) sa;
 489       i += 4;
 490 
 491       if (i < size) {
 492         *dp++ = tab0[(*sp)];
 493         i++;
 494         sp++;
 495       }
 496 
 497       if (i < size) {
 498         *dp++ = tab1[(*sp)];
 499         i++;
 500         sp++;
 501       }
 502 
 503       if (i < size) {
 504         *dp = tab2[(*sp)];
 505       }
 506     }
 507   }
 508 }
 509 
 510 /***************************************************************/
 511 void mlib_ImageLookUp_S16_D64(const mlib_s16 *src,
 512                               mlib_s32       slb,
 513                               mlib_d64       *dst,
 514                               mlib_s32       dlb,
 515                               mlib_s32       xsize,
 516                               mlib_s32       ysize,
 517                               mlib_s32       csize,
 518                               const mlib_d64 **table)
 519 {
 520   const mlib_d64 *table_base[4];
 521   mlib_s32 c;
 522 
 523   for (c = 0; c < csize; c++) {
 524     table_base[c] = &table[c][32768];
 525   }
 526 
 527 #ifdef __GNUC__
 528 #pragma GCC diagnostic push
 529 #pragma GCC diagnostic ignored "-Warray-bounds"
 530 #endif
 531   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_s16, table_base);
 532 #ifdef __GNUC__
 533 #pragma GCC diagnostic pop
 534 #endif
 535 }
 536 
 537 /***************************************************************/
 538 void mlib_ImageLookUp_U16_D64(const mlib_u16 *src,
 539                               mlib_s32       slb,
 540                               mlib_d64       *dst,
 541                               mlib_s32       dlb,
 542                               mlib_s32       xsize,
 543                               mlib_s32       ysize,
 544                               mlib_s32       csize,
 545                               const mlib_d64 **table)
 546 {
 547   const mlib_d64 *table_base[4];
 548   mlib_s32 c;
 549 
 550   for (c = 0; c < csize; c++) {
 551     table_base[c] = &table[c][0];
 552   }
 553 
 554 #ifdef __GNUC__
 555 #pragma GCC diagnostic push
 556 #pragma GCC diagnostic ignored "-Warray-bounds"
 557 #endif
 558   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_u16, table_base);
 559 #ifdef __GNUC__
 560 #pragma GCC diagnostic pop
 561 #endif
 562 }
 563 
 564 /***************************************************************/
 565 void mlib_ImageLookUp_S32_D64(const mlib_s32 *src,
 566                               mlib_s32       slb,
 567                               mlib_d64       *dst,
 568                               mlib_s32       dlb,
 569                               mlib_s32       xsize,
 570                               mlib_s32       ysize,
 571                               mlib_s32       csize,
 572                               const mlib_d64 **table)
 573 {
 574   const mlib_d64 *table_base[4];
 575   mlib_u32 shift = TABLE_SHIFT_S32;
 576   mlib_s32 c;
 577 
 578   for (c = 0; c < csize; c++) {
 579     table_base[c] = &table[c][shift];
 580   }
 581 
 582 #ifdef __GNUC__
 583 #pragma GCC diagnostic push
 584 #pragma GCC diagnostic ignored "-Warray-bounds"
 585 #endif
 586   MLIB_C_IMAGELOOKUP(mlib_d64, mlib_s32, table_base);
 587 #ifdef __GNUC__
 588 #pragma GCC diagnostic pop
 589 #endif
 590 }
 591 
 592 /***************************************************************/
 593 void mlib_ImageLookUpSI_U8_D64(const mlib_u8  *src,
 594                                mlib_s32       slb,
 595                                mlib_d64       *dst,
 596                                mlib_s32       dlb,
 597                                mlib_s32       xsize,
 598                                mlib_s32       ysize,
 599                                mlib_s32       csize,
 600                                const mlib_d64 **table)
 601 {
 602 
 603   if (xsize < 7) {
 604     MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_u8, table);
 605   }
 606   else if (csize == 2) {
 607     mlib_s32 i, j;
 608 
 609     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 610       mlib_u32 *sa;
 611       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 612       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 613       mlib_u32 s0;
 614       mlib_d64 t0, t1, t2, t3;
 615       mlib_s32 off;
 616       mlib_s32 size = xsize;
 617       mlib_d64 *dp = (mlib_d64 *) dst;
 618       mlib_u8 *sp = (void *)src;
 619 
 620       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 621 
 622       for (i = 0; i < off; i++, sp++) {
 623         *dp++ = tab0[sp[0]];
 624         *dp++ = tab1[sp[0]];
 625         size--;
 626       }
 627 
 628       sa = (mlib_u32 *) sp;
 629 
 630       s0 = sa[0];
 631       sa++;
 632 
 633 #ifdef __SUNPRO_C
 634 #pragma pipeloop(0)
 635 #endif /* __SUNPRO_C */
 636       for (i = 0; i < size - 7; i += 4, dp += 8, sa++) {
 637 #ifdef _LITTLE_ENDIAN
 638         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 639         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 640         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 641         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 642 #else
 643         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 644         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 645         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 646         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 647 #endif /* _LITTLE_ENDIAN */
 648         dp[0] = t0;
 649         dp[1] = t1;
 650         dp[2] = t2;
 651         dp[3] = t3;
 652 #ifdef _LITTLE_ENDIAN
 653         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 654         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 655         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 656         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 657 #else
 658         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 659         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 660         t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 661         t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 662 #endif /* _LITTLE_ENDIAN */
 663         s0 = sa[0];
 664         dp[4] = t0;
 665         dp[5] = t1;
 666         dp[6] = t2;
 667         dp[7] = t3;
 668       }
 669 
 670 #ifdef _LITTLE_ENDIAN
 671       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 672       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 673       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 674       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 675 #else
 676       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 677       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 678       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 679       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 680 #endif /* _LITTLE_ENDIAN */
 681       dp[0] = t0;
 682       dp[1] = t1;
 683       dp[2] = t2;
 684       dp[3] = t3;
 685 #ifdef _LITTLE_ENDIAN
 686       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 687       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 688       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 689       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 690 #else
 691       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 692       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 693       t2 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 694       t3 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 695 #endif /* _LITTLE_ENDIAN */
 696       dp[4] = t0;
 697       dp[5] = t1;
 698       dp[6] = t2;
 699       dp[7] = t3;
 700       dp += 8;
 701       sp = (mlib_u8 *) sa;
 702       i += 4;
 703 
 704       for (; i < size; i++, sp++) {
 705         *dp++ = tab0[sp[0]];
 706         *dp++ = tab1[sp[0]];
 707       }
 708     }
 709   }
 710   else if (csize == 3) {
 711     mlib_s32 i, j;
 712 
 713     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 714       mlib_u32 *sa;
 715       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 716       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 717       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 718       mlib_u32 s0;
 719       mlib_d64 t0, t1, t2, t3, t4, t5;
 720       mlib_s32 off;
 721       mlib_s32 size = xsize;
 722       mlib_d64 *dp = (mlib_d64 *) dst;
 723       mlib_u8 *sp = (void *)src;
 724 
 725       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 726 
 727       for (i = 0; i < off; i++, sp++) {
 728         *dp++ = tab0[sp[0]];
 729         *dp++ = tab1[sp[0]];
 730         *dp++ = tab2[sp[0]];
 731         size--;
 732       }
 733 
 734       sa = (mlib_u32 *) sp;
 735 
 736       s0 = sa[0];
 737       sa++;
 738 
 739 #ifdef __SUNPRO_C
 740 #pragma pipeloop(0)
 741 #endif /* __SUNPRO_C */
 742       for (i = 0; i < size - 7; i += 4, dp += 12, sa++) {
 743 #ifdef _LITTLE_ENDIAN
 744         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 745         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 746         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 747         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 748         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 749         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 750 #else
 751         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 752         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 753         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 754         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 755         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 756         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 757 #endif /* _LITTLE_ENDIAN */
 758         dp[0] = t0;
 759         dp[1] = t1;
 760         dp[2] = t2;
 761         dp[3] = t3;
 762         dp[4] = t4;
 763         dp[5] = t5;
 764 #ifdef _LITTLE_ENDIAN
 765         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 766         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 767         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 768         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 769         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 770         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 771 #else
 772         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 773         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 774         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 775         t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 776         t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 777         t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 778 #endif /* _LITTLE_ENDIAN */
 779         s0 = sa[0];
 780         dp[6] = t0;
 781         dp[7] = t1;
 782         dp[8] = t2;
 783         dp[9] = t3;
 784         dp[10] = t4;
 785         dp[11] = t5;
 786       }
 787 
 788 #ifdef _LITTLE_ENDIAN
 789       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 790       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 791       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 792       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 793       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 794       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 795 #else
 796       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 797       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 798       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 799       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 800       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 801       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 802 #endif /* _LITTLE_ENDIAN */
 803       dp[0] = t0;
 804       dp[1] = t1;
 805       dp[2] = t2;
 806       dp[3] = t3;
 807       dp[4] = t4;
 808       dp[5] = t5;
 809 #ifdef _LITTLE_ENDIAN
 810       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 811       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 812       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 813       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 814       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 815       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 816 #else
 817       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 818       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 819       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 820       t3 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 821       t4 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 822       t5 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 823 #endif /* _LITTLE_ENDIAN */
 824       dp[6] = t0;
 825       dp[7] = t1;
 826       dp[8] = t2;
 827       dp[9] = t3;
 828       dp[10] = t4;
 829       dp[11] = t5;
 830       dp += 12;
 831       sp = (mlib_u8 *) sa;
 832       i += 4;
 833 
 834       for (; i < size; i++, sp++) {
 835         *dp++ = tab0[sp[0]];
 836         *dp++ = tab1[sp[0]];
 837         *dp++ = tab2[sp[0]];
 838       }
 839     }
 840   }
 841   else if (csize == 4) {
 842     mlib_s32 i, j;
 843 
 844     for (j = 0; j < ysize; j++, dst += dlb, src += slb) {
 845       mlib_u32 *sa;
 846       mlib_d64 *tab0 = (mlib_d64 *) table[0];
 847       mlib_d64 *tab1 = (mlib_d64 *) table[1];
 848       mlib_d64 *tab2 = (mlib_d64 *) table[2];
 849       mlib_d64 *tab3 = (mlib_d64 *) table[3];
 850       mlib_u32 s0;
 851       mlib_d64 t0, t1, t2, t3;
 852       mlib_s32 off;
 853       mlib_s32 size = xsize;
 854       mlib_d64 *dp = (mlib_d64 *) dst;
 855       mlib_u8 *sp = (void *)src;
 856 
 857       off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3);
 858 
 859       for (i = 0; i < off; i++, sp++) {
 860         *dp++ = tab0[sp[0]];
 861         *dp++ = tab1[sp[0]];
 862         *dp++ = tab2[sp[0]];
 863         *dp++ = tab3[sp[0]];
 864         size--;
 865       }
 866 
 867       sa = (mlib_u32 *) sp;
 868 
 869       s0 = sa[0];
 870       sa++;
 871 
 872 #ifdef __SUNPRO_C
 873 #pragma pipeloop(0)
 874 #endif /* __SUNPRO_C */
 875       for (i = 0; i < size - 7; i += 4, dp += 16, sa++) {
 876 #ifdef _LITTLE_ENDIAN
 877         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 878         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 879         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 880         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 881 #else
 882         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 883         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 884         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 885         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 886 #endif /* _LITTLE_ENDIAN */
 887         dp[0] = t0;
 888         dp[1] = t1;
 889         dp[2] = t2;
 890         dp[3] = t3;
 891 #ifdef _LITTLE_ENDIAN
 892         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 893         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 894         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 895         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 896 #else
 897         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 898         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 899         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 900         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 901 #endif /* _LITTLE_ENDIAN */
 902         dp[4] = t0;
 903         dp[5] = t1;
 904         dp[6] = t2;
 905         dp[7] = t3;
 906 #ifdef _LITTLE_ENDIAN
 907         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 908         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 909         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 910         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 911 #else
 912         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 913         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 914         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 915         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 916 #endif /* _LITTLE_ENDIAN */
 917         dp[8] = t0;
 918         dp[9] = t1;
 919         dp[10] = t2;
 920         dp[11] = t3;
 921 #ifdef _LITTLE_ENDIAN
 922         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 923         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 924         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 925         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 926 #else
 927         t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 928         t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 929         t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 930         t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 931 #endif /* _LITTLE_ENDIAN */
 932         s0 = sa[0];
 933         dp[12] = t0;
 934         dp[13] = t1;
 935         dp[14] = t2;
 936         dp[15] = t3;
 937       }
 938 
 939 #ifdef _LITTLE_ENDIAN
 940       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 941       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 942       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 943       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 944 #else
 945       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 946       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 947       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 948       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 949 #endif /* _LITTLE_ENDIAN */
 950       dp[0] = t0;
 951       dp[1] = t1;
 952       dp[2] = t2;
 953       dp[3] = t3;
 954 #ifdef _LITTLE_ENDIAN
 955       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 956       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 957       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 958       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 959 #else
 960       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 961       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 962       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 963       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 964 #endif /* _LITTLE_ENDIAN */
 965       dp[4] = t0;
 966       dp[5] = t1;
 967       dp[6] = t2;
 968       dp[7] = t3;
 969 #ifdef _LITTLE_ENDIAN
 970       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 13) & 0x7F8));
 971       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 13) & 0x7F8));
 972       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 13) & 0x7F8));
 973       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 13) & 0x7F8));
 974 #else
 975       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 5) & 0x7F8));
 976       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 5) & 0x7F8));
 977       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 5) & 0x7F8));
 978       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 5) & 0x7F8));
 979 #endif /* _LITTLE_ENDIAN */
 980       dp[8] = t0;
 981       dp[9] = t1;
 982       dp[10] = t2;
 983       dp[11] = t3;
 984 #ifdef _LITTLE_ENDIAN
 985       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 >> 21) & 0x7F8));
 986       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 >> 21) & 0x7F8));
 987       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 >> 21) & 0x7F8));
 988       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 >> 21) & 0x7F8));
 989 #else
 990       t0 = *(mlib_d64 *) ((mlib_u8 *) tab0 + ((s0 << 3) & 0x7F8));
 991       t1 = *(mlib_d64 *) ((mlib_u8 *) tab1 + ((s0 << 3) & 0x7F8));
 992       t2 = *(mlib_d64 *) ((mlib_u8 *) tab2 + ((s0 << 3) & 0x7F8));
 993       t3 = *(mlib_d64 *) ((mlib_u8 *) tab3 + ((s0 << 3) & 0x7F8));
 994 #endif /* _LITTLE_ENDIAN */
 995       dp[12] = t0;
 996       dp[13] = t1;
 997       dp[14] = t2;
 998       dp[15] = t3;
 999       dp += 16;
1000       sp = (mlib_u8 *) sa;
1001       i += 4;
1002 
1003       for (; i < size; i++, sp++) {
1004         *dp++ = tab0[sp[0]];
1005         *dp++ = tab1[sp[0]];
1006         *dp++ = tab2[sp[0]];
1007         *dp++ = tab3[sp[0]];
1008       }
1009     }
1010   }
1011 }
1012 
1013 /***************************************************************/
1014 void mlib_ImageLookUpSI_S16_D64(const mlib_s16 *src,
1015                                 mlib_s32       slb,
1016                                 mlib_d64       *dst,
1017                                 mlib_s32       dlb,
1018                                 mlib_s32       xsize,
1019                                 mlib_s32       ysize,
1020                                 mlib_s32       csize,
1021                                 const mlib_d64 **table)
1022 {
1023   const mlib_d64 *table_base[4];
1024   mlib_s32 c;
1025 
1026   for (c = 0; c < csize; c++) {
1027     table_base[c] = &table[c][32768];
1028   }
1029 
1030   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_s16, table_base);
1031 }
1032 
1033 /***************************************************************/
1034 void mlib_ImageLookUpSI_U16_D64(const mlib_u16 *src,
1035                                 mlib_s32       slb,
1036                                 mlib_d64       *dst,
1037                                 mlib_s32       dlb,
1038                                 mlib_s32       xsize,
1039                                 mlib_s32       ysize,
1040                                 mlib_s32       csize,
1041                                 const mlib_d64 **table)
1042 {
1043   const mlib_d64 *table_base[4];
1044   mlib_s32 c;
1045 
1046   for (c = 0; c < csize; c++) {
1047     table_base[c] = &table[c][0];
1048   }
1049 
1050   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_u16, table_base);
1051 }
1052 
1053 /***************************************************************/
1054 void mlib_ImageLookUpSI_S32_D64(const mlib_s32 *src,
1055                                 mlib_s32       slb,
1056                                 mlib_d64       *dst,
1057                                 mlib_s32       dlb,
1058                                 mlib_s32       xsize,
1059                                 mlib_s32       ysize,
1060                                 mlib_s32       csize,
1061                                 const mlib_d64 **table)
1062 {
1063   const mlib_d64 *table_base[4];
1064   mlib_u32 shift = TABLE_SHIFT_S32;
1065   mlib_s32 c;
1066 
1067   for (c = 0; c < csize; c++) {
1068     table_base[c] = &table[c][shift];
1069   }
1070 
1071   MLIB_C_IMAGELOOKUPSI(mlib_d64, mlib_s32, table_base);
1072 }
1073 
1074 /***************************************************************/