1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageLookUp_Bit_U8 - table lookup
  30  *
  31  * SYNOPSIS
  32  *      void mlib_ImageLookUp_Bit_U8(src, slb,
  33  *                                   dst, dlb,
  34  *                                   xsize, ysize,
  35  *                                   csize, table)
  36  *
  37  * ARGUMENT
  38  *      src     pointer to input image (BIT)
  39  *      slb     stride of input image (in pixels)
  40  *      dst     pointer to output image (BYTE)
  41  *      dlb     stride of output image (in pixels)
  42  *      xsize   image width
  43  *      ysize   image height
  44  *      csize   number of channels
  45  *      table   lookup table
  46  *
  47  * DESCRIPTION
  48  *      dst = table[src] (c, vis version)
  49  */
  50 
  51 #include "mlib_image.h"
  52 #include "mlib_ImageLookUp.h"
  53 
  54 /***************************************************************/
  55 #define MAX_WIDTH  512
  56 
  57 /***************************************************************/
  58 #ifdef i386 /* do not copy by double data type for x86 */
  59 
  60 typedef struct {
  61   mlib_u32 int0, int1;
  62 } two_uint;
  63 
  64 #define TYPE_64BIT two_uint
  65 #define TYPE_32BIT mlib_u32
  66 #define DTYPE      two_uint
  67 
  68 #elif defined(_NO_LONGLONG)
  69 
  70 #define TYPE_64BIT mlib_d64
  71 #define TYPE_32BIT mlib_f32
  72 #define DTYPE      mlib_d64
  73 
  74 #else
  75 
  76 #define TYPE_64BIT mlib_d64
  77 #define TYPE_32BIT mlib_f32
  78 #define DTYPE      mlib_u64
  79 
  80 #endif /* i386 ( do not copy by double data type for x86 ) */
  81 
  82 /***************************************************************/
  83 typedef union {
  84   TYPE_64BIT d64;
  85   struct {
  86     TYPE_32BIT f0, f1;
  87   } f32s;
  88 } d64_2_f32;
  89 
  90 /***************************************************************/
  91 #ifdef _LITTLE_ENDIAN
  92 
  93 static const mlib_u32 mlib_bit_mask[16] = {
  94   0x00000000u, 0xFF000000u, 0x00FF0000u, 0xFFFF0000u,
  95   0x0000FF00u, 0xFF00FF00u, 0x00FFFF00u, 0xFFFFFF00u,
  96   0x000000FFu, 0xFF0000FFu, 0x00FF00FFu, 0xFFFF00FFu,
  97   0x0000FFFFu, 0xFF00FFFFu, 0x00FFFFFFu, 0xFFFFFFFFu
  98 };
  99 
 100 static const mlib_u32 mlib_bit_mask_2[4] = {
 101   0x00000000u, 0xFFFF0000u, 0x0000FFFFu, 0xFFFFFFFFu
 102 };
 103 
 104 static const mlib_u32 mlib_bit_mask_3[3*4] = {
 105   0x00000000u, 0xFF000000u, 0x00FFFFFFu, 0xFFFFFFFFu,
 106   0x00000000u, 0xFFFF0000u, 0x0000FFFFu, 0xFFFFFFFFu,
 107   0x00000000u, 0xFFFFFF00u, 0x000000FFu, 0xFFFFFFFFu
 108 };
 109 
 110 #else
 111 
 112 static const mlib_u32 mlib_bit_mask[16] = {
 113   0x00000000u, 0x000000FFu, 0x0000FF00u, 0x0000FFFFu,
 114   0x00FF0000u, 0x00FF00FFu, 0x00FFFF00u, 0x00FFFFFFu,
 115   0xFF000000u, 0xFF0000FFu, 0xFF00FF00u, 0xFF00FFFFu,
 116   0xFFFF0000u, 0xFFFF00FFu, 0xFFFFFF00u, 0xFFFFFFFFu
 117 };
 118 
 119 static const mlib_u32 mlib_bit_mask_2[4] = {
 120   0x00000000u, 0x0000FFFFu, 0xFFFF0000u, 0xFFFFFFFFu
 121 };
 122 
 123 static const mlib_u32 mlib_bit_mask_3[3*4] = {
 124   0x00000000u, 0x000000FFu, 0xFFFFFF00u, 0xFFFFFFFFu,
 125   0x00000000u, 0x0000FFFFu, 0xFFFF0000u, 0xFFFFFFFFu,
 126   0x00000000u, 0x00FFFFFFu, 0xFF000000u, 0xFFFFFFFFu
 127 };
 128 
 129 #endif /* _LITTLE_ENDIAN */
 130 
 131 /***************************************************************/
 132 mlib_status mlib_ImageLookUp_Bit_U8_1(const mlib_u8 *src,
 133                                       mlib_s32      slb,
 134                                       mlib_u8       *dst,
 135                                       mlib_s32      dlb,
 136                                       mlib_s32      xsize,
 137                                       mlib_s32      ysize,
 138                                       mlib_s32      nchan,
 139                                       mlib_s32      bitoff,
 140                                       const mlib_u8 **table)
 141 {
 142   mlib_s32 i, j, n;
 143   TYPE_64BIT dd_array[256];
 144   mlib_u8  buff_lcl[MAX_WIDTH/8];
 145   mlib_u8  *buff = (mlib_u8*)buff_lcl;
 146   mlib_u32 val0, val1, *p_dd = (mlib_u32*)dd_array;
 147 
 148   if (xsize > MAX_WIDTH) {
 149     buff = mlib_malloc((xsize + 7)/8);
 150 
 151     if (buff == NULL) return MLIB_FAILURE;
 152   }
 153 
 154   val0 = table[0][0];
 155   val1 = table[0][1];
 156   val0 |= (val0 << 8);
 157   val1 |= (val1 << 8);
 158   val0 |= (val0 << 16);
 159   val1 |= (val1 << 16);
 160 
 161   /* calculate lookup table */
 162   for (i = 0; i < 16; i++) {
 163     mlib_u32 v, mask = mlib_bit_mask[i];
 164 
 165     v = (val0 &~ mask) | (val1 & mask);
 166 
 167 #ifdef __SUNPRO_C
 168 #pragma pipeloop(0)
 169 #endif /* __SUNPRO_C */
 170     for (j = 0; j < 16; j++) {
 171       p_dd[2*(16*i + j)] = v;
 172     }
 173 
 174 #ifdef __SUNPRO_C
 175 #pragma pipeloop(0)
 176 #endif /* __SUNPRO_C */
 177     for (j = 0; j < 16; j++) {
 178       p_dd[2*(i + 16*j) + 1] = v;
 179     }
 180   }
 181 
 182   for (j = 0; j < ysize; j++) {
 183     mlib_s32 s0, size = xsize;
 184     mlib_u8  *dp = dst;
 185     mlib_u8  *sp = (void *)src;
 186     mlib_u8  *sa;
 187     TYPE_64BIT *da;
 188     mlib_s32 doff, boff = bitoff;
 189 
 190     if ((mlib_addr)dp & 7) {
 191 
 192       /* result of (dp & 7) certainly fits into mlib_s32 */
 193       doff = 8 - ((mlib_s32) ((mlib_addr)dp & 7));
 194 
 195       if (doff > xsize) doff = xsize;
 196 
 197       for (n = 0; n < doff; n++) {
 198         dp[n] = table[0][(sp[0] >> (7 - boff)) & 0x1];
 199         boff++;
 200 
 201         if (boff >= 8) {
 202           sp++;
 203           boff -= 8;
 204         }
 205 
 206         size--;
 207       }
 208 
 209       dp += doff;
 210     }
 211 
 212     if (boff) {
 213       mlib_ImageCopy_bit_na(sp, buff, size, boff, 0);
 214       sp = buff;
 215     }
 216 
 217     sa = (mlib_u8*)sp;
 218     da = (TYPE_64BIT*)dp;
 219     i  = 0;
 220 
 221     if ((mlib_addr)sa & 1 && size >= 8) {
 222       *da++ = dd_array[*sa++];
 223       i += 8;
 224     }
 225 
 226 #ifdef __SUNPRO_C
 227 #pragma pipeloop(0)
 228 #endif /* __SUNPRO_C */
 229     for (; i <= (size - 16); i += 16) {
 230       s0 = *(mlib_u16*)sa;
 231 #ifdef _LITTLE_ENDIAN
 232       *da++ = dd_array[s0 & 0xFF];
 233       *da++ = dd_array[s0 >> 8];
 234 #else
 235       *da++ = dd_array[s0 >> 8];
 236       *da++ = dd_array[s0 & 0xFF];
 237 #endif /* _LITTLE_ENDIAN */
 238       sa += 2;
 239     }
 240 
 241     if (i <= (size - 8)) {
 242       *da++ = dd_array[*sa++];
 243       i += 8;
 244     }
 245 
 246     if (i < size) {
 247 
 248 #ifdef _NO_LONGLONG
 249 
 250       mlib_u32 emask;
 251       val0 = sa[0];
 252       val1 = p_dd[2*val0];
 253 
 254       if (i < (size - 4)) {
 255         ((mlib_u32*)da)[0] = val1;
 256         da = (TYPE_64BIT *) ((mlib_u8 *)da + 4);
 257         i += 4;
 258         val1 = p_dd[2*val0+1];
 259       }
 260 
 261 #ifdef _LITTLE_ENDIAN
 262       emask = (~(mlib_u32)0) >> ((4 - (size - i)) * 8);
 263 #else
 264       emask = (~(mlib_u32)0) << ((4 - (size - i)) * 8);
 265 #endif /* _LITTLE_ENDIAN */
 266       ((mlib_u32*)da)[0] = (val1 & emask) | (((mlib_u32*)da)[0] &~ emask);
 267 
 268 #else /* _NO_LONGLONG */
 269 
 270 #ifdef _LITTLE_ENDIAN
 271       mlib_u64 emask = (~(mlib_u64)0) >> ((8 - (size - i)) * 8);
 272 #else
 273       mlib_u64 emask = (~(mlib_u64)0) << ((8 - (size - i)) * 8);
 274 #endif /* _LITTLE_ENDIAN */
 275 
 276       ((mlib_u64*)da)[0] = (((mlib_u64*)dd_array)[sa[0]] & emask) | (((mlib_u64*)da)[0] &~ emask);
 277 
 278 #endif /* _NO_LONGLONG */
 279     }
 280 
 281     src += slb;
 282     dst += dlb;
 283   }
 284 
 285   if (buff != (mlib_u8*)buff_lcl) mlib_free(buff);
 286 
 287   return MLIB_SUCCESS;
 288 }
 289 
 290 /***************************************************************/
 291 mlib_status mlib_ImageLookUp_Bit_U8_2(const mlib_u8 *src,
 292                                       mlib_s32      slb,
 293                                       mlib_u8       *dst,
 294                                       mlib_s32      dlb,
 295                                       mlib_s32      xsize,
 296                                       mlib_s32      ysize,
 297                                       mlib_s32      nchan,
 298                                       mlib_s32      bitoff,
 299                                       const mlib_u8 **table)
 300 {
 301   mlib_s32 i, j;
 302   mlib_s32 s0, size;
 303 #ifdef _NO_LONGLONG
 304   mlib_u32 emask, dd1, dd2;
 305 #else /* _NO_LONGLONG */
 306   mlib_u64 emask, dd;
 307 #endif /* _NO_LONGLONG */
 308   DTYPE    dd_array[16];
 309   mlib_u32 *p_dd = (mlib_u32*)dd_array;
 310   mlib_d64 buff_lcl[(MAX_WIDTH + MAX_WIDTH/8)/8];
 311   mlib_u8  *buff = (mlib_u8*)buff_lcl, *buffs;
 312   mlib_u32 val0, val1;
 313 
 314   size = xsize * 2;
 315 
 316   if (size > MAX_WIDTH) {
 317     buff = mlib_malloc(size + (size + 7)/8);
 318 
 319     if (buff == NULL) return MLIB_FAILURE;
 320   }
 321 
 322   buffs = buff + size;
 323 
 324   val0 = table[0][0];
 325   val1 = table[0][1];
 326 #ifdef _LITTLE_ENDIAN
 327   val0 = val0 | (table[1][0] << 8);
 328   val1 = val1 | (table[1][1] << 8);
 329 #else
 330   val0 = (val0 << 8) | table[1][0];
 331   val1 = (val1 << 8) | table[1][1];
 332 #endif /* _LITTLE_ENDIAN */
 333   val0 |= (val0 << 16);
 334   val1 |= (val1 << 16);
 335 
 336   /* calculate lookup table */
 337   for (i = 0; i < 4; i++) {
 338     mlib_u32 v, mask = mlib_bit_mask_2[i];
 339 
 340     v = (val0 &~ mask) | (val1 & mask);
 341 
 342     for (j = 0; j < 4; j++) {
 343       p_dd[2*(4*i + j)] = v;
 344       p_dd[2*(i + 4*j) + 1] = v;
 345     }
 346   }
 347 
 348   for (j = 0; j < ysize; j++) {
 349     mlib_u8  *dp = dst;
 350     mlib_u8  *sp = (void *)src;
 351     mlib_u8  *sa;
 352     DTYPE    *da;
 353 
 354     if ((mlib_addr)dp & 7) dp = buff;
 355 
 356     if (bitoff) {
 357       mlib_ImageCopy_bit_na(sp, buffs, size, bitoff, 0);
 358       sp = buffs;
 359     }
 360 
 361     sa = (mlib_u8*)sp;
 362     da = (DTYPE*)dp;
 363 
 364 #ifdef __SUNPRO_C
 365 #pragma pipeloop(0)
 366 #endif /* __SUNPRO_C */
 367     for (i = 0; i <= (size - 16); i += 16) {
 368       s0 = *sa++;
 369       *da++ = dd_array[s0 >> 4];
 370       *da++ = dd_array[s0 & 0xF];
 371     }
 372 
 373     if (i < size) {
 374       s0 = *sa++;
 375 
 376 #ifdef _NO_LONGLONG
 377 
 378       dd1 = p_dd[2*(s0 >> 4)];
 379       dd2 = p_dd[2*(s0 >> 4)+1];
 380 
 381       if (i < (size - 8)) {
 382         ((mlib_u32*)da)[0] = dd1;
 383         ((mlib_u32*)da)[1] = dd2;
 384         da++;
 385         i += 8;
 386         dd1 = p_dd[2*(s0 & 0xf)];
 387         dd2 = p_dd[2*(s0 & 0xf)+1];
 388       }
 389 
 390       if (i < (size - 4)) {
 391         ((mlib_u32*)da)[0] = dd1;
 392         da = (DTYPE *) ((mlib_u8 *)da + 4);
 393         i += 4;
 394         dd1 = dd2;
 395       }
 396 
 397 #ifdef _LITTLE_ENDIAN
 398       emask = (~(mlib_u32)0) >> ((4 - (size - i)) * 8);
 399 #else
 400       emask = (~(mlib_u32)0) << ((4 - (size - i)) * 8);
 401 #endif /* _LITTLE_ENDIAN */
 402       ((mlib_u32*)da)[0] = (dd1 & emask) | (((mlib_u32*)da)[0] &~ emask);
 403 
 404 #else /* _NO_LONGLONG */
 405 
 406       dd = ((mlib_u64*)dd_array)[s0 >> 4];
 407 
 408       if (i < (size - 8)) {
 409         ((mlib_u64*)da)[0] = dd;
 410         da++;
 411         i += 8;
 412         dd = ((mlib_u64*)dd_array)[s0 & 0xf];
 413       }
 414 
 415 #ifdef _LITTLE_ENDIAN
 416       emask = (~(mlib_u64)0) >> ((8 - (size - i)) * 8);
 417 #else
 418       emask = (~(mlib_u64)0) << ((8 - (size - i)) * 8);
 419 #endif /* _LITTLE_ENDIAN */
 420       ((mlib_u64*)da)[0] = (dd & emask) | (((mlib_u64*)da)[0] &~ emask);
 421 
 422 #endif /* _NO_LONGLONG */
 423     }
 424 
 425     if (dp != dst) mlib_ImageCopy_na(dp, dst, size);
 426 
 427     src += slb;
 428     dst += dlb;
 429   }
 430 
 431   if (buff != (mlib_u8*)buff_lcl) mlib_free(buff);
 432 
 433   return MLIB_SUCCESS;
 434 }
 435 
 436 /***************************************************************/
 437 mlib_status mlib_ImageLookUp_Bit_U8_3(const mlib_u8 *src,
 438                                       mlib_s32      slb,
 439                                       mlib_u8       *dst,
 440                                       mlib_s32      dlb,
 441                                       mlib_s32      xsize,
 442                                       mlib_s32      ysize,
 443                                       mlib_s32      nchan,
 444                                       mlib_s32      bitoff,
 445                                       const mlib_u8 **table)
 446 {
 447   mlib_s32 i, j;
 448   mlib_s32 s0, size;
 449   mlib_u32 emask, dd;
 450   TYPE_64BIT d_array01[16], d_array12[16];
 451   TYPE_64BIT buff_lcl[(MAX_WIDTH + MAX_WIDTH/8)/8];
 452   mlib_u8  *buff = (mlib_u8*)buff_lcl, *buffs;
 453   mlib_u32 l0, h0, v0, l1, h1, v1, l2, h2, v2;
 454 
 455   size = 3 * xsize;
 456 
 457   if (size > MAX_WIDTH) {
 458     buff = mlib_malloc(size + (size + 7)/8);
 459 
 460     if (buff == NULL) return MLIB_FAILURE;
 461   }
 462 
 463   buffs = buff + size;
 464 
 465 #ifdef _LITTLE_ENDIAN
 466   l0 = (table[0][0] << 24) | (table[2][0] << 16) | (table[1][0] << 8) | (table[0][0]);
 467   h0 = (table[0][1] << 24) | (table[2][1] << 16) | (table[1][1] << 8) | (table[0][1]);
 468   l1 = (l0 >> 8); l1 |= (l1 << 24);
 469   h1 = (h0 >> 8); h1 |= (h1 << 24);
 470   l2 = (l1 >> 8); l2 |= (l2 << 24);
 471   h2 = (h1 >> 8); h2 |= (h2 << 24);
 472 #else
 473   l0 = (table[0][0] << 24) | (table[1][0] << 16) | (table[2][0] << 8) | (table[0][0]);
 474   h0 = (table[0][1] << 24) | (table[1][1] << 16) | (table[2][1] << 8) | (table[0][1]);
 475   l1 = (l0 << 8); l1 |= (l1 >> 24);
 476   h1 = (h0 << 8); h1 |= (h1 >> 24);
 477   l2 = (l1 << 8); l2 |= (l2 >> 24);
 478   h2 = (h1 << 8); h2 |= (h2 >> 24);
 479 #endif /* _LITTLE_ENDIAN */
 480 
 481   /* calculate lookup table */
 482 #ifdef __SUNPRO_C
 483 #pragma pipeloop(0)
 484 #endif /* __SUNPRO_C */
 485   for (i = 0; i < 16; i++) {
 486     mlib_u32 mask0 = mlib_bit_mask_3[i >> 2];
 487     mlib_u32 mask1 = mlib_bit_mask_3[4 + ((i >> 1) & 3)];
 488     mlib_u32 mask2 = mlib_bit_mask_3[8 + (i & 3)];
 489 
 490     v0 = (l0 &~ mask0) | (h0 & mask0);
 491     v1 = (l1 &~ mask1) | (h1 & mask1);
 492     v2 = (l2 &~ mask2) | (h2 & mask2);
 493 
 494     ((mlib_u32*)d_array01)[2*i    ] = v0;
 495     ((mlib_u32*)d_array01)[2*i + 1] = v1;
 496     ((mlib_u32*)d_array12)[2*i    ] = v1;
 497     ((mlib_u32*)d_array12)[2*i + 1] = v2;
 498   }
 499 
 500   for (j = 0; j < ysize; j++) {
 501     mlib_u8  *dp = dst;
 502     mlib_u8  *sp = (void *)src;
 503     mlib_u8  *sa;
 504     mlib_u32 *da;
 505 
 506     if ((mlib_addr)dp & 7) dp = buff;
 507 
 508     if (bitoff) {
 509       mlib_ImageCopy_bit_na(sp, buffs, size, bitoff, 0);
 510       sp = buffs;
 511     }
 512 
 513     sa = (mlib_u8*)sp;
 514     da = (mlib_u32*)dp;
 515 
 516 #ifdef __SUNPRO_C
 517 #pragma pipeloop(0)
 518 #endif /* __SUNPRO_C */
 519     for (i = 0; i <= (size - 24); i += 24) {
 520       d64_2_f32 dd;
 521       s0 = *sa++;
 522 
 523       ((TYPE_64BIT*)da)[0] = *(d_array01 + (s0 >> 4));
 524 
 525       dd.f32s.f0 = ((TYPE_32BIT*)(d_array12 + (s0 >> 4)))[1];
 526       dd.f32s.f1 = ((TYPE_32BIT*)(d_array01 + (s0 & 0xF)))[0];
 527       ((TYPE_64BIT*)da)[1] = dd.d64;
 528       ((TYPE_64BIT*)da)[2] = *(d_array12 + (s0 & 0xF));
 529 
 530       da += 6;
 531     }
 532 
 533     if (i < size) {
 534       s0 = *sa++;
 535       dd = ((mlib_u32*)(d_array01 + (s0 >> 4)))[0];
 536 
 537       if (i < (size - 4)) {
 538         *da++ = dd;
 539         i += 4;
 540         dd = ((mlib_u32*)(d_array12 + (s0 >> 4)))[0];
 541       }
 542 
 543       if (i < (size - 4)) {
 544         *da++ = dd;
 545         i += 4;
 546         dd = ((mlib_u32*)(d_array12 + (s0 >> 4)))[1];
 547       }
 548 
 549       if (i < (size - 4)) {
 550         *da++ = dd;
 551         i += 4;
 552         dd = ((mlib_u32*)(d_array01 + (s0 & 0xF)))[0];
 553       }
 554 
 555       if (i < (size - 4)) {
 556         *da++ = dd;
 557         i += 4;
 558         dd = ((mlib_u32*)(d_array12 + (s0 & 0xF)))[0];
 559       }
 560 
 561       if (i < (size - 4)) {
 562         *da++ = dd;
 563         i += 4;
 564         dd = ((mlib_u32*)(d_array12 + (s0 & 0xF)))[1];
 565       }
 566 
 567 #ifdef _LITTLE_ENDIAN
 568       emask = (~(mlib_u32)0) >> ((4 - (size - i)) * 8);
 569 #else
 570       emask = (~(mlib_u32)0) << ((4 - (size - i)) * 8);
 571 #endif /* _LITTLE_ENDIAN */
 572       da[0] = (dd & emask) | (da[0] &~ emask);
 573     }
 574 
 575     if (dp != dst) mlib_ImageCopy_na(dp, dst, size);
 576 
 577     src += slb;
 578     dst += dlb;
 579   }
 580 
 581   if (buff != (mlib_u8*)buff_lcl) mlib_free(buff);
 582 
 583   return MLIB_SUCCESS;
 584 }
 585 
 586 /***************************************************************/
 587 mlib_status mlib_ImageLookUp_Bit_U8_4(const mlib_u8 *src,
 588                                       mlib_s32      slb,
 589                                       mlib_u8       *dst,
 590                                       mlib_s32      dlb,
 591                                       mlib_s32      xsize,
 592                                       mlib_s32      ysize,
 593                                       mlib_s32      nchan,
 594                                       mlib_s32      bitoff,
 595                                       const mlib_u8 **table)
 596 {
 597   mlib_s32 i, j;
 598   mlib_s32 s0, size;
 599   DTYPE    dd_array0[16], dd_array1[16], lh[4], dd;
 600   mlib_d64 buff_lcl[(MAX_WIDTH + MAX_WIDTH/8)/8];
 601   mlib_u8  *buff = (mlib_u8*)buff_lcl, *buffs;
 602   mlib_u32 l, h;
 603 
 604   size = xsize * 4;
 605 
 606   if (size > MAX_WIDTH) {
 607     buff = mlib_malloc(size + (size + 7)/8);
 608 
 609     if (buff == NULL) return MLIB_FAILURE;
 610   }
 611 
 612   buffs = buff + size;
 613 
 614 #ifdef _LITTLE_ENDIAN
 615   l = (table[3][0] << 24) | (table[2][0] << 16) | (table[1][0] << 8) | (table[0][0]);
 616   h = (table[3][1] << 24) | (table[2][1] << 16) | (table[1][1] << 8) | (table[0][1]);
 617 #else
 618   l = (table[0][0] << 24) | (table[1][0] << 16) | (table[2][0] << 8) | (table[3][0]);
 619   h = (table[0][1] << 24) | (table[1][1] << 16) | (table[2][1] << 8) | (table[3][1]);
 620 #endif /* _LITTLE_ENDIAN */
 621 
 622   ((mlib_u32*)lh)[0] = l;  ((mlib_u32*)lh)[1] = l;
 623   ((mlib_u32*)lh)[2] = l;  ((mlib_u32*)lh)[3] = h;
 624   ((mlib_u32*)lh)[4] = h;  ((mlib_u32*)lh)[5] = l;
 625   ((mlib_u32*)lh)[6] = h;  ((mlib_u32*)lh)[7] = h;
 626 
 627   /* calculate lookup table */
 628   dd_array0[ 0] = lh[0];  dd_array1[ 0] = lh[0];
 629   dd_array0[ 1] = lh[0];  dd_array1[ 1] = lh[1];
 630   dd_array0[ 2] = lh[0];  dd_array1[ 2] = lh[2];
 631   dd_array0[ 3] = lh[0];  dd_array1[ 3] = lh[3];
 632   dd_array0[ 4] = lh[1];  dd_array1[ 4] = lh[0];
 633   dd_array0[ 5] = lh[1];  dd_array1[ 5] = lh[1];
 634   dd_array0[ 6] = lh[1];  dd_array1[ 6] = lh[2];
 635   dd_array0[ 7] = lh[1];  dd_array1[ 7] = lh[3];
 636   dd_array0[ 8] = lh[2];  dd_array1[ 8] = lh[0];
 637   dd_array0[ 9] = lh[2];  dd_array1[ 9] = lh[1];
 638   dd_array0[10] = lh[2];  dd_array1[10] = lh[2];
 639   dd_array0[11] = lh[2];  dd_array1[11] = lh[3];
 640   dd_array0[12] = lh[3];  dd_array1[12] = lh[0];
 641   dd_array0[13] = lh[3];  dd_array1[13] = lh[1];
 642   dd_array0[14] = lh[3];  dd_array1[14] = lh[2];
 643   dd_array0[15] = lh[3];  dd_array1[15] = lh[3];
 644 
 645   for (j = 0; j < ysize; j++) {
 646     mlib_u8  *dp = dst;
 647     mlib_u8  *sp = (void *)src;
 648     mlib_u8  *sa;
 649     DTYPE    *da;
 650 
 651     if ((mlib_addr)dp & 7) dp = buff;
 652 
 653     if (bitoff) {
 654       mlib_ImageCopy_bit_na(sp, buffs, size, bitoff, 0);
 655       sp = buffs;
 656     }
 657 
 658     sa = (mlib_u8*)sp;
 659     da = (DTYPE*)dp;
 660 
 661 #ifdef __SUNPRO_C
 662 #pragma pipeloop(0)
 663 #endif /* __SUNPRO_C */
 664     for (i = 0; i <= (size - 32); i += 32) {
 665       s0 = *sa++;
 666       *da++ = dd_array0[s0 >> 4];
 667       *da++ = dd_array1[s0 >> 4];
 668       *da++ = dd_array0[s0 & 0xF];
 669       *da++ = dd_array1[s0 & 0xF];
 670     }
 671 
 672     if (i < size) {
 673       s0 = *sa++;
 674       dd = dd_array0[s0 >> 4];
 675 
 676       if (i <= (size - 8)) {
 677         *da++ = dd;
 678         i += 8;
 679         dd = dd_array1[s0 >> 4];
 680       }
 681 
 682       if (i <= (size - 8)) {
 683         *da++ = dd;
 684         i += 8;
 685         dd = dd_array0[s0 & 0xF];
 686       }
 687 
 688       if (i <= (size - 8)) {
 689         *da++ = dd;
 690         i += 8;
 691         dd = dd_array1[s0 & 0xF];
 692       }
 693 
 694       if (i < size) {
 695         *(mlib_u32*)da = *(mlib_u32*) & dd;
 696       }
 697     }
 698 
 699     if (dp != dst) mlib_ImageCopy_na(dp, dst, size);
 700 
 701     src += slb;
 702     dst += dlb;
 703   }
 704 
 705   if (buff != (mlib_u8*)buff_lcl) mlib_free(buff);
 706 
 707   return MLIB_SUCCESS;
 708 }
 709 
 710 /***************************************************************/