1 /*
   2  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 #include "vis_proto.h"
  29 #include "mlib_image.h"
  30 #include "mlib_v_ImageLookUpFunc.h"
  31 
  32 /***************************************************************/
  33 static void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(const mlib_s32 *src,
  34                                                    mlib_s16       *dst,
  35                                                    mlib_s32       xsize,
  36                                                    const mlib_s16 **table);
  37 
  38 static void mlib_v_ImageLookUpSI_S32_S16_2_D1(const mlib_s32 *src,
  39                                               mlib_s16       *dst,
  40                                               mlib_s32       xsize,
  41                                               const mlib_s16 **table);
  42 
  43 static void mlib_v_ImageLookUpSI_S32_S16_3_D1(const mlib_s32 *src,
  44                                               mlib_s16       *dst,
  45                                               mlib_s32       xsize,
  46                                               const mlib_s16 **table);
  47 
  48 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(const mlib_s32 *src,
  49                                                       mlib_s16       *dst,
  50                                                       mlib_s32       xsize,
  51                                                       const mlib_s16 **table);
  52 
  53 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(const mlib_s32 *src,
  54                                                       mlib_s16       *dst,
  55                                                       mlib_s32       xsize,
  56                                                       const mlib_s16 **table);
  57 
  58 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(const mlib_s32 *src,
  59                                                       mlib_s16       *dst,
  60                                                       mlib_s32       xsize,
  61                                                       const mlib_s16 **table);
  62 
  63 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(const mlib_s32 *src,
  64                                                       mlib_s16       *dst,
  65                                                       mlib_s32       xsize,
  66                                                       const mlib_s16 **table);
  67 
  68 /***************************************************************/
  69 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
  70 
  71 /***************************************************************/
  72 void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(const mlib_s32 *src,
  73                                             mlib_s16       *dst,
  74                                             mlib_s32       xsize,
  75                                             const mlib_s16 **table)
  76 {
  77   mlib_s32 *sp;                        /* pointer to source data */
  78   mlib_s32 s0, s1;                     /* source data */
  79   mlib_s16 *dl;                        /* pointer to start of destination */
  80   mlib_d64 *dp;                        /* aligned pointer to destination */
  81   mlib_d64 t0, t1, t2;                 /* destination data */
  82   mlib_d64 t3, acc;                    /* destination data */
  83   mlib_s32 i;                          /* loop variable */
  84   mlib_u32 shift = 2147483648u;
  85   const mlib_s16 *tab0 = &table[0][shift];
  86   const mlib_s16 *tab1 = &table[1][shift];
  87 
  88   sp = (void *)src;
  89   dl = dst;
  90   dp = (mlib_d64 *) dl;
  91 
  92   vis_alignaddr((void *)0, 6);
  93 
  94   if (xsize >= 2) {
  95 
  96     s0 = sp[0];
  97     s1 = sp[1];
  98     sp += 2;
  99 
 100 #pragma pipeloop(0)
 101     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
 102       t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 103       t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 104       t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 105       t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0));
 106       acc = vis_faligndata(t3, acc);
 107       acc = vis_faligndata(t2, acc);
 108       acc = vis_faligndata(t1, acc);
 109       acc = vis_faligndata(t0, acc);
 110       s0 = sp[0];
 111       s1 = sp[1];
 112       *dp++ = acc;
 113     }
 114 
 115     t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 116     t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 117     t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 118     t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0));
 119     acc = vis_faligndata(t3, acc);
 120     acc = vis_faligndata(t2, acc);
 121     acc = vis_faligndata(t1, acc);
 122     acc = vis_faligndata(t0, acc);
 123     *dp++ = acc;
 124   }
 125 
 126   if ((xsize & 1) != 0) {
 127     s0 = sp[0];
 128     t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 129     t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0));
 130     acc = vis_faligndata(t1, acc);
 131     acc = vis_faligndata(t0, acc);
 132     *(mlib_f32 *) dp = vis_read_hi(acc);
 133   }
 134 }
 135 
 136 /***************************************************************/
 137 void mlib_v_ImageLookUpSI_S32_S16_2_D1(const mlib_s32 *src,
 138                                        mlib_s16       *dst,
 139                                        mlib_s32       xsize,
 140                                        const mlib_s16 **table)
 141 {
 142   mlib_s32 *sp;                        /* pointer to source data */
 143   mlib_s32 s0, s1, s2;                 /* source data */
 144   mlib_s16 *dl;                        /* pointer to start of destination */
 145   mlib_d64 *dp;                        /* aligned pointer to destination */
 146   mlib_d64 t0, t1, t2;                 /* destination data */
 147   mlib_d64 t3, acc;                    /* destination data */
 148   mlib_s32 i;                          /* loop variable */
 149   mlib_u32 shift = 2147483648u;
 150   const mlib_s16 *tab0 = &table[0][shift];
 151   const mlib_s16 *tab1 = &table[1][shift];
 152 
 153   sp = (void *)src;
 154   dl = dst;
 155 
 156   vis_alignaddr((void *)0, 6);
 157 
 158   s0 = *sp++;
 159   *dl++ = tab0[s0];
 160   dp = (mlib_d64 *) dl;
 161   xsize--;
 162 
 163   if (xsize >= 2) {
 164 
 165     s1 = sp[0];
 166     s2 = sp[1];
 167     sp += 2;
 168 
 169 #pragma pipeloop(0)
 170     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
 171       t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s2));
 172       t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 173       t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 174       t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 175       acc = vis_faligndata(t3, acc);
 176       acc = vis_faligndata(t2, acc);
 177       acc = vis_faligndata(t1, acc);
 178       acc = vis_faligndata(t0, acc);
 179       s0 = s2;
 180       s1 = sp[0];
 181       s2 = sp[1];
 182       *dp++ = acc;
 183     }
 184 
 185     t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s2));
 186     t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 187     t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 188     t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 189     acc = vis_faligndata(t3, acc);
 190     acc = vis_faligndata(t2, acc);
 191     acc = vis_faligndata(t1, acc);
 192     acc = vis_faligndata(t0, acc);
 193     s0 = s2;
 194     *dp++ = acc;
 195   }
 196 
 197   dl = (mlib_s16 *) dp;
 198 
 199   if ((xsize & 1) != 0) {
 200     s1 = sp[0];
 201     t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 202     t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 203     acc = vis_faligndata(t1, acc);
 204     acc = vis_faligndata(t0, acc);
 205     *(mlib_f32 *) dp = vis_read_hi(acc);
 206     s0 = s1;
 207     dl += 2;
 208   }
 209 
 210   *dl = tab1[s0];
 211 }
 212 
 213 /***************************************************************/
 214 void mlib_v_ImageLookUpSI_S32_S16_2(const mlib_s32 *src,
 215                                     mlib_s32       slb,
 216                                     mlib_s16       *dst,
 217                                     mlib_s32       dlb,
 218                                     mlib_s32       xsize,
 219                                     mlib_s32       ysize,
 220                                     const mlib_s16 **table)
 221 {
 222   mlib_s32 *sl;
 223   mlib_s16 *dl;
 224   mlib_s32 j;
 225   mlib_u32 shift = 2147483648u;
 226   const mlib_s16 *tab0 = &table[0][shift];
 227   const mlib_s16 *tab1 = &table[1][shift];
 228 
 229   sl = (void *)src;
 230   dl = dst;
 231 
 232   /* row loop */
 233   for (j = 0; j < ysize; j++) {
 234     mlib_s32 *sp = sl;
 235     mlib_s16 *dp = dl;
 236     mlib_s32 off, s0, size = xsize;
 237 
 238     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7));
 239 
 240     if ((off >= 4) && (size > 0)) {
 241       s0 = *sp++;
 242       *dp++ = tab0[s0];
 243       *dp++ = tab1[s0];
 244       size--;
 245     }
 246 
 247     if (size > 0) {
 248 
 249       if (((mlib_addr) dp & 7) == 0) {
 250         mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(sp, dp, size, table);
 251       }
 252       else {
 253         mlib_v_ImageLookUpSI_S32_S16_2_D1(sp, dp, size, table);
 254       }
 255     }
 256 
 257     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 258     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 259   }
 260 }
 261 
 262 /***************************************************************/
 263 void mlib_v_ImageLookUpSI_S32_S16_3_D1(const mlib_s32 *src,
 264                                        mlib_s16       *dst,
 265                                        mlib_s32       xsize,
 266                                        const mlib_s16 **table)
 267 {
 268   mlib_s32 *sp;                        /* pointer to source data */
 269   mlib_s16 *dl;                        /* pointer to start of destination */
 270   mlib_d64 *dp;                        /* aligned pointer to destination */
 271   mlib_d64 t0, t1, t2, t3;             /* destination data */
 272   mlib_d64 acc0, acc1, acc2;           /* destination data */
 273   mlib_s32 i;                          /* loop variable */
 274   mlib_u32 shift = 2147483648u;
 275   const mlib_s16 *tab0 = &table[0][shift];
 276   const mlib_s16 *tab1 = &table[1][shift];
 277   const mlib_s16 *tab2 = &table[2][shift];
 278   mlib_s32 s00, s01, s02, s03;
 279 
 280   sp = (void *)src;
 281   dl = dst;
 282   dp = (mlib_d64 *) dl;
 283 
 284   vis_alignaddr((void *)0, 6);
 285 
 286   i = 0;
 287 
 288   if (xsize >= 4) {
 289 
 290     s00 = sp[0];
 291     s01 = sp[1];
 292     s02 = sp[2];
 293     s03 = sp[3];
 294     sp += 4;
 295 
 296 #pragma pipeloop(0)
 297     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
 298       t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s01));
 299       t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s00));
 300       t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s00));
 301       t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s00));
 302       acc0 = vis_faligndata(t3, acc0);
 303       acc0 = vis_faligndata(t2, acc0);
 304       acc0 = vis_faligndata(t1, acc0);
 305       acc0 = vis_faligndata(t0, acc0);
 306       t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s02));
 307       t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s02));
 308       t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s01));
 309       t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s01));
 310       acc1 = vis_faligndata(t3, acc1);
 311       acc1 = vis_faligndata(t2, acc1);
 312       acc1 = vis_faligndata(t1, acc1);
 313       acc1 = vis_faligndata(t0, acc1);
 314       t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s03));
 315       t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s03));
 316       t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s03));
 317       t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s02));
 318       acc2 = vis_faligndata(t3, acc2);
 319       acc2 = vis_faligndata(t2, acc2);
 320       acc2 = vis_faligndata(t1, acc2);
 321       acc2 = vis_faligndata(t0, acc2);
 322       s00 = sp[0];
 323       s01 = sp[1];
 324       s02 = sp[2];
 325       s03 = sp[3];
 326       *dp++ = acc0;
 327       *dp++ = acc1;
 328       *dp++ = acc2;
 329     }
 330 
 331     t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s01));
 332     t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s00));
 333     t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s00));
 334     t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s00));
 335     acc0 = vis_faligndata(t3, acc0);
 336     acc0 = vis_faligndata(t2, acc0);
 337     acc0 = vis_faligndata(t1, acc0);
 338     acc0 = vis_faligndata(t0, acc0);
 339     t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s02));
 340     t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s02));
 341     t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s01));
 342     t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s01));
 343     acc1 = vis_faligndata(t3, acc1);
 344     acc1 = vis_faligndata(t2, acc1);
 345     acc1 = vis_faligndata(t1, acc1);
 346     acc1 = vis_faligndata(t0, acc1);
 347     t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s03));
 348     t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s03));
 349     t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s03));
 350     t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s02));
 351     acc2 = vis_faligndata(t3, acc2);
 352     acc2 = vis_faligndata(t2, acc2);
 353     acc2 = vis_faligndata(t1, acc2);
 354     acc2 = vis_faligndata(t0, acc2);
 355     *dp++ = acc0;
 356     *dp++ = acc1;
 357     *dp++ = acc2;
 358     i += 4;
 359   }
 360 
 361   dl = (mlib_s16 *) dp;
 362 
 363 #pragma pipeloop(0)
 364   for (; i < xsize; i++) {
 365     s00 = sp[0];
 366     dl[0] = tab0[s00];
 367     dl[1] = tab1[s00];
 368     dl[2] = tab2[s00];
 369     dl += 3;
 370     sp++;
 371   }
 372 }
 373 
 374 /***************************************************************/
 375 void mlib_v_ImageLookUpSI_S32_S16_3(const mlib_s32 *src,
 376                                     mlib_s32       slb,
 377                                     mlib_s16       *dst,
 378                                     mlib_s32       dlb,
 379                                     mlib_s32       xsize,
 380                                     mlib_s32       ysize,
 381                                     const mlib_s16 **table)
 382 {
 383   mlib_s32 *sl;
 384   mlib_s16 *dl;
 385   mlib_s32 i, j;
 386   mlib_u32 shift = 2147483648u;
 387   const mlib_s16 *tab0 = &table[0][shift];
 388   const mlib_s16 *tab1 = &table[1][shift];
 389   const mlib_s16 *tab2 = &table[2][shift];
 390 
 391   sl = (void *)src;
 392   dl = dst;
 393 
 394   /* row loop */
 395   for (j = 0; j < ysize; j++) {
 396     mlib_s32 *sp = sl;
 397     mlib_s16 *dp = dl;
 398     mlib_s32 off, s0, size = xsize;
 399 
 400     off = (mlib_s32) (((mlib_addr) dp & 7) >> 1);
 401     off = (off < size) ? off : size;
 402 
 403     for (i = 0; i < off; i++) {
 404       s0 = *sp++;
 405       *dp++ = tab0[s0];
 406       *dp++ = tab1[s0];
 407       *dp++ = tab2[s0];
 408       size--;
 409     }
 410 
 411     if (size > 0) {
 412       mlib_v_ImageLookUpSI_S32_S16_3_D1(sp, dp, size, table);
 413     }
 414 
 415     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 416     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 417   }
 418 }
 419 
 420 /***************************************************************/
 421 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(const mlib_s32 *src,
 422                                                mlib_s16       *dst,
 423                                                mlib_s32       xsize,
 424                                                const mlib_s16 **table)
 425 {
 426   mlib_s32 *sp;                        /* pointer to source data */
 427   mlib_s32 s0;                         /* source data */
 428   mlib_s16 *dl;                        /* pointer to start of destination */
 429   mlib_d64 *dp;                        /* aligned pointer to destination */
 430   mlib_d64 t0, t1, t2, t3;             /* destination data */
 431   mlib_d64 acc;                        /* destination data */
 432   mlib_s32 i;                          /* loop variable */
 433   mlib_u32 shift = 2147483648u;
 434   const mlib_s16 *tab0 = &table[0][shift];
 435   const mlib_s16 *tab1 = &table[1][shift];
 436   const mlib_s16 *tab2 = &table[2][shift];
 437   const mlib_s16 *tab3 = &table[3][shift];
 438 
 439   sp = (void *)src;
 440   dl = dst;
 441   dp = (mlib_d64 *) dl;
 442 
 443   vis_alignaddr((void *)0, 6);
 444 
 445   if (xsize >= 1) {
 446 
 447     s0 = *sp++;
 448 
 449 #pragma pipeloop(0)
 450     for (i = 0; i <= xsize - 2; i++) {
 451       t3 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 452       t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 453       t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 454       t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0));
 455       acc = vis_faligndata(t3, acc);
 456       acc = vis_faligndata(t2, acc);
 457       acc = vis_faligndata(t1, acc);
 458       acc = vis_faligndata(t0, acc);
 459       s0 = *sp++;
 460       *dp++ = acc;
 461     }
 462 
 463     t3 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 464     t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 465     t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 466     t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0));
 467     acc = vis_faligndata(t3, acc);
 468     acc = vis_faligndata(t2, acc);
 469     acc = vis_faligndata(t1, acc);
 470     acc = vis_faligndata(t0, acc);
 471     *dp++ = acc;
 472   }
 473 }
 474 
 475 /***************************************************************/
 476 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(const mlib_s32 *src,
 477                                                mlib_s16       *dst,
 478                                                mlib_s32       xsize,
 479                                                const mlib_s16 **table)
 480 {
 481   mlib_s32 *sp;                        /* pointer to source data */
 482   mlib_s32 s0, s1;                     /* source data */
 483   mlib_s16 *dl;                        /* pointer to start of destination */
 484   mlib_d64 *dp;                        /* aligned pointer to destination */
 485   mlib_d64 t0, t1, t2, t3;             /* destination data */
 486   mlib_d64 acc;                        /* destination data */
 487   mlib_s32 i;                          /* loop variable */
 488   mlib_u32 shift = 2147483648u;
 489   const mlib_s16 *tab0 = &table[0][shift];
 490   const mlib_s16 *tab1 = &table[1][shift];
 491   const mlib_s16 *tab2 = &table[2][shift];
 492   const mlib_s16 *tab3 = &table[3][shift];
 493 
 494   sp = (void *)src;
 495   dl = dst;
 496   dp = (mlib_d64 *) dl;
 497 
 498   vis_alignaddr((void *)0, 6);
 499 
 500   s0 = *sp++;
 501 
 502   if (xsize >= 1) {
 503 
 504     s1 = *sp++;
 505 
 506 #pragma pipeloop(0)
 507     for (i = 0; i <= xsize - 2; i++) {
 508       t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 509       t2 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 510       t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 511       t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 512       acc = vis_faligndata(t3, acc);
 513       acc = vis_faligndata(t2, acc);
 514       acc = vis_faligndata(t1, acc);
 515       acc = vis_faligndata(t0, acc);
 516       s0 = s1;
 517       s1 = *sp++;
 518       *dp++ = acc;
 519     }
 520 
 521     t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 522     t2 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 523     t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 524     t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0));
 525     acc = vis_faligndata(t3, acc);
 526     acc = vis_faligndata(t2, acc);
 527     acc = vis_faligndata(t1, acc);
 528     acc = vis_faligndata(t0, acc);
 529     s0 = s1;
 530     *dp++ = acc;
 531   }
 532 
 533   dl = (mlib_s16 *) dp;
 534 
 535   dl[0] = tab1[s0];
 536   dl[1] = tab2[s0];
 537   dl[2] = tab3[s0];
 538 }
 539 
 540 /***************************************************************/
 541 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(const mlib_s32 *src,
 542                                                mlib_s16       *dst,
 543                                                mlib_s32       xsize,
 544                                                const mlib_s16 **table)
 545 {
 546   mlib_s32 *sp;                        /* pointer to source data */
 547   mlib_s32 s0, s1;                     /* source data */
 548   mlib_s16 *dl;                        /* pointer to start of destination */
 549   mlib_d64 *dp;                        /* aligned pointer to destination */
 550   mlib_d64 t0, t1, t2, t3;             /* destination data */
 551   mlib_d64 acc;                        /* destination data */
 552   mlib_s32 i;                          /* loop variable */
 553   mlib_u32 shift = 2147483648u;
 554   const mlib_s16 *tab0 = &table[0][shift];
 555   const mlib_s16 *tab1 = &table[1][shift];
 556   const mlib_s16 *tab2 = &table[2][shift];
 557   const mlib_s16 *tab3 = &table[3][shift];
 558 
 559   sp = (void *)src;
 560   dl = dst;
 561   dp = (mlib_d64 *) dl;
 562 
 563   vis_alignaddr((void *)0, 6);
 564 
 565   s0 = *sp++;
 566 
 567   if (xsize >= 1) {
 568 
 569     s1 = *sp++;
 570 
 571 #pragma pipeloop(0)
 572     for (i = 0; i <= xsize - 2; i++) {
 573       t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 574       t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 575       t1 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 576       t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 577       acc = vis_faligndata(t3, acc);
 578       acc = vis_faligndata(t2, acc);
 579       acc = vis_faligndata(t1, acc);
 580       acc = vis_faligndata(t0, acc);
 581       s0 = s1;
 582       s1 = *sp++;
 583       *dp++ = acc;
 584     }
 585 
 586     t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 587     t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 588     t1 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 589     t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0));
 590     acc = vis_faligndata(t3, acc);
 591     acc = vis_faligndata(t2, acc);
 592     acc = vis_faligndata(t1, acc);
 593     acc = vis_faligndata(t0, acc);
 594     s0 = s1;
 595     *dp++ = acc;
 596   }
 597 
 598   dl = (mlib_s16 *) dp;
 599 
 600   dl[0] = tab2[s0];
 601   dl[1] = tab3[s0];
 602 }
 603 
 604 /***************************************************************/
 605 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(const mlib_s32 *src,
 606                                                mlib_s16       *dst,
 607                                                mlib_s32       xsize,
 608                                                const mlib_s16 **table)
 609 {
 610   mlib_s32 *sp;                        /* pointer to source data */
 611   mlib_s32 s0, s1;                     /* source data */
 612   mlib_s16 *dl;                        /* pointer to start of destination */
 613   mlib_d64 *dp;                        /* aligned pointer to destination */
 614   mlib_d64 t0, t1, t2, t3;             /* destination data */
 615   mlib_d64 acc;                        /* destination data */
 616   mlib_s32 i;                          /* loop variable */
 617   mlib_u32 shift = 2147483648u;
 618   const mlib_s16 *tab0 = &table[0][shift];
 619   const mlib_s16 *tab1 = &table[1][shift];
 620   const mlib_s16 *tab2 = &table[2][shift];
 621   const mlib_s16 *tab3 = &table[3][shift];
 622 
 623   sp = (void *)src;
 624   dl = dst;
 625   dp = (mlib_d64 *) dl;
 626 
 627   vis_alignaddr((void *)0, 6);
 628 
 629   s0 = *sp++;
 630 
 631   if (xsize >= 1) {
 632 
 633     s1 = *sp++;
 634 
 635 #pragma pipeloop(0)
 636     for (i = 0; i <= xsize - 2; i++) {
 637       t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s1));
 638       t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 639       t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 640       t0 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 641       acc = vis_faligndata(t3, acc);
 642       acc = vis_faligndata(t2, acc);
 643       acc = vis_faligndata(t1, acc);
 644       acc = vis_faligndata(t0, acc);
 645       s0 = s1;
 646       s1 = *sp++;
 647       *dp++ = acc;
 648     }
 649 
 650     t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s1));
 651     t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1));
 652     t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1));
 653     t0 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0));
 654     acc = vis_faligndata(t3, acc);
 655     acc = vis_faligndata(t2, acc);
 656     acc = vis_faligndata(t1, acc);
 657     acc = vis_faligndata(t0, acc);
 658     s0 = s1;
 659     *dp++ = acc;
 660   }
 661 
 662   dl = (mlib_s16 *) dp;
 663 
 664   dl[0] = tab3[s0];
 665 }
 666 
 667 /***************************************************************/
 668 void mlib_v_ImageLookUpSI_S32_S16_4(const mlib_s32 *src,
 669                                     mlib_s32       slb,
 670                                     mlib_s16       *dst,
 671                                     mlib_s32       dlb,
 672                                     mlib_s32       xsize,
 673                                     mlib_s32       ysize,
 674                                     const mlib_s16 **table)
 675 {
 676   mlib_s32 *sl;
 677   mlib_s16 *dl;
 678   mlib_s32 j;
 679   mlib_u32 shift = 2147483648u;
 680   const mlib_s16 *tab0 = &table[0][shift];
 681   const mlib_s16 *tab1 = &table[1][shift];
 682   const mlib_s16 *tab2 = &table[2][shift];
 683 
 684   sl = (void *)src;
 685   dl = dst;
 686 
 687   /* row loop */
 688   for (j = 0; j < ysize; j++) {
 689     mlib_s32 *sp = sl;
 690     mlib_s16 *dp = dl;
 691     mlib_s32 off, s0, size = xsize;
 692 
 693     if (size > 0) {
 694       off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 695 
 696       if (off == 0) {
 697         mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(sp, dp, size, table);
 698       }
 699       else if (off == 1) {
 700         s0 = *sp;
 701         *dp++ = tab0[s0];
 702         size--;
 703         mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(sp, dp, size, table);
 704       }
 705       else if (off == 2) {
 706         s0 = *sp;
 707         *dp++ = tab0[s0];
 708         *dp++ = tab1[s0];
 709         size--;
 710         mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(sp, dp, size, table);
 711       }
 712       else if (off == 3) {
 713         s0 = *sp;
 714         *dp++ = tab0[s0];
 715         *dp++ = tab1[s0];
 716         *dp++ = tab2[s0];
 717         size--;
 718         mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(sp, dp, size, table);
 719       }
 720     }
 721 
 722     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 723     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 724   }
 725 }
 726 
 727 /***************************************************************/