1 /*
   2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 #include "vis_proto.h"
  29 #include "mlib_image.h"
  30 #include "mlib_v_ImageLookUpFunc.h"
  31 
  32 /***************************************************************/
  33 static void mlib_v_ImageLookUp_S32_S16_124_D1(const mlib_s32 * src,
  34                                               mlib_s16 * dst,
  35                                               mlib_s32 xsize,
  36                                               const mlib_s16 * table0,
  37                                               const mlib_s16 * table1,
  38                                               const mlib_s16 * table2,
  39                                               const mlib_s16 * table3);
  40 
  41 static void mlib_v_ImageLookUp_S32_S16_3_D1(const mlib_s32 * src,
  42                                             mlib_s16 * dst,
  43                                             mlib_s32 xsize,
  44                                             const mlib_s16 * table0,
  45                                             const mlib_s16 * table1,
  46                                             const mlib_s16 * table2);
  47 
  48 /***************************************************************/
  49 
  50 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
  51 
  52 /***************************************************************/
  53 void mlib_v_ImageLookUp_S32_S16_124_D1(const mlib_s32 * src,
  54                                        mlib_s16 * dst,
  55                                        mlib_s32 xsize,
  56                                        const mlib_s16 * table0,
  57                                        const mlib_s16 * table1,
  58                                        const mlib_s16 * table2,
  59                                        const mlib_s16 * table3)
  60 {
  61   mlib_s32 *sp;                        /* pointer to source data */
  62   mlib_s32 s0, s1, s2, s3;             /* source data */
  63   mlib_s16 *dl;                        /* pointer to start of destination */
  64   mlib_s16 *dend;                      /* pointer to end of destination */
  65   mlib_d64 *dp;                        /* aligned pointer to destination */
  66   mlib_d64 t0, t1, t2;                 /* destination data */
  67   mlib_d64 t3, acc0;                   /* destination data */
  68   mlib_s32 emask;                      /* edge mask */
  69   mlib_s32 i, num;                     /* loop variable */
  70 
  71   dl = dst;
  72   sp = (void *)src;
  73   dp = (mlib_d64 *) dl;
  74   dend = dl + xsize - 1;
  75 
  76   vis_alignaddr((void *)0, 6);
  77 
  78   if (xsize >= 4) {
  79 
  80     s0 = sp[0];
  81     s1 = sp[1];
  82     s2 = sp[2];
  83     s3 = sp[3];
  84     sp += 4;
  85 
  86 #pragma pipeloop(0)
  87     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
  88       t3 = VIS_LD_U16_I(table3, ((mlib_addr) 2 * s3));
  89       t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
  90       t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
  91       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
  92       acc0 = vis_faligndata(t3, acc0);
  93       acc0 = vis_faligndata(t2, acc0);
  94       acc0 = vis_faligndata(t1, acc0);
  95       acc0 = vis_faligndata(t0, acc0);
  96       s0 = sp[0];
  97       s1 = sp[1];
  98       s2 = sp[2];
  99       s3 = sp[3];
 100       *dp++ = acc0;
 101     }
 102 
 103     t3 = VIS_LD_U16_I(table3, ((mlib_addr) 2 * s3));
 104     t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 105     t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 106     t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 107     acc0 = vis_faligndata(t3, acc0);
 108     acc0 = vis_faligndata(t2, acc0);
 109     acc0 = vis_faligndata(t1, acc0);
 110     acc0 = vis_faligndata(t0, acc0);
 111     *dp++ = acc0;
 112   }
 113 
 114   if ((mlib_addr) dp <= (mlib_addr) dend) {
 115 
 116     num = (mlib_s32) ((mlib_s16 *) dend - (mlib_s16 *) dp);
 117     sp += num;
 118     num++;
 119 
 120     if (num == 1) {
 121       s0 = *sp;
 122 
 123       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 124       acc0 = vis_faligndata(t0, acc0);
 125     }
 126     else if (num == 2) {
 127       s0 = *sp;
 128       sp--;
 129 
 130       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 131       acc0 = vis_faligndata(t0, acc0);
 132 
 133       s0 = *sp;
 134 
 135       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 136       acc0 = vis_faligndata(t0, acc0);
 137     }
 138     else if (num == 3) {
 139       s0 = *sp;
 140       sp--;
 141 
 142       t0 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s0));
 143       acc0 = vis_faligndata(t0, acc0);
 144 
 145       s0 = *sp;
 146       sp--;
 147 
 148       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 149       acc0 = vis_faligndata(t0, acc0);
 150 
 151       s0 = *sp;
 152 
 153       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 154       acc0 = vis_faligndata(t0, acc0);
 155     }
 156 
 157     emask = vis_edge16(dp, dend);
 158     vis_pst_16(acc0, dp, emask);
 159   }
 160 }
 161 
 162 /***************************************************************/
 163 void mlib_v_ImageLookUp_S32_S16_1(const mlib_s32 * src,
 164                                   mlib_s32 slb,
 165                                   mlib_s16 * dst,
 166                                   mlib_s32 dlb,
 167                                   mlib_s32 xsize,
 168                                   mlib_s32 ysize, const mlib_s16 ** table)
 169 {
 170   mlib_s32 *sl;
 171   mlib_s16 *dl;
 172   mlib_u32 shift = 2147483648u;
 173   const mlib_s16 *tab = &table[0][shift];
 174   mlib_s32 j, i;
 175 
 176   sl = (void *)src;
 177   dl = dst;
 178 
 179   /* row loop */
 180   for (j = 0; j < ysize; j++) {
 181     mlib_s32 *sp = sl;
 182     mlib_s16 *dp = dl;
 183     mlib_s32 off, size = xsize;
 184 
 185     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 186 
 187     off = (off < size) ? off : size;
 188 
 189     for (i = 0; i < off; i++, sp++) {
 190       *dp++ = tab[sp[0]];
 191       size--;
 192     }
 193 
 194     if (size > 0) {
 195       mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab, tab, tab, tab);
 196     }
 197 
 198     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 199     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 200   }
 201 }
 202 
 203 /***************************************************************/
 204 void mlib_v_ImageLookUp_S32_S16_2(const mlib_s32 * src,
 205                                   mlib_s32 slb,
 206                                   mlib_s16 * dst,
 207                                   mlib_s32 dlb,
 208                                   mlib_s32 xsize,
 209                                   mlib_s32 ysize, const mlib_s16 ** table)
 210 {
 211   mlib_s32 *sl;
 212   mlib_s16 *dl;
 213   mlib_u32 shift = 2147483648u;
 214   const mlib_s16 *tab;
 215   mlib_s32 j, i;
 216 
 217   sl = (void *)src;
 218   dl = dst;
 219 
 220   /* row loop */
 221   for (j = 0; j < ysize; j++) {
 222     mlib_s32 *sp = sl;
 223     mlib_s16 *dp = dl;
 224     mlib_s32 off, size = xsize * 2;
 225     const mlib_s16 *tab0 = &table[0][shift];
 226     const mlib_s16 *tab1 = &table[1][shift];
 227 
 228     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 229 
 230     off = (off < size) ? off : size;
 231 
 232     for (i = 0; i < off - 1; i += 2, sp += 2) {
 233       *dp++ = tab0[sp[0]];
 234       *dp++ = tab1[sp[1]];
 235       size -= 2;
 236     }
 237 
 238     if ((off & 1) != 0) {
 239       *dp++ = tab0[sp[0]];
 240       size--;
 241       sp++;
 242       tab = tab0;
 243       tab0 = tab1;
 244       tab1 = tab;
 245     }
 246 
 247     if (size > 0) {
 248       mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab0, tab1, tab0, tab1);
 249     }
 250 
 251     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 252     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 253   }
 254 }
 255 
 256 /***************************************************************/
 257 void mlib_v_ImageLookUp_S32_S16_4(const mlib_s32 * src,
 258                                   mlib_s32 slb,
 259                                   mlib_s16 * dst,
 260                                   mlib_s32 dlb,
 261                                   mlib_s32 xsize,
 262                                   mlib_s32 ysize, const mlib_s16 ** table)
 263 {
 264   mlib_s32 *sl;
 265   mlib_s16 *dl;
 266   mlib_u32 shift = 2147483648u;
 267   const mlib_s16 *tab;
 268   mlib_s32 j;
 269 
 270   sl = (void *)src;
 271   dl = dst;
 272 
 273   /* row loop */
 274   for (j = 0; j < ysize; j++) {
 275     mlib_s32 *sp = sl;
 276     mlib_s16 *dp = dl;
 277     const mlib_s16 *tab0 = &table[0][shift];
 278     const mlib_s16 *tab1 = &table[1][shift];
 279     const mlib_s16 *tab2 = &table[2][shift];
 280     const mlib_s16 *tab3 = &table[3][shift];
 281     mlib_s32 off, size = xsize * 4;
 282 
 283     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 284 
 285     off = (off < size) ? off : size;
 286 
 287     if (off == 1) {
 288       *dp++ = tab0[sp[0]];
 289       tab = tab0;
 290       tab0 = tab1;
 291       tab1 = tab2;
 292       tab2 = tab3;
 293       tab3 = tab;
 294       size--;
 295       sp++;
 296     }
 297     else if (off == 2) {
 298       *dp++ = tab0[sp[0]];
 299       *dp++ = tab1[sp[1]];
 300       tab = tab0;
 301       tab0 = tab2;
 302       tab2 = tab;
 303       tab = tab1;
 304       tab1 = tab3;
 305       tab3 = tab;
 306       size -= 2;
 307       sp += 2;
 308     }
 309     else if (off == 3) {
 310       *dp++ = tab0[sp[0]];
 311       *dp++ = tab1[sp[1]];
 312       *dp++ = tab2[sp[2]];
 313       tab = tab3;
 314       tab3 = tab2;
 315       tab2 = tab1;
 316       tab1 = tab0;
 317       tab0 = tab;
 318       size -= 3;
 319       sp += 3;
 320     }
 321 
 322     if (size > 0) {
 323       mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
 324     }
 325 
 326     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 327     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 328   }
 329 }
 330 
 331 /***************************************************************/
 332 void mlib_v_ImageLookUp_S32_S16_3_D1(const mlib_s32 * src,
 333                                      mlib_s16 * dst,
 334                                      mlib_s32 xsize,
 335                                      const mlib_s16 * table0,
 336                                      const mlib_s16 * table1,
 337                                      const mlib_s16 * table2)
 338 {
 339   mlib_s32 *sp;                        /* pointer to source data */
 340   mlib_s32 s0, s1, s2, s3;             /* source data */
 341   mlib_s16 *dl;                        /* pointer to start of destination */
 342   mlib_s16 *dend;                      /* pointer to end of destination */
 343   mlib_d64 *dp;                        /* aligned pointer to destination */
 344   mlib_d64 t0, t1, t2, t3;             /* destination data */
 345   mlib_d64 acc0;                       /* destination data */
 346   mlib_s32 emask;                      /* edge mask */
 347   mlib_s32 i, num;                     /* loop variable */
 348   const mlib_s16 *table;
 349 
 350   dl = dst;
 351   sp = (void *)src;
 352   dp = (mlib_d64 *) dl;
 353   dend = dl + xsize - 1;
 354 
 355   vis_alignaddr((void *)0, 6);
 356 
 357   if (xsize >= 4) {
 358 
 359     s0 = sp[0];
 360     s1 = sp[1];
 361     s2 = sp[2];
 362     s3 = sp[3];
 363     sp += 4;
 364 
 365 #pragma pipeloop(0)
 366     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
 367       t3 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s3));
 368       t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 369       t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 370       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 371       acc0 = vis_faligndata(t3, acc0);
 372       acc0 = vis_faligndata(t2, acc0);
 373       acc0 = vis_faligndata(t1, acc0);
 374       acc0 = vis_faligndata(t0, acc0);
 375       s0 = sp[0];
 376       s1 = sp[1];
 377       s2 = sp[2];
 378       s3 = sp[3];
 379       *dp++ = acc0;
 380       table = table0;
 381       table0 = table1;
 382       table1 = table2;
 383       table2 = table;
 384     }
 385 
 386     t3 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s3));
 387     t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 388     t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 389     t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 390     acc0 = vis_faligndata(t3, acc0);
 391     acc0 = vis_faligndata(t2, acc0);
 392     acc0 = vis_faligndata(t1, acc0);
 393     acc0 = vis_faligndata(t0, acc0);
 394     *dp++ = acc0;
 395     table = table0;
 396     table0 = table1;
 397     table1 = table2;
 398     table2 = table;
 399   }
 400 
 401   if ((mlib_addr) dp <= (mlib_addr) dend) {
 402 
 403     num = (mlib_s32) ((mlib_s16 *) dend - (mlib_s16 *) dp);
 404     sp += num;
 405     num++;
 406 
 407     if (num == 1) {
 408       s0 = *sp;
 409 
 410       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 411       acc0 = vis_faligndata(t0, acc0);
 412     }
 413     else if (num == 2) {
 414       s0 = *sp;
 415       sp--;
 416 
 417       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 418       acc0 = vis_faligndata(t0, acc0);
 419 
 420       s0 = *sp;
 421 
 422       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 423       acc0 = vis_faligndata(t0, acc0);
 424     }
 425     else if (num == 3) {
 426       s0 = *sp;
 427       sp--;
 428 
 429       t0 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s0));
 430       acc0 = vis_faligndata(t0, acc0);
 431 
 432       s0 = *sp;
 433       sp--;
 434 
 435       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 436       acc0 = vis_faligndata(t0, acc0);
 437 
 438       s0 = *sp;
 439 
 440       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 441       acc0 = vis_faligndata(t0, acc0);
 442     }
 443 
 444     emask = vis_edge16(dp, dend);
 445     vis_pst_16(acc0, dp, emask);
 446   }
 447 }
 448 
 449 /***************************************************************/
 450 void mlib_v_ImageLookUp_S32_S16_3(const mlib_s32 * src,
 451                                   mlib_s32 slb,
 452                                   mlib_s16 * dst,
 453                                   mlib_s32 dlb,
 454                                   mlib_s32 xsize,
 455                                   mlib_s32 ysize, const mlib_s16 ** table)
 456 {
 457   mlib_s32 *sl;
 458   mlib_s16 *dl;
 459   mlib_u32 shift = 2147483648u;
 460   const mlib_s16 *tab;
 461   mlib_s32 j, i;
 462 
 463   sl = (void *)src;
 464   dl = dst;
 465 
 466   /* row loop */
 467   for (j = 0; j < ysize; j++) {
 468     mlib_s32 *sp = sl;
 469     mlib_s16 *dp = dl;
 470     const mlib_s16 *tab0 = &table[0][shift];
 471     const mlib_s16 *tab1 = &table[1][shift];
 472     const mlib_s16 *tab2 = &table[2][shift];
 473     mlib_s32 off, size = xsize * 3;
 474 
 475     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 476 
 477     off = (off < size) ? off : size;
 478 
 479     for (i = 0; i < off - 2; i += 3, sp += 3) {
 480       *dp++ = tab0[sp[0]];
 481       *dp++ = tab1[sp[1]];
 482       *dp++ = tab2[sp[2]];
 483       size -= 3;
 484     }
 485 
 486     off -= i;
 487 
 488     if (off == 1) {
 489       *dp++ = tab0[sp[0]];
 490       tab = tab0;
 491       tab0 = tab1;
 492       tab1 = tab2;
 493       tab2 = tab;
 494       size--;
 495       sp++;
 496     }
 497     else if (off == 2) {
 498       *dp++ = tab0[sp[0]];
 499       *dp++ = tab1[sp[1]];
 500       tab = tab2;
 501       tab2 = tab1;
 502       tab1 = tab0;
 503       tab0 = tab;
 504       size -= 2;
 505       sp += 2;
 506     }
 507 
 508     if (size > 0) {
 509       mlib_v_ImageLookUp_S32_S16_3_D1(sp, dp, size, tab0, tab1, tab2);
 510     }
 511 
 512     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 513     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
 514   }
 515 }
 516 
 517 /***************************************************************/