1 /*
   2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 #include "vis_proto.h"
  29 #include "mlib_image.h"
  30 #include "mlib_v_ImageLookUpFunc.h"
  31 
  32 /***************************************************************/
  33 static void mlib_v_ImageLookUp_S32_U16_124_D1(const mlib_s32 *src,
  34                                               mlib_u16       *dst,
  35                                               mlib_s32       xsize,
  36                                               const mlib_u16 *table0,
  37                                               const mlib_u16 *table1,
  38                                               const mlib_u16 *table2,
  39                                               const mlib_u16 *table3);
  40 
  41 static void mlib_v_ImageLookUp_S32_U16_3_D1(const mlib_s32 *src,
  42                                             mlib_u16       *dst,
  43                                             mlib_s32       xsize,
  44                                             const mlib_u16 *table0,
  45                                             const mlib_u16 *table1,
  46                                             const mlib_u16 *table2);
  47 
  48 /***************************************************************/
  49 
  50 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
  51 
  52 /***************************************************************/
  53 void mlib_v_ImageLookUp_S32_U16_124_D1(const mlib_s32 *src,
  54                                        mlib_u16       *dst,
  55                                        mlib_s32       xsize,
  56                                        const mlib_u16 *table0,
  57                                        const mlib_u16 *table1,
  58                                        const mlib_u16 *table2,
  59                                        const mlib_u16 *table3)
  60 {
  61   mlib_s32 *sp;                        /* pointer to source data */
  62   mlib_s32 s0, s1, s2, s3;             /* source data */
  63   mlib_u16 *dl;                        /* pointer to start of destination */
  64   mlib_u16 *dend;                      /* pointer to end of destination */
  65   mlib_d64 *dp;                        /* aligned pointer to destination */
  66   mlib_d64 t0, t1, t2;                 /* destination data */
  67   mlib_d64 t3, acc0;                   /* destination data */
  68   mlib_s32 emask;                      /* edge mask */
  69   mlib_s32 i, num;                     /* loop variable */
  70 
  71   dl = dst;
  72   sp = (void *)src;
  73   dp = (mlib_d64 *) dl;
  74   dend = dl + xsize - 1;
  75 
  76   vis_alignaddr((void *)0, 6);
  77 
  78   if (xsize >= 4) {
  79 
  80     s0 = sp[0];
  81     s1 = sp[1];
  82     s2 = sp[2];
  83     s3 = sp[3];
  84     sp += 4;
  85 
  86 #pragma pipeloop(0)
  87     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
  88       t3 = VIS_LD_U16_I(table3, ((mlib_addr) 2 * s3));
  89       t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
  90       t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
  91       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
  92       acc0 = vis_faligndata(t3, acc0);
  93       acc0 = vis_faligndata(t2, acc0);
  94       acc0 = vis_faligndata(t1, acc0);
  95       acc0 = vis_faligndata(t0, acc0);
  96       s0 = sp[0];
  97       s1 = sp[1];
  98       s2 = sp[2];
  99       s3 = sp[3];
 100       *dp++ = acc0;
 101     }
 102 
 103     t3 = VIS_LD_U16_I(table3, ((mlib_addr) 2 * s3));
 104     t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 105     t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 106     t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 107     acc0 = vis_faligndata(t3, acc0);
 108     acc0 = vis_faligndata(t2, acc0);
 109     acc0 = vis_faligndata(t1, acc0);
 110     acc0 = vis_faligndata(t0, acc0);
 111     *dp++ = acc0;
 112   }
 113 
 114   if ((mlib_addr) dp <= (mlib_addr) dend) {
 115 
 116     num = (mlib_s32) ((mlib_u16 *) dend - (mlib_u16 *) dp);
 117     sp += num;
 118     num++;
 119 
 120     if (num == 1) {
 121       s0 = *sp;
 122 
 123       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 124       acc0 = vis_faligndata(t0, acc0);
 125     }
 126     else if (num == 2) {
 127       s0 = *sp;
 128       sp--;
 129 
 130       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 131       acc0 = vis_faligndata(t0, acc0);
 132 
 133       s0 = *sp;
 134 
 135       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 136       acc0 = vis_faligndata(t0, acc0);
 137     }
 138     else if (num == 3) {
 139       s0 = *sp;
 140       sp--;
 141 
 142       t0 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s0));
 143       acc0 = vis_faligndata(t0, acc0);
 144 
 145       s0 = *sp;
 146       sp--;
 147 
 148       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 149       acc0 = vis_faligndata(t0, acc0);
 150 
 151       s0 = *sp;
 152 
 153       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 154       acc0 = vis_faligndata(t0, acc0);
 155     }
 156 
 157     emask = vis_edge16(dp, dend);
 158     vis_pst_16(acc0, dp, emask);
 159   }
 160 }
 161 
 162 /***************************************************************/
 163 void mlib_v_ImageLookUp_S32_U16_1(const mlib_s32 *src,
 164                                   mlib_s32       slb,
 165                                   mlib_u16       *dst,
 166                                   mlib_s32       dlb,
 167                                   mlib_s32       xsize,
 168                                   mlib_s32       ysize,
 169                                   const mlib_u16 **table)
 170 {
 171   mlib_s32 *sl;
 172   mlib_u16 *dl;
 173   mlib_u32 shift = 2147483648u;
 174   const mlib_u16 *tab = &table[0][shift];
 175   mlib_s32 j, i;
 176 
 177   sl = (void *)src;
 178   dl = dst;
 179 
 180   /* row loop */
 181   for (j = 0; j < ysize; j++) {
 182     mlib_s32 *sp = sl;
 183     mlib_u16 *dp = dl;
 184     mlib_s32 off, size = xsize;
 185 
 186     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 187 
 188     off = (off < size) ? off : size;
 189 
 190     for (i = 0; i < off; i++, sp++) {
 191       *dp++ = tab[sp[0]];
 192       size--;
 193     }
 194 
 195     if (size > 0) {
 196       mlib_v_ImageLookUp_S32_U16_124_D1(sp, dp, size, tab, tab, tab, tab);
 197     }
 198 
 199     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 200     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
 201   }
 202 }
 203 
 204 /***************************************************************/
 205 void mlib_v_ImageLookUp_S32_U16_2(const mlib_s32 *src,
 206                                   mlib_s32       slb,
 207                                   mlib_u16       *dst,
 208                                   mlib_s32       dlb,
 209                                   mlib_s32       xsize,
 210                                   mlib_s32       ysize,
 211                                   const mlib_u16 **table)
 212 {
 213   mlib_s32 *sl;
 214   mlib_u16 *dl;
 215   mlib_u32 shift = 2147483648u;
 216   const mlib_u16 *tab;
 217   mlib_s32 j, i;
 218 
 219   sl = (void *)src;
 220   dl = dst;
 221 
 222   /* row loop */
 223   for (j = 0; j < ysize; j++) {
 224     mlib_s32 *sp = sl;
 225     mlib_u16 *dp = dl;
 226     mlib_s32 off, size = xsize * 2;
 227     const mlib_u16 *tab0 = &table[0][shift];
 228     const mlib_u16 *tab1 = &table[1][shift];
 229 
 230     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 231 
 232     off = (off < size) ? off : size;
 233 
 234     for (i = 0; i < off - 1; i += 2, sp += 2) {
 235       *dp++ = tab0[sp[0]];
 236       *dp++ = tab1[sp[1]];
 237       size -= 2;
 238     }
 239 
 240     if ((off & 1) != 0) {
 241       *dp++ = tab0[sp[0]];
 242       size--;
 243       sp++;
 244       tab = tab0;
 245       tab0 = tab1;
 246       tab1 = tab;
 247     }
 248 
 249     if (size > 0) {
 250       mlib_v_ImageLookUp_S32_U16_124_D1(sp, dp, size, tab0, tab1, tab0, tab1);
 251     }
 252 
 253     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 254     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
 255   }
 256 }
 257 
 258 /***************************************************************/
 259 void mlib_v_ImageLookUp_S32_U16_4(const mlib_s32 *src,
 260                                   mlib_s32       slb,
 261                                   mlib_u16       *dst,
 262                                   mlib_s32       dlb,
 263                                   mlib_s32       xsize,
 264                                   mlib_s32       ysize,
 265                                   const mlib_u16 **table)
 266 {
 267   mlib_s32 *sl;
 268   mlib_u16 *dl;
 269   mlib_u32 shift = 2147483648u;
 270   const mlib_u16 *tab;
 271   mlib_s32 j;
 272 
 273   sl = (void *)src;
 274   dl = dst;
 275 
 276   /* row loop */
 277   for (j = 0; j < ysize; j++) {
 278     mlib_s32 *sp = sl;
 279     mlib_u16 *dp = dl;
 280     const mlib_u16 *tab0 = &table[0][shift];
 281     const mlib_u16 *tab1 = &table[1][shift];
 282     const mlib_u16 *tab2 = &table[2][shift];
 283     const mlib_u16 *tab3 = &table[3][shift];
 284     mlib_s32 off, size = xsize * 4;
 285 
 286     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 287 
 288     off = (off < size) ? off : size;
 289 
 290     if (off == 1) {
 291       *dp++ = tab0[sp[0]];
 292       tab = tab0;
 293       tab0 = tab1;
 294       tab1 = tab2;
 295       tab2 = tab3;
 296       tab3 = tab;
 297       size--;
 298       sp++;
 299     }
 300     else if (off == 2) {
 301       *dp++ = tab0[sp[0]];
 302       *dp++ = tab1[sp[1]];
 303       tab = tab0;
 304       tab0 = tab2;
 305       tab2 = tab;
 306       tab = tab1;
 307       tab1 = tab3;
 308       tab3 = tab;
 309       size -= 2;
 310       sp += 2;
 311     }
 312     else if (off == 3) {
 313       *dp++ = tab0[sp[0]];
 314       *dp++ = tab1[sp[1]];
 315       *dp++ = tab2[sp[2]];
 316       tab = tab3;
 317       tab3 = tab2;
 318       tab2 = tab1;
 319       tab1 = tab0;
 320       tab0 = tab;
 321       size -= 3;
 322       sp += 3;
 323     }
 324 
 325     if (size > 0) {
 326       mlib_v_ImageLookUp_S32_U16_124_D1(sp, dp, size, tab0, tab1, tab2, tab3);
 327     }
 328 
 329     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 330     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
 331   }
 332 }
 333 
 334 /***************************************************************/
 335 void mlib_v_ImageLookUp_S32_U16_3_D1(const mlib_s32 *src,
 336                                      mlib_u16       *dst,
 337                                      mlib_s32       xsize,
 338                                      const mlib_u16 *table0,
 339                                      const mlib_u16 *table1,
 340                                      const mlib_u16 *table2)
 341 {
 342   mlib_s32 *sp;                        /* pointer to source data */
 343   mlib_s32 s0, s1, s2, s3;             /* source data */
 344   mlib_u16 *dl;                        /* pointer to start of destination */
 345   mlib_u16 *dend;                      /* pointer to end of destination */
 346   mlib_d64 *dp;                        /* aligned pointer to destination */
 347   mlib_d64 t0, t1, t2, t3;             /* destination data */
 348   mlib_d64 acc0;                       /* destination data */
 349   mlib_s32 emask;                      /* edge mask */
 350   mlib_s32 i, num;                     /* loop variable */
 351   const mlib_u16 *table;
 352 
 353   dl = dst;
 354   sp = (void *)src;
 355   dp = (mlib_d64 *) dl;
 356   dend = dl + xsize - 1;
 357 
 358   vis_alignaddr((void *)0, 6);
 359 
 360   if (xsize >= 4) {
 361 
 362     s0 = sp[0];
 363     s1 = sp[1];
 364     s2 = sp[2];
 365     s3 = sp[3];
 366     sp += 4;
 367 
 368 #pragma pipeloop(0)
 369     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
 370       t3 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s3));
 371       t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 372       t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 373       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 374       acc0 = vis_faligndata(t3, acc0);
 375       acc0 = vis_faligndata(t2, acc0);
 376       acc0 = vis_faligndata(t1, acc0);
 377       acc0 = vis_faligndata(t0, acc0);
 378       s0 = sp[0];
 379       s1 = sp[1];
 380       s2 = sp[2];
 381       s3 = sp[3];
 382       *dp++ = acc0;
 383       table = table0;
 384       table0 = table1;
 385       table1 = table2;
 386       table2 = table;
 387     }
 388 
 389     t3 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s3));
 390     t2 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s2));
 391     t1 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s1));
 392     t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 393     acc0 = vis_faligndata(t3, acc0);
 394     acc0 = vis_faligndata(t2, acc0);
 395     acc0 = vis_faligndata(t1, acc0);
 396     acc0 = vis_faligndata(t0, acc0);
 397     *dp++ = acc0;
 398     table = table0;
 399     table0 = table1;
 400     table1 = table2;
 401     table2 = table;
 402   }
 403 
 404   if ((mlib_addr) dp <= (mlib_addr) dend) {
 405 
 406     num = (mlib_s32) ((mlib_u16 *) dend - (mlib_u16 *) dp);
 407     sp += num;
 408     num++;
 409 
 410     if (num == 1) {
 411       s0 = *sp;
 412 
 413       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 414       acc0 = vis_faligndata(t0, acc0);
 415     }
 416     else if (num == 2) {
 417       s0 = *sp;
 418       sp--;
 419 
 420       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 421       acc0 = vis_faligndata(t0, acc0);
 422 
 423       s0 = *sp;
 424 
 425       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 426       acc0 = vis_faligndata(t0, acc0);
 427     }
 428     else if (num == 3) {
 429       s0 = *sp;
 430       sp--;
 431 
 432       t0 = VIS_LD_U16_I(table2, ((mlib_addr) 2 * s0));
 433       acc0 = vis_faligndata(t0, acc0);
 434 
 435       s0 = *sp;
 436       sp--;
 437 
 438       t0 = VIS_LD_U16_I(table1, ((mlib_addr) 2 * s0));
 439       acc0 = vis_faligndata(t0, acc0);
 440 
 441       s0 = *sp;
 442 
 443       t0 = VIS_LD_U16_I(table0, ((mlib_addr) 2 * s0));
 444       acc0 = vis_faligndata(t0, acc0);
 445     }
 446 
 447     emask = vis_edge16(dp, dend);
 448     vis_pst_16(acc0, dp, emask);
 449   }
 450 }
 451 
 452 /***************************************************************/
 453 void mlib_v_ImageLookUp_S32_U16_3(const mlib_s32 *src,
 454                                   mlib_s32       slb,
 455                                   mlib_u16       *dst,
 456                                   mlib_s32       dlb,
 457                                   mlib_s32       xsize,
 458                                   mlib_s32       ysize,
 459                                   const mlib_u16 **table)
 460 {
 461   mlib_s32 *sl;
 462   mlib_u16 *dl;
 463   mlib_u32 shift = 2147483648u;
 464   const mlib_u16 *tab;
 465   mlib_s32 j, i;
 466 
 467   sl = (void *)src;
 468   dl = dst;
 469 
 470   /* row loop */
 471   for (j = 0; j < ysize; j++) {
 472     mlib_s32 *sp = sl;
 473     mlib_u16 *dp = dl;
 474     const mlib_u16 *tab0 = &table[0][shift];
 475     const mlib_u16 *tab1 = &table[1][shift];
 476     const mlib_u16 *tab2 = &table[2][shift];
 477     mlib_s32 off, size = xsize * 3;
 478 
 479     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
 480 
 481     off = (off < size) ? off : size;
 482 
 483     for (i = 0; i < off - 2; i += 3, sp += 3) {
 484       *dp++ = tab0[sp[0]];
 485       *dp++ = tab1[sp[1]];
 486       *dp++ = tab2[sp[2]];
 487       size -= 3;
 488     }
 489 
 490     off -= i;
 491 
 492     if (off == 1) {
 493       *dp++ = tab0[sp[0]];
 494       tab = tab0;
 495       tab0 = tab1;
 496       tab1 = tab2;
 497       tab2 = tab;
 498       size--;
 499       sp++;
 500     }
 501     else if (off == 2) {
 502       *dp++ = tab0[sp[0]];
 503       *dp++ = tab1[sp[1]];
 504       tab = tab2;
 505       tab2 = tab1;
 506       tab1 = tab0;
 507       tab0 = tab;
 508       size -= 2;
 509       sp += 2;
 510     }
 511 
 512     if (size > 0) {
 513       mlib_v_ImageLookUp_S32_U16_3_D1(sp, dp, size, tab0, tab1, tab2);
 514     }
 515 
 516     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
 517     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
 518   }
 519 }
 520 
 521 /***************************************************************/