1 /* 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 #include "vis_proto.h" 29 #include "mlib_image.h" 30 #include "mlib_v_ImageLookUpFunc.h" 31 32 /***************************************************************/ 33 static void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(const mlib_s32 *src, 34 mlib_s16 *dst, 35 mlib_s32 xsize, 36 const mlib_s16 **table); 37 38 static void mlib_v_ImageLookUpSI_S32_S16_2_D1(const mlib_s32 *src, 39 mlib_s16 *dst, 40 mlib_s32 xsize, 41 const mlib_s16 **table); 42 43 static void mlib_v_ImageLookUpSI_S32_S16_3_D1(const mlib_s32 *src, 44 mlib_s16 *dst, 45 mlib_s32 xsize, 46 const mlib_s16 **table); 47 48 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(const mlib_s32 *src, 49 mlib_s16 *dst, 50 mlib_s32 xsize, 51 const mlib_s16 **table); 52 53 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(const mlib_s32 *src, 54 mlib_s16 *dst, 55 mlib_s32 xsize, 56 const mlib_s16 **table); 57 58 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(const mlib_s32 *src, 59 mlib_s16 *dst, 60 mlib_s32 xsize, 61 const mlib_s16 **table); 62 63 static void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(const mlib_s32 *src, 64 mlib_s16 *dst, 65 mlib_s32 xsize, 66 const mlib_s16 **table); 67 68 /***************************************************************/ 69 #define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y)) 70 71 /***************************************************************/ 72 void mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(const mlib_s32 *src, 73 mlib_s16 *dst, 74 mlib_s32 xsize, 75 const mlib_s16 **table) 76 { 77 mlib_s32 *sp; /* pointer to source data */ 78 mlib_s32 s0, s1; /* source data */ 79 mlib_s16 *dl; /* pointer to start of destination */ 80 mlib_d64 *dp; /* aligned pointer to destination */ 81 mlib_d64 t0, t1, t2; /* destination data */ 82 mlib_d64 t3, acc; /* destination data */ 83 mlib_s32 i; /* loop variable */ 84 mlib_u32 shift = 2147483648u; 85 const mlib_s16 *tab0 = &table[0][shift]; 86 const mlib_s16 *tab1 = &table[1][shift]; 87 88 sp = (void *)src; 89 dl = dst; 90 dp = (mlib_d64 *) dl; 91 92 vis_alignaddr((void *)0, 6); 93 94 if (xsize >= 2) { 95 96 s0 = sp[0]; 97 s1 = sp[1]; 98 sp += 2; 99 100 #pragma pipeloop(0) 101 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 102 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 103 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 104 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 105 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0)); 106 acc = vis_faligndata(t3, acc); 107 acc = vis_faligndata(t2, acc); 108 acc = vis_faligndata(t1, acc); 109 acc = vis_faligndata(t0, acc); 110 s0 = sp[0]; 111 s1 = sp[1]; 112 *dp++ = acc; 113 } 114 115 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 116 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 117 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 118 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0)); 119 acc = vis_faligndata(t3, acc); 120 acc = vis_faligndata(t2, acc); 121 acc = vis_faligndata(t1, acc); 122 acc = vis_faligndata(t0, acc); 123 *dp++ = acc; 124 } 125 126 if ((xsize & 1) != 0) { 127 s0 = sp[0]; 128 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 129 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0)); 130 acc = vis_faligndata(t1, acc); 131 acc = vis_faligndata(t0, acc); 132 *(mlib_f32 *) dp = vis_read_hi(acc); 133 } 134 } 135 136 /***************************************************************/ 137 void mlib_v_ImageLookUpSI_S32_S16_2_D1(const mlib_s32 *src, 138 mlib_s16 *dst, 139 mlib_s32 xsize, 140 const mlib_s16 **table) 141 { 142 mlib_s32 *sp; /* pointer to source data */ 143 mlib_s32 s0, s1, s2; /* source data */ 144 mlib_s16 *dl; /* pointer to start of destination */ 145 mlib_d64 *dp; /* aligned pointer to destination */ 146 mlib_d64 t0, t1, t2; /* destination data */ 147 mlib_d64 t3, acc; /* destination data */ 148 mlib_s32 i; /* loop variable */ 149 mlib_u32 shift = 2147483648u; 150 const mlib_s16 *tab0 = &table[0][shift]; 151 const mlib_s16 *tab1 = &table[1][shift]; 152 153 sp = (void *)src; 154 dl = dst; 155 156 vis_alignaddr((void *)0, 6); 157 158 s0 = *sp++; 159 *dl++ = tab0[s0]; 160 dp = (mlib_d64 *) dl; 161 xsize--; 162 163 if (xsize >= 2) { 164 165 s1 = sp[0]; 166 s2 = sp[1]; 167 sp += 2; 168 169 #pragma pipeloop(0) 170 for (i = 0; i <= xsize - 4; i += 2, sp += 2) { 171 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s2)); 172 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 173 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 174 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 175 acc = vis_faligndata(t3, acc); 176 acc = vis_faligndata(t2, acc); 177 acc = vis_faligndata(t1, acc); 178 acc = vis_faligndata(t0, acc); 179 s0 = s2; 180 s1 = sp[0]; 181 s2 = sp[1]; 182 *dp++ = acc; 183 } 184 185 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s2)); 186 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 187 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 188 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 189 acc = vis_faligndata(t3, acc); 190 acc = vis_faligndata(t2, acc); 191 acc = vis_faligndata(t1, acc); 192 acc = vis_faligndata(t0, acc); 193 s0 = s2; 194 *dp++ = acc; 195 } 196 197 dl = (mlib_s16 *) dp; 198 199 if ((xsize & 1) != 0) { 200 s1 = sp[0]; 201 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 202 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 203 acc = vis_faligndata(t1, acc); 204 acc = vis_faligndata(t0, acc); 205 *(mlib_f32 *) dp = vis_read_hi(acc); 206 s0 = s1; 207 dl += 2; 208 } 209 210 *dl = tab1[s0]; 211 } 212 213 /***************************************************************/ 214 void mlib_v_ImageLookUpSI_S32_S16_2(const mlib_s32 *src, 215 mlib_s32 slb, 216 mlib_s16 *dst, 217 mlib_s32 dlb, 218 mlib_s32 xsize, 219 mlib_s32 ysize, 220 const mlib_s16 **table) 221 { 222 mlib_s32 *sl; 223 mlib_s16 *dl; 224 mlib_s32 j; 225 mlib_u32 shift = 2147483648u; 226 const mlib_s16 *tab0 = &table[0][shift]; 227 const mlib_s16 *tab1 = &table[1][shift]; 228 229 sl = (void *)src; 230 dl = dst; 231 232 /* row loop */ 233 for (j = 0; j < ysize; j++) { 234 mlib_s32 *sp = sl; 235 mlib_s16 *dp = dl; 236 mlib_s32 off, s0, size = xsize; 237 238 off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7)); 239 240 if ((off >= 4) && (size > 0)) { 241 s0 = *sp++; 242 *dp++ = tab0[s0]; 243 *dp++ = tab1[s0]; 244 size--; 245 } 246 247 if (size > 0) { 248 249 if (((mlib_addr) dp & 7) == 0) { 250 mlib_v_ImageLookUpSI_S32_S16_2_DstA8D1(sp, dp, size, table); 251 } 252 else { 253 mlib_v_ImageLookUpSI_S32_S16_2_D1(sp, dp, size, table); 254 } 255 } 256 257 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 258 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 259 } 260 } 261 262 /***************************************************************/ 263 void mlib_v_ImageLookUpSI_S32_S16_3_D1(const mlib_s32 *src, 264 mlib_s16 *dst, 265 mlib_s32 xsize, 266 const mlib_s16 **table) 267 { 268 mlib_s32 *sp; /* pointer to source data */ 269 mlib_s16 *dl; /* pointer to start of destination */ 270 mlib_d64 *dp; /* aligned pointer to destination */ 271 mlib_d64 t0, t1, t2, t3; /* destination data */ 272 mlib_d64 acc0, acc1, acc2; /* destination data */ 273 mlib_s32 i; /* loop variable */ 274 mlib_u32 shift = 2147483648u; 275 const mlib_s16 *tab0 = &table[0][shift]; 276 const mlib_s16 *tab1 = &table[1][shift]; 277 const mlib_s16 *tab2 = &table[2][shift]; 278 mlib_s32 s00, s01, s02, s03; 279 280 sp = (void *)src; 281 dl = dst; 282 dp = (mlib_d64 *) dl; 283 284 vis_alignaddr((void *)0, 6); 285 286 i = 0; 287 288 if (xsize >= 4) { 289 290 s00 = sp[0]; 291 s01 = sp[1]; 292 s02 = sp[2]; 293 s03 = sp[3]; 294 sp += 4; 295 296 #pragma pipeloop(0) 297 for (i = 0; i <= xsize - 8; i += 4, sp += 4) { 298 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s01)); 299 t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s00)); 300 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s00)); 301 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s00)); 302 acc0 = vis_faligndata(t3, acc0); 303 acc0 = vis_faligndata(t2, acc0); 304 acc0 = vis_faligndata(t1, acc0); 305 acc0 = vis_faligndata(t0, acc0); 306 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s02)); 307 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s02)); 308 t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s01)); 309 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s01)); 310 acc1 = vis_faligndata(t3, acc1); 311 acc1 = vis_faligndata(t2, acc1); 312 acc1 = vis_faligndata(t1, acc1); 313 acc1 = vis_faligndata(t0, acc1); 314 t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s03)); 315 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s03)); 316 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s03)); 317 t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s02)); 318 acc2 = vis_faligndata(t3, acc2); 319 acc2 = vis_faligndata(t2, acc2); 320 acc2 = vis_faligndata(t1, acc2); 321 acc2 = vis_faligndata(t0, acc2); 322 s00 = sp[0]; 323 s01 = sp[1]; 324 s02 = sp[2]; 325 s03 = sp[3]; 326 *dp++ = acc0; 327 *dp++ = acc1; 328 *dp++ = acc2; 329 } 330 331 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s01)); 332 t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s00)); 333 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s00)); 334 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s00)); 335 acc0 = vis_faligndata(t3, acc0); 336 acc0 = vis_faligndata(t2, acc0); 337 acc0 = vis_faligndata(t1, acc0); 338 acc0 = vis_faligndata(t0, acc0); 339 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s02)); 340 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s02)); 341 t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s01)); 342 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s01)); 343 acc1 = vis_faligndata(t3, acc1); 344 acc1 = vis_faligndata(t2, acc1); 345 acc1 = vis_faligndata(t1, acc1); 346 acc1 = vis_faligndata(t0, acc1); 347 t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s03)); 348 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s03)); 349 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s03)); 350 t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s02)); 351 acc2 = vis_faligndata(t3, acc2); 352 acc2 = vis_faligndata(t2, acc2); 353 acc2 = vis_faligndata(t1, acc2); 354 acc2 = vis_faligndata(t0, acc2); 355 *dp++ = acc0; 356 *dp++ = acc1; 357 *dp++ = acc2; 358 i += 4; 359 } 360 361 dl = (mlib_s16 *) dp; 362 363 #pragma pipeloop(0) 364 for (; i < xsize; i++) { 365 s00 = sp[0]; 366 dl[0] = tab0[s00]; 367 dl[1] = tab1[s00]; 368 dl[2] = tab2[s00]; 369 dl += 3; 370 sp++; 371 } 372 } 373 374 /***************************************************************/ 375 void mlib_v_ImageLookUpSI_S32_S16_3(const mlib_s32 *src, 376 mlib_s32 slb, 377 mlib_s16 *dst, 378 mlib_s32 dlb, 379 mlib_s32 xsize, 380 mlib_s32 ysize, 381 const mlib_s16 **table) 382 { 383 mlib_s32 *sl; 384 mlib_s16 *dl; 385 mlib_s32 i, j; 386 mlib_u32 shift = 2147483648u; 387 const mlib_s16 *tab0 = &table[0][shift]; 388 const mlib_s16 *tab1 = &table[1][shift]; 389 const mlib_s16 *tab2 = &table[2][shift]; 390 391 sl = (void *)src; 392 dl = dst; 393 394 /* row loop */ 395 for (j = 0; j < ysize; j++) { 396 mlib_s32 *sp = sl; 397 mlib_s16 *dp = dl; 398 mlib_s32 off, s0, size = xsize; 399 400 off = (mlib_s32) (((mlib_addr) dp & 7) >> 1); 401 off = (off < size) ? off : size; 402 403 for (i = 0; i < off; i++) { 404 s0 = *sp++; 405 *dp++ = tab0[s0]; 406 *dp++ = tab1[s0]; 407 *dp++ = tab2[s0]; 408 size--; 409 } 410 411 if (size > 0) { 412 mlib_v_ImageLookUpSI_S32_S16_3_D1(sp, dp, size, table); 413 } 414 415 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 416 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 417 } 418 } 419 420 /***************************************************************/ 421 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(const mlib_s32 *src, 422 mlib_s16 *dst, 423 mlib_s32 xsize, 424 const mlib_s16 **table) 425 { 426 mlib_s32 *sp; /* pointer to source data */ 427 mlib_s32 s0; /* source data */ 428 mlib_s16 *dl; /* pointer to start of destination */ 429 mlib_d64 *dp; /* aligned pointer to destination */ 430 mlib_d64 t0, t1, t2, t3; /* destination data */ 431 mlib_d64 acc; /* destination data */ 432 mlib_s32 i; /* loop variable */ 433 mlib_u32 shift = 2147483648u; 434 const mlib_s16 *tab0 = &table[0][shift]; 435 const mlib_s16 *tab1 = &table[1][shift]; 436 const mlib_s16 *tab2 = &table[2][shift]; 437 const mlib_s16 *tab3 = &table[3][shift]; 438 439 sp = (void *)src; 440 dl = dst; 441 dp = (mlib_d64 *) dl; 442 443 vis_alignaddr((void *)0, 6); 444 445 if (xsize >= 1) { 446 447 s0 = *sp++; 448 449 #pragma pipeloop(0) 450 for (i = 0; i <= xsize - 2; i++) { 451 t3 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 452 t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 453 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 454 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0)); 455 acc = vis_faligndata(t3, acc); 456 acc = vis_faligndata(t2, acc); 457 acc = vis_faligndata(t1, acc); 458 acc = vis_faligndata(t0, acc); 459 s0 = *sp++; 460 *dp++ = acc; 461 } 462 463 t3 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 464 t2 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 465 t1 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 466 t0 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s0)); 467 acc = vis_faligndata(t3, acc); 468 acc = vis_faligndata(t2, acc); 469 acc = vis_faligndata(t1, acc); 470 acc = vis_faligndata(t0, acc); 471 *dp++ = acc; 472 } 473 } 474 475 /***************************************************************/ 476 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(const mlib_s32 *src, 477 mlib_s16 *dst, 478 mlib_s32 xsize, 479 const mlib_s16 **table) 480 { 481 mlib_s32 *sp; /* pointer to source data */ 482 mlib_s32 s0, s1; /* source data */ 483 mlib_s16 *dl; /* pointer to start of destination */ 484 mlib_d64 *dp; /* aligned pointer to destination */ 485 mlib_d64 t0, t1, t2, t3; /* destination data */ 486 mlib_d64 acc; /* destination data */ 487 mlib_s32 i; /* loop variable */ 488 mlib_u32 shift = 2147483648u; 489 const mlib_s16 *tab0 = &table[0][shift]; 490 const mlib_s16 *tab1 = &table[1][shift]; 491 const mlib_s16 *tab2 = &table[2][shift]; 492 const mlib_s16 *tab3 = &table[3][shift]; 493 494 sp = (void *)src; 495 dl = dst; 496 dp = (mlib_d64 *) dl; 497 498 vis_alignaddr((void *)0, 6); 499 500 s0 = *sp++; 501 502 if (xsize >= 1) { 503 504 s1 = *sp++; 505 506 #pragma pipeloop(0) 507 for (i = 0; i <= xsize - 2; i++) { 508 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 509 t2 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 510 t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 511 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 512 acc = vis_faligndata(t3, acc); 513 acc = vis_faligndata(t2, acc); 514 acc = vis_faligndata(t1, acc); 515 acc = vis_faligndata(t0, acc); 516 s0 = s1; 517 s1 = *sp++; 518 *dp++ = acc; 519 } 520 521 t3 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 522 t2 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 523 t1 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 524 t0 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s0)); 525 acc = vis_faligndata(t3, acc); 526 acc = vis_faligndata(t2, acc); 527 acc = vis_faligndata(t1, acc); 528 acc = vis_faligndata(t0, acc); 529 s0 = s1; 530 *dp++ = acc; 531 } 532 533 dl = (mlib_s16 *) dp; 534 535 dl[0] = tab1[s0]; 536 dl[1] = tab2[s0]; 537 dl[2] = tab3[s0]; 538 } 539 540 /***************************************************************/ 541 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(const mlib_s32 *src, 542 mlib_s16 *dst, 543 mlib_s32 xsize, 544 const mlib_s16 **table) 545 { 546 mlib_s32 *sp; /* pointer to source data */ 547 mlib_s32 s0, s1; /* source data */ 548 mlib_s16 *dl; /* pointer to start of destination */ 549 mlib_d64 *dp; /* aligned pointer to destination */ 550 mlib_d64 t0, t1, t2, t3; /* destination data */ 551 mlib_d64 acc; /* destination data */ 552 mlib_s32 i; /* loop variable */ 553 mlib_u32 shift = 2147483648u; 554 const mlib_s16 *tab0 = &table[0][shift]; 555 const mlib_s16 *tab1 = &table[1][shift]; 556 const mlib_s16 *tab2 = &table[2][shift]; 557 const mlib_s16 *tab3 = &table[3][shift]; 558 559 sp = (void *)src; 560 dl = dst; 561 dp = (mlib_d64 *) dl; 562 563 vis_alignaddr((void *)0, 6); 564 565 s0 = *sp++; 566 567 if (xsize >= 1) { 568 569 s1 = *sp++; 570 571 #pragma pipeloop(0) 572 for (i = 0; i <= xsize - 2; i++) { 573 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 574 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 575 t1 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 576 t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 577 acc = vis_faligndata(t3, acc); 578 acc = vis_faligndata(t2, acc); 579 acc = vis_faligndata(t1, acc); 580 acc = vis_faligndata(t0, acc); 581 s0 = s1; 582 s1 = *sp++; 583 *dp++ = acc; 584 } 585 586 t3 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 587 t2 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 588 t1 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 589 t0 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s0)); 590 acc = vis_faligndata(t3, acc); 591 acc = vis_faligndata(t2, acc); 592 acc = vis_faligndata(t1, acc); 593 acc = vis_faligndata(t0, acc); 594 s0 = s1; 595 *dp++ = acc; 596 } 597 598 dl = (mlib_s16 *) dp; 599 600 dl[0] = tab2[s0]; 601 dl[1] = tab3[s0]; 602 } 603 604 /***************************************************************/ 605 void mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(const mlib_s32 *src, 606 mlib_s16 *dst, 607 mlib_s32 xsize, 608 const mlib_s16 **table) 609 { 610 mlib_s32 *sp; /* pointer to source data */ 611 mlib_s32 s0, s1; /* source data */ 612 mlib_s16 *dl; /* pointer to start of destination */ 613 mlib_d64 *dp; /* aligned pointer to destination */ 614 mlib_d64 t0, t1, t2, t3; /* destination data */ 615 mlib_d64 acc; /* destination data */ 616 mlib_s32 i; /* loop variable */ 617 mlib_u32 shift = 2147483648u; 618 const mlib_s16 *tab0 = &table[0][shift]; 619 const mlib_s16 *tab1 = &table[1][shift]; 620 const mlib_s16 *tab2 = &table[2][shift]; 621 const mlib_s16 *tab3 = &table[3][shift]; 622 623 sp = (void *)src; 624 dl = dst; 625 dp = (mlib_d64 *) dl; 626 627 vis_alignaddr((void *)0, 6); 628 629 s0 = *sp++; 630 631 if (xsize >= 1) { 632 633 s1 = *sp++; 634 635 #pragma pipeloop(0) 636 for (i = 0; i <= xsize - 2; i++) { 637 t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s1)); 638 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 639 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 640 t0 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 641 acc = vis_faligndata(t3, acc); 642 acc = vis_faligndata(t2, acc); 643 acc = vis_faligndata(t1, acc); 644 acc = vis_faligndata(t0, acc); 645 s0 = s1; 646 s1 = *sp++; 647 *dp++ = acc; 648 } 649 650 t3 = VIS_LD_U16_I(tab2, ((mlib_addr) 2 * s1)); 651 t2 = VIS_LD_U16_I(tab1, ((mlib_addr) 2 * s1)); 652 t1 = VIS_LD_U16_I(tab0, ((mlib_addr) 2 * s1)); 653 t0 = VIS_LD_U16_I(tab3, ((mlib_addr) 2 * s0)); 654 acc = vis_faligndata(t3, acc); 655 acc = vis_faligndata(t2, acc); 656 acc = vis_faligndata(t1, acc); 657 acc = vis_faligndata(t0, acc); 658 s0 = s1; 659 *dp++ = acc; 660 } 661 662 dl = (mlib_s16 *) dp; 663 664 dl[0] = tab3[s0]; 665 } 666 667 /***************************************************************/ 668 void mlib_v_ImageLookUpSI_S32_S16_4(const mlib_s32 *src, 669 mlib_s32 slb, 670 mlib_s16 *dst, 671 mlib_s32 dlb, 672 mlib_s32 xsize, 673 mlib_s32 ysize, 674 const mlib_s16 **table) 675 { 676 mlib_s32 *sl; 677 mlib_s16 *dl; 678 mlib_s32 j; 679 mlib_u32 shift = 2147483648u; 680 const mlib_s16 *tab0 = &table[0][shift]; 681 const mlib_s16 *tab1 = &table[1][shift]; 682 const mlib_s16 *tab2 = &table[2][shift]; 683 684 sl = (void *)src; 685 dl = dst; 686 687 /* row loop */ 688 for (j = 0; j < ysize; j++) { 689 mlib_s32 *sp = sl; 690 mlib_s16 *dp = dl; 691 mlib_s32 off, s0, size = xsize; 692 693 if (size > 0) { 694 off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1); 695 696 if (off == 0) { 697 mlib_v_ImageLookUpSI_S32_S16_4_DstOff0_D1(sp, dp, size, table); 698 } 699 else if (off == 1) { 700 s0 = *sp; 701 *dp++ = tab0[s0]; 702 size--; 703 mlib_v_ImageLookUpSI_S32_S16_4_DstOff1_D1(sp, dp, size, table); 704 } 705 else if (off == 2) { 706 s0 = *sp; 707 *dp++ = tab0[s0]; 708 *dp++ = tab1[s0]; 709 size--; 710 mlib_v_ImageLookUpSI_S32_S16_4_DstOff2_D1(sp, dp, size, table); 711 } 712 else if (off == 3) { 713 s0 = *sp; 714 *dp++ = tab0[s0]; 715 *dp++ = tab1[s0]; 716 *dp++ = tab2[s0]; 717 size--; 718 mlib_v_ImageLookUpSI_S32_S16_4_DstOff3_D1(sp, dp, size, table); 719 } 720 } 721 722 sl = (mlib_s32 *) ((mlib_u8 *) sl + slb); 723 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 724 } 725 } 726 727 /***************************************************************/