1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * The functions step along the lines from xLeft to xRight and apply 30 * the bilinear filtering. 31 * 32 */ 33 34 #include "vis_proto.h" 35 #include "mlib_image.h" 36 #include "mlib_ImageColormap.h" 37 #include "mlib_ImageCopy.h" 38 #include "mlib_ImageAffine.h" 39 #include "mlib_v_ImageFilters.h" 40 #include "mlib_v_ImageChannelExtract.h" 41 42 /*#define MLIB_VIS2*/ 43 44 /***************************************************************/ 45 #define DTYPE mlib_s16 46 47 #define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bl 48 49 /***************************************************************/ 50 static mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param); 51 static mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param); 52 53 /***************************************************************/ 54 const mlib_u64 mlib_dmask_arr[] = { 55 0x0000000000000000, 0x000000000000FFFF, 0x00000000FFFF0000, 0x00000000FFFFFFFF, 56 0x0000FFFF00000000, 0x0000FFFF0000FFFF, 0x0000FFFFFFFF0000, 0x0000FFFFFFFFFFFF, 57 0xFFFF000000000000, 0xFFFF00000000FFFF, 0xFFFF0000FFFF0000, 0xFFFF0000FFFFFFFF, 58 0xFFFFFFFF00000000, 0xFFFFFFFF0000FFFF, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF 59 }; 60 61 /***************************************************************/ 62 #define XOR_8000(x) 63 64 /***************************************************************/ 65 #ifdef MLIB_VIS2 66 #define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0) 67 #else 68 #define MLIB_WRITE_BMASK(bmask) 69 #endif 70 71 /***************************************************************/ 72 #undef DECLAREVAR 73 #define DECLAREVAR() \ 74 DECLAREVAR0(); \ 75 mlib_s32 *warp_tbl = param -> warp_tbl; \ 76 mlib_s32 srcYStride = param -> srcYStride; \ 77 mlib_u8 *dl; \ 78 mlib_s32 i, size; \ 79 /*mlib_d64 mask_8000 = vis_to_double_dup(0x80008000);*/ \ 80 mlib_d64 mask_7fff = vis_to_double_dup(0x7FFF7FFF); \ 81 mlib_d64 dx64, dy64, deltax, deltay, delta1_x, delta1_y; \ 82 mlib_d64 s0, s1, s2, s3; \ 83 mlib_d64 d0, d1, d2, d3, dd 84 85 /***************************************************************/ 86 87 /* arguments (x, y) are swapped to prevent overflow */ 88 #define FMUL_16x16(x, y) \ 89 vis_fpadd16(vis_fmul8sux16(y, x), \ 90 vis_fmul8ulx16(y, x)) 91 92 /***************************************************************/ 93 #define BUF_SIZE 512 94 95 /***************************************************************/ 96 #define DOUBLE_4U16(x0, x1, x2, x3) \ 97 vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \ 98 ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1))) 99 100 /***************************************************************/ 101 #define BL_SUM() \ 102 XOR_8000(s0); \ 103 XOR_8000(s1); \ 104 XOR_8000(s2); \ 105 XOR_8000(s3); \ 106 \ 107 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 108 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 109 \ 110 d0 = FMUL_16x16(s0, delta1_x); \ 111 d1 = FMUL_16x16(s1, deltax); \ 112 d0 = vis_fpadd16(d0, d1); \ 113 d0 = vis_fpadd16(d0, d0); \ 114 d0 = FMUL_16x16(d0, delta1_y); \ 115 \ 116 d2 = FMUL_16x16(s2, delta1_x); \ 117 d3 = FMUL_16x16(s3, deltax); \ 118 d2 = vis_fpadd16(d2, d3); \ 119 d2 = vis_fpadd16(d2, d2); \ 120 d2 = FMUL_16x16(d2, deltay); \ 121 \ 122 dd = vis_fpadd16(d0, d2); \ 123 dd = vis_fpadd16(dd, dd); \ 124 XOR_8000(dd); \ 125 \ 126 deltax = vis_fpadd16(deltax, dx64); \ 127 deltay = vis_fpadd16(deltay, dy64); \ 128 deltax = vis_fand(deltax, mask_7fff); \ 129 deltay = vis_fand(deltay, mask_7fff) 130 131 /***************************************************************/ 132 #define BL_SUM_3CH() \ 133 XOR_8000(s0); \ 134 XOR_8000(s1); \ 135 XOR_8000(s2); \ 136 XOR_8000(s3); \ 137 \ 138 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 139 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 140 \ 141 d0 = FMUL_16x16(s0, delta1_y); \ 142 d2 = FMUL_16x16(s2, deltay); \ 143 d0 = vis_fpadd16(d0, d2); \ 144 d0 = vis_fpadd16(d0, d0); \ 145 d0 = FMUL_16x16(d0, delta1_x); \ 146 \ 147 d1 = FMUL_16x16(s1, delta1_y); \ 148 d3 = FMUL_16x16(s3, deltay); \ 149 d1 = vis_fpadd16(d1, d3); \ 150 d1 = vis_fpadd16(d1, d1); \ 151 d1 = FMUL_16x16(d1, deltax); \ 152 \ 153 vis_alignaddr((void*)0, 2); \ 154 d0 = vis_faligndata(d0, d0); \ 155 dd = vis_fpadd16(d0, d1); \ 156 dd = vis_fpadd16(dd, dd); \ 157 XOR_8000(dd); \ 158 \ 159 deltax = vis_fpadd16(deltax, dx64); \ 160 deltay = vis_fpadd16(deltay, dy64); \ 161 deltax = vis_fand(deltax, mask_7fff); \ 162 deltay = vis_fand(deltay, mask_7fff) 163 164 /***************************************************************/ 165 #define LD_U16(sp, ind) vis_ld_u16(sp + ind) 166 167 /***************************************************************/ 168 #ifndef MLIB_VIS2 169 170 #define LOAD_1CH() \ 171 s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff); \ 172 s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff); \ 173 s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff); \ 174 s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff); \ 175 \ 176 s0 = vis_faligndata(LD_U16(sp2, 0), s0); \ 177 s1 = vis_faligndata(LD_U16(sp2, 2), s1); \ 178 s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2); \ 179 s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3); \ 180 \ 181 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 182 s1 = vis_faligndata(LD_U16(sp1, 2), s1); \ 183 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 184 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3); \ 185 \ 186 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 187 s1 = vis_faligndata(LD_U16(sp0, 2), s1); \ 188 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 189 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3) 190 191 #else 192 193 #define LOAD_1CH() \ 194 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0)); \ 195 s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2)); \ 196 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride)); \ 197 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \ 198 \ 199 t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0)); \ 200 t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2)); \ 201 t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride)); \ 202 t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \ 203 \ 204 s0 = vis_bshuffle(s0, t0); \ 205 s1 = vis_bshuffle(s1, t1); \ 206 s2 = vis_bshuffle(s2, t2); \ 207 s3 = vis_bshuffle(s3, t3) 208 209 #endif 210 211 /***************************************************************/ 212 #define GET_POINTER(sp) \ 213 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \ 214 X += dX; \ 215 Y += dY 216 217 /***************************************************************/ 218 #undef PREPARE_DELTAS 219 #define PREPARE_DELTAS \ 220 if (warp_tbl != NULL) { \ 221 dX = warp_tbl[2*j ]; \ 222 dY = warp_tbl[2*j + 1]; \ 223 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \ 224 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \ 225 } 226 227 /***************************************************************/ 228 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 229 { 230 DECLAREVAR(); 231 mlib_s32 off; 232 mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3; 233 #ifdef MLIB_VIS2 234 mlib_d64 t0, t1, t2, t3; 235 vis_write_bmask(0x45CD67EF, 0); 236 #else 237 vis_alignaddr((void*)0, 6); 238 #endif 239 240 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); 241 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); 242 243 for (j = yStart; j <= yFinish; j++) { 244 mlib_u8 *sp0, *sp1, *sp2, *sp3; 245 mlib_d64 *dp, dmask; 246 247 NEW_LINE(1); 248 249 off = (mlib_s32)dl & 7; 250 dp = (mlib_d64*)(dl - off); 251 off >>= 1; 252 253 x0 = X - off*dX; y0 = Y - off*dY; 254 x1 = x0 + dX; y1 = y0 + dY; 255 x2 = x1 + dX; y2 = y1 + dY; 256 x3 = x2 + dX; y3 = y2 + dY; 257 258 deltax = DOUBLE_4U16(x0, x1, x2, x3); 259 deltay = DOUBLE_4U16(y0, y1, y2, y3); 260 261 if (off) { 262 mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1))); 263 264 off = 4 - off; 265 GET_POINTER(sp3); 266 sp0 = sp1 = sp2 = sp3; 267 268 if (off > 1 && size > 1) { 269 GET_POINTER(sp3); 270 } 271 272 if (off > 2) { 273 sp2 = sp3; 274 275 if (size > 2) { 276 GET_POINTER(sp3); 277 } 278 } 279 280 LOAD_1CH(); 281 BL_SUM(); 282 283 dmask = ((mlib_d64*)mlib_dmask_arr)[emask]; 284 *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0])); 285 286 size -= off; 287 288 if (size < 0) size = 0; 289 } 290 291 #pragma pipeloop(0) 292 for (i = 0; i < size/4; i++) { 293 GET_POINTER(sp0); 294 GET_POINTER(sp1); 295 GET_POINTER(sp2); 296 GET_POINTER(sp3); 297 298 LOAD_1CH(); 299 BL_SUM(); 300 301 dp[i] = dd; 302 } 303 304 off = size & 3; 305 306 if (off) { 307 GET_POINTER(sp0); 308 sp1 = sp2 = sp3 = sp0; 309 310 if (off > 1) { 311 GET_POINTER(sp1); 312 } 313 314 if (off > 2) { 315 GET_POINTER(sp2); 316 } 317 318 LOAD_1CH(); 319 BL_SUM(); 320 321 dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F]; 322 dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i])); 323 } 324 } 325 326 return MLIB_SUCCESS; 327 } 328 329 /***************************************************************/ 330 #undef GET_POINTER 331 #define GET_POINTER(sp) \ 332 sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \ 333 X += dX; \ 334 Y += dY 335 336 /***************************************************************/ 337 #define LOAD_2CH() \ 338 s0 = vis_freg_pair(sp0[0], sp1[0]); \ 339 s1 = vis_freg_pair(sp0[1], sp1[1]); \ 340 s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]); \ 341 s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1]) 342 343 /***************************************************************/ 344 #undef PREPARE_DELTAS 345 #define PREPARE_DELTAS \ 346 if (warp_tbl != NULL) { \ 347 dX = warp_tbl[2*j ]; \ 348 dY = warp_tbl[2*j + 1]; \ 349 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \ 350 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \ 351 } 352 353 /***************************************************************/ 354 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 355 { 356 DECLAREVAR(); 357 mlib_s32 off; 358 mlib_s32 x0, x1, y0, y1; 359 360 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) { 361 return FUN_NAME(2ch_na)(param); 362 } 363 364 srcYStride >>= 2; 365 366 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 367 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 368 369 for (j = yStart; j <= yFinish; j++) { 370 mlib_f32 *sp0, *sp1; 371 mlib_d64 *dp; 372 373 NEW_LINE(2); 374 375 off = (mlib_s32)dl & 7; 376 dp = (mlib_d64*)(dl - off); 377 378 if (off) { 379 x0 = X - dX; y0 = Y - dY; 380 x1 = X; y1 = Y; 381 } else { 382 x0 = X; y0 = Y; 383 x1 = X + dX; y1 = Y + dY; 384 } 385 386 deltax = DOUBLE_4U16(x0, x0, x1, x1); 387 deltay = DOUBLE_4U16(y0, y0, y1, y1); 388 389 if (off) { 390 GET_POINTER(sp1); 391 sp0 = sp1; 392 LOAD_2CH(); 393 394 BL_SUM(); 395 396 ((mlib_f32*)dp)[1] = vis_read_lo(dd); 397 dp++; 398 size--; 399 } 400 401 #pragma pipeloop(0) 402 for (i = 0; i < size/2; i++) { 403 GET_POINTER(sp0); 404 GET_POINTER(sp1); 405 LOAD_2CH(); 406 407 BL_SUM(); 408 409 *dp++ = dd; 410 } 411 412 if (size & 1) { 413 GET_POINTER(sp0); 414 sp1 = sp0; 415 LOAD_2CH(); 416 417 BL_SUM(); 418 419 ((mlib_f32*)dp)[0] = vis_read_hi(dd); 420 } 421 } 422 423 return MLIB_SUCCESS; 424 } 425 426 /***************************************************************/ 427 #undef GET_POINTER 428 #define GET_POINTER(sp) \ 429 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \ 430 X += dX; \ 431 Y += dY 432 433 /***************************************************************/ 434 #ifndef MLIB_VIS2 435 436 #define LOAD_2CH_NA() \ 437 s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff); \ 438 s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff); \ 439 s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff); \ 440 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff); \ 441 \ 442 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 443 s1 = vis_faligndata(LD_U16(sp1, 4), s1); \ 444 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 445 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3); \ 446 \ 447 s0 = vis_faligndata(LD_U16(sp0, 2), s0); \ 448 s1 = vis_faligndata(LD_U16(sp0, 6), s1); \ 449 s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2); \ 450 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3); \ 451 \ 452 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 453 s1 = vis_faligndata(LD_U16(sp0, 4), s1); \ 454 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 455 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3) 456 457 #else 458 459 #define LOAD_2CH_NA() \ 460 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0)); \ 461 s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4)); \ 462 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride)); \ 463 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \ 464 \ 465 t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2)); \ 466 t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6)); \ 467 t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \ 468 t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \ 469 \ 470 s0 = vis_bshuffle(s0, t0); \ 471 s1 = vis_bshuffle(s1, t1); \ 472 s2 = vis_bshuffle(s2, t2); \ 473 s3 = vis_bshuffle(s3, t3) 474 475 #endif 476 477 /***************************************************************/ 478 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param) 479 { 480 DECLAREVAR(); 481 mlib_s32 max_xsize = param -> max_xsize, bsize; 482 mlib_s32 x0, x1, y0, y1; 483 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 484 #ifdef MLIB_VIS2 485 mlib_d64 t0, t1, t2, t3; 486 #endif 487 488 bsize = (max_xsize + 1)/2; 489 490 if (bsize > BUF_SIZE) { 491 pbuff = mlib_malloc(bsize*sizeof(mlib_d64)); 492 493 if (pbuff == NULL) return MLIB_FAILURE; 494 } 495 496 MLIB_WRITE_BMASK(0x45CD67EF); 497 498 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 499 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 500 501 for (j = yStart; j <= yFinish; j++) { 502 mlib_u8 *sp0, *sp1; 503 504 #ifndef MLIB_VIS2 505 vis_alignaddr((void*)0, 6); 506 #endif 507 508 NEW_LINE(2); 509 510 x0 = X; y0 = Y; 511 x1 = X + dX; y1 = Y + dY; 512 513 deltax = DOUBLE_4U16(x0, x0, x1, x1); 514 deltay = DOUBLE_4U16(y0, y0, y1, y1); 515 516 #pragma pipeloop(0) 517 for (i = 0; i < size/2; i++) { 518 GET_POINTER(sp0); 519 GET_POINTER(sp1); 520 LOAD_2CH_NA(); 521 522 BL_SUM(); 523 524 pbuff[i] = dd; 525 } 526 527 if (size & 1) { 528 GET_POINTER(sp0); 529 sp1 = sp0; 530 LOAD_2CH_NA(); 531 532 BL_SUM(); 533 534 pbuff[i] = dd; 535 } 536 537 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size); 538 } 539 540 if (pbuff != buff) { 541 mlib_free(pbuff); 542 } 543 544 return MLIB_SUCCESS; 545 } 546 547 /***************************************************************/ 548 #undef PREPARE_DELTAS 549 #define PREPARE_DELTAS \ 550 if (warp_tbl != NULL) { \ 551 dX = warp_tbl[2*j ]; \ 552 dY = warp_tbl[2*j + 1]; \ 553 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ \ 554 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ \ 555 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \ 556 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \ 557 } 558 559 /***************************************************************/ 560 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 561 { 562 DECLAREVAR(); 563 mlib_s32 max_xsize = param -> max_xsize; 564 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 565 566 if (max_xsize > BUF_SIZE) { 567 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 568 569 if (pbuff == NULL) return MLIB_FAILURE; 570 } 571 572 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 573 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 574 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 575 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 576 577 for (j = yStart; j <= yFinish; j++) { 578 mlib_u8 *sp; 579 mlib_d64 *sp0, *sp1; 580 581 NEW_LINE(3); 582 583 deltax = DOUBLE_4U16(X, X, X, X); 584 deltay = DOUBLE_4U16(Y, Y, Y, Y); 585 586 #pragma pipeloop(0) 587 for (i = 0; i < size; i++) { 588 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2; 589 590 vis_alignaddr(sp, 0); 591 sp0 = AL_ADDR(sp, 0); 592 s0 = vis_faligndata(sp0[0], sp0[1]); 593 s1 = vis_faligndata(sp0[1], sp0[2]); 594 595 vis_alignaddr(sp, srcYStride); 596 sp1 = AL_ADDR(sp, srcYStride); 597 s2 = vis_faligndata(sp1[0], sp1[1]); 598 s3 = vis_faligndata(sp1[1], sp1[2]); 599 600 BL_SUM_3CH(); 601 602 pbuff[i] = dd; 603 X += dX; 604 Y += dY; 605 } 606 607 mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size); 608 } 609 610 if (pbuff != buff) { 611 mlib_free(pbuff); 612 } 613 614 return MLIB_SUCCESS; 615 } 616 617 /***************************************************************/ 618 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 619 { 620 DECLAREVAR(); 621 622 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) { 623 return FUN_NAME(4ch_na)(param); 624 } 625 626 srcYStride >>= 3; 627 628 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 629 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 630 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 631 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 632 633 for (j = yStart; j <= yFinish; j++) { 634 mlib_d64 *sp; 635 636 NEW_LINE(4); 637 638 deltax = DOUBLE_4U16(X, X, X, X); 639 deltay = DOUBLE_4U16(Y, Y, Y, Y); 640 641 #pragma pipeloop(0) 642 for (i = 0; i < size; i++) { 643 sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); 644 s0 = sp[0]; 645 s1 = sp[1]; 646 s2 = sp[srcYStride]; 647 s3 = sp[srcYStride + 1]; 648 649 BL_SUM(); 650 651 ((mlib_d64*)dl)[i] = dd; 652 X += dX; 653 Y += dY; 654 } 655 } 656 657 return MLIB_SUCCESS; 658 } 659 660 /***************************************************************/ 661 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param) 662 { 663 DECLAREVAR(); 664 mlib_s32 max_xsize = param -> max_xsize; 665 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 666 667 if (max_xsize > BUF_SIZE) { 668 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 669 670 if (pbuff == NULL) return MLIB_FAILURE; 671 } 672 673 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 674 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 675 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 676 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 677 678 for (j = yStart; j <= yFinish; j++) { 679 mlib_u8 *sp; 680 mlib_d64 *sp0, *sp1; 681 682 NEW_LINE(4); 683 684 deltax = DOUBLE_4U16(X, X, X, X); 685 deltay = DOUBLE_4U16(Y, Y, Y, Y); 686 687 #pragma pipeloop(0) 688 for (i = 0; i < size; i++) { 689 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT); 690 691 vis_alignaddr(sp, 0); 692 sp0 = AL_ADDR(sp, 0); 693 s0 = vis_faligndata(sp0[0], sp0[1]); 694 s1 = vis_faligndata(sp0[1], sp0[2]); 695 696 vis_alignaddr(sp, srcYStride); 697 sp1 = AL_ADDR(sp, srcYStride); 698 s2 = vis_faligndata(sp1[0], sp1[1]); 699 s3 = vis_faligndata(sp1[1], sp1[2]); 700 701 BL_SUM(); 702 703 pbuff[i] = dd; 704 X += dX; 705 Y += dY; 706 } 707 708 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size); 709 } 710 711 if (pbuff != buff) { 712 mlib_free(pbuff); 713 } 714 715 return MLIB_SUCCESS; 716 } 717 718 /***************************************************************/ 719 #define LUT(x) plut[x] 720 721 mlib_status FUN_NAME(s16_i)(mlib_affine_param *param, 722 const void *colormap) 723 { 724 DECLAREVAR(); 725 mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); 726 mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); 727 mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off; 728 mlib_s32 max_xsize = param -> max_xsize; 729 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 730 731 srcYStride /= sizeof(DTYPE); 732 733 if (max_xsize > BUF_SIZE) { 734 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 735 736 if (pbuff == NULL) return MLIB_FAILURE; 737 } 738 739 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 740 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 741 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 742 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 743 744 for (j = yStart; j <= yFinish; j++) { 745 DTYPE *sp; 746 747 NEW_LINE(1); 748 749 deltax = DOUBLE_4U16(X, X, X, X); 750 deltay = DOUBLE_4U16(Y, Y, Y, Y); 751 752 #pragma pipeloop(0) 753 for (i = 0; i < size; i++) { 754 sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); 755 s0 = LUT(sp[0]); 756 s1 = LUT(sp[1]); 757 s2 = LUT(sp[srcYStride]); 758 s3 = LUT(sp[srcYStride + 1]); 759 760 BL_SUM(); 761 762 pbuff[i] = dd; 763 X += dX; 764 Y += dY; 765 } 766 767 if (nchan == 3) { 768 mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((void*)pbuff, (void*)dl, size, colormap); 769 } else { 770 mlib_ImageColorTrue2IndexLine_S16_S16_4((void*)pbuff, (void*)dl, size, colormap); 771 } 772 } 773 774 if (pbuff != buff) { 775 mlib_free(pbuff); 776 } 777 778 return MLIB_SUCCESS; 779 } 780 781 /***************************************************************/ 782 #undef DTYPE 783 #define DTYPE mlib_u8 784 785 mlib_status FUN_NAME(u8_i)(mlib_affine_param *param, 786 const void *colormap) 787 { 788 DECLAREVAR(); 789 mlib_s32 nchan = mlib_ImageGetLutChannels(colormap); 790 mlib_s32 lut_off = mlib_ImageGetLutOffset(colormap); 791 mlib_d64 *plut = (mlib_d64*)mlib_ImageGetLutNormalTable(colormap) - lut_off; 792 mlib_s32 max_xsize = param -> max_xsize; 793 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 794 795 if (max_xsize > BUF_SIZE) { 796 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 797 798 if (pbuff == NULL) return MLIB_FAILURE; 799 } 800 801 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 802 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 803 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 804 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 805 806 for (j = yStart; j <= yFinish; j++) { 807 DTYPE *sp; 808 809 NEW_LINE(1); 810 811 deltax = DOUBLE_4U16(X, X, X, X); 812 deltay = DOUBLE_4U16(Y, Y, Y, Y); 813 814 #pragma pipeloop(0) 815 for (i = 0; i < size; i++) { 816 sp = *(DTYPE**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); 817 s0 = LUT(sp[0]); 818 s1 = LUT(sp[1]); 819 s2 = LUT(sp[srcYStride]); 820 s3 = LUT(sp[srcYStride + 1]); 821 822 BL_SUM(); 823 824 pbuff[i] = dd; 825 X += dX; 826 Y += dY; 827 } 828 829 if (nchan == 3) { 830 mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((void*)pbuff, (void*)dl, size, colormap); 831 } else { 832 mlib_ImageColorTrue2IndexLine_S16_U8_4((void*)pbuff, (void*)dl, size, colormap); 833 } 834 } 835 836 if (pbuff != buff) { 837 mlib_free(pbuff); 838 } 839 840 return MLIB_SUCCESS; 841 } 842 843 /***************************************************************/