1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * The functions step along the lines from xLeft to xRight and apply 30 * the bilinear filtering. 31 * 32 */ 33 34 #include "vis_proto.h" 35 #include "mlib_image.h" 36 #include "mlib_ImageCopy.h" 37 #include "mlib_ImageAffine.h" 38 #include "mlib_v_ImageFilters.h" 39 #include "mlib_v_ImageChannelExtract.h" 40 #include "mlib_v_ImageAffine_BL_S16.h" 41 42 /*#define MLIB_VIS2*/ 43 44 /***************************************************************/ 45 #define DTYPE mlib_s16 46 47 #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bl 48 49 /***************************************************************/ 50 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param); 51 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param); 52 53 /***************************************************************/ 54 #define XOR_8000(x) x = vis_fxor(x, mask_8000) 55 56 /***************************************************************/ 57 #ifdef MLIB_VIS2 58 #define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0) 59 #else 60 #define MLIB_WRITE_BMASK(bmask) 61 #endif /* MLIB_VIS2 */ 62 63 /***************************************************************/ 64 #undef DECLAREVAR 65 #define DECLAREVAR() \ 66 DECLAREVAR0(); \ 67 mlib_s32 *warp_tbl = param -> warp_tbl; \ 68 mlib_s32 srcYStride = param -> srcYStride; \ 69 mlib_u8 *dl; \ 70 mlib_s32 i, size; \ 71 mlib_d64 mask_8000 = vis_to_double_dup(0x80008000); \ 72 mlib_d64 mask_7fff = vis_to_double_dup(0x7FFF7FFF); \ 73 mlib_d64 dx64, dy64, deltax, deltay, delta1_x, delta1_y; \ 74 mlib_d64 s0, s1, s2, s3; \ 75 mlib_d64 d0, d1, d2, d3, dd 76 77 /***************************************************************/ 78 79 /* arguments (x, y) are swapped to prevent overflow */ 80 #define FMUL_16x16(x, y) \ 81 vis_fpadd16(vis_fmul8sux16(y, x), \ 82 vis_fmul8ulx16(y, x)) 83 84 /***************************************************************/ 85 #define BUF_SIZE 512 86 87 /***************************************************************/ 88 #define DOUBLE_4U16(x0, x1, x2, x3) \ 89 vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \ 90 ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1))) 91 92 /***************************************************************/ 93 #define BL_SUM() \ 94 XOR_8000(s0); \ 95 XOR_8000(s1); \ 96 XOR_8000(s2); \ 97 XOR_8000(s3); \ 98 \ 99 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 100 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 101 \ 102 d0 = FMUL_16x16(s0, delta1_x); \ 103 d1 = FMUL_16x16(s1, deltax); \ 104 d0 = vis_fpadd16(d0, d1); \ 105 d0 = vis_fpadd16(d0, d0); \ 106 d0 = FMUL_16x16(d0, delta1_y); \ 107 \ 108 d2 = FMUL_16x16(s2, delta1_x); \ 109 d3 = FMUL_16x16(s3, deltax); \ 110 d2 = vis_fpadd16(d2, d3); \ 111 d2 = vis_fpadd16(d2, d2); \ 112 d2 = FMUL_16x16(d2, deltay); \ 113 \ 114 dd = vis_fpadd16(d0, d2); \ 115 dd = vis_fpadd16(dd, dd); \ 116 XOR_8000(dd); \ 117 \ 118 deltax = vis_fpadd16(deltax, dx64); \ 119 deltay = vis_fpadd16(deltay, dy64); \ 120 deltax = vis_fand(deltax, mask_7fff); \ 121 deltay = vis_fand(deltay, mask_7fff) 122 123 /***************************************************************/ 124 #define BL_SUM_3CH() \ 125 XOR_8000(s0); \ 126 XOR_8000(s1); \ 127 XOR_8000(s2); \ 128 XOR_8000(s3); \ 129 \ 130 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 131 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 132 \ 133 d0 = FMUL_16x16(s0, delta1_y); \ 134 d2 = FMUL_16x16(s2, deltay); \ 135 d0 = vis_fpadd16(d0, d2); \ 136 d0 = vis_fpadd16(d0, d0); \ 137 d0 = FMUL_16x16(d0, delta1_x); \ 138 \ 139 d1 = FMUL_16x16(s1, delta1_y); \ 140 d3 = FMUL_16x16(s3, deltay); \ 141 d1 = vis_fpadd16(d1, d3); \ 142 d1 = vis_fpadd16(d1, d1); \ 143 d1 = FMUL_16x16(d1, deltax); \ 144 \ 145 vis_alignaddr((void*)0, 2); \ 146 d0 = vis_faligndata(d0, d0); \ 147 dd = vis_fpadd16(d0, d1); \ 148 dd = vis_fpadd16(dd, dd); \ 149 XOR_8000(dd); \ 150 \ 151 deltax = vis_fpadd16(deltax, dx64); \ 152 deltay = vis_fpadd16(deltay, dy64); \ 153 deltax = vis_fand(deltax, mask_7fff); \ 154 deltay = vis_fand(deltay, mask_7fff) 155 156 /***************************************************************/ 157 #define LD_U16(sp, ind) vis_ld_u16(sp + ind) 158 159 /***************************************************************/ 160 #ifndef MLIB_VIS2 161 162 #define LOAD_1CH() \ 163 s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff); \ 164 s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff); \ 165 s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff); \ 166 s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff); \ 167 \ 168 s0 = vis_faligndata(LD_U16(sp2, 0), s0); \ 169 s1 = vis_faligndata(LD_U16(sp2, 2), s1); \ 170 s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2); \ 171 s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3); \ 172 \ 173 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 174 s1 = vis_faligndata(LD_U16(sp1, 2), s1); \ 175 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 176 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3); \ 177 \ 178 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 179 s1 = vis_faligndata(LD_U16(sp0, 2), s1); \ 180 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 181 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3) 182 183 #else 184 185 #define LOAD_1CH() \ 186 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0)); \ 187 s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2)); \ 188 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride)); \ 189 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \ 190 \ 191 t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0)); \ 192 t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2)); \ 193 t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride)); \ 194 t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \ 195 \ 196 s0 = vis_bshuffle(s0, t0); \ 197 s1 = vis_bshuffle(s1, t1); \ 198 s2 = vis_bshuffle(s2, t2); \ 199 s3 = vis_bshuffle(s3, t3) 200 201 #endif /* MLIB_VIS2 */ 202 203 /***************************************************************/ 204 #define GET_POINTER(sp) \ 205 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \ 206 X += dX; \ 207 Y += dY 208 209 /***************************************************************/ 210 #undef PREPARE_DELTAS 211 #define PREPARE_DELTAS \ 212 if (warp_tbl != NULL) { \ 213 dX = warp_tbl[2*j ]; \ 214 dY = warp_tbl[2*j + 1]; \ 215 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \ 216 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \ 217 } 218 219 /***************************************************************/ 220 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 221 { 222 DECLAREVAR(); 223 mlib_s32 off; 224 mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3; 225 #ifdef MLIB_VIS2 226 mlib_d64 t0, t1, t2, t3; 227 vis_write_bmask(0x45CD67EF, 0); 228 #else 229 vis_alignaddr((void*)0, 6); 230 #endif /* MLIB_VIS2 */ 231 232 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); 233 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); 234 235 for (j = yStart; j <= yFinish; j++) { 236 mlib_u8 *sp0, *sp1, *sp2, *sp3; 237 mlib_d64 *dp, dmask; 238 239 NEW_LINE(1); 240 241 off = (mlib_s32)dl & 7; 242 dp = (mlib_d64*)(dl - off); 243 off >>= 1; 244 245 x0 = X - off*dX; y0 = Y - off*dY; 246 x1 = x0 + dX; y1 = y0 + dY; 247 x2 = x1 + dX; y2 = y1 + dY; 248 x3 = x2 + dX; y3 = y2 + dY; 249 250 deltax = DOUBLE_4U16(x0, x1, x2, x3); 251 deltay = DOUBLE_4U16(y0, y1, y2, y3); 252 253 if (off) { 254 mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1))); 255 256 off = 4 - off; 257 GET_POINTER(sp3); 258 sp0 = sp1 = sp2 = sp3; 259 260 if (off > 1 && size > 1) { 261 GET_POINTER(sp3); 262 } 263 264 if (off > 2) { 265 sp2 = sp3; 266 267 if (size > 2) { 268 GET_POINTER(sp3); 269 } 270 } 271 272 LOAD_1CH(); 273 BL_SUM(); 274 275 dmask = ((mlib_d64*)mlib_dmask_arr)[emask]; 276 *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0])); 277 278 size -= off; 279 280 if (size < 0) size = 0; 281 } 282 283 #pragma pipeloop(0) 284 for (i = 0; i < size/4; i++) { 285 GET_POINTER(sp0); 286 GET_POINTER(sp1); 287 GET_POINTER(sp2); 288 GET_POINTER(sp3); 289 290 LOAD_1CH(); 291 BL_SUM(); 292 293 dp[i] = dd; 294 } 295 296 off = size & 3; 297 298 if (off) { 299 GET_POINTER(sp0); 300 sp1 = sp2 = sp3 = sp0; 301 302 if (off > 1) { 303 GET_POINTER(sp1); 304 } 305 306 if (off > 2) { 307 GET_POINTER(sp2); 308 } 309 310 LOAD_1CH(); 311 BL_SUM(); 312 313 dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F]; 314 dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i])); 315 } 316 } 317 318 return MLIB_SUCCESS; 319 } 320 321 /***************************************************************/ 322 #undef GET_POINTER 323 #define GET_POINTER(sp) \ 324 sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \ 325 X += dX; \ 326 Y += dY 327 328 /***************************************************************/ 329 #define LOAD_2CH() \ 330 s0 = vis_freg_pair(sp0[0], sp1[0]); \ 331 s1 = vis_freg_pair(sp0[1], sp1[1]); \ 332 s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]); \ 333 s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1]) 334 335 /***************************************************************/ 336 #undef PREPARE_DELTAS 337 #define PREPARE_DELTAS \ 338 if (warp_tbl != NULL) { \ 339 dX = warp_tbl[2*j ]; \ 340 dY = warp_tbl[2*j + 1]; \ 341 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \ 342 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \ 343 } 344 345 /***************************************************************/ 346 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 347 { 348 DECLAREVAR(); 349 mlib_s32 off; 350 mlib_s32 x0, x1, y0, y1; 351 352 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) { 353 return FUN_NAME(2ch_na)(param); 354 } 355 356 srcYStride >>= 2; 357 358 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 359 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 360 361 for (j = yStart; j <= yFinish; j++) { 362 mlib_f32 *sp0, *sp1; 363 mlib_d64 *dp; 364 365 NEW_LINE(2); 366 367 off = (mlib_s32)dl & 7; 368 dp = (mlib_d64*)(dl - off); 369 370 if (off) { 371 x0 = X - dX; y0 = Y - dY; 372 x1 = X; y1 = Y; 373 } else { 374 x0 = X; y0 = Y; 375 x1 = X + dX; y1 = Y + dY; 376 } 377 378 deltax = DOUBLE_4U16(x0, x0, x1, x1); 379 deltay = DOUBLE_4U16(y0, y0, y1, y1); 380 381 if (off) { 382 GET_POINTER(sp1); 383 sp0 = sp1; 384 LOAD_2CH(); 385 386 BL_SUM(); 387 388 ((mlib_f32*)dp)[1] = vis_read_lo(dd); 389 dp++; 390 size--; 391 } 392 393 #pragma pipeloop(0) 394 for (i = 0; i < size/2; i++) { 395 GET_POINTER(sp0); 396 GET_POINTER(sp1); 397 LOAD_2CH(); 398 399 BL_SUM(); 400 401 *dp++ = dd; 402 } 403 404 if (size & 1) { 405 GET_POINTER(sp0); 406 sp1 = sp0; 407 LOAD_2CH(); 408 409 BL_SUM(); 410 411 ((mlib_f32*)dp)[0] = vis_read_hi(dd); 412 } 413 } 414 415 return MLIB_SUCCESS; 416 } 417 418 /***************************************************************/ 419 #undef GET_POINTER 420 #define GET_POINTER(sp) \ 421 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \ 422 X += dX; \ 423 Y += dY 424 425 /***************************************************************/ 426 #ifndef MLIB_VIS2 427 428 #define LOAD_2CH_NA() \ 429 s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff); \ 430 s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff); \ 431 s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff); \ 432 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff); \ 433 \ 434 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 435 s1 = vis_faligndata(LD_U16(sp1, 4), s1); \ 436 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 437 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3); \ 438 \ 439 s0 = vis_faligndata(LD_U16(sp0, 2), s0); \ 440 s1 = vis_faligndata(LD_U16(sp0, 6), s1); \ 441 s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2); \ 442 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3); \ 443 \ 444 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 445 s1 = vis_faligndata(LD_U16(sp0, 4), s1); \ 446 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 447 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3) 448 449 #else 450 451 #define LOAD_2CH_NA() \ 452 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0)); \ 453 s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4)); \ 454 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride)); \ 455 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \ 456 \ 457 t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2)); \ 458 t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6)); \ 459 t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \ 460 t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \ 461 \ 462 s0 = vis_bshuffle(s0, t0); \ 463 s1 = vis_bshuffle(s1, t1); \ 464 s2 = vis_bshuffle(s2, t2); \ 465 s3 = vis_bshuffle(s3, t3) 466 467 #endif /* MLIB_VIS2 */ 468 469 /***************************************************************/ 470 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param) 471 { 472 DECLAREVAR(); 473 mlib_s32 max_xsize = param -> max_xsize, bsize; 474 mlib_s32 x0, x1, y0, y1; 475 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 476 #ifdef MLIB_VIS2 477 mlib_d64 t0, t1, t2, t3; 478 #endif /* MLIB_VIS2 */ 479 480 bsize = (max_xsize + 1)/2; 481 482 if (bsize > BUF_SIZE) { 483 pbuff = mlib_malloc(bsize*sizeof(mlib_d64)); 484 485 if (pbuff == NULL) return MLIB_FAILURE; 486 } 487 488 MLIB_WRITE_BMASK(0x45CD67EF); 489 490 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 491 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 492 493 for (j = yStart; j <= yFinish; j++) { 494 mlib_u8 *sp0, *sp1; 495 496 #ifndef MLIB_VIS2 497 vis_alignaddr((void*)0, 6); 498 #endif /* MLIB_VIS2 */ 499 500 NEW_LINE(2); 501 502 x0 = X; y0 = Y; 503 x1 = X + dX; y1 = Y + dY; 504 505 deltax = DOUBLE_4U16(x0, x0, x1, x1); 506 deltay = DOUBLE_4U16(y0, y0, y1, y1); 507 508 #pragma pipeloop(0) 509 for (i = 0; i < size/2; i++) { 510 GET_POINTER(sp0); 511 GET_POINTER(sp1); 512 LOAD_2CH_NA(); 513 514 BL_SUM(); 515 516 pbuff[i] = dd; 517 } 518 519 if (size & 1) { 520 GET_POINTER(sp0); 521 sp1 = sp0; 522 LOAD_2CH_NA(); 523 524 BL_SUM(); 525 526 pbuff[i] = dd; 527 } 528 529 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size); 530 } 531 532 if (pbuff != buff) { 533 mlib_free(pbuff); 534 } 535 536 return MLIB_SUCCESS; 537 } 538 539 /***************************************************************/ 540 #undef PREPARE_DELTAS 541 #define PREPARE_DELTAS \ 542 if (warp_tbl != NULL) { \ 543 dX = warp_tbl[2*j ]; \ 544 dY = warp_tbl[2*j + 1]; \ 545 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ \ 546 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ \ 547 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \ 548 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \ 549 } 550 551 /***************************************************************/ 552 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 553 { 554 DECLAREVAR(); 555 mlib_s32 max_xsize = param -> max_xsize; 556 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 557 558 if (max_xsize > BUF_SIZE) { 559 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 560 561 if (pbuff == NULL) return MLIB_FAILURE; 562 } 563 564 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 565 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 566 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 567 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 568 569 for (j = yStart; j <= yFinish; j++) { 570 mlib_u8 *sp; 571 mlib_d64 *sp0, *sp1; 572 573 NEW_LINE(3); 574 575 deltax = DOUBLE_4U16(X, X, X, X); 576 deltay = DOUBLE_4U16(Y, Y, Y, Y); 577 578 #pragma pipeloop(0) 579 for (i = 0; i < size; i++) { 580 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2; 581 582 vis_alignaddr(sp, 0); 583 sp0 = AL_ADDR(sp, 0); 584 s0 = vis_faligndata(sp0[0], sp0[1]); 585 s1 = vis_faligndata(sp0[1], sp0[2]); 586 587 vis_alignaddr(sp, srcYStride); 588 sp1 = AL_ADDR(sp, srcYStride); 589 s2 = vis_faligndata(sp1[0], sp1[1]); 590 s3 = vis_faligndata(sp1[1], sp1[2]); 591 592 BL_SUM_3CH(); 593 594 pbuff[i] = dd; 595 X += dX; 596 Y += dY; 597 } 598 599 mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size); 600 } 601 602 if (pbuff != buff) { 603 mlib_free(pbuff); 604 } 605 606 return MLIB_SUCCESS; 607 } 608 609 /***************************************************************/ 610 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 611 { 612 DECLAREVAR(); 613 614 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) { 615 return FUN_NAME(4ch_na)(param); 616 } 617 618 srcYStride >>= 3; 619 620 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 621 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 622 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 623 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 624 625 for (j = yStart; j <= yFinish; j++) { 626 mlib_d64 *sp; 627 628 NEW_LINE(4); 629 630 deltax = DOUBLE_4U16(X, X, X, X); 631 deltay = DOUBLE_4U16(Y, Y, Y, Y); 632 633 #pragma pipeloop(0) 634 for (i = 0; i < size; i++) { 635 sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); 636 s0 = sp[0]; 637 s1 = sp[1]; 638 s2 = sp[srcYStride]; 639 s3 = sp[srcYStride + 1]; 640 641 BL_SUM(); 642 643 ((mlib_d64*)dl)[i] = dd; 644 X += dX; 645 Y += dY; 646 } 647 } 648 649 return MLIB_SUCCESS; 650 } 651 652 /***************************************************************/ 653 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param) 654 { 655 DECLAREVAR(); 656 mlib_s32 max_xsize = param -> max_xsize; 657 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 658 659 if (max_xsize > BUF_SIZE) { 660 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 661 662 if (pbuff == NULL) return MLIB_FAILURE; 663 } 664 665 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 666 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 667 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 668 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 669 670 for (j = yStart; j <= yFinish; j++) { 671 mlib_u8 *sp; 672 mlib_d64 *sp0, *sp1; 673 674 NEW_LINE(4); 675 676 deltax = DOUBLE_4U16(X, X, X, X); 677 deltay = DOUBLE_4U16(Y, Y, Y, Y); 678 679 #pragma pipeloop(0) 680 for (i = 0; i < size; i++) { 681 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT); 682 683 vis_alignaddr(sp, 0); 684 sp0 = AL_ADDR(sp, 0); 685 s0 = vis_faligndata(sp0[0], sp0[1]); 686 s1 = vis_faligndata(sp0[1], sp0[2]); 687 688 vis_alignaddr(sp, srcYStride); 689 sp1 = AL_ADDR(sp, srcYStride); 690 s2 = vis_faligndata(sp1[0], sp1[1]); 691 s3 = vis_faligndata(sp1[1], sp1[2]); 692 693 BL_SUM(); 694 695 pbuff[i] = dd; 696 X += dX; 697 Y += dY; 698 } 699 700 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size); 701 } 702 703 if (pbuff != buff) { 704 mlib_free(pbuff); 705 } 706 707 return MLIB_SUCCESS; 708 } 709 710 /***************************************************************/