1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * The functions step along the lines from xLeft to xRight and apply 30 * the bilinear filtering. 31 * 32 */ 33 34 #include "vis_proto.h" 35 #include "mlib_image.h" 36 #include "mlib_ImageCopy.h" 37 #include "mlib_ImageAffine.h" 38 #include "mlib_v_ImageFilters.h" 39 #include "mlib_v_ImageChannelExtract.h" 40 41 /*#define MLIB_VIS2*/ 42 43 /***************************************************************/ 44 #define DTYPE mlib_s16 45 46 #define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bl 47 48 /***************************************************************/ 49 static mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param); 50 static mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param); 51 52 /***************************************************************/ 53 const mlib_u64 mlib_dmask_arr[] = { 54 0x0000000000000000, 0x000000000000FFFF, 0x00000000FFFF0000, 0x00000000FFFFFFFF, 55 0x0000FFFF00000000, 0x0000FFFF0000FFFF, 0x0000FFFFFFFF0000, 0x0000FFFFFFFFFFFF, 56 0xFFFF000000000000, 0xFFFF00000000FFFF, 0xFFFF0000FFFF0000, 0xFFFF0000FFFFFFFF, 57 0xFFFFFFFF00000000, 0xFFFFFFFF0000FFFF, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF 58 }; 59 60 /***************************************************************/ 61 #define XOR_8000(x) 62 63 /***************************************************************/ 64 #ifdef MLIB_VIS2 65 #define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0) 66 #else 67 #define MLIB_WRITE_BMASK(bmask) 68 #endif 69 70 /***************************************************************/ 71 #undef DECLAREVAR 72 #define DECLAREVAR() \ 73 DECLAREVAR0(); \ 74 mlib_s32 *warp_tbl = param -> warp_tbl; \ 75 mlib_s32 srcYStride = param -> srcYStride; \ 76 mlib_u8 *dl; \ 77 mlib_s32 i, size; \ 78 /*mlib_d64 mask_8000 = vis_to_double_dup(0x80008000);*/ \ 79 mlib_d64 mask_7fff = vis_to_double_dup(0x7FFF7FFF); \ 80 mlib_d64 dx64, dy64, deltax, deltay, delta1_x, delta1_y; \ 81 mlib_d64 s0, s1, s2, s3; \ 82 mlib_d64 d0, d1, d2, d3, dd 83 84 /***************************************************************/ 85 86 /* arguments (x, y) are swapped to prevent overflow */ 87 #define FMUL_16x16(x, y) \ 88 vis_fpadd16(vis_fmul8sux16(y, x), \ 89 vis_fmul8ulx16(y, x)) 90 91 /***************************************************************/ 92 #define BUF_SIZE 512 93 94 /***************************************************************/ 95 #define DOUBLE_4U16(x0, x1, x2, x3) \ 96 vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \ 97 ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1))) 98 99 /***************************************************************/ 100 #define BL_SUM() \ 101 XOR_8000(s0); \ 102 XOR_8000(s1); \ 103 XOR_8000(s2); \ 104 XOR_8000(s3); \ 105 \ 106 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 107 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 108 \ 109 d0 = FMUL_16x16(s0, delta1_x); \ 110 d1 = FMUL_16x16(s1, deltax); \ 111 d0 = vis_fpadd16(d0, d1); \ 112 d0 = vis_fpadd16(d0, d0); \ 113 d0 = FMUL_16x16(d0, delta1_y); \ 114 \ 115 d2 = FMUL_16x16(s2, delta1_x); \ 116 d3 = FMUL_16x16(s3, deltax); \ 117 d2 = vis_fpadd16(d2, d3); \ 118 d2 = vis_fpadd16(d2, d2); \ 119 d2 = FMUL_16x16(d2, deltay); \ 120 \ 121 dd = vis_fpadd16(d0, d2); \ 122 dd = vis_fpadd16(dd, dd); \ 123 XOR_8000(dd); \ 124 \ 125 deltax = vis_fpadd16(deltax, dx64); \ 126 deltay = vis_fpadd16(deltay, dy64); \ 127 deltax = vis_fand(deltax, mask_7fff); \ 128 deltay = vis_fand(deltay, mask_7fff) 129 130 /***************************************************************/ 131 #define BL_SUM_3CH() \ 132 XOR_8000(s0); \ 133 XOR_8000(s1); \ 134 XOR_8000(s2); \ 135 XOR_8000(s3); \ 136 \ 137 delta1_x = vis_fpsub16(mask_7fff, deltax); \ 138 delta1_y = vis_fpsub16(mask_7fff, deltay); \ 139 \ 140 d0 = FMUL_16x16(s0, delta1_y); \ 141 d2 = FMUL_16x16(s2, deltay); \ 142 d0 = vis_fpadd16(d0, d2); \ 143 d0 = vis_fpadd16(d0, d0); \ 144 d0 = FMUL_16x16(d0, delta1_x); \ 145 \ 146 d1 = FMUL_16x16(s1, delta1_y); \ 147 d3 = FMUL_16x16(s3, deltay); \ 148 d1 = vis_fpadd16(d1, d3); \ 149 d1 = vis_fpadd16(d1, d1); \ 150 d1 = FMUL_16x16(d1, deltax); \ 151 \ 152 vis_alignaddr((void*)0, 2); \ 153 d0 = vis_faligndata(d0, d0); \ 154 dd = vis_fpadd16(d0, d1); \ 155 dd = vis_fpadd16(dd, dd); \ 156 XOR_8000(dd); \ 157 \ 158 deltax = vis_fpadd16(deltax, dx64); \ 159 deltay = vis_fpadd16(deltay, dy64); \ 160 deltax = vis_fand(deltax, mask_7fff); \ 161 deltay = vis_fand(deltay, mask_7fff) 162 163 /***************************************************************/ 164 #define LD_U16(sp, ind) vis_ld_u16(sp + ind) 165 166 /***************************************************************/ 167 #ifndef MLIB_VIS2 168 169 #define LOAD_1CH() \ 170 s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff); \ 171 s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff); \ 172 s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff); \ 173 s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff); \ 174 \ 175 s0 = vis_faligndata(LD_U16(sp2, 0), s0); \ 176 s1 = vis_faligndata(LD_U16(sp2, 2), s1); \ 177 s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2); \ 178 s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3); \ 179 \ 180 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 181 s1 = vis_faligndata(LD_U16(sp1, 2), s1); \ 182 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 183 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3); \ 184 \ 185 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 186 s1 = vis_faligndata(LD_U16(sp0, 2), s1); \ 187 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 188 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3) 189 190 #else 191 192 #define LOAD_1CH() \ 193 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0)); \ 194 s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2)); \ 195 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride)); \ 196 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \ 197 \ 198 t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0)); \ 199 t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2)); \ 200 t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride)); \ 201 t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \ 202 \ 203 s0 = vis_bshuffle(s0, t0); \ 204 s1 = vis_bshuffle(s1, t1); \ 205 s2 = vis_bshuffle(s2, t2); \ 206 s3 = vis_bshuffle(s3, t3) 207 208 #endif 209 210 /***************************************************************/ 211 #define GET_POINTER(sp) \ 212 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \ 213 X += dX; \ 214 Y += dY 215 216 /***************************************************************/ 217 #undef PREPARE_DELTAS 218 #define PREPARE_DELTAS \ 219 if (warp_tbl != NULL) { \ 220 dX = warp_tbl[2*j ]; \ 221 dY = warp_tbl[2*j + 1]; \ 222 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \ 223 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \ 224 } 225 226 /***************************************************************/ 227 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 228 { 229 DECLAREVAR(); 230 mlib_s32 off; 231 mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3; 232 #ifdef MLIB_VIS2 233 mlib_d64 t0, t1, t2, t3; 234 vis_write_bmask(0x45CD67EF, 0); 235 #else 236 vis_alignaddr((void*)0, 6); 237 #endif 238 239 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); 240 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); 241 242 for (j = yStart; j <= yFinish; j++) { 243 mlib_u8 *sp0, *sp1, *sp2, *sp3; 244 mlib_d64 *dp, dmask; 245 246 NEW_LINE(1); 247 248 off = (mlib_s32)dl & 7; 249 dp = (mlib_d64*)(dl - off); 250 off >>= 1; 251 252 x0 = X - off*dX; y0 = Y - off*dY; 253 x1 = x0 + dX; y1 = y0 + dY; 254 x2 = x1 + dX; y2 = y1 + dY; 255 x3 = x2 + dX; y3 = y2 + dY; 256 257 deltax = DOUBLE_4U16(x0, x1, x2, x3); 258 deltay = DOUBLE_4U16(y0, y1, y2, y3); 259 260 if (off) { 261 mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1))); 262 263 off = 4 - off; 264 GET_POINTER(sp3); 265 sp0 = sp1 = sp2 = sp3; 266 267 if (off > 1 && size > 1) { 268 GET_POINTER(sp3); 269 } 270 271 if (off > 2) { 272 sp2 = sp3; 273 274 if (size > 2) { 275 GET_POINTER(sp3); 276 } 277 } 278 279 LOAD_1CH(); 280 BL_SUM(); 281 282 dmask = ((mlib_d64*)mlib_dmask_arr)[emask]; 283 *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0])); 284 285 size -= off; 286 287 if (size < 0) size = 0; 288 } 289 290 #pragma pipeloop(0) 291 for (i = 0; i < size/4; i++) { 292 GET_POINTER(sp0); 293 GET_POINTER(sp1); 294 GET_POINTER(sp2); 295 GET_POINTER(sp3); 296 297 LOAD_1CH(); 298 BL_SUM(); 299 300 dp[i] = dd; 301 } 302 303 off = size & 3; 304 305 if (off) { 306 GET_POINTER(sp0); 307 sp1 = sp2 = sp3 = sp0; 308 309 if (off > 1) { 310 GET_POINTER(sp1); 311 } 312 313 if (off > 2) { 314 GET_POINTER(sp2); 315 } 316 317 LOAD_1CH(); 318 BL_SUM(); 319 320 dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F]; 321 dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i])); 322 } 323 } 324 325 return MLIB_SUCCESS; 326 } 327 328 /***************************************************************/ 329 #undef GET_POINTER 330 #define GET_POINTER(sp) \ 331 sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \ 332 X += dX; \ 333 Y += dY 334 335 /***************************************************************/ 336 #define LOAD_2CH() \ 337 s0 = vis_freg_pair(sp0[0], sp1[0]); \ 338 s1 = vis_freg_pair(sp0[1], sp1[1]); \ 339 s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]); \ 340 s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1]) 341 342 /***************************************************************/ 343 #undef PREPARE_DELTAS 344 #define PREPARE_DELTAS \ 345 if (warp_tbl != NULL) { \ 346 dX = warp_tbl[2*j ]; \ 347 dY = warp_tbl[2*j + 1]; \ 348 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \ 349 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \ 350 } 351 352 /***************************************************************/ 353 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 354 { 355 DECLAREVAR(); 356 mlib_s32 off; 357 mlib_s32 x0, x1, y0, y1; 358 359 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) { 360 return FUN_NAME(2ch_na)(param); 361 } 362 363 srcYStride >>= 2; 364 365 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 366 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 367 368 for (j = yStart; j <= yFinish; j++) { 369 mlib_f32 *sp0, *sp1; 370 mlib_d64 *dp; 371 372 NEW_LINE(2); 373 374 off = (mlib_s32)dl & 7; 375 dp = (mlib_d64*)(dl - off); 376 377 if (off) { 378 x0 = X - dX; y0 = Y - dY; 379 x1 = X; y1 = Y; 380 } else { 381 x0 = X; y0 = Y; 382 x1 = X + dX; y1 = Y + dY; 383 } 384 385 deltax = DOUBLE_4U16(x0, x0, x1, x1); 386 deltay = DOUBLE_4U16(y0, y0, y1, y1); 387 388 if (off) { 389 GET_POINTER(sp1); 390 sp0 = sp1; 391 LOAD_2CH(); 392 393 BL_SUM(); 394 395 ((mlib_f32*)dp)[1] = vis_read_lo(dd); 396 dp++; 397 size--; 398 } 399 400 #pragma pipeloop(0) 401 for (i = 0; i < size/2; i++) { 402 GET_POINTER(sp0); 403 GET_POINTER(sp1); 404 LOAD_2CH(); 405 406 BL_SUM(); 407 408 *dp++ = dd; 409 } 410 411 if (size & 1) { 412 GET_POINTER(sp0); 413 sp1 = sp0; 414 LOAD_2CH(); 415 416 BL_SUM(); 417 418 ((mlib_f32*)dp)[0] = vis_read_hi(dd); 419 } 420 } 421 422 return MLIB_SUCCESS; 423 } 424 425 /***************************************************************/ 426 #undef GET_POINTER 427 #define GET_POINTER(sp) \ 428 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \ 429 X += dX; \ 430 Y += dY 431 432 /***************************************************************/ 433 #ifndef MLIB_VIS2 434 435 #define LOAD_2CH_NA() \ 436 s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff); \ 437 s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff); \ 438 s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff); \ 439 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff); \ 440 \ 441 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \ 442 s1 = vis_faligndata(LD_U16(sp1, 4), s1); \ 443 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \ 444 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3); \ 445 \ 446 s0 = vis_faligndata(LD_U16(sp0, 2), s0); \ 447 s1 = vis_faligndata(LD_U16(sp0, 6), s1); \ 448 s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2); \ 449 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3); \ 450 \ 451 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \ 452 s1 = vis_faligndata(LD_U16(sp0, 4), s1); \ 453 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \ 454 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3) 455 456 #else 457 458 #define LOAD_2CH_NA() \ 459 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0)); \ 460 s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4)); \ 461 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride)); \ 462 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \ 463 \ 464 t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2)); \ 465 t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6)); \ 466 t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \ 467 t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \ 468 \ 469 s0 = vis_bshuffle(s0, t0); \ 470 s1 = vis_bshuffle(s1, t1); \ 471 s2 = vis_bshuffle(s2, t2); \ 472 s3 = vis_bshuffle(s3, t3) 473 474 #endif 475 476 /***************************************************************/ 477 mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param) 478 { 479 DECLAREVAR(); 480 mlib_s32 max_xsize = param -> max_xsize, bsize; 481 mlib_s32 x0, x1, y0, y1; 482 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 483 #ifdef MLIB_VIS2 484 mlib_d64 t0, t1, t2, t3; 485 #endif 486 487 bsize = (max_xsize + 1)/2; 488 489 if (bsize > BUF_SIZE) { 490 pbuff = mlib_malloc(bsize*sizeof(mlib_d64)); 491 492 if (pbuff == NULL) return MLIB_FAILURE; 493 } 494 495 MLIB_WRITE_BMASK(0x45CD67EF); 496 497 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); 498 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); 499 500 for (j = yStart; j <= yFinish; j++) { 501 mlib_u8 *sp0, *sp1; 502 503 #ifndef MLIB_VIS2 504 vis_alignaddr((void*)0, 6); 505 #endif 506 507 NEW_LINE(2); 508 509 x0 = X; y0 = Y; 510 x1 = X + dX; y1 = Y + dY; 511 512 deltax = DOUBLE_4U16(x0, x0, x1, x1); 513 deltay = DOUBLE_4U16(y0, y0, y1, y1); 514 515 #pragma pipeloop(0) 516 for (i = 0; i < size/2; i++) { 517 GET_POINTER(sp0); 518 GET_POINTER(sp1); 519 LOAD_2CH_NA(); 520 521 BL_SUM(); 522 523 pbuff[i] = dd; 524 } 525 526 if (size & 1) { 527 GET_POINTER(sp0); 528 sp1 = sp0; 529 LOAD_2CH_NA(); 530 531 BL_SUM(); 532 533 pbuff[i] = dd; 534 } 535 536 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size); 537 } 538 539 if (pbuff != buff) { 540 mlib_free(pbuff); 541 } 542 543 return MLIB_SUCCESS; 544 } 545 546 /***************************************************************/ 547 #undef PREPARE_DELTAS 548 #define PREPARE_DELTAS \ 549 if (warp_tbl != NULL) { \ 550 dX = warp_tbl[2*j ]; \ 551 dY = warp_tbl[2*j + 1]; \ 552 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ \ 553 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ \ 554 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \ 555 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \ 556 } 557 558 /***************************************************************/ 559 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 560 { 561 DECLAREVAR(); 562 mlib_s32 max_xsize = param -> max_xsize; 563 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 564 565 if (max_xsize > BUF_SIZE) { 566 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 567 568 if (pbuff == NULL) return MLIB_FAILURE; 569 } 570 571 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 572 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 573 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 574 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 575 576 for (j = yStart; j <= yFinish; j++) { 577 mlib_u8 *sp; 578 mlib_d64 *sp0, *sp1; 579 580 NEW_LINE(3); 581 582 deltax = DOUBLE_4U16(X, X, X, X); 583 deltay = DOUBLE_4U16(Y, Y, Y, Y); 584 585 #pragma pipeloop(0) 586 for (i = 0; i < size; i++) { 587 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2; 588 589 vis_alignaddr(sp, 0); 590 sp0 = AL_ADDR(sp, 0); 591 s0 = vis_faligndata(sp0[0], sp0[1]); 592 s1 = vis_faligndata(sp0[1], sp0[2]); 593 594 vis_alignaddr(sp, srcYStride); 595 sp1 = AL_ADDR(sp, srcYStride); 596 s2 = vis_faligndata(sp1[0], sp1[1]); 597 s3 = vis_faligndata(sp1[1], sp1[2]); 598 599 BL_SUM_3CH(); 600 601 pbuff[i] = dd; 602 X += dX; 603 Y += dY; 604 } 605 606 mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size); 607 } 608 609 if (pbuff != buff) { 610 mlib_free(pbuff); 611 } 612 613 return MLIB_SUCCESS; 614 } 615 616 /***************************************************************/ 617 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 618 { 619 DECLAREVAR(); 620 621 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) { 622 return FUN_NAME(4ch_na)(param); 623 } 624 625 srcYStride >>= 3; 626 627 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 628 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 629 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 630 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 631 632 for (j = yStart; j <= yFinish; j++) { 633 mlib_d64 *sp; 634 635 NEW_LINE(4); 636 637 deltax = DOUBLE_4U16(X, X, X, X); 638 deltay = DOUBLE_4U16(Y, Y, Y, Y); 639 640 #pragma pipeloop(0) 641 for (i = 0; i < size; i++) { 642 sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); 643 s0 = sp[0]; 644 s1 = sp[1]; 645 s2 = sp[srcYStride]; 646 s3 = sp[srcYStride + 1]; 647 648 BL_SUM(); 649 650 ((mlib_d64*)dl)[i] = dd; 651 X += dX; 652 Y += dY; 653 } 654 } 655 656 return MLIB_SUCCESS; 657 } 658 659 /***************************************************************/ 660 mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param) 661 { 662 DECLAREVAR(); 663 mlib_s32 max_xsize = param -> max_xsize; 664 mlib_d64 buff[BUF_SIZE], *pbuff = buff; 665 666 if (max_xsize > BUF_SIZE) { 667 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64)); 668 669 if (pbuff == NULL) return MLIB_FAILURE; 670 } 671 672 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ 673 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ 674 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); 675 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); 676 677 for (j = yStart; j <= yFinish; j++) { 678 mlib_u8 *sp; 679 mlib_d64 *sp0, *sp1; 680 681 NEW_LINE(4); 682 683 deltax = DOUBLE_4U16(X, X, X, X); 684 deltay = DOUBLE_4U16(Y, Y, Y, Y); 685 686 #pragma pipeloop(0) 687 for (i = 0; i < size; i++) { 688 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT); 689 690 vis_alignaddr(sp, 0); 691 sp0 = AL_ADDR(sp, 0); 692 s0 = vis_faligndata(sp0[0], sp0[1]); 693 s1 = vis_faligndata(sp0[1], sp0[2]); 694 695 vis_alignaddr(sp, srcYStride); 696 sp1 = AL_ADDR(sp, srcYStride); 697 s2 = vis_faligndata(sp1[0], sp1[1]); 698 s3 = vis_faligndata(sp1[1], sp1[2]); 699 700 BL_SUM(); 701 702 pbuff[i] = dd; 703 X += dX; 704 Y += dY; 705 } 706 707 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size); 708 } 709 710 if (pbuff != buff) { 711 mlib_free(pbuff); 712 } 713 714 return MLIB_SUCCESS; 715 } 716 717 /***************************************************************/