1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * Image affine transformation with Bicubic filtering 30 * SYNOPSIS 31 * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges, 32 * mlib_s32 *rightEdges, 33 * mlib_s32 *xStarts, 34 * mlib_s32 *yStarts, 35 * mlib_s32 *sides, 36 * mlib_u8 *dstData, 37 * mlib_u8 **lineAddr, 38 * mlib_s32 dstYStride, 39 * mlib_s32 is_affine, 40 * mlib_s32 srcYStride, 41 * mlib_filter filter) 42 * 43 * ARGUMENTS 44 * leftEdges array[dstHeight] of xLeft coordinates 45 * RightEdges array[dstHeight] of xRight coordinates 46 * xStarts array[dstHeight] of xStart * 65536 coordinates 47 * yStarts array[dstHeight] of yStart * 65536 coordinates 48 * sides output array[4]. sides[0] is yStart, sides[1] is yFinish, 49 * sides[2] is dx * 65536, sides[3] is dy * 65536 50 * dstData pointer to the first pixel on (yStart - 1) line 51 * lineAddr array[srcHeight] of pointers to the first pixel on 52 * the corresponding lines 53 * dstYStride stride of destination image 54 * is_affine indicator (Affine - GridWarp) 55 * srcYStride stride of source image 56 * filter type of resampling filter 57 * 58 * DESCRIPTION 59 * The functions step along the lines from xLeft to xRight and apply 60 * the bicubic filtering. 61 * 62 */ 63 64 #include "mlib_ImageAffine.h" 65 66 #define DTYPE mlib_s16 67 #define FILTER_BITS 9 68 #define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bc 69 70 /***************************************************************/ 71 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */ 72 73 #undef FILTER_ELEM_BITS 74 #define FILTER_ELEM_BITS 4 75 76 #ifdef MLIB_USE_FTOI_CLAMPING 77 78 #define SAT16(DST) \ 79 DST = ((mlib_s32)val0) >> 16 80 81 #else 82 83 #define SAT16(DST) \ 84 if (val0 >= MLIB_S32_MAX) \ 85 DST = MLIB_S16_MAX; \ 86 else if (val0 <= MLIB_S32_MIN) \ 87 DST = MLIB_S16_MIN; \ 88 else \ 89 DST = ((mlib_s32)val0) >> 16 90 91 #endif /* MLIB_USE_FTOI_CLAMPING */ 92 93 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 94 { 95 DECLAREVAR_BC(); 96 DTYPE *dstLineEnd; 97 const mlib_f32 *mlib_filters_table; 98 99 if (filter == MLIB_BICUBIC) { 100 mlib_filters_table = mlib_filters_s16f_bc; 101 } 102 else { 103 mlib_filters_table = mlib_filters_s16f_bc2; 104 } 105 106 for (j = yStart; j <= yFinish; j++) { 107 mlib_d64 xf0, xf1, xf2, xf3; 108 mlib_d64 yf0, yf1, yf2, yf3; 109 mlib_d64 c0, c1, c2, c3, val0; 110 mlib_s32 filterpos; 111 mlib_f32 *fptr; 112 mlib_s32 s0, s1, s2, s3; 113 mlib_s32 s4, s5, s6, s7; 114 115 CLIP(1); 116 dstLineEnd = (DTYPE *) dstData + xRight; 117 118 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 119 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 120 121 xf0 = fptr[0]; 122 xf1 = fptr[1]; 123 xf2 = fptr[2]; 124 xf3 = fptr[3]; 125 126 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 127 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 128 129 yf0 = fptr[0]; 130 yf1 = fptr[1]; 131 yf2 = fptr[2]; 132 yf3 = fptr[3]; 133 134 xSrc = (X >> MLIB_SHIFT) - 1; 135 ySrc = (Y >> MLIB_SHIFT) - 1; 136 137 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 138 s0 = srcPixelPtr[0]; 139 s1 = srcPixelPtr[1]; 140 s2 = srcPixelPtr[2]; 141 s3 = srcPixelPtr[3]; 142 143 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 144 s4 = srcPixelPtr[0]; 145 s5 = srcPixelPtr[1]; 146 s6 = srcPixelPtr[2]; 147 s7 = srcPixelPtr[3]; 148 149 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { 150 151 X += dX; 152 Y += dY; 153 154 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 155 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 156 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 157 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 158 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); 159 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 160 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 161 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); 162 163 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 164 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 165 166 xf0 = fptr[0]; 167 xf1 = fptr[1]; 168 xf2 = fptr[2]; 169 xf3 = fptr[3]; 170 171 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 172 173 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 174 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 175 176 yf0 = fptr[0]; 177 yf1 = fptr[1]; 178 yf2 = fptr[2]; 179 yf3 = fptr[3]; 180 181 SAT16(dstPixelPtr[0]); 182 183 xSrc = (X >> MLIB_SHIFT) - 1; 184 ySrc = (Y >> MLIB_SHIFT) - 1; 185 186 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 187 s0 = srcPixelPtr[0]; 188 s1 = srcPixelPtr[1]; 189 s2 = srcPixelPtr[2]; 190 s3 = srcPixelPtr[3]; 191 192 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 193 s4 = srcPixelPtr[0]; 194 s5 = srcPixelPtr[1]; 195 s6 = srcPixelPtr[2]; 196 s7 = srcPixelPtr[3]; 197 } 198 199 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 200 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 201 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 202 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 203 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); 204 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 205 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 206 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); 207 208 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 209 SAT16(dstPixelPtr[0]); 210 } 211 212 return MLIB_SUCCESS; 213 } 214 215 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 216 { 217 DECLAREVAR_BC(); 218 DTYPE *dstLineEnd; 219 const mlib_f32 *mlib_filters_table; 220 221 if (filter == MLIB_BICUBIC) { 222 mlib_filters_table = mlib_filters_s16f_bc; 223 } 224 else { 225 mlib_filters_table = mlib_filters_s16f_bc2; 226 } 227 228 for (j = yStart; j <= yFinish; j++) { 229 mlib_d64 xf0, xf1, xf2, xf3; 230 mlib_d64 yf0, yf1, yf2, yf3; 231 mlib_d64 c0, c1, c2, c3, val0; 232 mlib_s32 filterpos, k; 233 mlib_f32 *fptr; 234 mlib_s32 s0, s1, s2, s3; 235 mlib_s32 s4, s5, s6, s7; 236 237 CLIP(2); 238 dstLineEnd = (DTYPE *) dstData + 2 * xRight; 239 240 for (k = 0; k < 2; k++) { 241 mlib_s32 X1 = X; 242 mlib_s32 Y1 = Y; 243 DTYPE *dPtr = dstPixelPtr + k; 244 245 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 246 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 247 248 xf0 = fptr[0]; 249 xf1 = fptr[1]; 250 xf2 = fptr[2]; 251 xf3 = fptr[3]; 252 253 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 254 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 255 256 yf0 = fptr[0]; 257 yf1 = fptr[1]; 258 yf2 = fptr[2]; 259 yf3 = fptr[3]; 260 261 xSrc = (X1 >> MLIB_SHIFT) - 1; 262 ySrc = (Y1 >> MLIB_SHIFT) - 1; 263 264 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 265 s0 = srcPixelPtr[0]; 266 s1 = srcPixelPtr[2]; 267 s2 = srcPixelPtr[4]; 268 s3 = srcPixelPtr[6]; 269 270 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 271 s4 = srcPixelPtr[0]; 272 s5 = srcPixelPtr[2]; 273 s6 = srcPixelPtr[4]; 274 s7 = srcPixelPtr[6]; 275 276 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { 277 278 X1 += dX; 279 Y1 += dY; 280 281 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 282 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 283 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 284 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 285 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); 286 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 287 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 288 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); 289 290 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 291 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 292 293 xf0 = fptr[0]; 294 xf1 = fptr[1]; 295 xf2 = fptr[2]; 296 xf3 = fptr[3]; 297 298 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 299 300 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 301 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 302 303 yf0 = fptr[0]; 304 yf1 = fptr[1]; 305 yf2 = fptr[2]; 306 yf3 = fptr[3]; 307 308 SAT16(dPtr[0]); 309 310 xSrc = (X1 >> MLIB_SHIFT) - 1; 311 ySrc = (Y1 >> MLIB_SHIFT) - 1; 312 313 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 314 s0 = srcPixelPtr[0]; 315 s1 = srcPixelPtr[2]; 316 s2 = srcPixelPtr[4]; 317 s3 = srcPixelPtr[6]; 318 319 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 320 s4 = srcPixelPtr[0]; 321 s5 = srcPixelPtr[2]; 322 s6 = srcPixelPtr[4]; 323 s7 = srcPixelPtr[6]; 324 } 325 326 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 327 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 328 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 329 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 330 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); 331 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 332 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 333 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); 334 335 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 336 SAT16(dPtr[0]); 337 } 338 } 339 340 return MLIB_SUCCESS; 341 } 342 343 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 344 { 345 DECLAREVAR_BC(); 346 DTYPE *dstLineEnd; 347 const mlib_f32 *mlib_filters_table; 348 349 if (filter == MLIB_BICUBIC) { 350 mlib_filters_table = mlib_filters_s16f_bc; 351 } 352 else { 353 mlib_filters_table = mlib_filters_s16f_bc2; 354 } 355 356 for (j = yStart; j <= yFinish; j++) { 357 mlib_d64 xf0, xf1, xf2, xf3; 358 mlib_d64 yf0, yf1, yf2, yf3; 359 mlib_d64 c0, c1, c2, c3, val0; 360 mlib_s32 filterpos, k; 361 mlib_f32 *fptr; 362 mlib_s32 s0, s1, s2, s3; 363 mlib_s32 s4, s5, s6, s7; 364 365 CLIP(3); 366 dstLineEnd = (DTYPE *) dstData + 3 * xRight; 367 368 for (k = 0; k < 3; k++) { 369 mlib_s32 X1 = X; 370 mlib_s32 Y1 = Y; 371 DTYPE *dPtr = dstPixelPtr + k; 372 373 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 374 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 375 376 xf0 = fptr[0]; 377 xf1 = fptr[1]; 378 xf2 = fptr[2]; 379 xf3 = fptr[3]; 380 381 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 382 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 383 384 yf0 = fptr[0]; 385 yf1 = fptr[1]; 386 yf2 = fptr[2]; 387 yf3 = fptr[3]; 388 389 xSrc = (X1 >> MLIB_SHIFT) - 1; 390 ySrc = (Y1 >> MLIB_SHIFT) - 1; 391 392 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 393 s0 = srcPixelPtr[0]; 394 s1 = srcPixelPtr[3]; 395 s2 = srcPixelPtr[6]; 396 s3 = srcPixelPtr[9]; 397 398 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 399 s4 = srcPixelPtr[0]; 400 s5 = srcPixelPtr[3]; 401 s6 = srcPixelPtr[6]; 402 s7 = srcPixelPtr[9]; 403 404 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { 405 406 X1 += dX; 407 Y1 += dY; 408 409 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 410 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 411 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 412 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 413 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); 414 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 415 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 416 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); 417 418 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 419 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 420 421 xf0 = fptr[0]; 422 xf1 = fptr[1]; 423 xf2 = fptr[2]; 424 xf3 = fptr[3]; 425 426 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 427 428 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 429 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 430 431 yf0 = fptr[0]; 432 yf1 = fptr[1]; 433 yf2 = fptr[2]; 434 yf3 = fptr[3]; 435 436 SAT16(dPtr[0]); 437 438 xSrc = (X1 >> MLIB_SHIFT) - 1; 439 ySrc = (Y1 >> MLIB_SHIFT) - 1; 440 441 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 442 s0 = srcPixelPtr[0]; 443 s1 = srcPixelPtr[3]; 444 s2 = srcPixelPtr[6]; 445 s3 = srcPixelPtr[9]; 446 447 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 448 s4 = srcPixelPtr[0]; 449 s5 = srcPixelPtr[3]; 450 s6 = srcPixelPtr[6]; 451 s7 = srcPixelPtr[9]; 452 } 453 454 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 455 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 456 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 457 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 458 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); 459 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 460 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 461 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); 462 463 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 464 SAT16(dPtr[0]); 465 } 466 } 467 468 return MLIB_SUCCESS; 469 } 470 471 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 472 { 473 DECLAREVAR_BC(); 474 DTYPE *dstLineEnd; 475 const mlib_f32 *mlib_filters_table; 476 477 if (filter == MLIB_BICUBIC) { 478 mlib_filters_table = mlib_filters_s16f_bc; 479 } 480 else { 481 mlib_filters_table = mlib_filters_s16f_bc2; 482 } 483 484 for (j = yStart; j <= yFinish; j++) { 485 mlib_d64 xf0, xf1, xf2, xf3; 486 mlib_d64 yf0, yf1, yf2, yf3; 487 mlib_d64 c0, c1, c2, c3, val0; 488 mlib_s32 filterpos, k; 489 mlib_f32 *fptr; 490 mlib_s32 s0, s1, s2, s3; 491 mlib_s32 s4, s5, s6, s7; 492 493 CLIP(4); 494 dstLineEnd = (DTYPE *) dstData + 4 * xRight; 495 496 for (k = 0; k < 4; k++) { 497 mlib_s32 X1 = X; 498 mlib_s32 Y1 = Y; 499 DTYPE *dPtr = dstPixelPtr + k; 500 501 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 502 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 503 504 xf0 = fptr[0]; 505 xf1 = fptr[1]; 506 xf2 = fptr[2]; 507 xf3 = fptr[3]; 508 509 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 510 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 511 512 yf0 = fptr[0]; 513 yf1 = fptr[1]; 514 yf2 = fptr[2]; 515 yf3 = fptr[3]; 516 517 xSrc = (X1 >> MLIB_SHIFT) - 1; 518 ySrc = (Y1 >> MLIB_SHIFT) - 1; 519 520 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 521 s0 = srcPixelPtr[0]; 522 s1 = srcPixelPtr[4]; 523 s2 = srcPixelPtr[8]; 524 s3 = srcPixelPtr[12]; 525 526 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 527 s4 = srcPixelPtr[0]; 528 s5 = srcPixelPtr[4]; 529 s6 = srcPixelPtr[8]; 530 s7 = srcPixelPtr[12]; 531 532 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { 533 534 X1 += dX; 535 Y1 += dY; 536 537 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 538 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 539 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 540 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 541 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); 542 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 543 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 544 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); 545 546 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 547 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 548 549 xf0 = fptr[0]; 550 xf1 = fptr[1]; 551 xf2 = fptr[2]; 552 xf3 = fptr[3]; 553 554 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 555 556 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 557 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 558 559 yf0 = fptr[0]; 560 yf1 = fptr[1]; 561 yf2 = fptr[2]; 562 yf3 = fptr[3]; 563 564 SAT16(dPtr[0]); 565 566 xSrc = (X1 >> MLIB_SHIFT) - 1; 567 ySrc = (Y1 >> MLIB_SHIFT) - 1; 568 569 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 570 s0 = srcPixelPtr[0]; 571 s1 = srcPixelPtr[4]; 572 s2 = srcPixelPtr[8]; 573 s3 = srcPixelPtr[12]; 574 575 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 576 s4 = srcPixelPtr[0]; 577 s5 = srcPixelPtr[4]; 578 s6 = srcPixelPtr[8]; 579 s7 = srcPixelPtr[12]; 580 } 581 582 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); 583 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); 584 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 585 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 586 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); 587 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 588 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 589 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); 590 591 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 592 SAT16(dPtr[0]); 593 } 594 } 595 596 return MLIB_SUCCESS; 597 } 598 599 #else /* for x86, using integer multiplies is faster */ 600 601 #define SHIFT_X 15 602 #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */ 603 604 #define SHIFT_Y (15 + 15 - SHIFT_X) 605 #define ROUND_Y (1 << (SHIFT_Y - 1)) 606 607 #define S32_TO_S16_SAT(DST) \ 608 if (val0 >= MLIB_S16_MAX) \ 609 DST = MLIB_S16_MAX; \ 610 else if (val0 <= MLIB_S16_MIN) \ 611 DST = MLIB_S16_MIN; \ 612 else \ 613 DST = (mlib_s16)val0 614 615 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 616 { 617 DECLAREVAR_BC(); 618 DTYPE *dstLineEnd; 619 const mlib_s16 *mlib_filters_table; 620 621 if (filter == MLIB_BICUBIC) { 622 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; 623 } 624 else { 625 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; 626 } 627 628 for (j = yStart; j <= yFinish; j++) { 629 mlib_s32 xf0, xf1, xf2, xf3; 630 mlib_s32 yf0, yf1, yf2, yf3; 631 mlib_s32 c0, c1, c2, c3, val0; 632 mlib_s32 filterpos; 633 mlib_s16 *fptr; 634 mlib_s32 s0, s1, s2, s3; 635 mlib_s32 s4, s5, s6, s7; 636 637 CLIP(1); 638 dstLineEnd = (DTYPE *) dstData + xRight; 639 640 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 641 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 642 643 xf0 = fptr[0]; 644 xf1 = fptr[1]; 645 xf2 = fptr[2]; 646 xf3 = fptr[3]; 647 648 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 649 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 650 651 yf0 = fptr[0]; 652 yf1 = fptr[1]; 653 yf2 = fptr[2]; 654 yf3 = fptr[3]; 655 656 xSrc = (X >> MLIB_SHIFT) - 1; 657 ySrc = (Y >> MLIB_SHIFT) - 1; 658 659 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 660 s0 = srcPixelPtr[0]; 661 s1 = srcPixelPtr[1]; 662 s2 = srcPixelPtr[2]; 663 s3 = srcPixelPtr[3]; 664 665 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 666 s4 = srcPixelPtr[0]; 667 s5 = srcPixelPtr[1]; 668 s6 = srcPixelPtr[2]; 669 s7 = srcPixelPtr[3]; 670 671 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { 672 673 X += dX; 674 Y += dY; 675 676 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 677 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 678 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 679 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 680 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 681 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 682 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 683 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 684 685 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 686 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 687 688 xf0 = fptr[0]; 689 xf1 = fptr[1]; 690 xf2 = fptr[2]; 691 xf3 = fptr[3]; 692 693 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 694 695 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 696 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 697 698 yf0 = fptr[0]; 699 yf1 = fptr[1]; 700 yf2 = fptr[2]; 701 yf3 = fptr[3]; 702 703 S32_TO_S16_SAT(dstPixelPtr[0]); 704 705 xSrc = (X >> MLIB_SHIFT) - 1; 706 ySrc = (Y >> MLIB_SHIFT) - 1; 707 708 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 709 s0 = srcPixelPtr[0]; 710 s1 = srcPixelPtr[1]; 711 s2 = srcPixelPtr[2]; 712 s3 = srcPixelPtr[3]; 713 714 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 715 s4 = srcPixelPtr[0]; 716 s5 = srcPixelPtr[1]; 717 s6 = srcPixelPtr[2]; 718 s7 = srcPixelPtr[3]; 719 } 720 721 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 722 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 723 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 724 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 725 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 726 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 727 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 728 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 729 730 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 731 S32_TO_S16_SAT(dstPixelPtr[0]); 732 } 733 734 return MLIB_SUCCESS; 735 } 736 737 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 738 { 739 DECLAREVAR_BC(); 740 DTYPE *dstLineEnd; 741 const mlib_s16 *mlib_filters_table; 742 743 if (filter == MLIB_BICUBIC) { 744 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; 745 } 746 else { 747 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; 748 } 749 750 for (j = yStart; j <= yFinish; j++) { 751 mlib_s32 xf0, xf1, xf2, xf3; 752 mlib_s32 yf0, yf1, yf2, yf3; 753 mlib_s32 c0, c1, c2, c3, val0; 754 mlib_s32 filterpos, k; 755 mlib_s16 *fptr; 756 mlib_s32 s0, s1, s2, s3; 757 mlib_s32 s4, s5, s6, s7; 758 759 CLIP(2); 760 dstLineEnd = (DTYPE *) dstData + 2 * xRight; 761 762 for (k = 0; k < 2; k++) { 763 mlib_s32 X1 = X; 764 mlib_s32 Y1 = Y; 765 DTYPE *dPtr = dstPixelPtr + k; 766 767 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 768 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 769 770 xf0 = fptr[0]; 771 xf1 = fptr[1]; 772 xf2 = fptr[2]; 773 xf3 = fptr[3]; 774 775 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 776 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 777 778 yf0 = fptr[0]; 779 yf1 = fptr[1]; 780 yf2 = fptr[2]; 781 yf3 = fptr[3]; 782 783 xSrc = (X1 >> MLIB_SHIFT) - 1; 784 ySrc = (Y1 >> MLIB_SHIFT) - 1; 785 786 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 787 s0 = srcPixelPtr[0]; 788 s1 = srcPixelPtr[2]; 789 s2 = srcPixelPtr[4]; 790 s3 = srcPixelPtr[6]; 791 792 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 793 s4 = srcPixelPtr[0]; 794 s5 = srcPixelPtr[2]; 795 s6 = srcPixelPtr[4]; 796 s7 = srcPixelPtr[6]; 797 798 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { 799 800 X1 += dX; 801 Y1 += dY; 802 803 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 804 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 805 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 806 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 807 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 808 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 809 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 810 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 811 812 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 813 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 814 815 xf0 = fptr[0]; 816 xf1 = fptr[1]; 817 xf2 = fptr[2]; 818 xf3 = fptr[3]; 819 820 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 821 822 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 823 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 824 825 yf0 = fptr[0]; 826 yf1 = fptr[1]; 827 yf2 = fptr[2]; 828 yf3 = fptr[3]; 829 830 S32_TO_S16_SAT(dPtr[0]); 831 832 xSrc = (X1 >> MLIB_SHIFT) - 1; 833 ySrc = (Y1 >> MLIB_SHIFT) - 1; 834 835 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 836 s0 = srcPixelPtr[0]; 837 s1 = srcPixelPtr[2]; 838 s2 = srcPixelPtr[4]; 839 s3 = srcPixelPtr[6]; 840 841 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 842 s4 = srcPixelPtr[0]; 843 s5 = srcPixelPtr[2]; 844 s6 = srcPixelPtr[4]; 845 s7 = srcPixelPtr[6]; 846 } 847 848 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 849 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 850 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 851 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 852 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 853 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 854 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 855 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 856 857 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 858 S32_TO_S16_SAT(dPtr[0]); 859 } 860 } 861 862 return MLIB_SUCCESS; 863 } 864 865 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 866 { 867 DECLAREVAR_BC(); 868 DTYPE *dstLineEnd; 869 const mlib_s16 *mlib_filters_table; 870 871 if (filter == MLIB_BICUBIC) { 872 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; 873 } 874 else { 875 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; 876 } 877 878 for (j = yStart; j <= yFinish; j++) { 879 mlib_s32 xf0, xf1, xf2, xf3; 880 mlib_s32 yf0, yf1, yf2, yf3; 881 mlib_s32 c0, c1, c2, c3, val0; 882 mlib_s32 filterpos, k; 883 mlib_s16 *fptr; 884 mlib_s32 s0, s1, s2, s3; 885 mlib_s32 s4, s5, s6, s7; 886 887 CLIP(3); 888 dstLineEnd = (DTYPE *) dstData + 3 * xRight; 889 890 for (k = 0; k < 3; k++) { 891 mlib_s32 X1 = X; 892 mlib_s32 Y1 = Y; 893 DTYPE *dPtr = dstPixelPtr + k; 894 895 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 896 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 897 898 xf0 = fptr[0]; 899 xf1 = fptr[1]; 900 xf2 = fptr[2]; 901 xf3 = fptr[3]; 902 903 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 904 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 905 906 yf0 = fptr[0]; 907 yf1 = fptr[1]; 908 yf2 = fptr[2]; 909 yf3 = fptr[3]; 910 911 xSrc = (X1 >> MLIB_SHIFT) - 1; 912 ySrc = (Y1 >> MLIB_SHIFT) - 1; 913 914 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 915 s0 = srcPixelPtr[0]; 916 s1 = srcPixelPtr[3]; 917 s2 = srcPixelPtr[6]; 918 s3 = srcPixelPtr[9]; 919 920 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 921 s4 = srcPixelPtr[0]; 922 s5 = srcPixelPtr[3]; 923 s6 = srcPixelPtr[6]; 924 s7 = srcPixelPtr[9]; 925 926 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { 927 928 X1 += dX; 929 Y1 += dY; 930 931 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 932 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 933 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 934 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 935 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 936 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 937 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 938 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 939 940 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 941 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 942 943 xf0 = fptr[0]; 944 xf1 = fptr[1]; 945 xf2 = fptr[2]; 946 xf3 = fptr[3]; 947 948 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 949 950 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 951 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 952 953 yf0 = fptr[0]; 954 yf1 = fptr[1]; 955 yf2 = fptr[2]; 956 yf3 = fptr[3]; 957 958 S32_TO_S16_SAT(dPtr[0]); 959 960 xSrc = (X1 >> MLIB_SHIFT) - 1; 961 ySrc = (Y1 >> MLIB_SHIFT) - 1; 962 963 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 964 s0 = srcPixelPtr[0]; 965 s1 = srcPixelPtr[3]; 966 s2 = srcPixelPtr[6]; 967 s3 = srcPixelPtr[9]; 968 969 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 970 s4 = srcPixelPtr[0]; 971 s5 = srcPixelPtr[3]; 972 s6 = srcPixelPtr[6]; 973 s7 = srcPixelPtr[9]; 974 } 975 976 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 977 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 978 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 979 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 980 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 981 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 982 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 983 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 984 985 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 986 S32_TO_S16_SAT(dPtr[0]); 987 } 988 } 989 990 return MLIB_SUCCESS; 991 } 992 993 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 994 { 995 DECLAREVAR_BC(); 996 DTYPE *dstLineEnd; 997 const mlib_s16 *mlib_filters_table; 998 999 if (filter == MLIB_BICUBIC) { 1000 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; 1001 } 1002 else { 1003 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; 1004 } 1005 1006 for (j = yStart; j <= yFinish; j++) { 1007 mlib_s32 xf0, xf1, xf2, xf3; 1008 mlib_s32 yf0, yf1, yf2, yf3; 1009 mlib_s32 c0, c1, c2, c3, val0; 1010 mlib_s32 filterpos, k; 1011 mlib_s16 *fptr; 1012 mlib_s32 s0, s1, s2, s3; 1013 mlib_s32 s4, s5, s6, s7; 1014 1015 CLIP(4); 1016 dstLineEnd = (DTYPE *) dstData + 4 * xRight; 1017 1018 for (k = 0; k < 4; k++) { 1019 mlib_s32 X1 = X; 1020 mlib_s32 Y1 = Y; 1021 DTYPE *dPtr = dstPixelPtr + k; 1022 1023 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 1024 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1025 1026 xf0 = fptr[0]; 1027 xf1 = fptr[1]; 1028 xf2 = fptr[2]; 1029 xf3 = fptr[3]; 1030 1031 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 1032 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1033 1034 yf0 = fptr[0]; 1035 yf1 = fptr[1]; 1036 yf2 = fptr[2]; 1037 yf3 = fptr[3]; 1038 1039 xSrc = (X1 >> MLIB_SHIFT) - 1; 1040 ySrc = (Y1 >> MLIB_SHIFT) - 1; 1041 1042 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 1043 s0 = srcPixelPtr[0]; 1044 s1 = srcPixelPtr[4]; 1045 s2 = srcPixelPtr[8]; 1046 s3 = srcPixelPtr[12]; 1047 1048 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1049 s4 = srcPixelPtr[0]; 1050 s5 = srcPixelPtr[4]; 1051 s6 = srcPixelPtr[8]; 1052 s7 = srcPixelPtr[12]; 1053 1054 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { 1055 1056 X1 += dX; 1057 Y1 += dY; 1058 1059 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 1060 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 1061 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1062 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1063 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1064 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1065 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1066 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1067 1068 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 1069 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1070 1071 xf0 = fptr[0]; 1072 xf1 = fptr[1]; 1073 xf2 = fptr[2]; 1074 xf3 = fptr[3]; 1075 1076 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 1077 1078 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 1079 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1080 1081 yf0 = fptr[0]; 1082 yf1 = fptr[1]; 1083 yf2 = fptr[2]; 1084 yf3 = fptr[3]; 1085 1086 S32_TO_S16_SAT(dPtr[0]); 1087 1088 xSrc = (X1 >> MLIB_SHIFT) - 1; 1089 ySrc = (Y1 >> MLIB_SHIFT) - 1; 1090 1091 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 1092 s0 = srcPixelPtr[0]; 1093 s1 = srcPixelPtr[4]; 1094 s2 = srcPixelPtr[8]; 1095 s3 = srcPixelPtr[12]; 1096 1097 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1098 s4 = srcPixelPtr[0]; 1099 s5 = srcPixelPtr[4]; 1100 s6 = srcPixelPtr[8]; 1101 s7 = srcPixelPtr[12]; 1102 } 1103 1104 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 1105 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; 1106 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1107 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1108 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1109 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1110 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1111 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1112 1113 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 1114 S32_TO_S16_SAT(dPtr[0]); 1115 } 1116 } 1117 1118 return MLIB_SUCCESS; 1119 } 1120 1121 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */ 1122 1123 /***************************************************************/