1 /* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * Image affine transformation with Bicubic filtering 30 * SYNOPSIS 31 * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges, 32 * mlib_s32 *rightEdges, 33 * mlib_s32 *xStarts, 34 * mlib_s32 *yStarts, 35 * mlib_s32 *sides, 36 * mlib_u8 *dstData, 37 * mlib_u8 **lineAddr, 38 * mlib_s32 dstYStride, 39 * mlib_s32 is_affine, 40 * mlib_s32 srcYStride, 41 * mlib_filter filter) 42 * 43 * 44 * ARGUMENTS 45 * leftEdges array[dstHeight] of xLeft coordinates 46 * RightEdges array[dstHeight] of xRight coordinates 47 * xStarts array[dstHeight] of xStart * 65536 coordinates 48 * yStarts array[dstHeight] of yStart * 65536 coordinates 49 * sides output array[4]. sides[0] is yStart, sides[1] is yFinish, 50 * sides[2] is dx * 65536, sides[3] is dy * 65536 51 * dstData pointer to the first pixel on (yStart - 1) line 52 * lineAddr array[srcHeight] of pointers to the first pixel on 53 * the corresponding lines 54 * dstYStride stride of destination image 55 * is_affine indicator (Affine - GridWarp) 56 * srcYStride stride of source image 57 * filter type of resampling filter 58 * 59 * DESCRIPTION 60 * The functions step along the lines from xLeft to xRight and apply 61 * the bicubic filtering. 62 * 63 */ 64 65 #include "mlib_ImageAffine.h" 66 67 #define DTYPE mlib_u8 68 69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc 70 71 #define FILTER_BITS 8 72 73 /***************************************************************/ 74 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */ 75 76 #undef FILTER_ELEM_BITS 77 #define FILTER_ELEM_BITS 4 78 79 #ifdef MLIB_USE_FTOI_CLAMPING 80 81 #define SAT8(DST) \ 82 DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80 83 84 #else 85 86 #define SAT8(DST) \ 87 val0 -= sat; \ 88 if (val0 >= MLIB_S32_MAX) \ 89 DST = MLIB_U8_MAX; \ 90 else if (val0 <= MLIB_S32_MIN) \ 91 DST = MLIB_U8_MIN; \ 92 else \ 93 DST = ((mlib_s32)val0 >> 24) ^ 0x80 94 95 #endif /* MLIB_USE_FTOI_CLAMPING */ 96 97 /***************************************************************/ 98 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 99 { 100 DECLAREVAR_BC(); 101 DTYPE *dstLineEnd; 102 mlib_d64 sat = (mlib_d64) 0x7F800000; 103 const mlib_f32 *mlib_filters_table; 104 105 if (filter == MLIB_BICUBIC) { 106 mlib_filters_table = mlib_filters_u8f_bc; 107 } 108 else { 109 mlib_filters_table = mlib_filters_u8f_bc2; 110 } 111 112 for (j = yStart; j <= yFinish; j++) { 113 mlib_d64 xf0, xf1, xf2, xf3; 114 mlib_d64 yf0, yf1, yf2, yf3; 115 mlib_d64 c0, c1, c2, c3, val0; 116 mlib_s32 filterpos; 117 mlib_f32 *fptr; 118 mlib_u8 s0, s1, s2, s3; 119 120 CLIP(1); 121 dstLineEnd = (DTYPE *) dstData + xRight; 122 123 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 124 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 125 126 xf0 = fptr[0]; 127 xf1 = fptr[1]; 128 xf2 = fptr[2]; 129 xf3 = fptr[3]; 130 131 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 132 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 133 134 yf0 = fptr[0]; 135 yf1 = fptr[1]; 136 yf2 = fptr[2]; 137 yf3 = fptr[3]; 138 139 xSrc = (X >> MLIB_SHIFT) - 1; 140 ySrc = (Y >> MLIB_SHIFT) - 1; 141 142 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 143 s0 = srcPixelPtr[0]; 144 s1 = srcPixelPtr[1]; 145 s2 = srcPixelPtr[2]; 146 s3 = srcPixelPtr[3]; 147 148 #ifdef __SUNPRO_C 149 #pragma pipeloop(0) 150 #endif /* __SUNPRO_C */ 151 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { 152 X += dX; 153 Y += dY; 154 155 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 156 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 157 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 158 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 159 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 160 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 161 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 162 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 163 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 164 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 165 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 166 167 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 168 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 169 170 xf0 = fptr[0]; 171 xf1 = fptr[1]; 172 xf2 = fptr[2]; 173 xf3 = fptr[3]; 174 175 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 176 177 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 178 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 179 180 yf0 = fptr[0]; 181 yf1 = fptr[1]; 182 yf2 = fptr[2]; 183 yf3 = fptr[3]; 184 185 SAT8(dstPixelPtr[0]); 186 187 xSrc = (X >> MLIB_SHIFT) - 1; 188 ySrc = (Y >> MLIB_SHIFT) - 1; 189 190 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 191 s0 = srcPixelPtr[0]; 192 s1 = srcPixelPtr[1]; 193 s2 = srcPixelPtr[2]; 194 s3 = srcPixelPtr[3]; 195 } 196 197 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 198 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 199 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 200 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 201 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 202 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 203 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 204 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 205 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 206 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + 207 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); 208 209 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 210 211 SAT8(dstPixelPtr[0]); 212 } 213 214 return MLIB_SUCCESS; 215 } 216 217 /***************************************************************/ 218 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 219 { 220 DECLAREVAR_BC(); 221 DTYPE *dstLineEnd; 222 mlib_d64 sat = (mlib_d64) 0x7F800000; 223 const mlib_f32 *mlib_filters_table; 224 225 if (filter == MLIB_BICUBIC) { 226 mlib_filters_table = mlib_filters_u8f_bc; 227 } 228 else { 229 mlib_filters_table = mlib_filters_u8f_bc2; 230 } 231 232 for (j = yStart; j <= yFinish; j++) { 233 mlib_d64 xf0, xf1, xf2, xf3; 234 mlib_d64 yf0, yf1, yf2, yf3; 235 mlib_d64 c0, c1, c2, c3, val0; 236 mlib_s32 filterpos, k; 237 mlib_f32 *fptr; 238 mlib_u8 s0, s1, s2, s3; 239 240 CLIP(2); 241 dstLineEnd = (DTYPE *) dstData + 2 * xRight; 242 243 for (k = 0; k < 2; k++) { 244 mlib_s32 X1 = X; 245 mlib_s32 Y1 = Y; 246 DTYPE *dPtr = dstPixelPtr + k; 247 248 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 249 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 250 251 xf0 = fptr[0]; 252 xf1 = fptr[1]; 253 xf2 = fptr[2]; 254 xf3 = fptr[3]; 255 256 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 257 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 258 259 yf0 = fptr[0]; 260 yf1 = fptr[1]; 261 yf2 = fptr[2]; 262 yf3 = fptr[3]; 263 264 xSrc = (X1 >> MLIB_SHIFT) - 1; 265 ySrc = (Y1 >> MLIB_SHIFT) - 1; 266 267 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 268 s0 = srcPixelPtr[0]; 269 s1 = srcPixelPtr[2]; 270 s2 = srcPixelPtr[4]; 271 s3 = srcPixelPtr[6]; 272 273 #ifdef __SUNPRO_C 274 #pragma pipeloop(0) 275 #endif /* __SUNPRO_C */ 276 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { 277 X1 += dX; 278 Y1 += dY; 279 280 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 281 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 282 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 283 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 284 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 285 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 286 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 287 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 288 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 289 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 290 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 291 292 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 293 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 294 295 xf0 = fptr[0]; 296 xf1 = fptr[1]; 297 xf2 = fptr[2]; 298 xf3 = fptr[3]; 299 300 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 301 302 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 303 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 304 305 yf0 = fptr[0]; 306 yf1 = fptr[1]; 307 yf2 = fptr[2]; 308 yf3 = fptr[3]; 309 310 SAT8(dPtr[0]); 311 312 xSrc = (X1 >> MLIB_SHIFT) - 1; 313 ySrc = (Y1 >> MLIB_SHIFT) - 1; 314 315 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 316 s0 = srcPixelPtr[0]; 317 s1 = srcPixelPtr[2]; 318 s2 = srcPixelPtr[4]; 319 s3 = srcPixelPtr[6]; 320 } 321 322 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 323 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 324 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 325 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 326 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 327 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 328 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 329 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 330 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 331 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + 332 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); 333 334 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 335 336 SAT8(dPtr[0]); 337 } 338 } 339 340 return MLIB_SUCCESS; 341 } 342 343 /***************************************************************/ 344 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 345 { 346 DECLAREVAR_BC(); 347 DTYPE *dstLineEnd; 348 mlib_d64 sat = (mlib_d64) 0x7F800000; 349 const mlib_f32 *mlib_filters_table; 350 351 if (filter == MLIB_BICUBIC) { 352 mlib_filters_table = mlib_filters_u8f_bc; 353 } 354 else { 355 mlib_filters_table = mlib_filters_u8f_bc2; 356 } 357 358 for (j = yStart; j <= yFinish; j++) { 359 mlib_d64 xf0, xf1, xf2, xf3; 360 mlib_d64 yf0, yf1, yf2, yf3; 361 mlib_d64 c0, c1, c2, c3, val0; 362 mlib_s32 filterpos, k; 363 mlib_f32 *fptr; 364 mlib_u8 s0, s1, s2, s3; 365 366 CLIP(3); 367 dstLineEnd = (DTYPE *) dstData + 3 * xRight; 368 369 for (k = 0; k < 3; k++) { 370 mlib_s32 X1 = X; 371 mlib_s32 Y1 = Y; 372 DTYPE *dPtr = dstPixelPtr + k; 373 374 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 375 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 376 377 xf0 = fptr[0]; 378 xf1 = fptr[1]; 379 xf2 = fptr[2]; 380 xf3 = fptr[3]; 381 382 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 383 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 384 385 yf0 = fptr[0]; 386 yf1 = fptr[1]; 387 yf2 = fptr[2]; 388 yf3 = fptr[3]; 389 390 xSrc = (X1 >> MLIB_SHIFT) - 1; 391 ySrc = (Y1 >> MLIB_SHIFT) - 1; 392 393 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 394 s0 = srcPixelPtr[0]; 395 s1 = srcPixelPtr[3]; 396 s2 = srcPixelPtr[6]; 397 s3 = srcPixelPtr[9]; 398 399 #ifdef __SUNPRO_C 400 #pragma pipeloop(0) 401 #endif /* __SUNPRO_C */ 402 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { 403 X1 += dX; 404 Y1 += dY; 405 406 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 407 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 408 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 409 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 410 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 411 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 412 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 413 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 414 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 415 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 416 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 417 418 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 419 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 420 421 xf0 = fptr[0]; 422 xf1 = fptr[1]; 423 xf2 = fptr[2]; 424 xf3 = fptr[3]; 425 426 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 427 428 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 429 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 430 431 yf0 = fptr[0]; 432 yf1 = fptr[1]; 433 yf2 = fptr[2]; 434 yf3 = fptr[3]; 435 436 SAT8(dPtr[0]); 437 438 xSrc = (X1 >> MLIB_SHIFT) - 1; 439 ySrc = (Y1 >> MLIB_SHIFT) - 1; 440 441 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 442 s0 = srcPixelPtr[0]; 443 s1 = srcPixelPtr[3]; 444 s2 = srcPixelPtr[6]; 445 s3 = srcPixelPtr[9]; 446 } 447 448 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 449 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 450 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 451 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 452 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 453 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 454 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 455 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 456 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 457 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + 458 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); 459 460 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 461 462 SAT8(dPtr[0]); 463 } 464 } 465 466 return MLIB_SUCCESS; 467 } 468 469 /***************************************************************/ 470 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 471 { 472 DECLAREVAR_BC(); 473 DTYPE *dstLineEnd; 474 mlib_d64 sat = (mlib_d64) 0x7F800000; 475 const mlib_f32 *mlib_filters_table; 476 477 if (filter == MLIB_BICUBIC) { 478 mlib_filters_table = mlib_filters_u8f_bc; 479 } 480 else { 481 mlib_filters_table = mlib_filters_u8f_bc2; 482 } 483 484 for (j = yStart; j <= yFinish; j++) { 485 mlib_d64 xf0, xf1, xf2, xf3; 486 mlib_d64 yf0, yf1, yf2, yf3; 487 mlib_d64 c0, c1, c2, c3, val0; 488 mlib_s32 filterpos, k; 489 mlib_f32 *fptr; 490 mlib_u8 s0, s1, s2, s3; 491 492 CLIP(4); 493 dstLineEnd = (DTYPE *) dstData + 4 * xRight; 494 495 for (k = 0; k < 4; k++) { 496 mlib_s32 X1 = X; 497 mlib_s32 Y1 = Y; 498 DTYPE *dPtr = dstPixelPtr + k; 499 500 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 501 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 502 503 xf0 = fptr[0]; 504 xf1 = fptr[1]; 505 xf2 = fptr[2]; 506 xf3 = fptr[3]; 507 508 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 509 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 510 511 yf0 = fptr[0]; 512 yf1 = fptr[1]; 513 yf2 = fptr[2]; 514 yf3 = fptr[3]; 515 516 xSrc = (X1 >> MLIB_SHIFT) - 1; 517 ySrc = (Y1 >> MLIB_SHIFT) - 1; 518 519 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 520 s0 = srcPixelPtr[0]; 521 s1 = srcPixelPtr[4]; 522 s2 = srcPixelPtr[8]; 523 s3 = srcPixelPtr[12]; 524 525 #ifdef __SUNPRO_C 526 #pragma pipeloop(0) 527 #endif /* __SUNPRO_C */ 528 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { 529 X1 += dX; 530 Y1 += dY; 531 532 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 533 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 534 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 535 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 536 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 537 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 538 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 539 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 540 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 541 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 542 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 543 544 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 545 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 546 547 xf0 = fptr[0]; 548 xf1 = fptr[1]; 549 xf2 = fptr[2]; 550 xf3 = fptr[3]; 551 552 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 553 554 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 555 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); 556 557 yf0 = fptr[0]; 558 yf1 = fptr[1]; 559 yf2 = fptr[2]; 560 yf3 = fptr[3]; 561 562 SAT8(dPtr[0]); 563 564 xSrc = (X1 >> MLIB_SHIFT) - 1; 565 ySrc = (Y1 >> MLIB_SHIFT) - 1; 566 567 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 568 s0 = srcPixelPtr[0]; 569 s1 = srcPixelPtr[4]; 570 s2 = srcPixelPtr[8]; 571 s3 = srcPixelPtr[12]; 572 } 573 574 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + 575 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); 576 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 577 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 578 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 579 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 580 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 581 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 582 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 583 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + 584 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); 585 586 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); 587 588 SAT8(dPtr[0]); 589 } 590 } 591 592 return MLIB_SUCCESS; 593 } 594 595 #else /* for x86, using integer multiplies is faster */ 596 597 #define SHIFT_X 12 598 #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */ 599 600 #define SHIFT_Y (14 + 14 - SHIFT_X) 601 #define ROUND_Y (1 << (SHIFT_Y - 1)) 602 603 /***************************************************************/ 604 /* Test for the presence of any "1" bit in bits 605 8 to 31 of val. If present, then val is either 606 negative or >255. If over/underflows of 8 bits 607 are uncommon, then this technique can be a win, 608 since only a single test, rather than two, is 609 necessary to determine if clamping is needed. 610 On the other hand, if over/underflows are common, 611 it adds an extra test. 612 */ 613 #define S32_TO_U8_SAT(DST) \ 614 if (val0 & 0xffffff00) { \ 615 if (val0 < MLIB_U8_MIN) \ 616 DST = MLIB_U8_MIN; \ 617 else \ 618 DST = MLIB_U8_MAX; \ 619 } else { \ 620 DST = (mlib_u8)val0; \ 621 } 622 623 /***************************************************************/ 624 mlib_status FUN_NAME(1ch)(mlib_affine_param *param) 625 { 626 DECLAREVAR_BC(); 627 DTYPE *dstLineEnd; 628 const mlib_s16 *mlib_filters_table; 629 630 if (filter == MLIB_BICUBIC) { 631 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; 632 } 633 else { 634 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; 635 } 636 637 for (j = yStart; j <= yFinish; j++) { 638 mlib_s32 xf0, xf1, xf2, xf3; 639 mlib_s32 yf0, yf1, yf2, yf3; 640 mlib_s32 c0, c1, c2, c3, val0; 641 mlib_s32 filterpos; 642 mlib_s16 *fptr; 643 mlib_u8 s0, s1, s2, s3; 644 645 CLIP(1); 646 dstLineEnd = (DTYPE *) dstData + xRight; 647 648 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 649 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 650 651 xf0 = fptr[0]; 652 xf1 = fptr[1]; 653 xf2 = fptr[2]; 654 xf3 = fptr[3]; 655 656 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 657 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 658 659 yf0 = fptr[0]; 660 yf1 = fptr[1]; 661 yf2 = fptr[2]; 662 yf3 = fptr[3]; 663 664 xSrc = (X >> MLIB_SHIFT) - 1; 665 ySrc = (Y >> MLIB_SHIFT) - 1; 666 667 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 668 s0 = srcPixelPtr[0]; 669 s1 = srcPixelPtr[1]; 670 s2 = srcPixelPtr[2]; 671 s3 = srcPixelPtr[3]; 672 673 #ifdef __SUNPRO_C 674 #pragma pipeloop(0) 675 #endif /* __SUNPRO_C */ 676 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { 677 X += dX; 678 Y += dY; 679 680 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 681 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 682 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 683 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 684 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 685 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 686 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 687 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 688 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 689 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 690 691 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; 692 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 693 694 xf0 = fptr[0]; 695 xf1 = fptr[1]; 696 xf2 = fptr[2]; 697 xf3 = fptr[3]; 698 699 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 700 701 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; 702 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 703 704 yf0 = fptr[0]; 705 yf1 = fptr[1]; 706 yf2 = fptr[2]; 707 yf3 = fptr[3]; 708 709 S32_TO_U8_SAT(dstPixelPtr[0]); 710 711 xSrc = (X >> MLIB_SHIFT) - 1; 712 ySrc = (Y >> MLIB_SHIFT) - 1; 713 714 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; 715 s0 = srcPixelPtr[0]; 716 s1 = srcPixelPtr[1]; 717 s2 = srcPixelPtr[2]; 718 s3 = srcPixelPtr[3]; 719 } 720 721 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 722 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 723 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 724 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 725 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 726 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 727 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 728 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 729 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + 730 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; 731 732 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 733 734 S32_TO_U8_SAT(dstPixelPtr[0]); 735 } 736 737 return MLIB_SUCCESS; 738 } 739 740 /***************************************************************/ 741 mlib_status FUN_NAME(2ch)(mlib_affine_param *param) 742 { 743 DECLAREVAR_BC(); 744 DTYPE *dstLineEnd; 745 const mlib_s16 *mlib_filters_table; 746 747 if (filter == MLIB_BICUBIC) { 748 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; 749 } 750 else { 751 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; 752 } 753 754 for (j = yStart; j <= yFinish; j++) { 755 mlib_s32 xf0, xf1, xf2, xf3; 756 mlib_s32 yf0, yf1, yf2, yf3; 757 mlib_s32 c0, c1, c2, c3, val0; 758 mlib_s32 filterpos, k; 759 mlib_s16 *fptr; 760 mlib_u8 s0, s1, s2, s3; 761 762 CLIP(2); 763 dstLineEnd = (DTYPE *) dstData + 2 * xRight; 764 765 for (k = 0; k < 2; k++) { 766 mlib_s32 X1 = X; 767 mlib_s32 Y1 = Y; 768 DTYPE *dPtr = dstPixelPtr + k; 769 770 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 771 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 772 773 xf0 = fptr[0]; 774 xf1 = fptr[1]; 775 xf2 = fptr[2]; 776 xf3 = fptr[3]; 777 778 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 779 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 780 781 yf0 = fptr[0]; 782 yf1 = fptr[1]; 783 yf2 = fptr[2]; 784 yf3 = fptr[3]; 785 786 xSrc = (X1 >> MLIB_SHIFT) - 1; 787 ySrc = (Y1 >> MLIB_SHIFT) - 1; 788 789 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 790 s0 = srcPixelPtr[0]; 791 s1 = srcPixelPtr[2]; 792 s2 = srcPixelPtr[4]; 793 s3 = srcPixelPtr[6]; 794 795 #ifdef __SUNPRO_C 796 #pragma pipeloop(0) 797 #endif /* __SUNPRO_C */ 798 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { 799 X1 += dX; 800 Y1 += dY; 801 802 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 803 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 804 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 805 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 806 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 807 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 808 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 809 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 810 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 811 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 812 813 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 814 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 815 816 xf0 = fptr[0]; 817 xf1 = fptr[1]; 818 xf2 = fptr[2]; 819 xf3 = fptr[3]; 820 821 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 822 823 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 824 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 825 826 yf0 = fptr[0]; 827 yf1 = fptr[1]; 828 yf2 = fptr[2]; 829 yf3 = fptr[3]; 830 831 S32_TO_U8_SAT(dPtr[0]); 832 833 xSrc = (X1 >> MLIB_SHIFT) - 1; 834 ySrc = (Y1 >> MLIB_SHIFT) - 1; 835 836 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; 837 s0 = srcPixelPtr[0]; 838 s1 = srcPixelPtr[2]; 839 s2 = srcPixelPtr[4]; 840 s3 = srcPixelPtr[6]; 841 } 842 843 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 844 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 845 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 846 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 847 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 848 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 849 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 850 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 851 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + 852 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; 853 854 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 855 856 S32_TO_U8_SAT(dPtr[0]); 857 } 858 } 859 860 return MLIB_SUCCESS; 861 } 862 863 /***************************************************************/ 864 mlib_status FUN_NAME(3ch)(mlib_affine_param *param) 865 { 866 DECLAREVAR_BC(); 867 DTYPE *dstLineEnd; 868 const mlib_s16 *mlib_filters_table; 869 870 if (filter == MLIB_BICUBIC) { 871 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; 872 } 873 else { 874 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; 875 } 876 877 for (j = yStart; j <= yFinish; j++) { 878 mlib_s32 xf0, xf1, xf2, xf3; 879 mlib_s32 yf0, yf1, yf2, yf3; 880 mlib_s32 c0, c1, c2, c3, val0; 881 mlib_s32 filterpos, k; 882 mlib_s16 *fptr; 883 mlib_u8 s0, s1, s2, s3; 884 885 CLIP(3); 886 dstLineEnd = (DTYPE *) dstData + 3 * xRight; 887 888 for (k = 0; k < 3; k++) { 889 mlib_s32 X1 = X; 890 mlib_s32 Y1 = Y; 891 DTYPE *dPtr = dstPixelPtr + k; 892 893 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 894 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 895 896 xf0 = fptr[0]; 897 xf1 = fptr[1]; 898 xf2 = fptr[2]; 899 xf3 = fptr[3]; 900 901 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 902 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 903 904 yf0 = fptr[0]; 905 yf1 = fptr[1]; 906 yf2 = fptr[2]; 907 yf3 = fptr[3]; 908 909 xSrc = (X1 >> MLIB_SHIFT) - 1; 910 ySrc = (Y1 >> MLIB_SHIFT) - 1; 911 912 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 913 s0 = srcPixelPtr[0]; 914 s1 = srcPixelPtr[3]; 915 s2 = srcPixelPtr[6]; 916 s3 = srcPixelPtr[9]; 917 918 #ifdef __SUNPRO_C 919 #pragma pipeloop(0) 920 #endif /* __SUNPRO_C */ 921 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { 922 X1 += dX; 923 Y1 += dY; 924 925 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 926 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 927 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 928 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 929 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 930 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 931 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 932 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 933 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 934 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 935 936 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 937 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 938 939 xf0 = fptr[0]; 940 xf1 = fptr[1]; 941 xf2 = fptr[2]; 942 xf3 = fptr[3]; 943 944 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 945 946 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 947 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 948 949 yf0 = fptr[0]; 950 yf1 = fptr[1]; 951 yf2 = fptr[2]; 952 yf3 = fptr[3]; 953 954 S32_TO_U8_SAT(dPtr[0]); 955 956 xSrc = (X1 >> MLIB_SHIFT) - 1; 957 ySrc = (Y1 >> MLIB_SHIFT) - 1; 958 959 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; 960 s0 = srcPixelPtr[0]; 961 s1 = srcPixelPtr[3]; 962 s2 = srcPixelPtr[6]; 963 s3 = srcPixelPtr[9]; 964 } 965 966 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 967 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 968 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 969 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 970 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 971 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 972 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 973 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 974 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + 975 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; 976 977 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 978 979 S32_TO_U8_SAT(dPtr[0]); 980 } 981 } 982 983 return MLIB_SUCCESS; 984 } 985 986 /***************************************************************/ 987 mlib_status FUN_NAME(4ch)(mlib_affine_param *param) 988 { 989 DECLAREVAR_BC(); 990 DTYPE *dstLineEnd; 991 const mlib_s16 *mlib_filters_table; 992 993 if (filter == MLIB_BICUBIC) { 994 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; 995 } 996 else { 997 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; 998 } 999 1000 for (j = yStart; j <= yFinish; j++) { 1001 mlib_s32 xf0, xf1, xf2, xf3; 1002 mlib_s32 yf0, yf1, yf2, yf3; 1003 mlib_s32 c0, c1, c2, c3, val0; 1004 mlib_s32 filterpos, k; 1005 mlib_s16 *fptr; 1006 mlib_u8 s0, s1, s2, s3; 1007 1008 CLIP(4); 1009 dstLineEnd = (DTYPE *) dstData + 4 * xRight; 1010 1011 for (k = 0; k < 4; k++) { 1012 mlib_s32 X1 = X; 1013 mlib_s32 Y1 = Y; 1014 DTYPE *dPtr = dstPixelPtr + k; 1015 1016 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 1017 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1018 1019 xf0 = fptr[0]; 1020 xf1 = fptr[1]; 1021 xf2 = fptr[2]; 1022 xf3 = fptr[3]; 1023 1024 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 1025 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1026 1027 yf0 = fptr[0]; 1028 yf1 = fptr[1]; 1029 yf2 = fptr[2]; 1030 yf3 = fptr[3]; 1031 1032 xSrc = (X1 >> MLIB_SHIFT) - 1; 1033 ySrc = (Y1 >> MLIB_SHIFT) - 1; 1034 1035 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 1036 s0 = srcPixelPtr[0]; 1037 s1 = srcPixelPtr[4]; 1038 s2 = srcPixelPtr[8]; 1039 s3 = srcPixelPtr[12]; 1040 1041 #ifdef __SUNPRO_C 1042 #pragma pipeloop(0) 1043 #endif /* __SUNPRO_C */ 1044 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { 1045 X1 += dX; 1046 Y1 += dY; 1047 1048 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 1049 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1050 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1051 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1052 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1053 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1054 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1055 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1056 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1057 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1058 1059 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; 1060 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1061 1062 xf0 = fptr[0]; 1063 xf1 = fptr[1]; 1064 xf2 = fptr[2]; 1065 xf3 = fptr[3]; 1066 1067 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 1068 1069 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; 1070 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); 1071 1072 yf0 = fptr[0]; 1073 yf1 = fptr[1]; 1074 yf2 = fptr[2]; 1075 yf3 = fptr[3]; 1076 1077 S32_TO_U8_SAT(dPtr[0]); 1078 1079 xSrc = (X1 >> MLIB_SHIFT) - 1; 1080 ySrc = (Y1 >> MLIB_SHIFT) - 1; 1081 1082 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; 1083 s0 = srcPixelPtr[0]; 1084 s1 = srcPixelPtr[4]; 1085 s2 = srcPixelPtr[8]; 1086 s3 = srcPixelPtr[12]; 1087 } 1088 1089 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; 1090 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1091 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1092 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1093 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1094 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1095 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1096 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); 1097 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + 1098 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; 1099 1100 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; 1101 1102 S32_TO_U8_SAT(dPtr[0]); 1103 } 1104 } 1105 1106 return MLIB_SUCCESS; 1107 } 1108 1109 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */ 1110 1111 /***************************************************************/