54 * dstYStride stride of destination image
55 * is_affine indicator (Affine - GridWarp)
56 * srcYStride stride of source image
57 * filter type of resampling filter
58 *
59 * DESCRIPTION
60 * The functions step along the lines from xLeft to xRight and apply
61 * the bicubic filtering.
62 *
63 */
64
65 #include "mlib_ImageAffine.h"
66
67 #define DTYPE mlib_u8
68
69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
70
71 #define FILTER_BITS 8
72
73 /***************************************************************/
74 #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
75
76 #undef FILTER_ELEM_BITS
77 #define FILTER_ELEM_BITS 4
78
79 #ifdef MLIB_USE_FTOI_CLAMPING
80
81 #define SAT8(DST) \
82 DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80
83
84 #else
85
86 #define SAT8(DST) \
87 val0 -= sat; \
88 if (val0 >= MLIB_S32_MAX) \
89 DST = MLIB_U8_MAX; \
90 else if (val0 <= MLIB_S32_MIN) \
91 DST = MLIB_U8_MIN; \
92 else \
93 DST = ((mlib_s32)val0 >> 24) ^ 0x80
94
95 #endif /* MLIB_USE_FTOI_CLAMPING */
96
97 /***************************************************************/
98 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
99 {
100 DECLAREVAR_BC();
101 DTYPE *dstLineEnd;
102 mlib_d64 sat = (mlib_d64) 0x7F800000;
103 const mlib_f32 *mlib_filters_table;
104
105 if (filter == MLIB_BICUBIC) {
106 mlib_filters_table = mlib_filters_u8f_bc;
107 }
108 else {
109 mlib_filters_table = mlib_filters_u8f_bc2;
110 }
111
112 for (j = yStart; j <= yFinish; j++) {
113 mlib_d64 xf0, xf1, xf2, xf3;
114 mlib_d64 yf0, yf1, yf2, yf3;
115 mlib_d64 c0, c1, c2, c3, val0;
116 mlib_s32 filterpos;
117 mlib_f32 *fptr;
118 mlib_u8 s0, s1, s2, s3;
119
120 CLIP(1);
121 dstLineEnd = (DTYPE *) dstData + xRight;
122
123 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
124 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
125
126 xf0 = fptr[0];
127 xf1 = fptr[1];
128 xf2 = fptr[2];
129 xf3 = fptr[3];
130
131 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
132 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
133
134 yf0 = fptr[0];
135 yf1 = fptr[1];
136 yf2 = fptr[2];
137 yf3 = fptr[3];
138
139 xSrc = (X >> MLIB_SHIFT) - 1;
140 ySrc = (Y >> MLIB_SHIFT) - 1;
141
142 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
143 s0 = srcPixelPtr[0];
144 s1 = srcPixelPtr[1];
145 s2 = srcPixelPtr[2];
146 s3 = srcPixelPtr[3];
147
148 #ifdef __SUNPRO_C
149 #pragma pipeloop(0)
150 #endif /* __SUNPRO_C */
151 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
152 X += dX;
153 Y += dY;
154
155 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
156 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
157 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
158 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
159 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
160 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
161 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
162 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
163 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
164 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
165 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
166
167 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
168 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
169
170 xf0 = fptr[0];
171 xf1 = fptr[1];
172 xf2 = fptr[2];
173 xf3 = fptr[3];
174
175 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
176
177 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
178 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
179
180 yf0 = fptr[0];
181 yf1 = fptr[1];
182 yf2 = fptr[2];
183 yf3 = fptr[3];
184
185 SAT8(dstPixelPtr[0]);
186
187 xSrc = (X >> MLIB_SHIFT) - 1;
188 ySrc = (Y >> MLIB_SHIFT) - 1;
189
190 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
191 s0 = srcPixelPtr[0];
192 s1 = srcPixelPtr[1];
193 s2 = srcPixelPtr[2];
194 s3 = srcPixelPtr[3];
195 }
196
197 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
198 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
199 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
200 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
201 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
202 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
203 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
204 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
205 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
206 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
207 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
208
209 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
210
211 SAT8(dstPixelPtr[0]);
212 }
213
214 return MLIB_SUCCESS;
215 }
216
217 /***************************************************************/
218 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
219 {
220 DECLAREVAR_BC();
221 DTYPE *dstLineEnd;
222 mlib_d64 sat = (mlib_d64) 0x7F800000;
223 const mlib_f32 *mlib_filters_table;
224
225 if (filter == MLIB_BICUBIC) {
226 mlib_filters_table = mlib_filters_u8f_bc;
227 }
228 else {
229 mlib_filters_table = mlib_filters_u8f_bc2;
230 }
231
232 for (j = yStart; j <= yFinish; j++) {
233 mlib_d64 xf0, xf1, xf2, xf3;
234 mlib_d64 yf0, yf1, yf2, yf3;
235 mlib_d64 c0, c1, c2, c3, val0;
236 mlib_s32 filterpos, k;
237 mlib_f32 *fptr;
238 mlib_u8 s0, s1, s2, s3;
239
240 CLIP(2);
241 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
242
243 for (k = 0; k < 2; k++) {
244 mlib_s32 X1 = X;
245 mlib_s32 Y1 = Y;
246 DTYPE *dPtr = dstPixelPtr + k;
247
248 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
249 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
250
251 xf0 = fptr[0];
252 xf1 = fptr[1];
253 xf2 = fptr[2];
254 xf3 = fptr[3];
255
256 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
257 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
258
259 yf0 = fptr[0];
260 yf1 = fptr[1];
261 yf2 = fptr[2];
262 yf3 = fptr[3];
263
264 xSrc = (X1 >> MLIB_SHIFT) - 1;
265 ySrc = (Y1 >> MLIB_SHIFT) - 1;
266
267 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
268 s0 = srcPixelPtr[0];
269 s1 = srcPixelPtr[2];
270 s2 = srcPixelPtr[4];
271 s3 = srcPixelPtr[6];
272
273 #ifdef __SUNPRO_C
274 #pragma pipeloop(0)
275 #endif /* __SUNPRO_C */
276 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
277 X1 += dX;
278 Y1 += dY;
279
280 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
281 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
282 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
283 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
284 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
285 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
286 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
287 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
288 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
289 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
290 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
291
292 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
293 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
294
295 xf0 = fptr[0];
296 xf1 = fptr[1];
297 xf2 = fptr[2];
298 xf3 = fptr[3];
299
300 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
301
302 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
303 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
304
305 yf0 = fptr[0];
306 yf1 = fptr[1];
307 yf2 = fptr[2];
308 yf3 = fptr[3];
309
310 SAT8(dPtr[0]);
311
312 xSrc = (X1 >> MLIB_SHIFT) - 1;
313 ySrc = (Y1 >> MLIB_SHIFT) - 1;
314
315 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
316 s0 = srcPixelPtr[0];
317 s1 = srcPixelPtr[2];
318 s2 = srcPixelPtr[4];
319 s3 = srcPixelPtr[6];
320 }
321
322 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
323 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
324 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
325 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
326 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
327 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
328 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
329 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
330 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
331 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
332 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
333
334 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
335
336 SAT8(dPtr[0]);
337 }
338 }
339
340 return MLIB_SUCCESS;
341 }
342
343 /***************************************************************/
344 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
345 {
346 DECLAREVAR_BC();
347 DTYPE *dstLineEnd;
348 mlib_d64 sat = (mlib_d64) 0x7F800000;
349 const mlib_f32 *mlib_filters_table;
350
351 if (filter == MLIB_BICUBIC) {
352 mlib_filters_table = mlib_filters_u8f_bc;
353 }
354 else {
355 mlib_filters_table = mlib_filters_u8f_bc2;
356 }
357
358 for (j = yStart; j <= yFinish; j++) {
359 mlib_d64 xf0, xf1, xf2, xf3;
360 mlib_d64 yf0, yf1, yf2, yf3;
361 mlib_d64 c0, c1, c2, c3, val0;
362 mlib_s32 filterpos, k;
363 mlib_f32 *fptr;
364 mlib_u8 s0, s1, s2, s3;
365
366 CLIP(3);
367 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
368
369 for (k = 0; k < 3; k++) {
370 mlib_s32 X1 = X;
371 mlib_s32 Y1 = Y;
372 DTYPE *dPtr = dstPixelPtr + k;
373
374 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
375 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
376
377 xf0 = fptr[0];
378 xf1 = fptr[1];
379 xf2 = fptr[2];
380 xf3 = fptr[3];
381
382 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
383 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
384
385 yf0 = fptr[0];
386 yf1 = fptr[1];
387 yf2 = fptr[2];
388 yf3 = fptr[3];
389
390 xSrc = (X1 >> MLIB_SHIFT) - 1;
391 ySrc = (Y1 >> MLIB_SHIFT) - 1;
392
393 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
394 s0 = srcPixelPtr[0];
395 s1 = srcPixelPtr[3];
396 s2 = srcPixelPtr[6];
397 s3 = srcPixelPtr[9];
398
399 #ifdef __SUNPRO_C
400 #pragma pipeloop(0)
401 #endif /* __SUNPRO_C */
402 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
403 X1 += dX;
404 Y1 += dY;
405
406 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
407 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
408 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
409 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
410 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
411 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
412 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
413 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
414 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
415 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
416 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
417
418 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
419 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
420
421 xf0 = fptr[0];
422 xf1 = fptr[1];
423 xf2 = fptr[2];
424 xf3 = fptr[3];
425
426 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
427
428 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
429 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
430
431 yf0 = fptr[0];
432 yf1 = fptr[1];
433 yf2 = fptr[2];
434 yf3 = fptr[3];
435
436 SAT8(dPtr[0]);
437
438 xSrc = (X1 >> MLIB_SHIFT) - 1;
439 ySrc = (Y1 >> MLIB_SHIFT) - 1;
440
441 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
442 s0 = srcPixelPtr[0];
443 s1 = srcPixelPtr[3];
444 s2 = srcPixelPtr[6];
445 s3 = srcPixelPtr[9];
446 }
447
448 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
449 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
450 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
451 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
452 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
453 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
454 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
455 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
456 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
457 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
458 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
459
460 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
461
462 SAT8(dPtr[0]);
463 }
464 }
465
466 return MLIB_SUCCESS;
467 }
468
469 /***************************************************************/
470 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
471 {
472 DECLAREVAR_BC();
473 DTYPE *dstLineEnd;
474 mlib_d64 sat = (mlib_d64) 0x7F800000;
475 const mlib_f32 *mlib_filters_table;
476
477 if (filter == MLIB_BICUBIC) {
478 mlib_filters_table = mlib_filters_u8f_bc;
479 }
480 else {
481 mlib_filters_table = mlib_filters_u8f_bc2;
482 }
483
484 for (j = yStart; j <= yFinish; j++) {
485 mlib_d64 xf0, xf1, xf2, xf3;
486 mlib_d64 yf0, yf1, yf2, yf3;
487 mlib_d64 c0, c1, c2, c3, val0;
488 mlib_s32 filterpos, k;
489 mlib_f32 *fptr;
490 mlib_u8 s0, s1, s2, s3;
491
492 CLIP(4);
493 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
494
495 for (k = 0; k < 4; k++) {
496 mlib_s32 X1 = X;
497 mlib_s32 Y1 = Y;
498 DTYPE *dPtr = dstPixelPtr + k;
499
500 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
501 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
502
503 xf0 = fptr[0];
504 xf1 = fptr[1];
505 xf2 = fptr[2];
506 xf3 = fptr[3];
507
508 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
509 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
510
511 yf0 = fptr[0];
512 yf1 = fptr[1];
513 yf2 = fptr[2];
514 yf3 = fptr[3];
515
516 xSrc = (X1 >> MLIB_SHIFT) - 1;
517 ySrc = (Y1 >> MLIB_SHIFT) - 1;
518
519 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
520 s0 = srcPixelPtr[0];
521 s1 = srcPixelPtr[4];
522 s2 = srcPixelPtr[8];
523 s3 = srcPixelPtr[12];
524
525 #ifdef __SUNPRO_C
526 #pragma pipeloop(0)
527 #endif /* __SUNPRO_C */
528 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
529 X1 += dX;
530 Y1 += dY;
531
532 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
533 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
534 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
535 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
536 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
537 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
538 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
539 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
540 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
541 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
542 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
543
544 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
545 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
546
547 xf0 = fptr[0];
548 xf1 = fptr[1];
549 xf2 = fptr[2];
550 xf3 = fptr[3];
551
552 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
553
554 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
555 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
556
557 yf0 = fptr[0];
558 yf1 = fptr[1];
559 yf2 = fptr[2];
560 yf3 = fptr[3];
561
562 SAT8(dPtr[0]);
563
564 xSrc = (X1 >> MLIB_SHIFT) - 1;
565 ySrc = (Y1 >> MLIB_SHIFT) - 1;
566
567 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
568 s0 = srcPixelPtr[0];
569 s1 = srcPixelPtr[4];
570 s2 = srcPixelPtr[8];
571 s3 = srcPixelPtr[12];
572 }
573
574 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
575 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
576 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
577 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
578 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
579 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
580 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
581 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
582 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
583 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
584 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
585
586 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
587
588 SAT8(dPtr[0]);
589 }
590 }
591
592 return MLIB_SUCCESS;
593 }
594
595 #else /* for x86, using integer multiplies is faster */
596
597 #define SHIFT_X 12
598 #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
599
600 #define SHIFT_Y (14 + 14 - SHIFT_X)
601 #define ROUND_Y (1 << (SHIFT_Y - 1))
602
603 /***************************************************************/
604 /* Test for the presence of any "1" bit in bits
605 8 to 31 of val. If present, then val is either
606 negative or >255. If over/underflows of 8 bits
607 are uncommon, then this technique can be a win,
608 since only a single test, rather than two, is
609 necessary to determine if clamping is needed.
610 On the other hand, if over/underflows are common,
611 it adds an extra test.
612 */
613 #define S32_TO_U8_SAT(DST) \
614 if (val0 & 0xffffff00) { \
615 if (val0 < MLIB_U8_MIN) \
1088
1089 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1090 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1091 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1092 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1093 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1094 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1095 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1096 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1097 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1098 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1099
1100 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1101
1102 S32_TO_U8_SAT(dPtr[0]);
1103 }
1104 }
1105
1106 return MLIB_SUCCESS;
1107 }
1108
1109 #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1110
1111 /***************************************************************/
|
54 * dstYStride stride of destination image
55 * is_affine indicator (Affine - GridWarp)
56 * srcYStride stride of source image
57 * filter type of resampling filter
58 *
59 * DESCRIPTION
60 * The functions step along the lines from xLeft to xRight and apply
61 * the bicubic filtering.
62 *
63 */
64
65 #include "mlib_ImageAffine.h"
66
67 #define DTYPE mlib_u8
68
69 #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
70
71 #define FILTER_BITS 8
72
73 /***************************************************************/
74 /* for x86, using integer multiplies is faster */
75
76 #define SHIFT_X 12
77 #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
78
79 #define SHIFT_Y (14 + 14 - SHIFT_X)
80 #define ROUND_Y (1 << (SHIFT_Y - 1))
81
82 /***************************************************************/
83 /* Test for the presence of any "1" bit in bits
84 8 to 31 of val. If present, then val is either
85 negative or >255. If over/underflows of 8 bits
86 are uncommon, then this technique can be a win,
87 since only a single test, rather than two, is
88 necessary to determine if clamping is needed.
89 On the other hand, if over/underflows are common,
90 it adds an extra test.
91 */
92 #define S32_TO_U8_SAT(DST) \
93 if (val0 & 0xffffff00) { \
94 if (val0 < MLIB_U8_MIN) \
567
568 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
569 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
570 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
571 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
572 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
573 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
574 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
575 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
576 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
577 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
578
579 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
580
581 S32_TO_U8_SAT(dPtr[0]);
582 }
583 }
584
585 return MLIB_SUCCESS;
586 }
587
588 /***************************************************************/
|