127 #define LOAD_BUFF(buff) \
128 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
129
130 #endif /* _LITTLE_ENDIAN */
131 #endif /* _NO_LONGLONG */
132
133 /***************************************************************/
134 typedef union {
135 mlib_d64 d64;
136 struct {
137 mlib_s32 i0;
138 mlib_s32 i1;
139 } i32s;
140 struct {
141 mlib_s32 f0;
142 mlib_s32 f1;
143 } f32s;
144 } d64_2x32;
145
146 /***************************************************************/
147 #define BUFF_LINE 256
148
149 /***************************************************************/
150 #define DEF_VARS(type) \
151 type *adr_src, *sl, *sp = NULL; \
152 type *adr_dst, *dl, *dp = NULL; \
153 FTYPE *pbuff = buff; \
154 mlib_s32 wid, hgt, sll, dll; \
155 mlib_s32 nchannel, chan1; \
156 mlib_s32 i, j, c
157
158 /***************************************************************/
159 #define LOAD_KERNEL3() \
160 FTYPE scalef = DSCALE; \
161 FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8; \
162 FTYPE p00, p01, p02, p03, \
163 p10, p11, p12, p13, \
164 p20, p21, p22, p23; \
165 \
166 while (scalef_expon > 30) { \
167 scalef /= (1 << 30); \
168 scalef_expon -= 30; \
169 } \
170 \
171 scalef /= (1 << scalef_expon); \
172 \
173 /* keep kernel in regs */ \
174 k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \
175 k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; \
176 k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8]
177
178 /***************************************************************/
179 #define LOAD_KERNEL(SIZE) \
180 FTYPE scalef = DSCALE; \
181 \
182 while (scalef_expon > 30) { \
183 scalef /= (1 << 30); \
184 scalef_expon -= 30; \
185 } \
186 \
187 scalef /= (1 << scalef_expon); \
188 \
189 for (j = 0; j < SIZE; j++) k[j] = scalef * kern[j]
190
191 /***************************************************************/
192 #define GET_SRC_DST_PARAMETERS(type) \
193 hgt = mlib_ImageGetHeight(src); \
194 wid = mlib_ImageGetWidth(src); \
195 nchannel = mlib_ImageGetChannels(src); \
196 sll = mlib_ImageGetStride(src) / sizeof(type); \
197 dll = mlib_ImageGetStride(dst) / sizeof(type); \
198 adr_src = (type *)mlib_ImageGetData(src); \
199 adr_dst = (type *)mlib_ImageGetData(dst)
200
201 /***************************************************************/
202 #ifndef __sparc
203
204 #if IMG_TYPE == 1
205
206 /* Test for the presence of any "1" bit in bits
207 8 to 31 of val. If present, then val is either
208 negative or >255. If over/underflows of 8 bits
209 are uncommon, then this technique can be a win,
210 since only a single test, rather than two, is
211 necessary to determine if clamping is needed.
227 #define CLAMP_STORE(dst, val) \
228 if (val >= MLIB_S16_MAX) \
229 dst = MLIB_S16_MAX; \
230 else if (val <= MLIB_S16_MIN) \
231 dst = MLIB_S16_MIN; \
232 else \
233 dst = (mlib_s16)val
234
235 #elif IMG_TYPE == 3
236
237 #define CLAMP_STORE(dst, val) \
238 if (val >= MLIB_U16_MAX) \
239 dst = MLIB_U16_MAX; \
240 else if (val <= MLIB_U16_MIN) \
241 dst = MLIB_U16_MIN; \
242 else \
243 dst = (mlib_u16)val
244
245 #endif /* IMG_TYPE == 1 */
246 #endif /* __sparc */
247
248 /***************************************************************/
249 #define KSIZE 3
250
251 mlib_status CONV_FUNC(3x3)(mlib_image *dst,
252 const mlib_image *src,
253 const mlib_s32 *kern,
254 mlib_s32 scalef_expon,
255 mlib_s32 cmask)
256 {
257 FTYPE buff[(KSIZE + 2)*BUFF_LINE], *buff0, *buff1, *buff2, *buff3, *buffT;
258 DEF_VARS(DTYPE);
259 DTYPE *sl1;
260 mlib_s32 chan2;
261 mlib_s32 *buffo, *buffi;
262 DTYPE *sl2;
263 #ifndef __sparc
264 mlib_s32 d0, d1;
265 #endif /* __sparc */
266 LOAD_KERNEL3();
267 GET_SRC_DST_PARAMETERS(DTYPE);
268
269 if (wid > BUFF_LINE) {
270 pbuff = mlib_malloc((KSIZE + 2)*sizeof(FTYPE)*wid);
271
272 if (pbuff == NULL) return MLIB_FAILURE;
273 }
274
275 buff0 = pbuff;
276 buff1 = buff0 + wid;
277 buff2 = buff1 + wid;
278 buff3 = buff2 + wid;
279 buffo = (mlib_s32*)(buff3 + wid);
280 buffi = buffo + (wid &~ 1);
281
282 chan1 = nchannel;
283 chan2 = chan1 + chan1;
284
285 wid -= (KSIZE - 1);
286 hgt -= (KSIZE - 1);
287
288 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
289
290 for (c = 0; c < nchannel; c++) {
291 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
292
293 sl = adr_src + c;
294 dl = adr_dst + c;
295
296 sl1 = sl + sll;
297 sl2 = sl1 + sll;
298 #ifdef __SUNPRO_C
299 #pragma pipeloop(0)
300 #endif /* __SUNPRO_C */
301 for (i = 0; i < wid + (KSIZE - 1); i++) {
302 buff0[i] = (FTYPE)sl[i*chan1];
303 buff1[i] = (FTYPE)sl1[i*chan1];
304 buff2[i] = (FTYPE)sl2[i*chan1];
305 }
306
307 sl += KSIZE*sll;
308
309 for (j = 0; j < hgt; j++) {
310 FTYPE s0, s1;
311
312 p02 = buff0[0];
313 p12 = buff1[0];
314 p22 = buff2[0];
315
316 p03 = buff0[1];
317 p13 = buff1[1];
318 p23 = buff2[1];
319
320 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
321 s1 = p03 * k0 + p13 * k3 + p23 * k6;
322
323 sp = sl;
324 dp = dl;
325
326 #ifdef __SUNPRO_C
327 #pragma pipeloop(0)
328 #endif /* __SUNPRO_C */
329 for (i = 0; i <= (wid - 2); i += 2) {
330 #ifdef __sparc
331 #ifdef _NO_LONGLONG
332 mlib_s32 o64_1, o64_2;
333 #else /* _NO_LONGLONG */
334 mlib_s64 o64;
335 #endif /* _NO_LONGLONG */
336 #endif /* __sparc */
337 d64_2x32 dd;
338
339 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
340 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
341
342 LOAD_BUFF(buffi);
343
344 dd.d64 = *(FTYPE *)(buffi + i);
345 buff3[i ] = (FTYPE)dd.i32s.i0;
346 buff3[i + 1] = (FTYPE)dd.i32s.i1;
347
348 #ifndef __sparc
349 d0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
350 d1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
351
352 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
353 s1 = p03 * k0 + p13 * k3 + p23 * k6;
354
355 dp[0 ] = FROM_S32(d0);
356 dp[chan1] = FROM_S32(d1);
357
358 #else /* __sparc */
359
360 dd.i32s.i0 = D2I(s0 + p02 * k2 + p12 * k5 + p22 * k8);
361 dd.i32s.i1 = D2I(s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8);
362 *(FTYPE *)(buffo + i) = dd.d64;
363
364 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
365 s1 = p03 * k0 + p13 * k3 + p23 * k6;
366
367 #ifdef _NO_LONGLONG
368
369 o64_1 = buffo[i];
370 o64_2 = buffo[i+1];
371 #if IMG_TYPE != 1
372 STORE2(FROM_S32(o64_1), FROM_S32(o64_2));
373 #else
374 STORE2(o64_1 >> 24, o64_2 >> 24);
375 #endif /* IMG_TYPE != 1 */
376
377 #else /* _NO_LONGLONG */
378
379 o64 = *(mlib_s64*)(buffo + i);
380 #if IMG_TYPE != 1
381 STORE2(FROM_S32(o64 >> 32), FROM_S32(o64));
382 #else
383 STORE2(o64 >> 56, o64 >> 24);
384 #endif /* IMG_TYPE != 1 */
385 #endif /* _NO_LONGLONG */
386 #endif /* __sparc */
387
388 sp += chan2;
389 dp += chan2;
390 }
391
392 for (; i < wid; i++) {
393 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i];
394 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
395 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
396
397 buffi[i] = (mlib_s32)sp[0];
398 buff3[i] = (FTYPE)buffi[i];
399
400 #ifndef __sparc
401
402 d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
403 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
404
405 dp[0] = FROM_S32(d0);
406
407 #else /* __sparc */
408
409 buffo[i] = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
410 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
411 #if IMG_TYPE != 1
412 dp[0] = FROM_S32(buffo[i]);
413 #else
414 dp[0] = buffo[i] >> 24;
415 #endif /* IMG_TYPE != 1 */
416 #endif /* __sparc */
417
418 sp += chan1;
419 dp += chan1;
420 }
421
422 buffi[wid] = (mlib_s32)sp[0];
423 buff3[wid] = (FTYPE)buffi[wid];
424 buffi[wid + 1] = (mlib_s32)sp[chan1];
425 buff3[wid + 1] = (FTYPE)buffi[wid + 1];
426
427 sl += sll;
428 dl += dll;
429
430 buffT = buff0;
431 buff0 = buff1;
432 buff1 = buff2;
433 buff2 = buff3;
434 buff3 = buffT;
435 }
436 }
437
438 #ifdef __sparc
439 #if IMG_TYPE == 1
440 {
441 mlib_s32 amask = (1 << nchannel) - 1;
442
443 if ((cmask & amask) != amask) {
444 mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
445 } else {
446 mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
447 }
448 }
449
450 #endif /* IMG_TYPE == 1 */
451 #endif /* __sparc */
452
453 if (pbuff != buff) mlib_free(pbuff);
454
455 return MLIB_SUCCESS;
456 }
457
458 /***************************************************************/
459 #ifndef __sparc /* for x86, using integer multiplies is faster */
460
461 mlib_status CONV_FUNC_I(3x3)(mlib_image *dst,
462 const mlib_image *src,
463 const mlib_s32 *kern,
464 mlib_s32 scalef_expon,
465 mlib_s32 cmask)
466 {
467 DTYPE *adr_src, *sl, *sp0, *sp1, *sp2;
468 DTYPE *adr_dst, *dl, *dp;
469 mlib_s32 wid, hgt, sll, dll;
470 mlib_s32 nchannel, chan1, chan2;
471 mlib_s32 i, j, c;
472 mlib_s32 shift1, shift2;
473 mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8;
474 mlib_s32 p02, p03,
475 p12, p13,
476 p22, p23;
477
478 #if IMG_TYPE != 1
479 shift1 = 16;
480 #else
481 shift1 = 8;
482 #endif /* IMG_TYPE != 1 */
483
484 shift2 = scalef_expon - shift1;
485
486 /* keep kernel in regs */
487 k0 = kern[0] >> shift1; k1 = kern[1] >> shift1; k2 = kern[2] >> shift1;
488 k3 = kern[3] >> shift1; k4 = kern[4] >> shift1; k5 = kern[5] >> shift1;
489 k6 = kern[6] >> shift1; k7 = kern[7] >> shift1; k8 = kern[8] >> shift1;
490
491 GET_SRC_DST_PARAMETERS(DTYPE);
492
493 chan1 = nchannel;
494 chan2 = chan1 + chan1;
495
496 wid -= (KSIZE - 1);
497 hgt -= (KSIZE - 1);
498
499 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
500
501 for (c = 0; c < chan1; c++) {
502 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
503
504 sl = adr_src + c;
505 dl = adr_dst + c;
506
507 for (j = 0; j < hgt; j++) {
508 mlib_s32 s0, s1;
509 mlib_s32 pix0, pix1;
510
511 dp = dl;
512 sp0 = sl;
513 sp1 = sp0 + sll;
514 sp2 = sp1 + sll;
515
516 p02 = sp0[0];
517 p12 = sp1[0];
518 p22 = sp2[0];
519
520 p03 = sp0[chan1];
521 p13 = sp1[chan1];
522 p23 = sp2[chan1];
523
524 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
525 s1 = p03 * k0 + p13 * k3 + p23 * k6;
526
527 sp0 += chan2;
528 sp1 += chan2;
529 sp2 += chan2;
530
531 #ifdef __SUNPRO_C
532 #pragma pipeloop(0)
533 #endif /* __SUNPRO_C */
534 for (i = 0; i <= (wid - 2); i += 2) {
535 p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
536 p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
537
538 pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
539 pix1 = (s1 + p02 * k1 + p03 * k2 + p12 * k4 +
540 p13 * k5 + p22 * k7 + p23 * k8) >> shift2;
541
542 CLAMP_STORE(dp[0], pix0);
543 CLAMP_STORE(dp[chan1], pix1);
544
545 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
546 s1 = p03 * k0 + p13 * k3 + p23 * k6;
547
548 sp0 += chan2;
549 sp1 += chan2;
550 sp2 += chan2;
551 dp += chan2;
552 }
553
554 if (wid & 1) {
555 p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
556 pix0 = (s0 + p02 * k2 + p12 * k5 + p22 * k8) >> shift2;
557 CLAMP_STORE(dp[0], pix0);
558 }
559
560 sl += sll;
561 dl += dll;
562 }
563 }
564
565 return MLIB_SUCCESS;
566 }
567
568 #endif /* __sparc ( for x86, using integer multiplies is faster ) */
569
570 /***************************************************************/
571 #undef KSIZE
572 #define KSIZE 4
573
574 mlib_status CONV_FUNC(4x4)(mlib_image *dst,
575 const mlib_image *src,
576 const mlib_s32 *kern,
577 mlib_s32 scalef_expon,
578 mlib_s32 cmask)
579 {
580 FTYPE buff[(KSIZE + 3)*BUFF_LINE];
581 FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buffd, *buffT;
582 FTYPE k[KSIZE*KSIZE];
583 mlib_s32 d0, d1;
584 FTYPE k0, k1, k2, k3, k4, k5, k6, k7;
585 FTYPE p00, p01, p02, p03, p04,
586 p10, p11, p12, p13, p14,
587 p20, p21, p22, p23,
588 p30, p31, p32, p33;
589 DEF_VARS(DTYPE);
590 DTYPE *sl1;
591 mlib_s32 chan2;
592 mlib_s32 *buffo, *buffi;
593 DTYPE *sl2, *sl3;
594 LOAD_KERNEL(KSIZE*KSIZE);
595 GET_SRC_DST_PARAMETERS(DTYPE);
596
597 if (wid > BUFF_LINE) {
598 pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
599
600 if (pbuff == NULL) return MLIB_FAILURE;
601 }
602
603 buff0 = pbuff;
604 buff1 = buff0 + wid;
605 buff2 = buff1 + wid;
606 buff3 = buff2 + wid;
607 buff4 = buff3 + wid;
608 buffd = buff4 + wid;
609 buffo = (mlib_s32*)(buffd + wid);
610 buffi = buffo + (wid &~ 1);
611
612 chan1 = nchannel;
613 chan2 = chan1 + chan1;
614
615 wid -= (KSIZE - 1);
616 hgt -= (KSIZE - 1);
617
618 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
619
620 for (c = 0; c < nchannel; c++) {
621 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
622
623 sl = adr_src + c;
624 dl = adr_dst + c;
625
626 sl1 = sl + sll;
627 sl2 = sl1 + sll;
628 sl3 = sl2 + sll;
629 #ifdef __SUNPRO_C
630 #pragma pipeloop(0)
631 #endif /* __SUNPRO_C */
632 for (i = 0; i < wid + (KSIZE - 1); i++) {
633 buff0[i] = (FTYPE)sl[i*chan1];
634 buff1[i] = (FTYPE)sl1[i*chan1];
635 buff2[i] = (FTYPE)sl2[i*chan1];
636 buff3[i] = (FTYPE)sl3[i*chan1];
637 }
638
639 sl += KSIZE*sll;
640
641 for (j = 0; j < hgt; j++) {
642 d64_2x32 dd;
643
644 /*
645 * First loop on two first lines of kernel
646 */
647 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
648 k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
649
650 sp = sl;
651 dp = dl;
652
653 p02 = buff0[0];
654 p12 = buff1[0];
655 p03 = buff0[1];
656 p13 = buff1[1];
657 p04 = buff0[2];
658
659 #ifdef __SUNPRO_C
660 #pragma pipeloop(0)
661 #endif /* __SUNPRO_C */
662 for (i = 0; i <= (wid - 2); i += 2) {
663 p00 = p02; p10 = p12;
664 p01 = p03; p11 = p13;
665 p02 = p04; p12 = buff1[i + 2];
666 p03 = buff0[i + 3]; p13 = buff1[i + 3];
667 p04 = buff0[i + 4]; p14 = buff1[i + 4];
668
669 LOAD_BUFF(buffi);
670
671 dd.d64 = *(FTYPE *)(buffi + i);
672 buff4[i ] = (FTYPE)dd.i32s.i0;
673 buff4[i + 1] = (FTYPE)dd.i32s.i1;
674
675 buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
676 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
677 buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
678 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
679
680 sp += chan2;
681 dp += chan2;
682 }
683
684 /*
685 * Second loop on two last lines of kernel
686 */
687 k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
688 k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
689
690 sp = sl;
691 dp = dl;
692
693 p02 = buff2[0];
694 p12 = buff3[0];
695 p03 = buff2[1];
696 p13 = buff3[1];
697 p04 = buff2[2];
698
699 #ifdef __SUNPRO_C
700 #pragma pipeloop(0)
701 #endif /* __SUNPRO_C */
702 for (i = 0; i <= (wid - 2); i += 2) {
703 p00 = p02; p10 = p12;
704 p01 = p03; p11 = p13;
705 p02 = p04; p12 = buff3[i + 2];
706 p03 = buff2[i + 3]; p13 = buff3[i + 3];
707 p04 = buff2[i + 4]; p14 = buff3[i + 4];
708
709 d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
710 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buffd[i]);
711 d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
712 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buffd[i + 1]);
713
714 dp[0 ] = FROM_S32(d0);
715 dp[chan1] = FROM_S32(d1);
716
717 sp += chan2;
718 dp += chan2;
719 }
720
721 /* last pixels */
722 for (; i < wid; i++) {
723 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i];
724 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
725 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
726 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
727
728 buff4[i] = (FTYPE)sp[0];
729
730 buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
731 p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
732 p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
733 p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
734
735 dp[0] = FROM_S32(buffo[i]);
736
737 sp += chan1;
738 dp += chan1;
739 }
740
741 buff4[wid ] = (FTYPE)sp[0];
742 buff4[wid + 1] = (FTYPE)sp[chan1];
743 buff4[wid + 2] = (FTYPE)sp[chan2];
744
745 /* next line */
746 sl += sll;
747 dl += dll;
748
749 buffT = buff0;
750 buff0 = buff1;
751 buff1 = buff2;
752 buff2 = buff3;
753 buff3 = buff4;
754 buff4 = buffT;
755 }
756 }
757
758 if (pbuff != buff) mlib_free(pbuff);
759
760 return MLIB_SUCCESS;
761 }
762
763 /***************************************************************/
764 #undef KSIZE
765 #define KSIZE 5
766
767 mlib_status CONV_FUNC(5x5)(mlib_image *dst,
768 const mlib_image *src,
769 const mlib_s32 *kern,
770 mlib_s32 scalef_expon,
771 mlib_s32 cmask)
772 {
773 FTYPE buff[(KSIZE + 3)*BUFF_LINE];
774 FTYPE *buff0, *buff1, *buff2, *buff3, *buff4, *buff5, *buffd, *buffT;
775 FTYPE k[KSIZE*KSIZE];
776 mlib_s32 d0, d1;
777 FTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
778 FTYPE p00, p01, p02, p03, p04, p05,
779 p10, p11, p12, p13, p14, p15,
780 p20, p21, p22, p23, p24,
781 p30, p31, p32, p33, p34,
782 p40, p41, p42, p43, p44;
783 DEF_VARS(DTYPE);
784 DTYPE *sl1;
785 mlib_s32 chan2;
786 mlib_s32 *buffo, *buffi;
787 DTYPE *sl2, *sl3, *sl4;
788 LOAD_KERNEL(KSIZE*KSIZE);
789 GET_SRC_DST_PARAMETERS(DTYPE);
790
791 if (wid > BUFF_LINE) {
792 pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
793
794 if (pbuff == NULL) return MLIB_FAILURE;
795 }
796
797 buff0 = pbuff;
798 buff1 = buff0 + wid;
799 buff2 = buff1 + wid;
800 buff3 = buff2 + wid;
801 buff4 = buff3 + wid;
802 buff5 = buff4 + wid;
803 buffd = buff5 + wid;
804 buffo = (mlib_s32*)(buffd + wid);
805 buffi = buffo + (wid &~ 1);
806
807 chan1 = nchannel;
808 chan2 = chan1 + chan1;
809
810 wid -= (KSIZE - 1);
811 hgt -= (KSIZE - 1);
812
813 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
814
815 for (c = 0; c < nchannel; c++) {
816 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
817
818 sl = adr_src + c;
819 dl = adr_dst + c;
820
821 sl1 = sl + sll;
822 sl2 = sl1 + sll;
823 sl3 = sl2 + sll;
824 sl4 = sl3 + sll;
825 #ifdef __SUNPRO_C
826 #pragma pipeloop(0)
827 #endif /* __SUNPRO_C */
828 for (i = 0; i < wid + (KSIZE - 1); i++) {
829 buff0[i] = (FTYPE)sl[i*chan1];
830 buff1[i] = (FTYPE)sl1[i*chan1];
831 buff2[i] = (FTYPE)sl2[i*chan1];
832 buff3[i] = (FTYPE)sl3[i*chan1];
833 buff4[i] = (FTYPE)sl4[i*chan1];
834 }
835
836 sl += KSIZE*sll;
837
838 for (j = 0; j < hgt; j++) {
839 d64_2x32 dd;
840
841 /*
842 * First loop
843 */
844 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
845 k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
846
847 sp = sl;
848 dp = dl;
849
850 p02 = buff0[0];
851 p12 = buff1[0];
852 p03 = buff0[1];
853 p13 = buff1[1];
854 p04 = buff0[2];
855 p14 = buff1[2];
856
857 #ifdef __SUNPRO_C
858 #pragma pipeloop(0)
859 #endif /* __SUNPRO_C */
860 for (i = 0; i <= (wid - 2); i += 2) {
861 p00 = p02; p10 = p12;
862 p01 = p03; p11 = p13;
863 p02 = p04; p12 = p14;
864
865 LOAD_BUFF(buffi);
866
867 p03 = buff0[i + 3]; p13 = buff1[i + 3];
868 p04 = buff0[i + 4]; p14 = buff1[i + 4];
869 p05 = buff0[i + 5]; p15 = buff1[i + 5];
870
871 buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
872 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
873 buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
874 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
875
876 sp += chan2;
877 dp += chan2;
878 }
879
880 /*
881 * Second loop
882 */
883 k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
884 k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
885
886 sp = sl;
887 dp = dl;
888
889 p02 = buff2[0];
890 p12 = buff3[0];
891 p03 = buff2[1];
892 p13 = buff3[1];
893 p04 = buff2[2];
894 p14 = buff3[2];
895
896 #ifdef __SUNPRO_C
897 #pragma pipeloop(0)
898 #endif /* __SUNPRO_C */
899 for (i = 0; i <= (wid - 2); i += 2) {
900 p00 = p02; p10 = p12;
901 p01 = p03; p11 = p13;
902
903 p02 = buff2[i + 2]; p12 = buff3[i + 2];
904 p03 = buff2[i + 3]; p13 = buff3[i + 3];
905 p04 = buff2[i + 4]; p14 = buff3[i + 4];
906 p05 = buff2[i + 5]; p15 = buff3[i + 5];
907
908 dd.d64 = *(FTYPE *)(buffi + i);
909 buff5[i ] = (FTYPE)dd.i32s.i0;
910 buff5[i + 1] = (FTYPE)dd.i32s.i1;
911
912 buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
913 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
914 buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
915 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
916
917 sp += chan2;
918 dp += chan2;
919 }
920
921 /*
922 * 3 loop
923 */
924 k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
925
926 sp = sl;
927 dp = dl;
928
929 p02 = buff4[0];
930 p03 = buff4[1];
931 p04 = buff4[2];
932 p05 = buff4[3];
933
934 #ifdef __SUNPRO_C
935 #pragma pipeloop(0)
936 #endif /* __SUNPRO_C */
937 for (i = 0; i <= (wid - 2); i += 2) {
938 p00 = p02; p01 = p03; p02 = p04; p03 = p05;
939
940 p04 = buff4[i + 4]; p05 = buff4[i + 5];
941
942 d0 = D2I(p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buffd[i]);
943 d1 = D2I(p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buffd[i + 1]);
944
945 dp[0 ] = FROM_S32(d0);
946 dp[chan1] = FROM_S32(d1);
947
948 sp += chan2;
949 dp += chan2;
950 }
951
952 /* last pixels */
953 for (; i < wid; i++) {
954 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i];
955 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
956 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
957 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
958 p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
959
960 p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2];
961 p43 = buff4[i + 3]; p44 = buff4[i + 4];
962
963 buff5[i] = (FTYPE)sp[0];
964
965 buffo[i] = D2I(p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
966 p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
967 p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
968 p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
969 p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
970
971 dp[0] = FROM_S32(buffo[i]);
972
973 sp += chan1;
974 dp += chan1;
975 }
976
977 buff5[wid ] = (FTYPE)sp[0];
978 buff5[wid + 1] = (FTYPE)sp[chan1];
979 buff5[wid + 2] = (FTYPE)sp[chan2];
980 buff5[wid + 3] = (FTYPE)sp[chan2 + chan1];
981
982 /* next line */
983 sl += sll;
984 dl += dll;
985
986 buffT = buff0;
987 buff0 = buff1;
988 buff1 = buff2;
989 buff2 = buff3;
990 buff3 = buff4;
991 buff4 = buff5;
992 buff5 = buffT;
993 }
994 }
995
996 if (pbuff != buff) mlib_free(pbuff);
997
998 return MLIB_SUCCESS;
999 }
1000
1001 /***************************************************************/
1002 #ifndef __sparc /* for x86, using integer multiplies is faster */
1003
1004 mlib_status CONV_FUNC_I(5x5)(mlib_image *dst,
1005 const mlib_image *src,
1006 const mlib_s32 *kern,
1007 mlib_s32 scalef_expon,
1008 mlib_s32 cmask)
1009 {
1010 mlib_s32 buff[BUFF_LINE];
1011 mlib_s32 *buffd;
1012 mlib_s32 k[KSIZE*KSIZE];
1013 mlib_s32 shift1, shift2;
1014 mlib_s32 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
1015 mlib_s32 p00, p01, p02, p03, p04, p05,
1016 p10, p11, p12, p13, p14, p15;
1017 DTYPE *adr_src, *sl, *sp0, *sp1;
1018 DTYPE *adr_dst, *dl, *dp;
1019 mlib_s32 *pbuff = buff;
1020 mlib_s32 wid, hgt, sll, dll;
1021 mlib_s32 nchannel, chan1, chan2, chan3, chan4;
1022 mlib_s32 i, j, c;
1023
1024 #if IMG_TYPE != 1
1025 shift1 = 16;
1026 #else
1027 shift1 = 8;
1028 #endif /* IMG_TYPE != 1 */
1029
1030 shift2 = scalef_expon - shift1;
1031
1032 for (j = 0; j < KSIZE*KSIZE; j++) k[j] = kern[j] >> shift1;
1033
1034 GET_SRC_DST_PARAMETERS(DTYPE);
1035
1036 if (wid > BUFF_LINE) {
1037 pbuff = mlib_malloc(sizeof(mlib_s32)*wid);
1038
1039 if (pbuff == NULL) return MLIB_FAILURE;
1040 }
1041
1042 buffd = pbuff;
1043
1044 chan1 = nchannel;
1045 chan2 = chan1 + chan1;
1046 chan3 = chan2 + chan1;
1047 chan4 = chan3 + chan1;
1048
1049 wid -= (KSIZE - 1);
1050 hgt -= (KSIZE - 1);
1051
1052 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
1053
1054 for (c = 0; c < chan1; c++) {
1055 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
1056
1057 sl = adr_src + c;
1058 dl = adr_dst + c;
1059
1060 for (j = 0; j < hgt; j++) {
1061 mlib_s32 pix0, pix1;
1062 /*
1063 * First loop
1064 */
1065 sp0 = sl;
1066 sp1 = sp0 + sll;
1067 dp = dl;
1068
1069 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
1070 k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
1071
1072 p02 = sp0[0]; p12 = sp1[0];
1073 p03 = sp0[chan1]; p13 = sp1[chan1];
1074 p04 = sp0[chan2]; p14 = sp1[chan2];
1075 p05 = sp0[chan3]; p15 = sp1[chan3];
1076
1077 sp0 += chan4;
1078 sp1 += chan4;
1079
1080 #ifdef __SUNPRO_C
1081 #pragma pipeloop(0)
1082 #endif /* __SUNPRO_C */
1083 for (i = 0; i <= (wid - 2); i += 2) {
1084 p00 = p02; p10 = p12;
1085 p01 = p03; p11 = p13;
1086 p02 = p04; p12 = p14;
1087 p03 = p05; p13 = p15;
1088
1089 p04 = sp0[0]; p14 = sp1[0];
1090 p05 = sp0[chan1]; p15 = sp1[chan1];
1091
1092 buffd[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
1093 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
1094 buffd[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
1095 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
1096
1097 sp0 += chan2;
1098 sp1 += chan2;
1099 dp += chan2;
1100 }
1101
1102 if (wid & 1) {
1103 p00 = p02; p10 = p12;
1104 p01 = p03; p11 = p13;
1105 p02 = p04; p12 = p14;
1106 p03 = p05; p13 = p15;
1107
1108 p04 = sp0[0]; p14 = sp1[0];
1109
1110 buffd[i] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
1111 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
1112 }
1113
1114 /*
1115 * Second loop
1116 */
1117 sp0 = sl + 2*sll;
1118 sp1 = sp0 + sll;
1119 dp = dl;
1120
1121 k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
1122 k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
1123
1124 p02 = sp0[0]; p12 = sp1[0];
1125 p03 = sp0[chan1]; p13 = sp1[chan1];
1126 p04 = sp0[chan2]; p14 = sp1[chan2];
1127 p05 = sp0[chan3]; p15 = sp1[chan3];
1128
1129 sp0 += chan4;
1130 sp1 += chan4;
1131
1132 #ifdef __SUNPRO_C
1133 #pragma pipeloop(0)
1134 #endif /* __SUNPRO_C */
1135 for (i = 0; i <= (wid - 2); i += 2) {
1136 p00 = p02; p10 = p12;
1137 p01 = p03; p11 = p13;
1138 p02 = p04; p12 = p14;
1139 p03 = p05; p13 = p15;
1140
1141 p04 = sp0[0]; p14 = sp1[0];
1142 p05 = sp0[chan1]; p15 = sp1[chan1];
1143
1144 buffd[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
1145 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
1146 buffd[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
1147 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
1148
1149 sp0 += chan2;
1150 sp1 += chan2;
1151 dp += chan2;
1152 }
1153
1154 if (wid & 1) {
1155 p00 = p02; p10 = p12;
1156 p01 = p03; p11 = p13;
1157 p02 = p04; p12 = p14;
1158 p03 = p05; p13 = p15;
1159
1160 p04 = sp0[0]; p14 = sp1[0];
1161
1162 buffd[i] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
1163 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
1164 }
1165
1166 /*
1167 * 3 loop
1168 */
1169 dp = dl;
1170 sp0 = sl + 4*sll;
1171
1172 k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
1173
1174 p02 = sp0[0];
1175 p03 = sp0[chan1];
1176 p04 = sp0[chan2];
1177 p05 = sp0[chan3];
1178
1179 sp0 += chan2 + chan2;
1180
1181 #ifdef __SUNPRO_C
1182 #pragma pipeloop(0)
1183 #endif /* __SUNPRO_C */
1184 for (i = 0; i <= (wid - 2); i += 2) {
1185 p00 = p02; p01 = p03; p02 = p04; p03 = p05;
1186
1187 p04 = sp0[0]; p05 = sp0[chan1];
1188
1189 pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 +
1190 p03 * k3 + p04 * k4) >> shift2;
1191 pix1 = (buffd[i + 1] + p01 * k0 + p02 * k1 + p03 * k2 +
1192 p04 * k3 + p05 * k4) >> shift2;
1193
1194 CLAMP_STORE(dp[0], pix0);
1195 CLAMP_STORE(dp[chan1], pix1);
1196
1197 dp += chan2;
1198 sp0 += chan2;
1199 }
1200
1201 if (wid & 1) {
1202 p00 = p02; p01 = p03; p02 = p04; p03 = p05;
1203
1204 p04 = sp0[0];
1205
1206 pix0 = (buffd[i ] + p00 * k0 + p01 * k1 + p02 * k2 +
1207 p03 * k3 + p04 * k4) >> shift2;
1208 CLAMP_STORE(dp[0], pix0);
1209 }
1210
1211 /* next line */
1212 sl += sll;
1213 dl += dll;
1214 }
1215 }
1216
1217 if (pbuff != buff) mlib_free(pbuff);
1218
1219 return MLIB_SUCCESS;
1220 }
1221
1222 #endif /* __sparc ( for x86, using integer multiplies is faster ) */
1223
1224 /***************************************************************/
1225 #if IMG_TYPE == 1
1226
1227 #undef KSIZE
1228 #define KSIZE 7
1229
1230 mlib_status CONV_FUNC(7x7)(mlib_image *dst,
1231 const mlib_image *src,
1232 const mlib_s32 *kern,
1233 mlib_s32 scalef_expon,
1234 mlib_s32 cmask)
1235 {
1236 FTYPE buff[(KSIZE + 3)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
1237 FTYPE k[KSIZE*KSIZE];
1238 mlib_s32 l, m, buff_ind;
1239 mlib_s32 d0, d1;
1240 FTYPE k0, k1, k2, k3, k4, k5, k6;
1241 FTYPE p0, p1, p2, p3, p4, p5, p6, p7;
1242 DTYPE *sl2, *sl3, *sl4, *sl5, *sl6;
1243 DEF_VARS(DTYPE);
1244 DTYPE *sl1;
1245 mlib_s32 chan2;
1246 mlib_s32 *buffo, *buffi;
1247 LOAD_KERNEL(KSIZE*KSIZE);
1248 GET_SRC_DST_PARAMETERS(DTYPE);
1249
1250 if (wid > BUFF_LINE) {
1251 pbuff = mlib_malloc((KSIZE + 3)*sizeof(FTYPE)*wid);
1252
1253 if (pbuff == NULL) return MLIB_FAILURE;
1254 }
1255
1256 for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
1257 for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
1258 buffd = buffs[KSIZE] + wid;
1259 buffo = (mlib_s32*)(buffd + wid);
1260 buffi = buffo + (wid &~ 1);
1261
1262 chan1 = nchannel;
1263 chan2 = chan1 + chan1;
1264
1265 wid -= (KSIZE - 1);
1266 hgt -= (KSIZE - 1);
1267
1268 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
1269
1270 for (c = 0; c < nchannel; c++) {
1271 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
1272
1273 sl = adr_src + c;
1274 dl = adr_dst + c;
1275
1276 sl1 = sl + sll;
1277 sl2 = sl1 + sll;
1278 sl3 = sl2 + sll;
1279 sl4 = sl3 + sll;
1280 sl5 = sl4 + sll;
1281 sl6 = sl5 + sll;
1282 #ifdef __SUNPRO_C
1283 #pragma pipeloop(0)
1284 #endif /* __SUNPRO_C */
1285 for (i = 0; i < wid + (KSIZE - 1); i++) {
1286 buffs[0][i] = (FTYPE)sl[i*chan1];
1287 buffs[1][i] = (FTYPE)sl1[i*chan1];
1288 buffs[2][i] = (FTYPE)sl2[i*chan1];
1289 buffs[3][i] = (FTYPE)sl3[i*chan1];
1290 buffs[4][i] = (FTYPE)sl4[i*chan1];
1291 buffs[5][i] = (FTYPE)sl5[i*chan1];
1292 buffs[6][i] = (FTYPE)sl6[i*chan1];
1293 }
1294
1295 buff_ind = 0;
1296
1297 #ifdef __SUNPRO_C
1298 #pragma pipeloop(0)
1299 #endif /* __SUNPRO_C */
1300 for (i = 0; i < wid; i++) buffd[i] = 0.0;
1301
1302 sl += KSIZE*sll;
1303
1304 for (j = 0; j < hgt; j++) {
1305 FTYPE **buffc = buffs + buff_ind;
1306 FTYPE *buffn = buffc[KSIZE];
1307 FTYPE *pk = k;
1308
1309 for (l = 0; l < KSIZE; l++) {
1310 FTYPE *buff = buffc[l];
1311 d64_2x32 dd;
1312
1313 sp = sl;
1314 dp = dl;
1315
1316 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1317 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
1318
1319 k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
1320 k4 = *pk++; k5 = *pk++; k6 = *pk++;
1321
1322 if (l < (KSIZE - 1)) {
1323 #ifdef __SUNPRO_C
1324 #pragma pipeloop(0)
1325 #endif /* __SUNPRO_C */
1326 for (i = 0; i <= (wid - 2); i += 2) {
1327 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1328
1329 p6 = buff[i + 6]; p7 = buff[i + 7];
1330
1331 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
1332 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
1333 }
1334
1335 } else {
1336 #ifdef __SUNPRO_C
1337 #pragma pipeloop(0)
1338 #endif /* __SUNPRO_C */
1339 for (i = 0; i <= (wid - 2); i += 2) {
1340 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1341
1342 p6 = buff[i + 6]; p7 = buff[i + 7];
1343
1344 LOAD_BUFF(buffi);
1345
1346 dd.d64 = *(FTYPE *)(buffi + i);
1347 buffn[i ] = (FTYPE)dd.i32s.i0;
1348 buffn[i + 1] = (FTYPE)dd.i32s.i1;
1349
1350 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]);
1351 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
1352
1353 dp[0 ] = FROM_S32(d0);
1354 dp[chan1] = FROM_S32(d1);
1355
1356 buffd[i ] = 0.0;
1357 buffd[i + 1] = 0.0;
1358
1359 sp += chan2;
1360 dp += chan2;
1361 }
1362 }
1363 }
1364
1365 /* last pixels */
1366 for (; i < wid; i++) {
1367 FTYPE *pk = k, s = 0;
1368 mlib_s32 d0;
1369
1370 for (l = 0; l < KSIZE; l++) {
1371 FTYPE *buff = buffc[l] + i;
1372
1373 for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
1374 }
1375
1376 d0 = D2I(s);
1377 dp[0] = FROM_S32(d0);
1378
1379 buffn[i] = (FTYPE)sp[0];
1380
1381 sp += chan1;
1382 dp += chan1;
1383 }
1384
1385 for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
1386
1387 /* next line */
1388 sl += sll;
1389 dl += dll;
1390
1391 buff_ind++;
1392
1393 if (buff_ind >= KSIZE + 1) buff_ind = 0;
1394 }
1395 }
1396
1397 if (pbuff != buff) mlib_free(pbuff);
1398
1399 return MLIB_SUCCESS;
1400 }
1401
1402 #endif /* IMG_TYPE == 1 */
1403
1404 /***************************************************************/
1405 #define MAX_KER 7
1406 #define MAX_N 15
1407
1408 static mlib_status mlib_ImageConv1xN(mlib_image *dst,
1409 const mlib_image *src,
1410 const mlib_d64 *k,
1411 mlib_s32 n,
1412 mlib_s32 dn,
1413 mlib_s32 cmask)
1414 {
1415 FTYPE buff[BUFF_SIZE];
1416 mlib_s32 off, kh;
1417 mlib_s32 d0, d1;
1418 const FTYPE *pk;
1419 FTYPE k0, k1, k2, k3;
1420 FTYPE p0, p1, p2, p3, p4;
1421 DEF_VARS(DTYPE);
1422 DTYPE *sl_c, *dl_c, *sl0;
|
127 #define LOAD_BUFF(buff) \
128 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
129
130 #endif /* _LITTLE_ENDIAN */
131 #endif /* _NO_LONGLONG */
132
133 /***************************************************************/
134 typedef union {
135 mlib_d64 d64;
136 struct {
137 mlib_s32 i0;
138 mlib_s32 i1;
139 } i32s;
140 struct {
141 mlib_s32 f0;
142 mlib_s32 f1;
143 } f32s;
144 } d64_2x32;
145
146 /***************************************************************/
147 #define DEF_VARS(type) \
148 type *adr_src, *sl, *sp = NULL; \
149 type *adr_dst, *dl, *dp = NULL; \
150 FTYPE *pbuff = buff; \
151 mlib_s32 wid, hgt, sll, dll; \
152 mlib_s32 nchannel, chan1; \
153 mlib_s32 i, j, c
154
155 /***************************************************************/
156 #define GET_SRC_DST_PARAMETERS(type) \
157 hgt = mlib_ImageGetHeight(src); \
158 wid = mlib_ImageGetWidth(src); \
159 nchannel = mlib_ImageGetChannels(src); \
160 sll = mlib_ImageGetStride(src) / sizeof(type); \
161 dll = mlib_ImageGetStride(dst) / sizeof(type); \
162 adr_src = (type *)mlib_ImageGetData(src); \
163 adr_dst = (type *)mlib_ImageGetData(dst)
164
165 /***************************************************************/
166 #ifndef __sparc
167
168 #if IMG_TYPE == 1
169
170 /* Test for the presence of any "1" bit in bits
171 8 to 31 of val. If present, then val is either
172 negative or >255. If over/underflows of 8 bits
173 are uncommon, then this technique can be a win,
174 since only a single test, rather than two, is
175 necessary to determine if clamping is needed.
191 #define CLAMP_STORE(dst, val) \
192 if (val >= MLIB_S16_MAX) \
193 dst = MLIB_S16_MAX; \
194 else if (val <= MLIB_S16_MIN) \
195 dst = MLIB_S16_MIN; \
196 else \
197 dst = (mlib_s16)val
198
199 #elif IMG_TYPE == 3
200
201 #define CLAMP_STORE(dst, val) \
202 if (val >= MLIB_U16_MAX) \
203 dst = MLIB_U16_MAX; \
204 else if (val <= MLIB_U16_MIN) \
205 dst = MLIB_U16_MIN; \
206 else \
207 dst = (mlib_u16)val
208
209 #endif /* IMG_TYPE == 1 */
210 #endif /* __sparc */
211
212 /***************************************************************/
213 #define MAX_KER 7
214 #define MAX_N 15
215
216 static mlib_status mlib_ImageConv1xN(mlib_image *dst,
217 const mlib_image *src,
218 const mlib_d64 *k,
219 mlib_s32 n,
220 mlib_s32 dn,
221 mlib_s32 cmask)
222 {
223 FTYPE buff[BUFF_SIZE];
224 mlib_s32 off, kh;
225 mlib_s32 d0, d1;
226 const FTYPE *pk;
227 FTYPE k0, k1, k2, k3;
228 FTYPE p0, p1, p2, p3, p4;
229 DEF_VARS(DTYPE);
230 DTYPE *sl_c, *dl_c, *sl0;
|