< prev index next >
src/java.desktop/share/native/libmlib_image/mlib_ImageConv_F32nw.c
Print this page
rev 14293 : remove ImageConv
*** 73,571 ****
type *sl; \
type *dl, *dp = NULL; \
mlib_s32 i, j, c
/***************************************************************/
- #undef KSIZE
- #define KSIZE 2
-
- mlib_status CONV_FUNC(2x2)(mlib_image *dst,
- const mlib_image *src,
- const mlib_d64 *kern,
- mlib_s32 cmask)
- {
- DEF_VARS(DTYPE);
- DTYPE *sp0, *sp1;
- mlib_s32 chan2 = chan1 + chan1;
- mlib_s32 chan3 = chan1 + chan2;
- mlib_s32 chan4 = chan3 + chan1;
- DTYPE k0, k1, k2, k3;
- DTYPE p00, p01, p02, p03, p04,
- p10, p11, p12, p13, p14;
-
- /* keep kernel in regs */
- k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1];
- k2 = (DTYPE)kern[2]; k3 = (DTYPE)kern[3];
-
- wid -= (KSIZE - 1);
- hgt -= (KSIZE - 1);
-
- for (c = 0; c < chan1; c++) {
- if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
- dl = adr_dst + c;
- sl = adr_src + c;
-
- for (j = 0; j < hgt; j++) {
- dp = dl;
- sp0 = sl;
- sp1 = sp0 + sll;
-
- p04 = sp0[0];
- p14 = sp1[0];
-
- sp0 += chan1;
- sp1 += chan1;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 4); i += 4) {
- p00 = p04; p10 = p14;
-
- p01 = sp0[0]; p11 = sp1[0];
- p02 = sp0[chan1]; p12 = sp1[chan1];
- p03 = sp0[chan2]; p13 = sp1[chan2];
- p04 = sp0[chan3]; p14 = sp1[chan3];
-
- dp[0 ] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
- dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
- dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
- dp[chan3] = p03 * k0 + p04 * k1 + p13 * k2 + p14 * k3;
-
- dp += chan4;
- sp0 += chan4;
- sp1 += chan4;
- }
-
- if (i < wid) {
- p00 = p04; p10 = p14;
- p01 = sp0[0]; p11 = sp1[0];
- dp[0] = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
-
- if ((i + 1) < wid) {
- p02 = sp0[chan1]; p12 = sp1[chan1];
- dp[chan1] = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
-
- if ((i + 2) < wid) {
- p03 = sp0[chan2]; p13 = sp1[chan2];
- dp[chan2] = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
- }
- }
- }
-
- sl += sll;
- dl += dll;
- }
- }
-
- return MLIB_SUCCESS;
- }
-
- /***************************************************************/
- #undef KSIZE
- #define KSIZE 3
-
- mlib_status CONV_FUNC(3x3)(mlib_image *dst,
- const mlib_image *src,
- const mlib_d64 *kern,
- mlib_s32 cmask)
- {
- DEF_VARS(DTYPE);
- mlib_s32 chan2 = chan1 + chan1;
- DTYPE *sp0, *sp1;
- DTYPE *sp2;
- DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8;
- DTYPE p02, p03, p12, p13, p22, p23;
-
- /* keep kernel in regs */
- k0 = (DTYPE)kern[0]; k1 = (DTYPE)kern[1]; k2 = (DTYPE)kern[2];
- k3 = (DTYPE)kern[3]; k4 = (DTYPE)kern[4]; k5 = (DTYPE)kern[5];
- k6 = (DTYPE)kern[6]; k7 = (DTYPE)kern[7]; k8 = (DTYPE)kern[8];
-
- wid -= (KSIZE - 1);
- hgt -= (KSIZE - 1);
-
- adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
- for (c = 0; c < chan1; c++) {
- if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
- sl = adr_src + c;
- dl = adr_dst + c;
-
- for (j = 0; j < hgt; j++) {
- DTYPE s0, s1;
-
- dp = dl;
- sp0 = sl;
- sp1 = sp0 + sll;
- sp2 = sp1 + sll;
-
- p02 = sp0[0];
- p12 = sp1[0];
- p22 = sp2[0];
-
- p03 = sp0[chan1];
- p13 = sp1[chan1];
- p23 = sp2[chan1];
-
- s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
- s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
- sp0 += chan2;
- sp1 += chan2;
- sp2 += chan2;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
- p03 = sp0[chan1]; p13 = sp1[chan1]; p23 = sp2[chan1];
-
- dp[0 ] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
- dp[chan1] = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8;
-
- s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
- s1 = p03 * k0 + p13 * k3 + p23 * k6;
-
- sp0 += chan2;
- sp1 += chan2;
- sp2 += chan2;
- dp += chan2;
- }
-
- if (wid & 1) {
- p02 = sp0[0]; p12 = sp1[0]; p22 = sp2[0];
- dp[0] = s0 + p02 * k2 + p12 * k5 + p22 * k8;
- }
-
- sl += sll;
- dl += dll;
- }
- }
-
- return MLIB_SUCCESS;
- }
-
- /***************************************************************/
- #undef KSIZE
- #define KSIZE 4
-
- mlib_status CONV_FUNC(4x4)(mlib_image *dst,
- const mlib_image *src,
- const mlib_d64 *k,
- mlib_s32 cmask)
- {
- DTYPE k0, k1, k2, k3, k4, k5, k6, k7;
- DTYPE p00, p01, p02, p03, p04,
- p10, p11, p12, p13, p14;
- DEF_VARS(DTYPE);
- DTYPE *sp0, *sp1;
- mlib_s32 chan2 = chan1 + chan1;
- mlib_s32 chan3 = chan1 + chan2;
-
- wid -= (KSIZE - 1);
- hgt -= (KSIZE - 1);
-
- adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
- for (c = 0; c < chan1; c++) {
- if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
- sl = adr_src + c;
- dl = adr_dst + c;
-
- for (j = 0; j < hgt; j++) {
- /*
- * First loop on two first lines of kernel
- */
- sp0 = sl;
- sp1 = sp0 + sll;
- dp = dl;
-
- k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3];
- k4 = (DTYPE)k[4]; k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7];
-
- p02 = sp0[0]; p12 = sp1[0];
- p03 = sp0[chan1]; p13 = sp1[chan1];
- p04 = sp0[chan2]; p14 = sp1[chan2];
-
- sp0 += chan3;
- sp1 += chan3;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
-
- p03 = sp0[0]; p13 = sp1[0];
- p04 = sp0[chan1]; p14 = sp1[chan1];
-
- dp[0 ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
- p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
- dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
- p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
- sp0 += chan2;
- sp1 += chan2;
- dp += chan2;
- }
-
- if (wid & 1) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = sp0[0]; p13 = sp1[0];
-
- dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
- p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
- }
-
- /*
- * Second loop on two last lines of kernel
- */
- sp0 = sl + 2*sll;
- sp1 = sp0 + sll;
- dp = dl;
-
- k0 = (DTYPE)k[ 8]; k1 = (DTYPE)k[ 9]; k2 = (DTYPE)k[10]; k3 = (DTYPE)k[11];
- k4 = (DTYPE)k[12]; k5 = (DTYPE)k[13]; k6 = (DTYPE)k[14]; k7 = (DTYPE)k[15];
-
- p02 = sp0[0]; p12 = sp1[0];
- p03 = sp0[chan1]; p13 = sp1[chan1];
- p04 = sp0[chan2]; p14 = sp1[chan2];
-
- sp0 += chan3;
- sp1 += chan3;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
-
- p03 = sp0[0]; p13 = sp1[0];
- p04 = sp0[chan1]; p14 = sp1[chan1];
-
- dp[0 ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
- p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
- dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
- p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
-
- sp0 += chan2;
- sp1 += chan2;
- dp += chan2;
- }
-
- if (wid & 1) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = sp0[0]; p13 = sp1[0];
-
- dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
- p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
- }
-
- /* next line */
- sl += sll;
- dl += dll;
- }
- }
-
- return MLIB_SUCCESS;
- }
-
- /***************************************************************/
- #undef KSIZE
- #define KSIZE 5
-
- mlib_status CONV_FUNC(5x5)(mlib_image *dst,
- const mlib_image *src,
- const mlib_d64 *k,
- mlib_s32 cmask)
- {
- DTYPE k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
- DTYPE p00, p01, p02, p03, p04, p05,
- p10, p11, p12, p13, p14, p15;
- DEF_VARS(DTYPE);
- DTYPE *sp0, *sp1;
- mlib_s32 chan2 = chan1 + chan1;
- mlib_s32 chan3 = chan1 + chan2;
- mlib_s32 chan4 = chan3 + chan1;
-
- wid -= (KSIZE - 1);
- hgt -= (KSIZE - 1);
-
- adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
-
- for (c = 0; c < chan1; c++) {
- if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
-
- sl = adr_src + c;
- dl = adr_dst + c;
-
- for (j = 0; j < hgt; j++) {
- /*
- * First loop
- */
- sp0 = sl;
- sp1 = sp0 + sll;
- dp = dl;
-
- k0 = (DTYPE)k[0]; k1 = (DTYPE)k[1]; k2 = (DTYPE)k[2]; k3 = (DTYPE)k[3]; k4 = (DTYPE)k[4];
- k5 = (DTYPE)k[5]; k6 = (DTYPE)k[6]; k7 = (DTYPE)k[7]; k8 = (DTYPE)k[8]; k9 = (DTYPE)k[9];
-
- p02 = sp0[0]; p12 = sp1[0];
- p03 = sp0[chan1]; p13 = sp1[chan1];
- p04 = sp0[chan2]; p14 = sp1[chan2];
- p05 = sp0[chan3]; p15 = sp1[chan3];
-
- sp0 += chan4;
- sp1 += chan4;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = p05; p13 = p15;
-
- p04 = sp0[0]; p14 = sp1[0];
- p05 = sp0[chan1]; p15 = sp1[chan1];
-
- dp[ 0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
- p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
- dp[chan1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
- p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
- sp0 += chan2;
- sp1 += chan2;
- dp += chan2;
- }
-
- if (wid & 1) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = p05; p13 = p15;
-
- p04 = sp0[0]; p14 = sp1[0];
-
- dp[0] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
- p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
- }
-
- /*
- * Second loop
- */
- sp0 = sl + 2*sll;
- sp1 = sp0 + sll;
- dp = dl;
-
- k0 = (DTYPE)k[10]; k1 = (DTYPE)k[11]; k2 = (DTYPE)k[12]; k3 = (DTYPE)k[13]; k4 = (DTYPE)k[14];
- k5 = (DTYPE)k[15]; k6 = (DTYPE)k[16]; k7 = (DTYPE)k[17]; k8 = (DTYPE)k[18]; k9 = (DTYPE)k[19];
-
- p02 = sp0[0]; p12 = sp1[0];
- p03 = sp0[chan1]; p13 = sp1[chan1];
- p04 = sp0[chan2]; p14 = sp1[chan2];
- p05 = sp0[chan3]; p15 = sp1[chan3];
-
- sp0 += chan4;
- sp1 += chan4;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = p05; p13 = p15;
-
- p04 = sp0[0]; p14 = sp1[0];
- p05 = sp0[chan1]; p15 = sp1[chan1];
-
- dp[ 0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
- p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
- dp[chan1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
- p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
-
- sp0 += chan2;
- sp1 += chan2;
- dp += chan2;
- }
-
- if (wid & 1) {
- p00 = p02; p10 = p12;
- p01 = p03; p11 = p13;
- p02 = p04; p12 = p14;
- p03 = p05; p13 = p15;
-
- p04 = sp0[0]; p14 = sp1[0];
-
- dp[0] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
- p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
- }
-
- /*
- * 3 loop
- */
- dp = dl;
- sp0 = sl + 4*sll;
-
- k0 = (DTYPE)k[20]; k1 = (DTYPE)k[21]; k2 = (DTYPE)k[22]; k3 = (DTYPE)k[23]; k4 = (DTYPE)k[24];
-
- p02 = sp0[0];
- p03 = sp0[chan1];
- p04 = sp0[chan2];
- p05 = sp0[chan3];
-
- sp0 += chan2 + chan2;
-
- #ifdef __SUNPRO_C
- #pragma pipeloop(0)
- #endif /* __SUNPRO_C */
- for (i = 0; i <= (wid - 2); i += 2) {
- p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
- p04 = sp0[0]; p05 = sp0[chan1];
-
- dp[0 ] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
- dp[chan1] += p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4;
-
- dp += chan2;
- sp0 += chan2;
- }
-
- if (wid & 1) {
- p00 = p02; p01 = p03; p02 = p04; p03 = p05;
-
- p04 = sp0[0];
-
- dp[0] += p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4;
- }
-
- /* next line */
- sl += sll;
- dl += dll;
- }
- }
-
- return MLIB_SUCCESS;
- }
-
- /***************************************************************/
#define BUFF_SIZE 1600
#define CACHE_SIZE (64*1024)
static mlib_status mlib_ImageConv1xN(mlib_image *dst,
--- 73,82 ----
< prev index next >