< prev index next >
src/java.desktop/unix/native/libmlib_image/mlib_v_ImageChannelInsert_1.c
Print this page
rev 14294 : remove Channels
@@ -25,38 +25,13 @@
/*
* FUNCTIONS
- * mlib_v_ImageChannelInsert_U8
- * mlib_v_ImageChannelInsert_U8_12_A8D1X8
- * mlib_v_ImageChannelInsert_U8_12_A8D2X8
* mlib_v_ImageChannelInsert_U8_12_D1
- * mlib_v_ImageChannelInsert_U8_12
- * mlib_v_ImageChannelInsert_U8_13_A8D1X8
- * mlib_v_ImageChannelInsert_U8_13_A8D2X8
* mlib_v_ImageChannelInsert_U8_13_D1
- * mlib_v_ImageChannelInsert_U8_13
- * mlib_v_ImageChannelInsert_U8_14_A8D1X8
- * mlib_v_ImageChannelInsert_U8_14_A8D2X8
* mlib_v_ImageChannelInsert_U8_14_D1
- * mlib_v_ImageChannelInsert_U8_14
- * mlib_v_ImageChannelInsert_S16
- * mlib_v_ImageChannelInsert_S16_12_A8D1X4
- * mlib_v_ImageChannelInsert_S16_12_A8D2X4
- * mlib_v_ImageChannelInsert_S16_12_D1
- * mlib_v_ImageChannelInsert_S16_12
- * mlib_v_ImageChannelInsert_S16_13_A8D1X4
- * mlib_v_ImageChannelInsert_S16_13_A8D2X4
- * mlib_v_ImageChannelInsert_S16_13_D1
- * mlib_v_ImageChannelInsert_S16_13
- * mlib_v_ImageChannelInsert_S16_14_A8D1X4
- * mlib_v_ImageChannelInsert_S16_14_A8D2X4
- * mlib_v_ImageChannelInsert_S16_14_D1
- * mlib_v_ImageChannelInsert_S16_14
- * mlib_v_ImageChannelInsert_S32
- * mlib_v_ImageChannelInsert_D64
*
* ARGUMENT
* src pointer to source image data
* dst pointer to destination image data
* slb source image line stride in bytes
@@ -78,428 +53,16 @@
#include "vis_proto.h"
#include "mlib_image.h"
#include "mlib_v_ImageChannelInsert.h"
/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_U8(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 channels,
- mlib_s32 channeld,
- mlib_s32 width,
- mlib_s32 height,
- mlib_s32 cmask)
-{
- mlib_u8 *sp; /* pointer for pixel in src */
- mlib_u8 *sl; /* pointer for line in src */
- mlib_u8 *dp; /* pointer for pixel in dst */
- mlib_u8 *dl; /* pointer for line in dst */
- mlib_s32 i, j, k; /* indices for x, y, channel */
- mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
- mlib_s32 inc0, inc1, inc2;
- mlib_u8 s0, s1, s2;
-
- deltac[channels] = 1;
- for (i = (channeld - 1), k = 0; i >= 0; i--) {
- if ((cmask & (1 << i)) == 0)
- deltac[k]++;
- else
- k++;
- }
-
- deltac[channels] = channeld;
- for (i = 1; i < channels; i++) {
- deltac[channels] -= deltac[i];
- }
-
- sp = sl = (void *)src;
- dp = dl = dst + deltac[0];
-
- if (channels == 2) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- dp[0] = s0;
- dp[inc0] = s1;
- dp += inc1;
- sp += 2;
- }
-
- sp = sl += slb;
- dp = dl += dlb;
- }
- }
- else if (channels == 3) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- inc2 = deltac[3] + inc1;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- s2 = sp[2];
- dp[0] = s0;
- dp[inc0] = s1;
- dp[inc1] = s2;
- dp += inc2;
- sp += 3;
- }
-
- sp = sl += slb;
- dp = dl += dlb;
- }
- }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_D64(const mlib_d64 *src,
- mlib_s32 slb,
- mlib_d64 *dst,
- mlib_s32 dlb,
- mlib_s32 channels,
- mlib_s32 channeld,
- mlib_s32 width,
- mlib_s32 height,
- mlib_s32 cmask)
-{
- mlib_d64 *sp; /* pointer for pixel in src */
- mlib_d64 *sl; /* pointer for line in src */
- mlib_d64 *dp; /* pointer for pixel in dst */
- mlib_d64 *dl; /* pointer for line in dst */
- mlib_s32 i, j, k; /* indices for x, y, channel */
- mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
- mlib_s32 inc0, inc1, inc2;
- mlib_d64 s0, s1, s2;
-
- deltac[channels] = 1;
- for (i = (channeld - 1), k = 0; i >= 0; i--) {
- if ((cmask & (1 << i)) == 0)
- deltac[k]++;
- else
- k++;
- }
-
- deltac[channels] = channeld;
- for (i = 1; i < channels; i++) {
- deltac[channels] -= deltac[i];
- }
-
- sp = sl = (void *)src;
- dp = dl = dst + deltac[0];
-
- if (channels == 1) {
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- dp[0] = s0;
- dp += channeld;
- sp++;
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
- }
- }
- else if (channels == 2) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- dp[0] = s0;
- dp[inc0] = s1;
- dp += inc1;
- sp += 2;
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
- }
- }
- else if (channels == 3) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- inc2 = deltac[3] + inc1;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- s2 = sp[2];
- dp[0] = s0;
- dp[inc0] = s1;
- dp[inc1] = s2;
- dp += inc2;
- sp += 3;
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
- }
- }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-void mlib_v_ImageChannelInsert_S16(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 channels,
- mlib_s32 channeld,
- mlib_s32 width,
- mlib_s32 height,
- mlib_s32 cmask)
-{
- mlib_s16 *sp; /* pointer for pixel in src */
- mlib_s16 *sl; /* pointer for line in src */
- mlib_s16 *dp; /* pointer for pixel in dst */
- mlib_s16 *dl; /* pointer for line in dst */
- mlib_s32 i, j, k; /* indices for x, y, channel */
- mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
- mlib_s32 inc0, inc1, inc2;
- mlib_s16 s0, s1, s2;
-
- deltac[channels] = 1;
- for (i = (channeld - 1), k = 0; i >= 0; i--) {
- if ((cmask & (1 << i)) == 0)
- deltac[k]++;
- else
- k++;
- }
-
- deltac[channels] = channeld;
- for (i = 1; i < channels; i++) {
- deltac[channels] -= deltac[i];
- }
-
- sp = sl = (void *)src;
- dp = dl = dst + deltac[0];
-
- if (channels == 2) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- dp[0] = s0;
- dp[inc0] = s1;
- dp += inc1;
- sp += 2;
- }
-
- sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
- }
- else if (channels == 3) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- inc2 = deltac[3] + inc1;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- s2 = sp[2];
- dp[0] = s0;
- dp[inc0] = s1;
- dp[inc1] = s2;
- dp += inc2;
- sp += 3;
- }
-
- sp = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
- }
-}
-
-/***************************************************************/
-/* general channel insertion: slower due to the inner loop */
-
-void mlib_v_ImageChannelInsert_S32(const mlib_s32 *src,
- mlib_s32 slb,
- mlib_s32 *dst,
- mlib_s32 dlb,
- mlib_s32 channels,
- mlib_s32 channeld,
- mlib_s32 width,
- mlib_s32 height,
- mlib_s32 cmask)
-{
- mlib_s32 *sp; /* pointer for pixel in src */
- mlib_s32 *sl; /* pointer for line in src */
- mlib_s32 *dp; /* pointer for pixel in dst */
- mlib_s32 *dl; /* pointer for line in dst */
- mlib_s32 i, j, k; /* indices for x, y, channel */
- mlib_s32 deltac[5] = { 0, 1, 1, 1, 1 };
- mlib_s32 inc0, inc1, inc2;
- mlib_s32 s0, s1, s2;
-
- deltac[channels] = 1;
- for (i = (channeld - 1), k = 0; i >= 0; i--) {
- if ((cmask & (1 << i)) == 0)
- deltac[k]++;
- else
- k++;
- }
-
- deltac[channels] = channeld;
- for (i = 1; i < channels; i++) {
- deltac[channels] -= deltac[i];
- }
-
- sp = sl = (void *)src;
- dp = dl = dst + deltac[0];
-
- if (channels == 1) {
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- dp[0] = s0;
- dp += channeld;
- sp++;
- }
-
- sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
- else if (channels == 2) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- dp[0] = s0;
- dp[inc0] = s1;
- dp += inc1;
- sp += 2;
- }
-
- sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
- else if (channels == 3) {
- inc0 = deltac[1];
- inc1 = deltac[2] + inc0;
- inc2 = deltac[3] + inc1;
- for (j = 0; j < height; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < width; i++) {
- s0 = sp[0];
- s1 = sp[1];
- s2 = sp[2];
- dp[0] = s0;
- dp[inc0] = s1;
- dp[inc1] = s2;
- dp += inc2;
- sp += 3;
- }
-
- sp = sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
- }
- }
-}
-
-/***************************************************************/
#define INSERT_U8_12(sd0, dd0, dd1) /* channel duplicate */ \
dd0 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \
dd1 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0))
/***************************************************************/
/* insert one channel to a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * dsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelInsert_U8_12_A8D1X8(const mlib_u8 *src,
- mlib_u8 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_d64 *sp, *dp;
- mlib_d64 sd0;
- mlib_d64 dd0, dd1;
- mlib_s32 bmask;
- mlib_s32 i;
-
- bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6);
-
- sp = (mlib_d64 *) src;
- dp = (mlib_d64 *) dst;
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 8; i++) {
- sd0 = *sp++;
- INSERT_U8_12(sd0, dd0, dd1);
- vis_pst_8(dd0, dp++, bmask);
- vis_pst_8(dd1, dp++, bmask);
- }
-}
-
-/***************************************************************/
-/* insert one channel to a 2-channel image.
- * both source and destination image data are 8-byte aligned.
- * xsize is multiple of 8.
- */
-
-void mlib_v_ImageChannelInsert_U8_12_A8D2X8(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_d64 *sp, *dp;
- mlib_d64 *sl, *dl;
- mlib_d64 sd0;
- mlib_d64 dd0, dd1;
- mlib_s32 bmask;
- mlib_s32 i, j;
-
- bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6);
-
- sp = sl = (mlib_d64 *) src;
- dp = dl = (mlib_d64 *) dst;
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 8; i++) {
- sd0 = *sp++;
- INSERT_U8_12(sd0, dd0, dd1);
- vis_pst_8(dd0, dp++, bmask);
- vis_pst_8(dd1, dp++, bmask);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-/* insert one channel to a 2-channel image.
*/
void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src,
mlib_u8 *dst,
mlib_s32 dsize,
@@ -722,60 +285,10 @@
}
}
}
/***************************************************************/
-/* insert one channel to a 2-channel image.
- */
-
-void mlib_v_ImageChannelInsert_U8_12(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_u8 *sa, *da;
- mlib_u8 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_U8_12_D1(sa, da, xsize, cmask);
- sa = sl += slb;
- da = dl += dlb;
- }
-}
-
-/***************************************************************/
-#define INSERT_U8_13(sd0, dd0, dd1, dd2) \
- sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd0)); \
- sdb = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sda)); \
- sdc = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb)); \
- sdd = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb)); \
- dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_hi(sdd)); \
- sde = vis_fpmerge(vis_read_lo(sdc), vis_read_lo(sdd)); \
- dd1 = vis_freg_pair(vis_read_lo(dd0), vis_read_hi(sde)); \
- dd2 = vis_freg_pair(vis_read_lo(sde), vis_read_lo(sde))
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_U8_A8(channeld) \
- sd = *sp++; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
#define LOAD_INSERT_STORE_U8(channeld) \
vis_alignaddr((void *)0, off); \
sd0 = sd1; \
sd1 = *sp++; \
sd = vis_faligndata(sd0, sd1); \
@@ -788,62 +301,10 @@
vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld; \
vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld
/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13_A8D1X8(const mlib_u8 *src,
- mlib_u8 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_u8 *da;
- mlib_d64 *sp;
- mlib_d64 sd;
- mlib_s32 i;
-
- vis_alignaddr((void *)0, 1); /* for 1-byte left shift */
-
- sp = (mlib_d64 *) src;
- da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 8; i++) {
- LOAD_INSERT_STORE_U8_A8(3);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13_A8D2X8(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_u8 *da, *dl;
- mlib_d64 *sp, *sl;
- mlib_d64 sd;
- mlib_s32 i, j;
-
- vis_alignaddr((void *)0, 1);
-
- sp = sl = (mlib_d64 *) src;
- da = dl = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 8; i++) {
- LOAD_INSERT_STORE_U8_A8(3);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src,
mlib_u8 *dst,
mlib_s32 dsize,
mlib_s32 cmask)
{
@@ -906,110 +367,19 @@
}
}
}
/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_13(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_u8 *sa, *da;
- mlib_u8 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_U8_13_D1(sa, da, xsize, cmask);
- sa = sl += slb;
- da = dl += dlb;
- }
-}
-
-/***************************************************************/
#define INSERT_U8_14(sd0, dd0, dd1, dd2, dd3) \
sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \
sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0)); \
dd0 = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sda)); \
dd1 = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sda)); \
dd2 = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb)); \
dd3 = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb))
/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14_A8D1X8(const mlib_u8 *src,
- mlib_u8 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_d64 *sp, *dp;
- mlib_d64 sd0;
- mlib_d64 sda, sdb;
- mlib_d64 dd0, dd1, dd2, dd3;
- mlib_s32 bmask;
- mlib_s32 i;
-
- bmask = cmask | (cmask << 4);
-
- sp = (mlib_d64 *) src;
- dp = (mlib_d64 *) dst;
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 8; i++) {
- sd0 = *sp++;
- INSERT_U8_14(sd0, dd0, dd1, dd2, dd3);
- vis_pst_8(dd0, dp++, bmask);
- vis_pst_8(dd1, dp++, bmask);
- vis_pst_8(dd2, dp++, bmask);
- vis_pst_8(dd3, dp++, bmask);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14_A8D2X8(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_d64 *sp, *dp;
- mlib_d64 *sl, *dl;
- mlib_d64 sd0;
- mlib_d64 sda, sdb;
- mlib_d64 dd0, dd1, dd2, dd3;
- mlib_s32 bmask;
- mlib_s32 i, j;
-
- bmask = cmask | (cmask << 4);
-
- sp = sl = (mlib_d64 *) src;
- dp = dl = (mlib_d64 *) dst;
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 8; i++) {
- sd0 = *sp++;
- INSERT_U8_14(sd0, dd0, dd1, dd2, dd3);
- vis_pst_8(dd0, dp++, bmask);
- vis_pst_8(dd1, dp++, bmask);
- vis_pst_8(dd2, dp++, bmask);
- vis_pst_8(dd3, dp++, bmask);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src,
mlib_u8 *dst,
mlib_s32 dsize,
mlib_s32 cmask)
{
@@ -1186,447 +556,7 @@
}
}
}
}
-/***************************************************************/
-void mlib_v_ImageChannelInsert_U8_14(const mlib_u8 *src,
- mlib_s32 slb,
- mlib_u8 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_u8 *sa, *da;
- mlib_u8 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_U8_14_D1(sa, da, xsize, cmask);
- sa = sl += slb;
- da = dl += dlb;
- }
-}
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_1X_A8(channeld) \
- sd = *sp++; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
-#define LOAD_INSERT_STORE_S16_1X(channeld) \
- vis_alignaddr((void *)0, off); \
- sd0 = sd1; \
- sd1 = *sp++; \
- sd = vis_faligndata(sd0, sd1); \
- vis_alignaddr((void *)0, 2); \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld; \
- vis_st_u16(sd = vis_faligndata(sd, sd), da); da += channeld
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_A8D1X4(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *da;
- mlib_d64 *sp;
- mlib_d64 sd;
- mlib_s32 i;
-
- sp = (mlib_d64 *) src;
- da = dst + (2 - cmask); /* 2,1 -> 0,1 */
-
- vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(2);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_A8D2X4(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *da, *dl;
- mlib_d64 *sp, *sl;
- mlib_d64 sd;
- mlib_s32 i, j;
-
- sp = sl = (mlib_d64 *) src;
- da = dl = dst + (2 - cmask); /* 2,1 -> 0,1 */
-
- vis_alignaddr((void *)0, 2);
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(2);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12_D1(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *dend; /* end point in destination */
- mlib_d64 *sp; /* 8-byte aligned start points in src */
- mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */
- mlib_s32 off; /* offset of address alignment in src */
- mlib_s32 i;
-
- sa = (void *)src;
- da = dst + (2 - cmask); /* 2,1 -> 0,1 */
-
- /* prepare the src address */
- sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
- off = (mlib_addr) sa & 7;
-
- dend = da + dsize * 2 - 1;
-
- sd1 = *sp++;
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X(2);
- }
-
- /* right end handling */
- if ((mlib_addr) da <= (mlib_addr) dend) {
-
- vis_alignaddr((void *)0, off);
- sd0 = sd1;
- sd1 = *sp++;
- sd = vis_faligndata(sd0, sd1);
-
- vis_alignaddr((void *)0, 2);
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 2;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 2;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- }
- }
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_12(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_S16_12_D1(sa, da, xsize, cmask);
- sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_A8D1X4(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *da;
- mlib_d64 *sp;
- mlib_d64 sd;
- mlib_s32 i;
-
- sp = (mlib_d64 *) src;
- da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */
-
- vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(3);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_A8D2X4(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *da, *dl;
- mlib_d64 *sp, *sl;
- mlib_d64 sd;
- mlib_s32 i, j;
-
- sp = sl = (mlib_d64 *) src;
- da = dl = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */
-
- vis_alignaddr((void *)0, 2);
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(3);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13_D1(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *dend; /* end point in destination */
- mlib_d64 *sp; /* 8-byte aligned start points in src */
- mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */
- mlib_s32 off; /* offset of address alignment in src */
- mlib_s32 i;
-
- sa = (void *)src;
- da = dst + (2 / cmask); /* 4,2,1 -> 0,1,2 */
-
- /* prepare the src address */
- sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
- off = (mlib_addr) sa & 7;
-
- dend = da + dsize * 3 - 1;
-
- sd1 = *sp++;
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X(3);
- }
-
- /* right end handling */
- if ((mlib_addr) da <= (mlib_addr) dend) {
-
- vis_alignaddr((void *)0, off);
- sd0 = sd1;
- sd1 = *sp++;
- sd = vis_faligndata(sd0, sd1);
-
- vis_alignaddr((void *)0, 2);
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 3;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 3;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- }
- }
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_13(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_S16_13_D1(sa, da, xsize, cmask);
- sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-#define INSERT_S16_14(sp, dp, bmask) /* channel duplicate */ \
- /* obsolete: it is slower than the vis_st_u16() version*/ \
- sd0 = *sp++; \
- sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0)); \
- sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0)); \
- sdc = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sda)); \
- sdd = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sda)); \
- sde = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb)); \
- sdf = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb)); \
- dd0 = vis_fpmerge(vis_read_hi(sdc), vis_read_lo(sdc)); \
- dd1 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sdd)); \
- dd2 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sde)); \
- dd3 = vis_fpmerge(vis_read_hi(sdf), vis_read_lo(sdf)); \
- vis_pst_16(dd0, dp++, bmask); \
- vis_pst_16(dd1, dp++, bmask); \
- vis_pst_16(dd2, dp++, bmask); \
- vis_pst_16(dd3, dp++, bmask)
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_A8D1X4(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *da;
- mlib_d64 *sp;
- mlib_d64 sd;
- mlib_s32 i;
-
- sp = (mlib_d64 *) src;
- da = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */
-
- vis_alignaddr((void *)0, 2);
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(4);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_A8D2X4(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *da, *dl;
- mlib_d64 *sp, *sl;
- mlib_d64 sd;
- mlib_s32 i, j;
-
- sp = sl = (mlib_d64 *) src;
- da = dl = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */
-
- vis_alignaddr((void *)0, 2);
-
- for (j = 0; j < ysize; j++) {
-#pragma pipeloop(0)
- for (i = 0; i < xsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X_A8(4);
- }
-
- sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14_D1(const mlib_s16 *src,
- mlib_s16 *dst,
- mlib_s32 dsize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *dend; /* end point in destination */
- mlib_d64 *sp; /* 8-byte aligned start points in src */
- mlib_d64 sd0, sd1, sd; /* 8-byte registers for source data */
- mlib_s32 off; /* offset of address alignment in src */
- mlib_s32 i;
-
- sa = (void *)src;
- da = dst + (6 / cmask + 1) / 2; /* 8,4,2,1 -> 0,1,2,3 */
-
- /* prepare the src address */
- sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
- off = (mlib_addr) sa & 7;
-
- dend = da + dsize * 4 - 1;
-
- sd1 = *sp++;
-
-#pragma pipeloop(0)
- for (i = 0; i < dsize / 4; i++) {
- LOAD_INSERT_STORE_S16_1X(4);
- }
-
- /* right end handling */
- if ((mlib_addr) da <= (mlib_addr) dend) {
-
- vis_alignaddr((void *)0, off);
- sd0 = sd1;
- sd1 = *sp++;
- sd = vis_faligndata(sd0, sd1);
-
- vis_alignaddr((void *)0, 2);
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 4;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- da += 4;
- if ((mlib_addr) da <= (mlib_addr) dend) {
- vis_st_u16(sd = vis_faligndata(sd, sd), da);
- }
- }
- }
-}
-
-/***************************************************************/
-void mlib_v_ImageChannelInsert_S16_14(const mlib_s16 *src,
- mlib_s32 slb,
- mlib_s16 *dst,
- mlib_s32 dlb,
- mlib_s32 xsize,
- mlib_s32 ysize,
- mlib_s32 cmask)
-{
- mlib_s16 *sa, *da;
- mlib_s16 *sl, *dl;
- mlib_s32 j;
-
- sa = sl = (void *)src;
- da = dl = dst;
-
-#pragma pipeloop(0)
- for (j = 0; j < ysize; j++) {
- mlib_v_ImageChannelInsert_S16_14_D1(sa, da, xsize, cmask);
- sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
- da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
- }
-}
/***************************************************************/
< prev index next >