12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28 /*
29 * FILENAME: mlib_v_ImageChannelExtract_43.c
30 *
31 * FUNCTIONS
32 * mlib_v_ImageChannelExtract_U8_43R_A8D1X8
33 * mlib_v_ImageChannelExtract_U8_43R_A8D2X8
34 * mlib_v_ImageChannelExtract_U8_43R_D1
35 * mlib_v_ImageChannelExtract_U8_43R
36 * mlib_v_ImageChannelExtract_S16_43R_A8D1X4
37 * mlib_v_ImageChannelExtract_S16_43R_A8D2X4
38 * mlib_v_ImageChannelExtract_S16_43R_D1
39 * mlib_v_ImageChannelExtract_S16_43R
40 * mlib_v_ImageChannelExtract_U8_43L_A8D1X8
41 * mlib_v_ImageChannelExtract_U8_43L_A8D2X8
42 * mlib_v_ImageChannelExtract_U8_43L_D1
43 * mlib_v_ImageChannelExtract_U8_43L
44 * mlib_v_ImageChannelExtract_S16_43L_A8D1X4
45 * mlib_v_ImageChannelExtract_S16_43L_A8D2X4
46 * mlib_v_ImageChannelExtract_S16_43L_D1
47 * mlib_v_ImageChannelExtract_S16_43L
48 *
49 * SYNOPSIS
50 *
51 * ARGUMENT
52 * src pointer to source image data
53 * dst pointer to destination image data
54 * slb source image line stride in bytes
55 * dlb destination image line stride in bytes
56 * dsize image data size in pixels
57 * xsize image width in pixels
58 * ysize image height in lines
59 * cmask channel mask
60 *
61 * DESCRIPTION
62 * extract the right or left 3 channels of a 4-channel image to
63 * a 3-channel image -- VIS version low level functions.
64 *
65 * ABGR => BGR (43R), or RGBA => RGB (43L)
66 *
67 * NOTE
68 * These functions are separated from mlib_v_ImageChannelExtract.c
69 * for loop unrolling and structure clarity.
70 */
71
72 #include "vis_proto.h"
73 #include "mlib_image.h"
74 #include "mlib_v_ImageChannelExtract.h"
75
76 /***************************************************************/
77 #define EXTRACT_U8_43R_old /* shift right */ \
78 dd2 = vis_faligndata(sd3, dd2); /* r7-------------- */ \
79 sd3 = vis_faligndata(sd3, sd3); \
80 dd2 = vis_faligndata(sd3, dd2); /* g7r7------------ */ \
81 sd3 = vis_faligndata(sd3, sd3); \
82 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \
83 sd3 = vis_faligndata(sd3, sd3); \
84 sd3 = vis_faligndata(sd3, sd3); \
85 dd2 = vis_faligndata(sd3, dd2); /* r6b7g7r7-------- */ \
86 sd3 = vis_faligndata(sd3, sd3); \
87 dd2 = vis_faligndata(sd3, dd2); /* g6r6b7g7r7------ */ \
88 sd3 = vis_faligndata(sd3, sd3); \
89 dd2 = vis_faligndata(sd3, dd2); /* b6g6r6b7g7r7---- */ \
90 \
91 dd2 = vis_faligndata(sd2, dd2); /* r5b6g6r6b7g7r7-- */ \
92 sd2 = vis_faligndata(sd2, sd2); \
93 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \
94 \
95 sd2 = vis_faligndata(sd2, sd2); \
96 dd1 = vis_faligndata(sd2, dd1); /* b5-------------- */ \
97 sd2 = vis_faligndata(sd2, sd2); \
98 sd2 = vis_faligndata(sd2, sd2); \
99 dd1 = vis_faligndata(sd2, dd1); /* r4b5------------ */ \
100 sd2 = vis_faligndata(sd2, sd2); \
101 dd1 = vis_faligndata(sd2, dd1); /* g4r4b5---------- */ \
102 sd2 = vis_faligndata(sd2, sd2); \
103 dd1 = vis_faligndata(sd2, dd1); /* b4g4r4b5-------- */ \
104 \
105 dd1 = vis_faligndata(sd1, dd1); /* r3b4g4r4b5------ */ \
106 sd1 = vis_faligndata(sd1, sd1); \
107 dd1 = vis_faligndata(sd1, dd1); /* g3r3b4g4r4b5---- */ \
108 sd1 = vis_faligndata(sd1, sd1); \
109 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5-- */ \
110 sd1 = vis_faligndata(sd1, sd1); \
111 sd1 = vis_faligndata(sd1, sd1); \
112 dd1 = vis_faligndata(sd1, dd1); /* r2b3g3r3b4g4r4b5 */ \
113 \
114 sd1 = vis_faligndata(sd1, sd1); \
115 dd0 = vis_faligndata(sd1, dd0); /* g2-------------- */ \
116 sd1 = vis_faligndata(sd1, sd1); \
117 dd0 = vis_faligndata(sd1, dd0); /* b2g2------------ */ \
118 \
119 dd0 = vis_faligndata(sd0, dd0); /* r1b2g2---------- */ \
120 sd0 = vis_faligndata(sd0, sd0); \
121 dd0 = vis_faligndata(sd0, dd0); /* g1r1b2g2-------- */ \
122 sd0 = vis_faligndata(sd0, sd0); \
123 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2------ */ \
124 sd0 = vis_faligndata(sd0, sd0); \
125 sd0 = vis_faligndata(sd0, sd0); \
126 dd0 = vis_faligndata(sd0, dd0); /* r0b1g1r1b2g2---- */ \
127 sd0 = vis_faligndata(sd0, sd0); \
128 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1g1r1b2g2-- */ \
129 sd0 = vis_faligndata(sd0, sd0); \
130 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1g1r1b2g2 */
131
132 /***************************************************************/
133 #define EXTRACT_U8_43R /* shift right */ \
134 vis_alignaddr((void *)0, 5); \
135 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \
136 sda = vis_freg_pair(vis_read_hi(sd3), vis_read_hi(sd3)); \
137 dd2 = vis_faligndata(sda, dd2); /* b6g6r6b7g7r7---- */ \
138 \
139 vis_alignaddr((void *)0, 6); \
140 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \
141 \
142 vis_alignaddr((void *)0, 5); \
143 dd1 = vis_faligndata(sd2, dd1); /* b5g5r5---------- */ \
144 sda = vis_freg_pair(vis_read_hi(sd2), vis_read_hi(sd2)); \
145 dd1 = vis_faligndata(sda, dd1); /* b4g4r4b5g5r5---- */ \
146 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5g5 */ \
147 sda = vis_freg_pair(vis_read_hi(sd1), vis_read_hi(sd1)); \
148 vis_alignaddr((void *)0, 7); \
149 dd1 = vis_faligndata(sda, dd1); /* r2b3g3r3b4g4r4b5 */ \
150 \
151 vis_alignaddr((void *)0, 5); \
152 dd0 = vis_faligndata(sda, dd0); /* b2g2r2---------- */ \
153 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2r2---- */ \
154 sda = vis_freg_pair(vis_read_hi(sd0), vis_read_hi(sd0)); \
155 dd0 = vis_faligndata(sda, dd0); /* b0g0r0b1g1r1b2g2 */
156
157 /***************************************************************/
158 #define LOAD_EXTRACT_U8_43R_STORE \
159 sd0 = *sp++; /* --b0g0r0--b1g1r1 */ \
160 sd1 = *sp++; /* --b2g2r2--b3g3r3 */ \
161 sd2 = *sp++; /* --b4g4r4--b5g5r5 */ \
162 sd3 = *sp++; /* --b6g6r6--b7g7r7 */ \
163 EXTRACT_U8_43R; \
164 *dp++ = dd0; /* b0g0r0b1g1r1b2g2 */ \
165 *dp++ = dd1; /* r2b3g3r3b4g4r4b5 */ \
166 *dp++ = dd2; /* g5r5b6g6r6b7g7r7 */
167
168 /***************************************************************/
169 #define LOAD_EXTRACT_U8_43R \
170 vis_alignaddr((void *)soff, 0); \
171 s0 = s4; \
172 s1 = sp[1]; \
173 s2 = sp[2]; \
174 s3 = sp[3]; \
175 s4 = sp[4]; \
176 sd0 = vis_faligndata(s0, s1); \
177 sd1 = vis_faligndata(s1, s2); \
178 sd2 = vis_faligndata(s2, s3); \
179 sd3 = vis_faligndata(s3, s4); \
180 sp += 4; \
181 dd2old = dd2; \
182 EXTRACT_U8_43R
183
184 /***************************************************************/
185 /*
186 * Both source and destination image data are 1-d vectors and
187 * 8-byte aligned. And dsize is multiple of 8.
188 */
189
190 void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src,
191 mlib_u8 *dst,
192 mlib_s32 dsize)
193 {
194 mlib_d64 *sp, *dp;
195 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
196 mlib_d64 dd0, dd1, dd2; /* dst data */
197 mlib_d64 sda;
198 mlib_s32 i;
199
200 sp = (mlib_d64 *) src;
201 dp = (mlib_d64 *) dst;
202
203 /* set GSR.offset for vis_faligndata() */
204 /* vis_alignaddr((void *)0, 7); *//* only for _old */
205
206 #pragma pipeloop(0)
207 for (i = 0; i < dsize / 8; i++) {
208 LOAD_EXTRACT_U8_43R_STORE;
209 }
210 }
211
212 /***************************************************************/
213 /*
214 * Either source or destination image data are not 1-d vectors, but
215 * they are 8-byte aligned. And slb and dlb are multiple of 8.
216 * The xsize is multiple of 8.
217 */
218
219 void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src,
220 mlib_s32 slb,
221 mlib_u8 *dst,
222 mlib_s32 dlb,
223 mlib_s32 xsize,
224 mlib_s32 ysize)
225 {
226 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
227 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
228 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
229 mlib_d64 dd0, dd1, dd2; /* dst data */
230 mlib_d64 sda;
231 mlib_s32 i, j; /* indices for x, y */
232
233 /* set GSR.offset for vis_faligndata() */
234 /* vis_alignaddr((void *)0, 7); *//* only for _old */
235
236 sp = sl = (mlib_d64 *) src;
237 dp = dl = (mlib_d64 *) dst;
238
239 /* row loop */
240 for (j = 0; j < ysize; j++) {
241 /* 8-byte column loop */
242 #pragma pipeloop(0)
243 for (i = 0; i < xsize / 8; i++) {
244 LOAD_EXTRACT_U8_43R_STORE;
245 }
246
247 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
248 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
249 }
250 }
251
252 /***************************************************************/
253 /*
254 * Either source or destination data are not 8-byte aligned.
255 * And dsize is in pixels.
256 */
257
258 void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src,
259 mlib_u8 *dst,
260 mlib_s32 dsize)
261 {
262 mlib_u8 *sa, *da;
263 mlib_u8 *dend, *dend2; /* end points in dst */
264 mlib_d64 *dp; /* 8-byte aligned start points in dst */
265 mlib_d64 *sp; /* 8-byte aligned start point in src */
266 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
267 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
268 mlib_d64 dd0, dd1, dd2; /* dst data */
269 mlib_d64 dd2old; /* the last datum of the last step */
270 mlib_d64 sda;
271 mlib_s32 soff; /* offset of address in src */
272 mlib_s32 doff; /* offset of address in dst */
273 mlib_s32 emask; /* edge mask */
274 mlib_s32 i, n;
275
276 sa = (void *)src;
277 da = dst;
278
279 /* prepare the source address */
280 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
281 soff = ((mlib_addr) sa & 7);
282
283 /* prepare the destination addresses */
284 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
285 dend = da + dsize * 3 - 1;
286 dend2 = dend - 23;
287 doff = 8 - ((mlib_addr) da & 7);
288
289 /* generate edge mask for the start point */
290 emask = vis_edge8(da, dend);
291
292 /* load 32 byte, convert, store 24 bytes */
293 s4 = sp[0]; /* initial value */
294 LOAD_EXTRACT_U8_43R;
295
296 if (dsize >= 8) {
297 if (doff == 8) {
298 vis_pst_8(dd0, dp++, emask);
299 *dp++ = dd1;
300 *dp++ = dd2;
301 }
302 else {
303 vis_alignaddr((void *)doff, 0);
304 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
305 *dp++ = vis_faligndata(dd0, dd1);
306 *dp++ = vis_faligndata(dd1, dd2);
307 }
308 }
309 else { /* for very small size */
310 if (doff == 8) {
311 vis_pst_8(dd0, dp++, emask);
312 if ((mlib_addr) dp <= (mlib_addr) dend) {
313 emask = vis_edge8(dp, dend);
314 vis_pst_8(dd1, dp++, emask);
315 if ((mlib_addr) dp <= (mlib_addr) dend) {
316 emask = vis_edge8(dp, dend);
317 vis_pst_8(dd2, dp++, emask);
318 }
319 }
320 }
321 else {
322 vis_alignaddr((void *)doff, 0);
323 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
324 if ((mlib_addr) dp <= (mlib_addr) dend) {
325 emask = vis_edge8(dp, dend);
326 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
327 if ((mlib_addr) dp <= (mlib_addr) dend) {
328 emask = vis_edge8(dp, dend);
329 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
330 if ((mlib_addr) dp <= (mlib_addr) dend) {
331 emask = vis_edge8(dp, dend);
332 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
333 }
334 }
335 }
336 }
337 }
338
339 /* no edge handling is needed in the loop */
340 if (doff == 8) {
341 if ((mlib_addr) dp <= (mlib_addr) dend2) {
342 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
343 #pragma pipeloop(0)
344 for (i = 0; i < n; i++) {
345 LOAD_EXTRACT_U8_43R;
346 *dp++ = dd0;
347 *dp++ = dd1;
348 *dp++ = dd2;
349 }
350 }
351 }
352 else {
353 if ((mlib_addr) dp <= (mlib_addr) dend2) {
354 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
355 #pragma pipeloop(0)
356 for (i = 0; i < n; i++) {
357 LOAD_EXTRACT_U8_43R;
358 vis_alignaddr((void *)doff, 0);
359 *dp++ = vis_faligndata(dd2old, dd0);
360 *dp++ = vis_faligndata(dd0, dd1);
361 *dp++ = vis_faligndata(dd1, dd2);
362 }
363 }
364 }
365
366 if ((mlib_addr) dp <= (mlib_addr) dend) {
367 LOAD_EXTRACT_U8_43R;
368 emask = vis_edge8(dp, dend);
369 if (doff == 8) {
370 vis_pst_8(dd0, dp++, emask);
371 if ((mlib_addr) dp <= (mlib_addr) dend) {
372 emask = vis_edge8(dp, dend);
373 vis_pst_8(dd1, dp++, emask);
374 if ((mlib_addr) dp <= (mlib_addr) dend) {
375 emask = vis_edge8(dp, dend);
376 vis_pst_8(dd2, dp++, emask);
377 }
378 }
379 }
380 else {
381 vis_alignaddr((void *)doff, 0);
382 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
383 if ((mlib_addr) dp <= (mlib_addr) dend) {
384 emask = vis_edge8(dp, dend);
385 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
386 if ((mlib_addr) dp <= (mlib_addr) dend) {
387 emask = vis_edge8(dp, dend);
388 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
389 }
390 }
391 }
392 }
393 }
394
395 /***************************************************************/
396 void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src,
397 mlib_s32 slb,
398 mlib_u8 *dst,
399 mlib_s32 dlb,
400 mlib_s32 xsize,
401 mlib_s32 ysize)
402 {
403 mlib_u8 *sa, *da;
404 mlib_u8 *sl, *dl;
405 mlib_s32 j;
406
407 sa = sl = (void *)src;
408 da = dl = dst;
409
410 for (j = 0; j < ysize; j++) {
411 mlib_v_ImageChannelExtract_U8_43R_D1(sa, da, xsize);
412 sa = sl += slb;
413 da = dl += dlb;
414 }
415 }
416
417 /***************************************************************/
418 #define EXTRACT_S16_43R_old /* shift right */ \
419 \
420 dd2 = vis_faligndata(sd3, dd2); /* r3------ */ \
421 sd3 = vis_faligndata(sd3, sd3); \
422 dd2 = vis_faligndata(sd3, dd2); /* g3r3---- */ \
423 sd3 = vis_faligndata(sd3, sd3); \
424 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \
425 \
426 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \
427 sd2 = vis_faligndata(sd2, sd2); \
428 dd1 = vis_faligndata(sd2, dd1); /* g2------ */ \
429 sd2 = vis_faligndata(sd2, sd2); \
430 dd1 = vis_faligndata(sd2, dd1); /* b2g2---- */ \
431 \
432 dd1 = vis_faligndata(sd1, dd1); /* r1b2g2-- */ \
433 sd1 = vis_faligndata(sd1, sd1); \
434 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \
435 sd1 = vis_faligndata(sd1, sd1); \
436 dd0 = vis_faligndata(sd1, dd0); /* b1------ */ \
437 \
438 dd0 = vis_faligndata(sd0, dd0); /* r0b1---- */ \
439 sd0 = vis_faligndata(sd0, sd0); \
440 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1-- */ \
441 sd0 = vis_faligndata(sd0, sd0); \
442 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */
443
444 /***************************************************************/
445 #define EXTRACT_S16_43R /* shift right */ \
446 \
447 vis_alignaddr((void *)0, 2); \
448 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \
449 \
450 vis_alignaddr((void *)0, 6); \
451 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \
452 vis_alignaddr((void *)0, 2); \
453 dd1 = vis_faligndata(sd2, dd1); /* b2g2r2-- */ \
454 \
455 vis_alignaddr((void *)0, 4); \
456 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \
457 vis_alignaddr((void *)0, 2); \
458 dd0 = vis_faligndata(sd1, dd0); /* b1g1r1-- */ \
459 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */
460
461 /***************************************************************/
462 #define LOAD_EXTRACT_S16_43R_STORE \
463 \
464 sd0 = *sp++; /* --b0g0r0 */ \
465 sd1 = *sp++; /* --b1g1r1 */ \
466 sd2 = *sp++; /* --b2g2r2 */ \
467 sd3 = *sp++; /* --b3g3r3 */ \
468 \
469 EXTRACT_S16_43R; \
470 \
471 *dp++ = dd0; /* b0g0r0b1 */ \
472 *dp++ = dd1; /* g1r1b2g2 */ \
473 *dp++ = dd2; /* r2b3g3r3 */
474
475 /***************************************************************/
476 #define LOAD_EXTRACT_S16_43R \
477 \
478 vis_alignaddr((void *)soff, 0); \
479 s0 = s4; \
480 s1 = sp[1]; \
481 s2 = sp[2]; \
482 s3 = sp[3]; \
483 s4 = sp[4]; \
484 sd0 = vis_faligndata(s0, s1); \
485 sd1 = vis_faligndata(s1, s2); \
486 sd2 = vis_faligndata(s2, s3); \
487 sd3 = vis_faligndata(s3, s4); \
488 sp += 4; \
489 dd2old = dd2; \
490 EXTRACT_S16_43R
491
492 /***************************************************************/
493 /*
494 * Both source and destination image data are 1-d vectors and
495 * 8-byte aligned. And size is in 4-pixels.
496 */
497
498 void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src,
499 mlib_s16 *dst,
500 mlib_s32 dsize)
501 {
502 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
503 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
504 mlib_d64 dd0, dd1, dd2; /* dst data */
505 mlib_s32 i;
506
507 sp = (mlib_d64 *) src;
508 dp = (mlib_d64 *) dst;
509
510 /* set GSR.offset for vis_faligndata() */
511 /* vis_alignaddr((void *)0, 6); *//* only for _old */
512
513 #pragma pipeloop(0)
514 for (i = 0; i < dsize / 4; i++) {
515 LOAD_EXTRACT_S16_43R_STORE;
516 }
517 }
518
519 /***************************************************************/
520 /*
521 * Either source or destination image data are not 1-d vectors, but
522 * they are 8-byte aligned. The xsize is multiple of 8.
523 * slb and dlb are multiple of 8.
524 */
525
526 void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src,
527 mlib_s32 slb,
528 mlib_s16 *dst,
529 mlib_s32 dlb,
530 mlib_s32 xsize,
531 mlib_s32 ysize)
532 {
533 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
534 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
535 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
536 mlib_d64 dd0, dd1, dd2; /* dst data */
537 mlib_s32 i, j; /* indices for x, y */
538
539 /* set GSR.offset for vis_faligndata() */
540 /* vis_alignaddr((void *)0, 6); *//* only for _old */
541
542 sp = sl = (mlib_d64 *) src;
543 dp = dl = (mlib_d64 *) dst;
544
545 /* row loop */
546 for (j = 0; j < ysize; j++) {
547 /* 4-pixel column loop */
548 #pragma pipeloop(0)
549 for (i = 0; i < xsize / 4; i++) {
550 LOAD_EXTRACT_S16_43R_STORE;
551 }
552
553 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
554 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
555 }
556 }
557
558 /***************************************************************/
559 /*
560 * Either source or destination data are not 8-byte aligned.
561 * And dsize is multiple of 8.
562 */
563
564 void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src,
565 mlib_s16 *dst,
566 mlib_s32 dsize)
567 {
568 mlib_s16 *sa, *da; /* pointer for pixel */
569 mlib_s16 *dend, *dend2; /* end points in dst */
570 mlib_d64 *dp; /* 8-byte aligned start points in dst */
571 mlib_d64 *sp; /* 8-byte aligned start point in src */
572 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
573 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
574 mlib_d64 dd0, dd1, dd2; /* dst data */
575 mlib_d64 dd2old; /* the last datum of the last step */
576 mlib_s32 soff; /* offset of address in src */
577 mlib_s32 doff; /* offset of address in dst */
578 mlib_s32 emask; /* edge mask */
579 mlib_s32 i, n;
580
581 sa = (void *)src;
582 da = dst;
583
584 /* prepare the source address */
585 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
586 soff = ((mlib_addr) sa & 7);
587
588 /* prepare the destination addresses */
589 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
590 dend = da + dsize * 3 - 1;
591 dend2 = dend - 11;
592 doff = 8 - ((mlib_addr) da & 7);
593
594 /* generate edge mask for the start point */
595 emask = vis_edge16(da, dend);
596
597 /* load 32 byte, convert, store 24 bytes */
598 s4 = sp[0]; /* initial value */
599 LOAD_EXTRACT_S16_43R;
600
601 if (dsize >= 4) {
602 if (doff == 8) {
603 vis_pst_16(dd0, dp++, emask);
604 *dp++ = dd1;
605 *dp++ = dd2;
606 }
607 else {
608 vis_alignaddr((void *)doff, 0);
609 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
610 *dp++ = vis_faligndata(dd0, dd1);
611 *dp++ = vis_faligndata(dd1, dd2);
612 }
613 }
614 else { /* for very small size */
615 if (doff == 8) {
616 vis_pst_16(dd0, dp++, emask);
617 if ((mlib_addr) dp <= (mlib_addr) dend) {
618 emask = vis_edge16(dp, dend);
619 vis_pst_16(dd1, dp++, emask);
620 if ((mlib_addr) dp <= (mlib_addr) dend) {
621 emask = vis_edge16(dp, dend);
622 vis_pst_16(dd2, dp++, emask);
623 }
624 }
625 }
626 else {
627 vis_alignaddr((void *)doff, 0);
628 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
629 if ((mlib_addr) dp <= (mlib_addr) dend) {
630 emask = vis_edge16(dp, dend);
631 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
632 if ((mlib_addr) dp <= (mlib_addr) dend) {
633 emask = vis_edge16(dp, dend);
634 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
635 }
636 }
637 }
638 }
639
640 /* no edge handling is needed in the loop */
641 if (doff == 8) {
642 if ((mlib_addr) dp <= (mlib_addr) dend2) {
643 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
644 #pragma pipeloop(0)
645 for (i = 0; i < n; i++) {
646 LOAD_EXTRACT_S16_43R;
647 *dp++ = dd0;
648 *dp++ = dd1;
649 *dp++ = dd2;
650 }
651 }
652 }
653 else {
654 if ((mlib_addr) dp <= (mlib_addr) dend2) {
655 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
656 #pragma pipeloop(0)
657 for (i = 0; i < n; i++) {
658 LOAD_EXTRACT_S16_43R;
659 vis_alignaddr((void *)doff, 0);
660 *dp++ = vis_faligndata(dd2old, dd0);
661 *dp++ = vis_faligndata(dd0, dd1);
662 *dp++ = vis_faligndata(dd1, dd2);
663 }
664 }
665 }
666
667 if ((mlib_addr) dp <= (mlib_addr) dend) {
668 LOAD_EXTRACT_S16_43R;
669 emask = vis_edge16(dp, dend);
670 if (doff == 8) {
671 vis_pst_16(dd0, dp++, emask);
672 if ((mlib_addr) dp <= (mlib_addr) dend) {
673 emask = vis_edge16(dp, dend);
674 vis_pst_16(dd1, dp++, emask);
675 if ((mlib_addr) dp <= (mlib_addr) dend) {
676 emask = vis_edge16(dp, dend);
677 vis_pst_16(dd2, dp++, emask);
678 }
679 }
680 }
681 else {
682 vis_alignaddr((void *)doff, 0);
683 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
684 if ((mlib_addr) dp <= (mlib_addr) dend) {
685 emask = vis_edge16(dp, dend);
686 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
687 if ((mlib_addr) dp <= (mlib_addr) dend) {
688 emask = vis_edge16(dp, dend);
689 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
690 }
691 }
692 }
693 }
694 }
695
696 /***************************************************************/
697 void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src,
698 mlib_s32 slb,
699 mlib_s16 *dst,
700 mlib_s32 dlb,
701 mlib_s32 xsize,
702 mlib_s32 ysize)
703 {
704 mlib_s16 *sa, *da;
705 mlib_s16 *sl, *dl;
706 mlib_s32 j;
707
708 sa = sl = (void *)src;
709 da = dl = dst;
710
711 for (j = 0; j < ysize; j++) {
712 mlib_v_ImageChannelExtract_S16_43R_D1(sa, da, xsize);
713 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
714 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
715 }
716 }
717
718 /***************************************************************/
719 #define EXTRACT_U8_43L_old /* shift left */ \
720 \
721 dd0 = vis_faligndata(dd0, sd0); /* --------------r0 */ \
722 sd0 = vis_faligndata(sd0, sd0); \
723 dd0 = vis_faligndata(dd0, sd0); /* ------------r0g0 */ \
724 sd0 = vis_faligndata(sd0, sd0); \
725 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \
726 sd0 = vis_faligndata(sd0, sd0); \
727 sd0 = vis_faligndata(sd0, sd0); \
728 dd0 = vis_faligndata(dd0, sd0); /* --------r0g0b0r1 */ \
729 sd0 = vis_faligndata(sd0, sd0); \
730 dd0 = vis_faligndata(dd0, sd0); /* ------r0g0b0r1g1 */ \
731 sd0 = vis_faligndata(sd0, sd0); \
732 dd0 = vis_faligndata(dd0, sd0); /* ----r0g0b0r1g1b1 */ \
733 \
734 dd0 = vis_faligndata(dd0, sd1); /* --r0g0b0r1g1b1r2 */ \
735 sd1 = vis_faligndata(sd1, sd1); \
736 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \
737 \
738 sd1 = vis_faligndata(sd1, sd1); \
739 dd1 = vis_faligndata(dd1, sd1); /* --------------b2 */ \
740 sd1 = vis_faligndata(sd1, sd1); \
741 sd1 = vis_faligndata(sd1, sd1); \
742 dd1 = vis_faligndata(dd1, sd1); /* ------------b2r3 */ \
743 sd1 = vis_faligndata(sd1, sd1); \
744 dd1 = vis_faligndata(dd1, sd1); /* ----------b2r3g3 */ \
745 sd1 = vis_faligndata(sd1, sd1); \
746 dd1 = vis_faligndata(dd1, sd1); /* --------b2r3g3b3 */ \
747 \
748 dd1 = vis_faligndata(dd1, sd2); /* ------b2r3g3b3r4 */ \
749 sd2 = vis_faligndata(sd2, sd2); \
750 dd1 = vis_faligndata(dd1, sd2); /* ----b2r3g3b3r4g4 */ \
751 sd2 = vis_faligndata(sd2, sd2); \
752 dd1 = vis_faligndata(dd1, sd2); /* --b2r3g3b3r4g4b4 */ \
753 sd2 = vis_faligndata(sd2, sd2); \
754 sd2 = vis_faligndata(sd2, sd2); \
755 dd1 = vis_faligndata(dd1, sd2); /* b2r3g3b3r4g4b4r5 */ \
756 \
757 sd2 = vis_faligndata(sd2, sd2); \
758 dd2 = vis_faligndata(dd2, sd2); /* --------------g5 */ \
759 sd2 = vis_faligndata(sd2, sd2); \
760 dd2 = vis_faligndata(dd2, sd2); /* ------------g5b5 */ \
761 \
762 dd2 = vis_faligndata(dd2, sd3); /* ----------g5b5r6 */ \
763 sd3 = vis_faligndata(sd3, sd3); \
764 dd2 = vis_faligndata(dd2, sd3); /* --------g5b5r6g6 */ \
765 sd3 = vis_faligndata(sd3, sd3); \
766 dd2 = vis_faligndata(dd2, sd3); /* ------g5b5r6g6b6 */ \
767 sd3 = vis_faligndata(sd3, sd3); \
768 sd3 = vis_faligndata(sd3, sd3); \
769 dd2 = vis_faligndata(dd2, sd3); /* ----g5b5r6g6b6r7 */ \
770 sd3 = vis_faligndata(sd3, sd3); \
771 dd2 = vis_faligndata(dd2, sd3); /* --g5b5r6g6b6r7g7 */ \
772 sd3 = vis_faligndata(sd3, sd3); \
773 dd2 = vis_faligndata(dd2, sd3); /* g5b5r6g6b6r7g7b7 */
774
775 /***************************************************************/
776 #define EXTRACT_U8_43L /* shift left */ \
777 \
778 vis_alignaddr((void *)0, 3); \
779 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \
780 sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0)); \
781 dd0 = vis_faligndata(dd0, sda); /* ----r0g0b0r1g1b1 */ \
782 \
783 vis_alignaddr((void *)0, 2); \
784 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \
785 \
786 vis_alignaddr((void *)0, 3); \
787 dd1 = vis_faligndata(dd1, sd1); /* ----------r2g2b2 */ \
788 sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1)); \
789 dd1 = vis_faligndata(dd1, sda); /* ----r2g2b2r3g3b3 */ \
790 dd1 = vis_faligndata(dd1, sd2); /* g2b2r3g3b3r4g4b4 */ \
791 \
792 sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2)); \
793 vis_alignaddr((void *)0, 1); \
794 dd1 = vis_faligndata(dd1, sda); /* b2r3g3b3r4g4b4r5 */ \
795 \
796 vis_alignaddr((void *)0, 3); \
797 dd2 = vis_faligndata(dd2, sda); /* ----------r5g5b5 */ \
798 \
799 dd2 = vis_faligndata(dd2, sd3); /* ----r5g5b5r6g6b6 */ \
800 sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3)); \
801 dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */
802
803 /***************************************************************/
804 #define LOAD_EXTRACT_U8_43L_STORE \
805 \
806 sd0 = *sp++; /* r0g0b0--r1g1b1-- */ \
807 sd1 = *sp++; /* r2g2b2--r3g3b3-- */ \
808 sd2 = *sp++; /* r4g4b4--r5g5b5-- */ \
809 sd3 = *sp++; /* r6g6b6--r7g7b7-- */ \
810 \
811 EXTRACT_U8_43L; \
812 \
813 *dp++ = dd0; /* r0g0b0r1g1b1r2g2 */ \
814 *dp++ = dd1; /* b2r3g3b3r4g4b4r5 */ \
815 *dp++ = dd2; /* g5b5r6g6b6r7g7b7 */
816
817 /***************************************************************/
818 #define LOAD_EXTRACT_U8_43L \
819 \
820 vis_alignaddr((void *)soff, 0); \
821 s0 = s4; \
822 s1 = sp[1]; \
823 s2 = sp[2]; \
824 s3 = sp[3]; \
825 s4 = sp[4]; \
826 sd0 = vis_faligndata(s0, s1); /* the intermediate is ABGR aligned */ \
827 sd1 = vis_faligndata(s1, s2); \
828 sd2 = vis_faligndata(s2, s3); \
829 sd3 = vis_faligndata(s3, s4); \
830 sp += 4; \
831 \
832 /* vis_alignaddr((void *)0, 1); */ /* for _old only */ \
833 dd2old = dd2; \
834 EXTRACT_U8_43L
835
836 /***************************************************************/
837 /*
838 * Both source and destination image data are 1-d vectors and
839 * 8-byte aligned. And dsize is multiple of 8.
840 */
841
842 void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src,
843 mlib_u8 *dst,
844 mlib_s32 dsize)
845 {
846 mlib_d64 *sp, *dp;
847 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
848 mlib_d64 dd0, dd1, dd2; /* dst data */
849 mlib_d64 sda;
850 mlib_s32 i;
851
852 sp = (mlib_d64 *) src;
853 dp = (mlib_d64 *) dst;
854
855 /* set GSR.offset for vis_faligndata() */
856 /* vis_alignaddr((void *)0, 1); *//* for _old only */
857
858 #pragma pipeloop(0)
859 for (i = 0; i < dsize / 8; i++) {
860 LOAD_EXTRACT_U8_43L_STORE;
861 }
862 }
863
864 /***************************************************************/
865 /*
866 * Either source or destination image data are not 1-d vectors, but
867 * they are 8-byte aligned. And slb and dlb are multiple of 8.
868 * The xsize is multiple of 8.
869 */
870
871 void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src,
872 mlib_s32 slb,
873 mlib_u8 *dst,
874 mlib_s32 dlb,
875 mlib_s32 xsize,
876 mlib_s32 ysize)
877 {
878 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
879 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
880 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
881 mlib_d64 dd0, dd1, dd2; /* dst data */
882 mlib_d64 sda;
883 mlib_s32 i, j; /* indices for x, y */
884
885 /* set GSR.offset for vis_faligndata() */
886 /* vis_alignaddr((void *)0, 1); *//* for _old only */
887
888 sp = sl = (mlib_d64 *) src;
889 dp = dl = (mlib_d64 *) dst;
890
891 /* row loop */
892 for (j = 0; j < ysize; j++) {
893 /* 8-byte column loop */
894 #pragma pipeloop(0)
895 for (i = 0; i < xsize / 8; i++) {
896 LOAD_EXTRACT_U8_43L_STORE;
897 }
898
899 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
900 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
901 }
902 }
903
904 /***************************************************************/
905 /*
906 * Either source or destination data are not 8-byte aligned.
907 * And ssize is multiple of 8.
908 */
909
910 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
911 mlib_u8 *dst,
912 mlib_s32 dsize)
913 {
914 mlib_u8 *sa, *da;
915 mlib_u8 *dend, *dend2; /* end points in dst */
916 mlib_d64 *dp; /* 8-byte aligned start points in dst */
917 mlib_d64 *sp; /* 8-byte aligned start point in src */
918 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
919 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
920 mlib_d64 dd0, dd1, dd2; /* dst data */
921 mlib_d64 dd2old; /* the last datum of the last step */
922 mlib_d64 sda;
923 mlib_s32 soff; /* offset of address in src */
924 mlib_s32 doff; /* offset of address in dst */
925 mlib_s32 emask; /* edge mask */
1028 vis_pst_8(dd2, dp++, emask);
1029 }
1030 }
1031 }
1032 else {
1033 vis_alignaddr((void *)doff, 0);
1034 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
1035 if ((mlib_addr) dp <= (mlib_addr) dend) {
1036 emask = vis_edge8(dp, dend);
1037 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
1038 if ((mlib_addr) dp <= (mlib_addr) dend) {
1039 emask = vis_edge8(dp, dend);
1040 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
1041 }
1042 }
1043 }
1044 }
1045 }
1046
1047 /***************************************************************/
1048 void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src,
1049 mlib_s32 slb,
1050 mlib_u8 *dst,
1051 mlib_s32 dlb,
1052 mlib_s32 xsize,
1053 mlib_s32 ysize)
1054 {
1055 mlib_u8 *sa, *da;
1056 mlib_u8 *sl, *dl;
1057 mlib_s32 j;
1058
1059 sa = sl = (void *)src;
1060 da = dl = dst;
1061
1062 for (j = 0; j < ysize; j++) {
1063 mlib_v_ImageChannelExtract_U8_43L_D1(sa, da, xsize);
1064 sa = sl += slb;
1065 da = dl += dlb;
1066 }
1067 }
1068
1069 /***************************************************************/
1070 #define EXTRACT_S16_43L /* shift left */ \
1071 vis_alignaddr((void *)0, 6); \
1072 dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \
1073 vis_alignaddr((void *)0, 2); \
1074 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1 */ \
1075 \
1076 vis_alignaddr((void *)0, 6); \
1077 dd1 = vis_faligndata(dd1, sd1); /* --r1g1b1 */ \
1078 vis_alignaddr((void *)0, 4); \
1079 dd1 = vis_faligndata(dd1, sd2); /* g1b1r2g2 */ \
1080 \
1081 vis_alignaddr((void *)0, 6); \
1082 dd2 = vis_faligndata(dd2, sd2); /* --r2g2b2 */ \
1083 dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */
1084
1085 /***************************************************************/
1086 #define LOAD_EXTRACT_S16_43L_STORE \
1087 \
1088 sd0 = *sp++; /* r0g0b0-- */ \
1089 sd1 = *sp++; /* r1g1b1-- */ \
1090 sd2 = *sp++; /* r2g2b2-- */ \
1091 sd3 = *sp++; /* r3g3b3-- */ \
1092 \
1093 EXTRACT_S16_43L; \
1094 \
1095 *dp++ = dd0; /* r0g0b0r1 */ \
1096 *dp++ = dd1; /* g1b1r2g2 */ \
1097 *dp++ = dd2; /* b2r3g3b3 */
1098
1099 /***************************************************************/
1100 #define LOAD_EXTRACT_S16_43L \
1101 \
1102 vis_alignaddr((void *)soff, 0); \
1103 s0 = s4; \
1104 s1 = sp[1]; \
1105 s2 = sp[2]; \
1106 s3 = sp[3]; \
1107 s4 = sp[4]; \
1108 sd0 = vis_faligndata(s0, s1); \
1109 sd1 = vis_faligndata(s1, s2); \
1110 sd2 = vis_faligndata(s2, s3); \
1111 sd3 = vis_faligndata(s3, s4); \
1112 sp += 4; \
1113 dd2old = dd2; \
1114 EXTRACT_S16_43L
1115
1116 /***************************************************************/
1117 /*
1118 * Both source and destination image data are 1-d vectors and
1119 * 8-byte aligned. And dsize is multiple of 4.
1120 */
1121
1122 void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src,
1123 mlib_s16 *dst,
1124 mlib_s32 dsize)
1125 {
1126 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
1127 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
1128 mlib_d64 dd0, dd1, dd2; /* dst data */
1129 mlib_s32 i;
1130
1131 sp = (mlib_d64 *) src;
1132 dp = (mlib_d64 *) dst;
1133
1134 /* set GSR.offset for vis_faligndata() */
1135 /* vis_alignaddr((void *)0, 2); *//* only for _old */
1136
1137 #pragma pipeloop(0)
1138 for (i = 0; i < dsize / 4; i++) {
1139 LOAD_EXTRACT_S16_43L_STORE;
1140 }
1141 }
1142
1143 /***************************************************************/
1144 /*
1145 * Either source or destination image data are not 1-d vectors, but
1146 * they are 8-byte aligned. The xsize is multiple of 4.
1147 * And slb and dlb are multiple of 8.
1148 */
1149
1150 void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src,
1151 mlib_s32 slb,
1152 mlib_s16 *dst,
1153 mlib_s32 dlb,
1154 mlib_s32 xsize,
1155 mlib_s32 ysize)
1156 {
1157 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
1158 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
1159 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
1160 mlib_d64 dd0, dd1, dd2; /* dst data */
1161 mlib_s32 i, j; /* indices for x, y */
1162
1163 /* set GSR.offset for vis_faligndata() */
1164 /* vis_alignaddr((void *)0, 2); *//* only for _old */
1165
1166 sp = sl = (mlib_d64 *) src;
1167 dp = dl = (mlib_d64 *) dst;
1168
1169 /* row loop */
1170 for (j = 0; j < ysize; j++) {
1171 /* 4-pixel column loop */
1172 #pragma pipeloop(0)
1173 for (i = 0; i < xsize / 4; i++) {
1174 LOAD_EXTRACT_S16_43L_STORE;
1175 }
1176
1177 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
1178 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
1179 }
1180 }
1181
1182 /***************************************************************/
1183 /*
1184 * Either source or destination data are not 8-byte aligned.
1185 * And size is in pixels.
1186 */
1187
1188 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
1189 mlib_s16 *dst,
1190 mlib_s32 dsize)
1191 {
1192 mlib_s16 *sa, *da; /* pointer for pixel */
1193 mlib_s16 *dend, *dend2; /* end points in dst */
1194 mlib_d64 *dp; /* 8-byte aligned start points in dst */
1195 mlib_d64 *sp; /* 8-byte aligned start point in src */
1196 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
1197 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
1198 mlib_d64 dd0, dd1, dd2; /* dst data */
1199 mlib_d64 dd2old; /* the last datum of the last step */
1200 mlib_s32 soff; /* offset of address in src */
1201 mlib_s32 doff; /* offset of address in dst */
1202 mlib_s32 emask; /* edge mask */
1203 mlib_s32 i, n;
1297 emask = vis_edge16(dp, dend);
1298 vis_pst_16(dd1, dp++, emask);
1299 if ((mlib_addr) dp <= (mlib_addr) dend) {
1300 emask = vis_edge16(dp, dend);
1301 vis_pst_16(dd2, dp++, emask);
1302 }
1303 }
1304 }
1305 else {
1306 vis_alignaddr((void *)doff, 0);
1307 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
1308 if ((mlib_addr) dp <= (mlib_addr) dend) {
1309 emask = vis_edge16(dp, dend);
1310 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
1311 if ((mlib_addr) dp <= (mlib_addr) dend) {
1312 emask = vis_edge16(dp, dend);
1313 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
1314 }
1315 }
1316 }
1317 }
1318 }
1319
1320 /***************************************************************/
1321 void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src,
1322 mlib_s32 slb,
1323 mlib_s16 *dst,
1324 mlib_s32 dlb,
1325 mlib_s32 xsize,
1326 mlib_s32 ysize)
1327 {
1328 mlib_s16 *sa, *da;
1329 mlib_s16 *sl, *dl;
1330 mlib_s32 j;
1331
1332 sa = sl = (void *)src;
1333 da = dl = dst;
1334
1335 for (j = 0; j < ysize; j++) {
1336 mlib_v_ImageChannelExtract_S16_43L_D1(sa, da, xsize);
1337 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
1338 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
1339 }
1340 }
1341
1342 /***************************************************************/
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28 /*
29 * FILENAME: mlib_v_ImageChannelExtract_43.c
30 *
31 * FUNCTIONS
32 * mlib_v_ImageChannelExtract_U8_43L_D1
33 * mlib_v_ImageChannelExtract_S16_43L_D1
34 *
35 * SYNOPSIS
36 *
37 * ARGUMENT
38 * src pointer to source image data
39 * dst pointer to destination image data
40 * slb source image line stride in bytes
41 * dlb destination image line stride in bytes
42 * dsize image data size in pixels
43 * xsize image width in pixels
44 * ysize image height in lines
45 * cmask channel mask
46 *
47 * DESCRIPTION
48 * extract the right or left 3 channels of a 4-channel image to
49 * a 3-channel image -- VIS version low level functions.
50 *
51 * ABGR => BGR (43R), or RGBA => RGB (43L)
52 *
53 * NOTE
54 * These functions are separated from mlib_v_ImageChannelExtract.c
55 * for loop unrolling and structure clarity.
56 */
57
58 #include "vis_proto.h"
59 #include "mlib_image.h"
60 #include "mlib_v_ImageChannelExtract.h"
61
62 /***************************************************************/
63 #define EXTRACT_U8_43L /* shift left */ \
64 \
65 vis_alignaddr((void *)0, 3); \
66 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \
67 sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0)); \
68 dd0 = vis_faligndata(dd0, sda); /* ----r0g0b0r1g1b1 */ \
69 \
70 vis_alignaddr((void *)0, 2); \
71 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \
72 \
73 vis_alignaddr((void *)0, 3); \
74 dd1 = vis_faligndata(dd1, sd1); /* ----------r2g2b2 */ \
75 sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1)); \
76 dd1 = vis_faligndata(dd1, sda); /* ----r2g2b2r3g3b3 */ \
77 dd1 = vis_faligndata(dd1, sd2); /* g2b2r3g3b3r4g4b4 */ \
78 \
79 sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2)); \
80 vis_alignaddr((void *)0, 1); \
81 dd1 = vis_faligndata(dd1, sda); /* b2r3g3b3r4g4b4r5 */ \
82 \
83 vis_alignaddr((void *)0, 3); \
84 dd2 = vis_faligndata(dd2, sda); /* ----------r5g5b5 */ \
85 \
86 dd2 = vis_faligndata(dd2, sd3); /* ----r5g5b5r6g6b6 */ \
87 sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3)); \
88 dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */
89
90 /***************************************************************/
91 #define LOAD_EXTRACT_U8_43L \
92 \
93 vis_alignaddr((void *)soff, 0); \
94 s0 = s4; \
95 s1 = sp[1]; \
96 s2 = sp[2]; \
97 s3 = sp[3]; \
98 s4 = sp[4]; \
99 sd0 = vis_faligndata(s0, s1); /* the intermediate is ABGR aligned */ \
100 sd1 = vis_faligndata(s1, s2); \
101 sd2 = vis_faligndata(s2, s3); \
102 sd3 = vis_faligndata(s3, s4); \
103 sp += 4; \
104 \
105 /* vis_alignaddr((void *)0, 1); */ /* for _old only */ \
106 dd2old = dd2; \
107 EXTRACT_U8_43L
108
109 /***************************************************************/
110 /*
111 * Either source or destination data are not 8-byte aligned.
112 * And ssize is multiple of 8.
113 */
114
115 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
116 mlib_u8 *dst,
117 mlib_s32 dsize)
118 {
119 mlib_u8 *sa, *da;
120 mlib_u8 *dend, *dend2; /* end points in dst */
121 mlib_d64 *dp; /* 8-byte aligned start points in dst */
122 mlib_d64 *sp; /* 8-byte aligned start point in src */
123 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
124 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
125 mlib_d64 dd0, dd1, dd2; /* dst data */
126 mlib_d64 dd2old; /* the last datum of the last step */
127 mlib_d64 sda;
128 mlib_s32 soff; /* offset of address in src */
129 mlib_s32 doff; /* offset of address in dst */
130 mlib_s32 emask; /* edge mask */
233 vis_pst_8(dd2, dp++, emask);
234 }
235 }
236 }
237 else {
238 vis_alignaddr((void *)doff, 0);
239 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
240 if ((mlib_addr) dp <= (mlib_addr) dend) {
241 emask = vis_edge8(dp, dend);
242 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
243 if ((mlib_addr) dp <= (mlib_addr) dend) {
244 emask = vis_edge8(dp, dend);
245 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
246 }
247 }
248 }
249 }
250 }
251
252 /***************************************************************/
253 #define EXTRACT_S16_43L /* shift left */ \
254 vis_alignaddr((void *)0, 6); \
255 dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \
256 vis_alignaddr((void *)0, 2); \
257 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1 */ \
258 \
259 vis_alignaddr((void *)0, 6); \
260 dd1 = vis_faligndata(dd1, sd1); /* --r1g1b1 */ \
261 vis_alignaddr((void *)0, 4); \
262 dd1 = vis_faligndata(dd1, sd2); /* g1b1r2g2 */ \
263 \
264 vis_alignaddr((void *)0, 6); \
265 dd2 = vis_faligndata(dd2, sd2); /* --r2g2b2 */ \
266 dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */
267
268 /***************************************************************/
269 #define LOAD_EXTRACT_S16_43L \
270 \
271 vis_alignaddr((void *)soff, 0); \
272 s0 = s4; \
273 s1 = sp[1]; \
274 s2 = sp[2]; \
275 s3 = sp[3]; \
276 s4 = sp[4]; \
277 sd0 = vis_faligndata(s0, s1); \
278 sd1 = vis_faligndata(s1, s2); \
279 sd2 = vis_faligndata(s2, s3); \
280 sd3 = vis_faligndata(s3, s4); \
281 sp += 4; \
282 dd2old = dd2; \
283 EXTRACT_S16_43L
284
285 /***************************************************************/
286 /*
287 * Either source or destination data are not 8-byte aligned.
288 * And size is in pixels.
289 */
290
291 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
292 mlib_s16 *dst,
293 mlib_s32 dsize)
294 {
295 mlib_s16 *sa, *da; /* pointer for pixel */
296 mlib_s16 *dend, *dend2; /* end points in dst */
297 mlib_d64 *dp; /* 8-byte aligned start points in dst */
298 mlib_d64 *sp; /* 8-byte aligned start point in src */
299 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
300 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
301 mlib_d64 dd0, dd1, dd2; /* dst data */
302 mlib_d64 dd2old; /* the last datum of the last step */
303 mlib_s32 soff; /* offset of address in src */
304 mlib_s32 doff; /* offset of address in dst */
305 mlib_s32 emask; /* edge mask */
306 mlib_s32 i, n;
400 emask = vis_edge16(dp, dend);
401 vis_pst_16(dd1, dp++, emask);
402 if ((mlib_addr) dp <= (mlib_addr) dend) {
403 emask = vis_edge16(dp, dend);
404 vis_pst_16(dd2, dp++, emask);
405 }
406 }
407 }
408 else {
409 vis_alignaddr((void *)doff, 0);
410 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
411 if ((mlib_addr) dp <= (mlib_addr) dend) {
412 emask = vis_edge16(dp, dend);
413 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
414 if ((mlib_addr) dp <= (mlib_addr) dend) {
415 emask = vis_edge16(dp, dend);
416 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
417 }
418 }
419 }
420 }
421 }
422
423 /***************************************************************/
|