1 /* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * FILENAME: mlib_v_ImageChannelExtract_43.c 30 * 31 * FUNCTIONS 32 * mlib_v_ImageChannelExtract_U8_43R_A8D1X8 33 * mlib_v_ImageChannelExtract_U8_43R_A8D2X8 34 * mlib_v_ImageChannelExtract_U8_43R_D1 35 * mlib_v_ImageChannelExtract_U8_43R 36 * mlib_v_ImageChannelExtract_S16_43R_A8D1X4 37 * mlib_v_ImageChannelExtract_S16_43R_A8D2X4 38 * mlib_v_ImageChannelExtract_S16_43R_D1 39 * mlib_v_ImageChannelExtract_S16_43R 40 * mlib_v_ImageChannelExtract_U8_43L_A8D1X8 41 * mlib_v_ImageChannelExtract_U8_43L_A8D2X8 42 * mlib_v_ImageChannelExtract_U8_43L_D1 43 * mlib_v_ImageChannelExtract_U8_43L 44 * mlib_v_ImageChannelExtract_S16_43L_A8D1X4 45 * mlib_v_ImageChannelExtract_S16_43L_A8D2X4 46 * mlib_v_ImageChannelExtract_S16_43L_D1 47 * mlib_v_ImageChannelExtract_S16_43L 48 * 49 * SYNOPSIS 50 * 51 * ARGUMENT 52 * src pointer to source image data 53 * dst pointer to destination image data 54 * slb source image line stride in bytes 55 * dlb destination image line stride in bytes 56 * dsize image data size in pixels 57 * xsize image width in pixels 58 * ysize image height in lines 59 * cmask channel mask 60 * 61 * DESCRIPTION 62 * extract the right or left 3 channels of a 4-channel image to 63 * a 3-channel image -- VIS version low level functions. 64 * 65 * ABGR => BGR (43R), or RGBA => RGB (43L) 66 * 67 * NOTE 68 * These functions are separated from mlib_v_ImageChannelExtract.c 69 * for loop unrolling and structure clarity. 70 */ 71 72 #include "vis_proto.h" 73 #include "mlib_image.h" 74 #include "mlib_v_ImageChannelExtract.h" 75 76 /***************************************************************/ 77 #define EXTRACT_U8_43R_old /* shift right */ \ 78 dd2 = vis_faligndata(sd3, dd2); /* r7-------------- */ \ 79 sd3 = vis_faligndata(sd3, sd3); \ 80 dd2 = vis_faligndata(sd3, dd2); /* g7r7------------ */ \ 81 sd3 = vis_faligndata(sd3, sd3); \ 82 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \ 83 sd3 = vis_faligndata(sd3, sd3); \ 84 sd3 = vis_faligndata(sd3, sd3); \ 85 dd2 = vis_faligndata(sd3, dd2); /* r6b7g7r7-------- */ \ 86 sd3 = vis_faligndata(sd3, sd3); \ 87 dd2 = vis_faligndata(sd3, dd2); /* g6r6b7g7r7------ */ \ 88 sd3 = vis_faligndata(sd3, sd3); \ 89 dd2 = vis_faligndata(sd3, dd2); /* b6g6r6b7g7r7---- */ \ 90 \ 91 dd2 = vis_faligndata(sd2, dd2); /* r5b6g6r6b7g7r7-- */ \ 92 sd2 = vis_faligndata(sd2, sd2); \ 93 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \ 94 \ 95 sd2 = vis_faligndata(sd2, sd2); \ 96 dd1 = vis_faligndata(sd2, dd1); /* b5-------------- */ \ 97 sd2 = vis_faligndata(sd2, sd2); \ 98 sd2 = vis_faligndata(sd2, sd2); \ 99 dd1 = vis_faligndata(sd2, dd1); /* r4b5------------ */ \ 100 sd2 = vis_faligndata(sd2, sd2); \ 101 dd1 = vis_faligndata(sd2, dd1); /* g4r4b5---------- */ \ 102 sd2 = vis_faligndata(sd2, sd2); \ 103 dd1 = vis_faligndata(sd2, dd1); /* b4g4r4b5-------- */ \ 104 \ 105 dd1 = vis_faligndata(sd1, dd1); /* r3b4g4r4b5------ */ \ 106 sd1 = vis_faligndata(sd1, sd1); \ 107 dd1 = vis_faligndata(sd1, dd1); /* g3r3b4g4r4b5---- */ \ 108 sd1 = vis_faligndata(sd1, sd1); \ 109 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5-- */ \ 110 sd1 = vis_faligndata(sd1, sd1); \ 111 sd1 = vis_faligndata(sd1, sd1); \ 112 dd1 = vis_faligndata(sd1, dd1); /* r2b3g3r3b4g4r4b5 */ \ 113 \ 114 sd1 = vis_faligndata(sd1, sd1); \ 115 dd0 = vis_faligndata(sd1, dd0); /* g2-------------- */ \ 116 sd1 = vis_faligndata(sd1, sd1); \ 117 dd0 = vis_faligndata(sd1, dd0); /* b2g2------------ */ \ 118 \ 119 dd0 = vis_faligndata(sd0, dd0); /* r1b2g2---------- */ \ 120 sd0 = vis_faligndata(sd0, sd0); \ 121 dd0 = vis_faligndata(sd0, dd0); /* g1r1b2g2-------- */ \ 122 sd0 = vis_faligndata(sd0, sd0); \ 123 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2------ */ \ 124 sd0 = vis_faligndata(sd0, sd0); \ 125 sd0 = vis_faligndata(sd0, sd0); \ 126 dd0 = vis_faligndata(sd0, dd0); /* r0b1g1r1b2g2---- */ \ 127 sd0 = vis_faligndata(sd0, sd0); \ 128 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1g1r1b2g2-- */ \ 129 sd0 = vis_faligndata(sd0, sd0); \ 130 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1g1r1b2g2 */ 131 132 /***************************************************************/ 133 #define EXTRACT_U8_43R /* shift right */ \ 134 vis_alignaddr((void *)0, 5); \ 135 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \ 136 sda = vis_freg_pair(vis_read_hi(sd3), vis_read_hi(sd3)); \ 137 dd2 = vis_faligndata(sda, dd2); /* b6g6r6b7g7r7---- */ \ 138 \ 139 vis_alignaddr((void *)0, 6); \ 140 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \ 141 \ 142 vis_alignaddr((void *)0, 5); \ 143 dd1 = vis_faligndata(sd2, dd1); /* b5g5r5---------- */ \ 144 sda = vis_freg_pair(vis_read_hi(sd2), vis_read_hi(sd2)); \ 145 dd1 = vis_faligndata(sda, dd1); /* b4g4r4b5g5r5---- */ \ 146 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5g5 */ \ 147 sda = vis_freg_pair(vis_read_hi(sd1), vis_read_hi(sd1)); \ 148 vis_alignaddr((void *)0, 7); \ 149 dd1 = vis_faligndata(sda, dd1); /* r2b3g3r3b4g4r4b5 */ \ 150 \ 151 vis_alignaddr((void *)0, 5); \ 152 dd0 = vis_faligndata(sda, dd0); /* b2g2r2---------- */ \ 153 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2r2---- */ \ 154 sda = vis_freg_pair(vis_read_hi(sd0), vis_read_hi(sd0)); \ 155 dd0 = vis_faligndata(sda, dd0); /* b0g0r0b1g1r1b2g2 */ 156 157 /***************************************************************/ 158 #define LOAD_EXTRACT_U8_43R_STORE \ 159 sd0 = *sp++; /* --b0g0r0--b1g1r1 */ \ 160 sd1 = *sp++; /* --b2g2r2--b3g3r3 */ \ 161 sd2 = *sp++; /* --b4g4r4--b5g5r5 */ \ 162 sd3 = *sp++; /* --b6g6r6--b7g7r7 */ \ 163 EXTRACT_U8_43R; \ 164 *dp++ = dd0; /* b0g0r0b1g1r1b2g2 */ \ 165 *dp++ = dd1; /* r2b3g3r3b4g4r4b5 */ \ 166 *dp++ = dd2; /* g5r5b6g6r6b7g7r7 */ 167 168 /***************************************************************/ 169 #define LOAD_EXTRACT_U8_43R \ 170 vis_alignaddr((void *)soff, 0); \ 171 s0 = s4; \ 172 s1 = sp[1]; \ 173 s2 = sp[2]; \ 174 s3 = sp[3]; \ 175 s4 = sp[4]; \ 176 sd0 = vis_faligndata(s0, s1); \ 177 sd1 = vis_faligndata(s1, s2); \ 178 sd2 = vis_faligndata(s2, s3); \ 179 sd3 = vis_faligndata(s3, s4); \ 180 sp += 4; \ 181 dd2old = dd2; \ 182 EXTRACT_U8_43R 183 184 /***************************************************************/ 185 /* 186 * Both source and destination image data are 1-d vectors and 187 * 8-byte aligned. And dsize is multiple of 8. 188 */ 189 190 void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src, 191 mlib_u8 *dst, 192 mlib_s32 dsize) 193 { 194 mlib_d64 *sp, *dp; 195 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 196 mlib_d64 dd0, dd1, dd2; /* dst data */ 197 mlib_d64 sda; 198 mlib_s32 i; 199 200 sp = (mlib_d64 *) src; 201 dp = (mlib_d64 *) dst; 202 203 /* set GSR.offset for vis_faligndata() */ 204 /* vis_alignaddr((void *)0, 7); *//* only for _old */ 205 206 #pragma pipeloop(0) 207 for (i = 0; i < dsize / 8; i++) { 208 LOAD_EXTRACT_U8_43R_STORE; 209 } 210 } 211 212 /***************************************************************/ 213 /* 214 * Either source or destination image data are not 1-d vectors, but 215 * they are 8-byte aligned. And slb and dlb are multiple of 8. 216 * The xsize is multiple of 8. 217 */ 218 219 void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src, 220 mlib_s32 slb, 221 mlib_u8 *dst, 222 mlib_s32 dlb, 223 mlib_s32 xsize, 224 mlib_s32 ysize) 225 { 226 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 227 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ 228 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 229 mlib_d64 dd0, dd1, dd2; /* dst data */ 230 mlib_d64 sda; 231 mlib_s32 i, j; /* indices for x, y */ 232 233 /* set GSR.offset for vis_faligndata() */ 234 /* vis_alignaddr((void *)0, 7); *//* only for _old */ 235 236 sp = sl = (mlib_d64 *) src; 237 dp = dl = (mlib_d64 *) dst; 238 239 /* row loop */ 240 for (j = 0; j < ysize; j++) { 241 /* 8-byte column loop */ 242 #pragma pipeloop(0) 243 for (i = 0; i < xsize / 8; i++) { 244 LOAD_EXTRACT_U8_43R_STORE; 245 } 246 247 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); 248 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); 249 } 250 } 251 252 /***************************************************************/ 253 /* 254 * Either source or destination data are not 8-byte aligned. 255 * And dsize is in pixels. 256 */ 257 258 void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src, 259 mlib_u8 *dst, 260 mlib_s32 dsize) 261 { 262 mlib_u8 *sa, *da; 263 mlib_u8 *dend, *dend2; /* end points in dst */ 264 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 265 mlib_d64 *sp; /* 8-byte aligned start point in src */ 266 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 267 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 268 mlib_d64 dd0, dd1, dd2; /* dst data */ 269 mlib_d64 dd2old; /* the last datum of the last step */ 270 mlib_d64 sda; 271 mlib_s32 soff; /* offset of address in src */ 272 mlib_s32 doff; /* offset of address in dst */ 273 mlib_s32 emask; /* edge mask */ 274 mlib_s32 i, n; 275 276 sa = (void *)src; 277 da = dst; 278 279 /* prepare the source address */ 280 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 281 soff = ((mlib_addr) sa & 7); 282 283 /* prepare the destination addresses */ 284 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 285 dend = da + dsize * 3 - 1; 286 dend2 = dend - 23; 287 doff = 8 - ((mlib_addr) da & 7); 288 289 /* generate edge mask for the start point */ 290 emask = vis_edge8(da, dend); 291 292 /* load 32 byte, convert, store 24 bytes */ 293 s4 = sp[0]; /* initial value */ 294 LOAD_EXTRACT_U8_43R; 295 296 if (dsize >= 8) { 297 if (doff == 8) { 298 vis_pst_8(dd0, dp++, emask); 299 *dp++ = dd1; 300 *dp++ = dd2; 301 } 302 else { 303 vis_alignaddr((void *)doff, 0); 304 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 305 *dp++ = vis_faligndata(dd0, dd1); 306 *dp++ = vis_faligndata(dd1, dd2); 307 } 308 } 309 else { /* for very small size */ 310 if (doff == 8) { 311 vis_pst_8(dd0, dp++, emask); 312 if ((mlib_addr) dp <= (mlib_addr) dend) { 313 emask = vis_edge8(dp, dend); 314 vis_pst_8(dd1, dp++, emask); 315 if ((mlib_addr) dp <= (mlib_addr) dend) { 316 emask = vis_edge8(dp, dend); 317 vis_pst_8(dd2, dp++, emask); 318 } 319 } 320 } 321 else { 322 vis_alignaddr((void *)doff, 0); 323 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 324 if ((mlib_addr) dp <= (mlib_addr) dend) { 325 emask = vis_edge8(dp, dend); 326 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 327 if ((mlib_addr) dp <= (mlib_addr) dend) { 328 emask = vis_edge8(dp, dend); 329 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 330 if ((mlib_addr) dp <= (mlib_addr) dend) { 331 emask = vis_edge8(dp, dend); 332 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask); 333 } 334 } 335 } 336 } 337 } 338 339 /* no edge handling is needed in the loop */ 340 if (doff == 8) { 341 if ((mlib_addr) dp <= (mlib_addr) dend2) { 342 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 343 #pragma pipeloop(0) 344 for (i = 0; i < n; i++) { 345 LOAD_EXTRACT_U8_43R; 346 *dp++ = dd0; 347 *dp++ = dd1; 348 *dp++ = dd2; 349 } 350 } 351 } 352 else { 353 if ((mlib_addr) dp <= (mlib_addr) dend2) { 354 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 355 #pragma pipeloop(0) 356 for (i = 0; i < n; i++) { 357 LOAD_EXTRACT_U8_43R; 358 vis_alignaddr((void *)doff, 0); 359 *dp++ = vis_faligndata(dd2old, dd0); 360 *dp++ = vis_faligndata(dd0, dd1); 361 *dp++ = vis_faligndata(dd1, dd2); 362 } 363 } 364 } 365 366 if ((mlib_addr) dp <= (mlib_addr) dend) { 367 LOAD_EXTRACT_U8_43R; 368 emask = vis_edge8(dp, dend); 369 if (doff == 8) { 370 vis_pst_8(dd0, dp++, emask); 371 if ((mlib_addr) dp <= (mlib_addr) dend) { 372 emask = vis_edge8(dp, dend); 373 vis_pst_8(dd1, dp++, emask); 374 if ((mlib_addr) dp <= (mlib_addr) dend) { 375 emask = vis_edge8(dp, dend); 376 vis_pst_8(dd2, dp++, emask); 377 } 378 } 379 } 380 else { 381 vis_alignaddr((void *)doff, 0); 382 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask); 383 if ((mlib_addr) dp <= (mlib_addr) dend) { 384 emask = vis_edge8(dp, dend); 385 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 386 if ((mlib_addr) dp <= (mlib_addr) dend) { 387 emask = vis_edge8(dp, dend); 388 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 389 } 390 } 391 } 392 } 393 } 394 395 /***************************************************************/ 396 void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src, 397 mlib_s32 slb, 398 mlib_u8 *dst, 399 mlib_s32 dlb, 400 mlib_s32 xsize, 401 mlib_s32 ysize) 402 { 403 mlib_u8 *sa, *da; 404 mlib_u8 *sl, *dl; 405 mlib_s32 j; 406 407 sa = sl = (void *)src; 408 da = dl = dst; 409 410 for (j = 0; j < ysize; j++) { 411 mlib_v_ImageChannelExtract_U8_43R_D1(sa, da, xsize); 412 sa = sl += slb; 413 da = dl += dlb; 414 } 415 } 416 417 /***************************************************************/ 418 #define EXTRACT_S16_43R_old /* shift right */ \ 419 \ 420 dd2 = vis_faligndata(sd3, dd2); /* r3------ */ \ 421 sd3 = vis_faligndata(sd3, sd3); \ 422 dd2 = vis_faligndata(sd3, dd2); /* g3r3---- */ \ 423 sd3 = vis_faligndata(sd3, sd3); \ 424 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \ 425 \ 426 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \ 427 sd2 = vis_faligndata(sd2, sd2); \ 428 dd1 = vis_faligndata(sd2, dd1); /* g2------ */ \ 429 sd2 = vis_faligndata(sd2, sd2); \ 430 dd1 = vis_faligndata(sd2, dd1); /* b2g2---- */ \ 431 \ 432 dd1 = vis_faligndata(sd1, dd1); /* r1b2g2-- */ \ 433 sd1 = vis_faligndata(sd1, sd1); \ 434 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \ 435 sd1 = vis_faligndata(sd1, sd1); \ 436 dd0 = vis_faligndata(sd1, dd0); /* b1------ */ \ 437 \ 438 dd0 = vis_faligndata(sd0, dd0); /* r0b1---- */ \ 439 sd0 = vis_faligndata(sd0, sd0); \ 440 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1-- */ \ 441 sd0 = vis_faligndata(sd0, sd0); \ 442 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */ 443 444 /***************************************************************/ 445 #define EXTRACT_S16_43R /* shift right */ \ 446 \ 447 vis_alignaddr((void *)0, 2); \ 448 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \ 449 \ 450 vis_alignaddr((void *)0, 6); \ 451 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \ 452 vis_alignaddr((void *)0, 2); \ 453 dd1 = vis_faligndata(sd2, dd1); /* b2g2r2-- */ \ 454 \ 455 vis_alignaddr((void *)0, 4); \ 456 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \ 457 vis_alignaddr((void *)0, 2); \ 458 dd0 = vis_faligndata(sd1, dd0); /* b1g1r1-- */ \ 459 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */ 460 461 /***************************************************************/ 462 #define LOAD_EXTRACT_S16_43R_STORE \ 463 \ 464 sd0 = *sp++; /* --b0g0r0 */ \ 465 sd1 = *sp++; /* --b1g1r1 */ \ 466 sd2 = *sp++; /* --b2g2r2 */ \ 467 sd3 = *sp++; /* --b3g3r3 */ \ 468 \ 469 EXTRACT_S16_43R; \ 470 \ 471 *dp++ = dd0; /* b0g0r0b1 */ \ 472 *dp++ = dd1; /* g1r1b2g2 */ \ 473 *dp++ = dd2; /* r2b3g3r3 */ 474 475 /***************************************************************/ 476 #define LOAD_EXTRACT_S16_43R \ 477 \ 478 vis_alignaddr((void *)soff, 0); \ 479 s0 = s4; \ 480 s1 = sp[1]; \ 481 s2 = sp[2]; \ 482 s3 = sp[3]; \ 483 s4 = sp[4]; \ 484 sd0 = vis_faligndata(s0, s1); \ 485 sd1 = vis_faligndata(s1, s2); \ 486 sd2 = vis_faligndata(s2, s3); \ 487 sd3 = vis_faligndata(s3, s4); \ 488 sp += 4; \ 489 dd2old = dd2; \ 490 EXTRACT_S16_43R 491 492 /***************************************************************/ 493 /* 494 * Both source and destination image data are 1-d vectors and 495 * 8-byte aligned. And size is in 4-pixels. 496 */ 497 498 void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src, 499 mlib_s16 *dst, 500 mlib_s32 dsize) 501 { 502 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 503 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 504 mlib_d64 dd0, dd1, dd2; /* dst data */ 505 mlib_s32 i; 506 507 sp = (mlib_d64 *) src; 508 dp = (mlib_d64 *) dst; 509 510 /* set GSR.offset for vis_faligndata() */ 511 /* vis_alignaddr((void *)0, 6); *//* only for _old */ 512 513 #pragma pipeloop(0) 514 for (i = 0; i < dsize / 4; i++) { 515 LOAD_EXTRACT_S16_43R_STORE; 516 } 517 } 518 519 /***************************************************************/ 520 /* 521 * Either source or destination image data are not 1-d vectors, but 522 * they are 8-byte aligned. The xsize is multiple of 8. 523 * slb and dlb are multiple of 8. 524 */ 525 526 void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src, 527 mlib_s32 slb, 528 mlib_s16 *dst, 529 mlib_s32 dlb, 530 mlib_s32 xsize, 531 mlib_s32 ysize) 532 { 533 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 534 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ 535 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 536 mlib_d64 dd0, dd1, dd2; /* dst data */ 537 mlib_s32 i, j; /* indices for x, y */ 538 539 /* set GSR.offset for vis_faligndata() */ 540 /* vis_alignaddr((void *)0, 6); *//* only for _old */ 541 542 sp = sl = (mlib_d64 *) src; 543 dp = dl = (mlib_d64 *) dst; 544 545 /* row loop */ 546 for (j = 0; j < ysize; j++) { 547 /* 4-pixel column loop */ 548 #pragma pipeloop(0) 549 for (i = 0; i < xsize / 4; i++) { 550 LOAD_EXTRACT_S16_43R_STORE; 551 } 552 553 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); 554 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); 555 } 556 } 557 558 /***************************************************************/ 559 /* 560 * Either source or destination data are not 8-byte aligned. 561 * And dsize is multiple of 8. 562 */ 563 564 void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src, 565 mlib_s16 *dst, 566 mlib_s32 dsize) 567 { 568 mlib_s16 *sa, *da; /* pointer for pixel */ 569 mlib_s16 *dend, *dend2; /* end points in dst */ 570 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 571 mlib_d64 *sp; /* 8-byte aligned start point in src */ 572 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 573 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 574 mlib_d64 dd0, dd1, dd2; /* dst data */ 575 mlib_d64 dd2old; /* the last datum of the last step */ 576 mlib_s32 soff; /* offset of address in src */ 577 mlib_s32 doff; /* offset of address in dst */ 578 mlib_s32 emask; /* edge mask */ 579 mlib_s32 i, n; 580 581 sa = (void *)src; 582 da = dst; 583 584 /* prepare the source address */ 585 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 586 soff = ((mlib_addr) sa & 7); 587 588 /* prepare the destination addresses */ 589 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 590 dend = da + dsize * 3 - 1; 591 dend2 = dend - 11; 592 doff = 8 - ((mlib_addr) da & 7); 593 594 /* generate edge mask for the start point */ 595 emask = vis_edge16(da, dend); 596 597 /* load 32 byte, convert, store 24 bytes */ 598 s4 = sp[0]; /* initial value */ 599 LOAD_EXTRACT_S16_43R; 600 601 if (dsize >= 4) { 602 if (doff == 8) { 603 vis_pst_16(dd0, dp++, emask); 604 *dp++ = dd1; 605 *dp++ = dd2; 606 } 607 else { 608 vis_alignaddr((void *)doff, 0); 609 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 610 *dp++ = vis_faligndata(dd0, dd1); 611 *dp++ = vis_faligndata(dd1, dd2); 612 } 613 } 614 else { /* for very small size */ 615 if (doff == 8) { 616 vis_pst_16(dd0, dp++, emask); 617 if ((mlib_addr) dp <= (mlib_addr) dend) { 618 emask = vis_edge16(dp, dend); 619 vis_pst_16(dd1, dp++, emask); 620 if ((mlib_addr) dp <= (mlib_addr) dend) { 621 emask = vis_edge16(dp, dend); 622 vis_pst_16(dd2, dp++, emask); 623 } 624 } 625 } 626 else { 627 vis_alignaddr((void *)doff, 0); 628 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 629 if ((mlib_addr) dp <= (mlib_addr) dend) { 630 emask = vis_edge16(dp, dend); 631 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 632 if ((mlib_addr) dp <= (mlib_addr) dend) { 633 emask = vis_edge16(dp, dend); 634 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 635 } 636 } 637 } 638 } 639 640 /* no edge handling is needed in the loop */ 641 if (doff == 8) { 642 if ((mlib_addr) dp <= (mlib_addr) dend2) { 643 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 644 #pragma pipeloop(0) 645 for (i = 0; i < n; i++) { 646 LOAD_EXTRACT_S16_43R; 647 *dp++ = dd0; 648 *dp++ = dd1; 649 *dp++ = dd2; 650 } 651 } 652 } 653 else { 654 if ((mlib_addr) dp <= (mlib_addr) dend2) { 655 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 656 #pragma pipeloop(0) 657 for (i = 0; i < n; i++) { 658 LOAD_EXTRACT_S16_43R; 659 vis_alignaddr((void *)doff, 0); 660 *dp++ = vis_faligndata(dd2old, dd0); 661 *dp++ = vis_faligndata(dd0, dd1); 662 *dp++ = vis_faligndata(dd1, dd2); 663 } 664 } 665 } 666 667 if ((mlib_addr) dp <= (mlib_addr) dend) { 668 LOAD_EXTRACT_S16_43R; 669 emask = vis_edge16(dp, dend); 670 if (doff == 8) { 671 vis_pst_16(dd0, dp++, emask); 672 if ((mlib_addr) dp <= (mlib_addr) dend) { 673 emask = vis_edge16(dp, dend); 674 vis_pst_16(dd1, dp++, emask); 675 if ((mlib_addr) dp <= (mlib_addr) dend) { 676 emask = vis_edge16(dp, dend); 677 vis_pst_16(dd2, dp++, emask); 678 } 679 } 680 } 681 else { 682 vis_alignaddr((void *)doff, 0); 683 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask); 684 if ((mlib_addr) dp <= (mlib_addr) dend) { 685 emask = vis_edge16(dp, dend); 686 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 687 if ((mlib_addr) dp <= (mlib_addr) dend) { 688 emask = vis_edge16(dp, dend); 689 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 690 } 691 } 692 } 693 } 694 } 695 696 /***************************************************************/ 697 void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src, 698 mlib_s32 slb, 699 mlib_s16 *dst, 700 mlib_s32 dlb, 701 mlib_s32 xsize, 702 mlib_s32 ysize) 703 { 704 mlib_s16 *sa, *da; 705 mlib_s16 *sl, *dl; 706 mlib_s32 j; 707 708 sa = sl = (void *)src; 709 da = dl = dst; 710 711 for (j = 0; j < ysize; j++) { 712 mlib_v_ImageChannelExtract_S16_43R_D1(sa, da, xsize); 713 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); 714 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 715 } 716 } 717 718 /***************************************************************/ 719 #define EXTRACT_U8_43L_old /* shift left */ \ 720 \ 721 dd0 = vis_faligndata(dd0, sd0); /* --------------r0 */ \ 722 sd0 = vis_faligndata(sd0, sd0); \ 723 dd0 = vis_faligndata(dd0, sd0); /* ------------r0g0 */ \ 724 sd0 = vis_faligndata(sd0, sd0); \ 725 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \ 726 sd0 = vis_faligndata(sd0, sd0); \ 727 sd0 = vis_faligndata(sd0, sd0); \ 728 dd0 = vis_faligndata(dd0, sd0); /* --------r0g0b0r1 */ \ 729 sd0 = vis_faligndata(sd0, sd0); \ 730 dd0 = vis_faligndata(dd0, sd0); /* ------r0g0b0r1g1 */ \ 731 sd0 = vis_faligndata(sd0, sd0); \ 732 dd0 = vis_faligndata(dd0, sd0); /* ----r0g0b0r1g1b1 */ \ 733 \ 734 dd0 = vis_faligndata(dd0, sd1); /* --r0g0b0r1g1b1r2 */ \ 735 sd1 = vis_faligndata(sd1, sd1); \ 736 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \ 737 \ 738 sd1 = vis_faligndata(sd1, sd1); \ 739 dd1 = vis_faligndata(dd1, sd1); /* --------------b2 */ \ 740 sd1 = vis_faligndata(sd1, sd1); \ 741 sd1 = vis_faligndata(sd1, sd1); \ 742 dd1 = vis_faligndata(dd1, sd1); /* ------------b2r3 */ \ 743 sd1 = vis_faligndata(sd1, sd1); \ 744 dd1 = vis_faligndata(dd1, sd1); /* ----------b2r3g3 */ \ 745 sd1 = vis_faligndata(sd1, sd1); \ 746 dd1 = vis_faligndata(dd1, sd1); /* --------b2r3g3b3 */ \ 747 \ 748 dd1 = vis_faligndata(dd1, sd2); /* ------b2r3g3b3r4 */ \ 749 sd2 = vis_faligndata(sd2, sd2); \ 750 dd1 = vis_faligndata(dd1, sd2); /* ----b2r3g3b3r4g4 */ \ 751 sd2 = vis_faligndata(sd2, sd2); \ 752 dd1 = vis_faligndata(dd1, sd2); /* --b2r3g3b3r4g4b4 */ \ 753 sd2 = vis_faligndata(sd2, sd2); \ 754 sd2 = vis_faligndata(sd2, sd2); \ 755 dd1 = vis_faligndata(dd1, sd2); /* b2r3g3b3r4g4b4r5 */ \ 756 \ 757 sd2 = vis_faligndata(sd2, sd2); \ 758 dd2 = vis_faligndata(dd2, sd2); /* --------------g5 */ \ 759 sd2 = vis_faligndata(sd2, sd2); \ 760 dd2 = vis_faligndata(dd2, sd2); /* ------------g5b5 */ \ 761 \ 762 dd2 = vis_faligndata(dd2, sd3); /* ----------g5b5r6 */ \ 763 sd3 = vis_faligndata(sd3, sd3); \ 764 dd2 = vis_faligndata(dd2, sd3); /* --------g5b5r6g6 */ \ 765 sd3 = vis_faligndata(sd3, sd3); \ 766 dd2 = vis_faligndata(dd2, sd3); /* ------g5b5r6g6b6 */ \ 767 sd3 = vis_faligndata(sd3, sd3); \ 768 sd3 = vis_faligndata(sd3, sd3); \ 769 dd2 = vis_faligndata(dd2, sd3); /* ----g5b5r6g6b6r7 */ \ 770 sd3 = vis_faligndata(sd3, sd3); \ 771 dd2 = vis_faligndata(dd2, sd3); /* --g5b5r6g6b6r7g7 */ \ 772 sd3 = vis_faligndata(sd3, sd3); \ 773 dd2 = vis_faligndata(dd2, sd3); /* g5b5r6g6b6r7g7b7 */ 774 775 /***************************************************************/ 776 #define EXTRACT_U8_43L /* shift left */ \ 777 \ 778 vis_alignaddr((void *)0, 3); \ 779 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \ 780 sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0)); \ 781 dd0 = vis_faligndata(dd0, sda); /* ----r0g0b0r1g1b1 */ \ 782 \ 783 vis_alignaddr((void *)0, 2); \ 784 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \ 785 \ 786 vis_alignaddr((void *)0, 3); \ 787 dd1 = vis_faligndata(dd1, sd1); /* ----------r2g2b2 */ \ 788 sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1)); \ 789 dd1 = vis_faligndata(dd1, sda); /* ----r2g2b2r3g3b3 */ \ 790 dd1 = vis_faligndata(dd1, sd2); /* g2b2r3g3b3r4g4b4 */ \ 791 \ 792 sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2)); \ 793 vis_alignaddr((void *)0, 1); \ 794 dd1 = vis_faligndata(dd1, sda); /* b2r3g3b3r4g4b4r5 */ \ 795 \ 796 vis_alignaddr((void *)0, 3); \ 797 dd2 = vis_faligndata(dd2, sda); /* ----------r5g5b5 */ \ 798 \ 799 dd2 = vis_faligndata(dd2, sd3); /* ----r5g5b5r6g6b6 */ \ 800 sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3)); \ 801 dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */ 802 803 /***************************************************************/ 804 #define LOAD_EXTRACT_U8_43L_STORE \ 805 \ 806 sd0 = *sp++; /* r0g0b0--r1g1b1-- */ \ 807 sd1 = *sp++; /* r2g2b2--r3g3b3-- */ \ 808 sd2 = *sp++; /* r4g4b4--r5g5b5-- */ \ 809 sd3 = *sp++; /* r6g6b6--r7g7b7-- */ \ 810 \ 811 EXTRACT_U8_43L; \ 812 \ 813 *dp++ = dd0; /* r0g0b0r1g1b1r2g2 */ \ 814 *dp++ = dd1; /* b2r3g3b3r4g4b4r5 */ \ 815 *dp++ = dd2; /* g5b5r6g6b6r7g7b7 */ 816 817 /***************************************************************/ 818 #define LOAD_EXTRACT_U8_43L \ 819 \ 820 vis_alignaddr((void *)soff, 0); \ 821 s0 = s4; \ 822 s1 = sp[1]; \ 823 s2 = sp[2]; \ 824 s3 = sp[3]; \ 825 s4 = sp[4]; \ 826 sd0 = vis_faligndata(s0, s1); /* the intermediate is ABGR aligned */ \ 827 sd1 = vis_faligndata(s1, s2); \ 828 sd2 = vis_faligndata(s2, s3); \ 829 sd3 = vis_faligndata(s3, s4); \ 830 sp += 4; \ 831 \ 832 /* vis_alignaddr((void *)0, 1); */ /* for _old only */ \ 833 dd2old = dd2; \ 834 EXTRACT_U8_43L 835 836 /***************************************************************/ 837 /* 838 * Both source and destination image data are 1-d vectors and 839 * 8-byte aligned. And dsize is multiple of 8. 840 */ 841 842 void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src, 843 mlib_u8 *dst, 844 mlib_s32 dsize) 845 { 846 mlib_d64 *sp, *dp; 847 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 848 mlib_d64 dd0, dd1, dd2; /* dst data */ 849 mlib_d64 sda; 850 mlib_s32 i; 851 852 sp = (mlib_d64 *) src; 853 dp = (mlib_d64 *) dst; 854 855 /* set GSR.offset for vis_faligndata() */ 856 /* vis_alignaddr((void *)0, 1); *//* for _old only */ 857 858 #pragma pipeloop(0) 859 for (i = 0; i < dsize / 8; i++) { 860 LOAD_EXTRACT_U8_43L_STORE; 861 } 862 } 863 864 /***************************************************************/ 865 /* 866 * Either source or destination image data are not 1-d vectors, but 867 * they are 8-byte aligned. And slb and dlb are multiple of 8. 868 * The xsize is multiple of 8. 869 */ 870 871 void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src, 872 mlib_s32 slb, 873 mlib_u8 *dst, 874 mlib_s32 dlb, 875 mlib_s32 xsize, 876 mlib_s32 ysize) 877 { 878 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 879 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ 880 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 881 mlib_d64 dd0, dd1, dd2; /* dst data */ 882 mlib_d64 sda; 883 mlib_s32 i, j; /* indices for x, y */ 884 885 /* set GSR.offset for vis_faligndata() */ 886 /* vis_alignaddr((void *)0, 1); *//* for _old only */ 887 888 sp = sl = (mlib_d64 *) src; 889 dp = dl = (mlib_d64 *) dst; 890 891 /* row loop */ 892 for (j = 0; j < ysize; j++) { 893 /* 8-byte column loop */ 894 #pragma pipeloop(0) 895 for (i = 0; i < xsize / 8; i++) { 896 LOAD_EXTRACT_U8_43L_STORE; 897 } 898 899 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); 900 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); 901 } 902 } 903 904 /***************************************************************/ 905 /* 906 * Either source or destination data are not 8-byte aligned. 907 * And ssize is multiple of 8. 908 */ 909 910 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src, 911 mlib_u8 *dst, 912 mlib_s32 dsize) 913 { 914 mlib_u8 *sa, *da; 915 mlib_u8 *dend, *dend2; /* end points in dst */ 916 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 917 mlib_d64 *sp; /* 8-byte aligned start point in src */ 918 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 919 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 920 mlib_d64 dd0, dd1, dd2; /* dst data */ 921 mlib_d64 dd2old; /* the last datum of the last step */ 922 mlib_d64 sda; 923 mlib_s32 soff; /* offset of address in src */ 924 mlib_s32 doff; /* offset of address in dst */ 925 mlib_s32 emask; /* edge mask */ 926 mlib_s32 i, n; 927 928 sa = (void *)src; 929 da = dst; 930 931 /* prepare the source address */ 932 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 933 soff = ((mlib_addr) sa & 7); 934 935 /* prepare the destination addresses */ 936 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 937 dend = da + dsize * 3 - 1; 938 dend2 = dend - 23; 939 doff = 8 - ((mlib_addr) da & 7); 940 941 /* generate edge mask for the start point */ 942 emask = vis_edge8(da, dend); 943 944 /* load 32 byte, convert, store 24 bytes */ 945 s4 = sp[0]; /* initial value */ 946 LOAD_EXTRACT_U8_43L; 947 948 if (dsize >= 8) { 949 if (doff == 8) { 950 vis_pst_8(dd0, dp++, emask); 951 *dp++ = dd1; 952 *dp++ = dd2; 953 } 954 else { 955 vis_alignaddr((void *)doff, 0); 956 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 957 *dp++ = vis_faligndata(dd0, dd1); 958 *dp++ = vis_faligndata(dd1, dd2); 959 } 960 } 961 else { /* for very small size */ 962 if (doff == 8) { 963 vis_pst_8(dd0, dp++, emask); 964 if ((mlib_addr) dp <= (mlib_addr) dend) { 965 emask = vis_edge8(dp, dend); 966 vis_pst_8(dd1, dp++, emask); 967 if ((mlib_addr) dp <= (mlib_addr) dend) { 968 emask = vis_edge8(dp, dend); 969 vis_pst_8(dd2, dp++, emask); 970 } 971 } 972 } 973 else { 974 vis_alignaddr((void *)doff, 0); 975 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 976 if ((mlib_addr) dp <= (mlib_addr) dend) { 977 emask = vis_edge8(dp, dend); 978 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 979 if ((mlib_addr) dp <= (mlib_addr) dend) { 980 emask = vis_edge8(dp, dend); 981 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 982 if ((mlib_addr) dp <= (mlib_addr) dend) { 983 emask = vis_edge8(dp, dend); 984 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask); 985 } 986 } 987 } 988 } 989 } 990 991 /* no edge handling is needed in the loop */ 992 if (doff == 8) { 993 if ((mlib_addr) dp <= (mlib_addr) dend2) { 994 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 995 #pragma pipeloop(0) 996 for (i = 0; i < n; i++) { 997 LOAD_EXTRACT_U8_43L; 998 *dp++ = dd0; 999 *dp++ = dd1; 1000 *dp++ = dd2; 1001 } 1002 } 1003 } 1004 else { 1005 if ((mlib_addr) dp <= (mlib_addr) dend2) { 1006 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 1007 #pragma pipeloop(0) 1008 for (i = 0; i < n; i++) { 1009 LOAD_EXTRACT_U8_43L; 1010 vis_alignaddr((void *)doff, 0); 1011 *dp++ = vis_faligndata(dd2old, dd0); 1012 *dp++ = vis_faligndata(dd0, dd1); 1013 *dp++ = vis_faligndata(dd1, dd2); 1014 } 1015 } 1016 } 1017 1018 if ((mlib_addr) dp <= (mlib_addr) dend) { 1019 LOAD_EXTRACT_U8_43L; 1020 emask = vis_edge8(dp, dend); 1021 if (doff == 8) { 1022 vis_pst_8(dd0, dp++, emask); 1023 if ((mlib_addr) dp <= (mlib_addr) dend) { 1024 emask = vis_edge8(dp, dend); 1025 vis_pst_8(dd1, dp++, emask); 1026 if ((mlib_addr) dp <= (mlib_addr) dend) { 1027 emask = vis_edge8(dp, dend); 1028 vis_pst_8(dd2, dp++, emask); 1029 } 1030 } 1031 } 1032 else { 1033 vis_alignaddr((void *)doff, 0); 1034 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask); 1035 if ((mlib_addr) dp <= (mlib_addr) dend) { 1036 emask = vis_edge8(dp, dend); 1037 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 1038 if ((mlib_addr) dp <= (mlib_addr) dend) { 1039 emask = vis_edge8(dp, dend); 1040 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 1041 } 1042 } 1043 } 1044 } 1045 } 1046 1047 /***************************************************************/ 1048 void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src, 1049 mlib_s32 slb, 1050 mlib_u8 *dst, 1051 mlib_s32 dlb, 1052 mlib_s32 xsize, 1053 mlib_s32 ysize) 1054 { 1055 mlib_u8 *sa, *da; 1056 mlib_u8 *sl, *dl; 1057 mlib_s32 j; 1058 1059 sa = sl = (void *)src; 1060 da = dl = dst; 1061 1062 for (j = 0; j < ysize; j++) { 1063 mlib_v_ImageChannelExtract_U8_43L_D1(sa, da, xsize); 1064 sa = sl += slb; 1065 da = dl += dlb; 1066 } 1067 } 1068 1069 /***************************************************************/ 1070 #define EXTRACT_S16_43L /* shift left */ \ 1071 vis_alignaddr((void *)0, 6); \ 1072 dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \ 1073 vis_alignaddr((void *)0, 2); \ 1074 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1 */ \ 1075 \ 1076 vis_alignaddr((void *)0, 6); \ 1077 dd1 = vis_faligndata(dd1, sd1); /* --r1g1b1 */ \ 1078 vis_alignaddr((void *)0, 4); \ 1079 dd1 = vis_faligndata(dd1, sd2); /* g1b1r2g2 */ \ 1080 \ 1081 vis_alignaddr((void *)0, 6); \ 1082 dd2 = vis_faligndata(dd2, sd2); /* --r2g2b2 */ \ 1083 dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */ 1084 1085 /***************************************************************/ 1086 #define LOAD_EXTRACT_S16_43L_STORE \ 1087 \ 1088 sd0 = *sp++; /* r0g0b0-- */ \ 1089 sd1 = *sp++; /* r1g1b1-- */ \ 1090 sd2 = *sp++; /* r2g2b2-- */ \ 1091 sd3 = *sp++; /* r3g3b3-- */ \ 1092 \ 1093 EXTRACT_S16_43L; \ 1094 \ 1095 *dp++ = dd0; /* r0g0b0r1 */ \ 1096 *dp++ = dd1; /* g1b1r2g2 */ \ 1097 *dp++ = dd2; /* b2r3g3b3 */ 1098 1099 /***************************************************************/ 1100 #define LOAD_EXTRACT_S16_43L \ 1101 \ 1102 vis_alignaddr((void *)soff, 0); \ 1103 s0 = s4; \ 1104 s1 = sp[1]; \ 1105 s2 = sp[2]; \ 1106 s3 = sp[3]; \ 1107 s4 = sp[4]; \ 1108 sd0 = vis_faligndata(s0, s1); \ 1109 sd1 = vis_faligndata(s1, s2); \ 1110 sd2 = vis_faligndata(s2, s3); \ 1111 sd3 = vis_faligndata(s3, s4); \ 1112 sp += 4; \ 1113 dd2old = dd2; \ 1114 EXTRACT_S16_43L 1115 1116 /***************************************************************/ 1117 /* 1118 * Both source and destination image data are 1-d vectors and 1119 * 8-byte aligned. And dsize is multiple of 4. 1120 */ 1121 1122 void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src, 1123 mlib_s16 *dst, 1124 mlib_s32 dsize) 1125 { 1126 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 1127 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 1128 mlib_d64 dd0, dd1, dd2; /* dst data */ 1129 mlib_s32 i; 1130 1131 sp = (mlib_d64 *) src; 1132 dp = (mlib_d64 *) dst; 1133 1134 /* set GSR.offset for vis_faligndata() */ 1135 /* vis_alignaddr((void *)0, 2); *//* only for _old */ 1136 1137 #pragma pipeloop(0) 1138 for (i = 0; i < dsize / 4; i++) { 1139 LOAD_EXTRACT_S16_43L_STORE; 1140 } 1141 } 1142 1143 /***************************************************************/ 1144 /* 1145 * Either source or destination image data are not 1-d vectors, but 1146 * they are 8-byte aligned. The xsize is multiple of 4. 1147 * And slb and dlb are multiple of 8. 1148 */ 1149 1150 void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src, 1151 mlib_s32 slb, 1152 mlib_s16 *dst, 1153 mlib_s32 dlb, 1154 mlib_s32 xsize, 1155 mlib_s32 ysize) 1156 { 1157 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */ 1158 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */ 1159 mlib_d64 sd0, sd1, sd2, sd3; /* source data */ 1160 mlib_d64 dd0, dd1, dd2; /* dst data */ 1161 mlib_s32 i, j; /* indices for x, y */ 1162 1163 /* set GSR.offset for vis_faligndata() */ 1164 /* vis_alignaddr((void *)0, 2); *//* only for _old */ 1165 1166 sp = sl = (mlib_d64 *) src; 1167 dp = dl = (mlib_d64 *) dst; 1168 1169 /* row loop */ 1170 for (j = 0; j < ysize; j++) { 1171 /* 4-pixel column loop */ 1172 #pragma pipeloop(0) 1173 for (i = 0; i < xsize / 4; i++) { 1174 LOAD_EXTRACT_S16_43L_STORE; 1175 } 1176 1177 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb); 1178 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb); 1179 } 1180 } 1181 1182 /***************************************************************/ 1183 /* 1184 * Either source or destination data are not 8-byte aligned. 1185 * And size is in pixels. 1186 */ 1187 1188 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src, 1189 mlib_s16 *dst, 1190 mlib_s32 dsize) 1191 { 1192 mlib_s16 *sa, *da; /* pointer for pixel */ 1193 mlib_s16 *dend, *dend2; /* end points in dst */ 1194 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 1195 mlib_d64 *sp; /* 8-byte aligned start point in src */ 1196 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 1197 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 1198 mlib_d64 dd0, dd1, dd2; /* dst data */ 1199 mlib_d64 dd2old; /* the last datum of the last step */ 1200 mlib_s32 soff; /* offset of address in src */ 1201 mlib_s32 doff; /* offset of address in dst */ 1202 mlib_s32 emask; /* edge mask */ 1203 mlib_s32 i, n; 1204 1205 sa = (void *)src; 1206 da = dst; 1207 1208 /* prepare the source address */ 1209 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 1210 soff = ((mlib_addr) sa & 7); 1211 1212 /* prepare the destination addresses */ 1213 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 1214 dend = da + dsize * 3 - 1; 1215 dend2 = dend - 11; 1216 doff = 8 - ((mlib_addr) da & 7); 1217 1218 /* generate edge mask for the start point */ 1219 emask = vis_edge16(da, dend); 1220 1221 /* load 32 byte, convert, store 24 bytes */ 1222 s4 = sp[0]; /* initial value */ 1223 LOAD_EXTRACT_S16_43L; 1224 1225 if (dsize >= 4) { 1226 if (doff == 8) { 1227 vis_pst_16(dd0, dp++, emask); 1228 *dp++ = dd1; 1229 *dp++ = dd2; 1230 } 1231 else { 1232 vis_alignaddr((void *)doff, 0); 1233 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 1234 *dp++ = vis_faligndata(dd0, dd1); 1235 *dp++ = vis_faligndata(dd1, dd2); 1236 } 1237 } 1238 else { /* for very small size */ 1239 if (doff == 8) { 1240 vis_pst_16(dd0, dp++, emask); 1241 if ((mlib_addr) dp <= (mlib_addr) dend) { 1242 emask = vis_edge16(dp, dend); 1243 vis_pst_16(dd1, dp++, emask); 1244 if ((mlib_addr) dp <= (mlib_addr) dend) { 1245 emask = vis_edge16(dp, dend); 1246 vis_pst_16(dd2, dp++, emask); 1247 } 1248 } 1249 } 1250 else { 1251 vis_alignaddr((void *)doff, 0); 1252 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 1253 if ((mlib_addr) dp <= (mlib_addr) dend) { 1254 emask = vis_edge16(dp, dend); 1255 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 1256 if ((mlib_addr) dp <= (mlib_addr) dend) { 1257 emask = vis_edge16(dp, dend); 1258 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 1259 } 1260 } 1261 } 1262 } 1263 1264 /* no edge handling is needed in the loop */ 1265 if (doff == 8) { 1266 if ((mlib_addr) dp <= (mlib_addr) dend2) { 1267 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 1268 #pragma pipeloop(0) 1269 for (i = 0; i < n; i++) { 1270 LOAD_EXTRACT_S16_43L; 1271 *dp++ = dd0; 1272 *dp++ = dd1; 1273 *dp++ = dd2; 1274 } 1275 } 1276 } 1277 else { 1278 if ((mlib_addr) dp <= (mlib_addr) dend2) { 1279 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 1280 #pragma pipeloop(0) 1281 for (i = 0; i < n; i++) { 1282 LOAD_EXTRACT_S16_43L; 1283 vis_alignaddr((void *)doff, 0); 1284 *dp++ = vis_faligndata(dd2old, dd0); 1285 *dp++ = vis_faligndata(dd0, dd1); 1286 *dp++ = vis_faligndata(dd1, dd2); 1287 } 1288 } 1289 } 1290 1291 if ((mlib_addr) dp <= (mlib_addr) dend) { 1292 LOAD_EXTRACT_S16_43L; 1293 emask = vis_edge16(dp, dend); 1294 if (doff == 8) { 1295 vis_pst_16(dd0, dp++, emask); 1296 if ((mlib_addr) dp <= (mlib_addr) dend) { 1297 emask = vis_edge16(dp, dend); 1298 vis_pst_16(dd1, dp++, emask); 1299 if ((mlib_addr) dp <= (mlib_addr) dend) { 1300 emask = vis_edge16(dp, dend); 1301 vis_pst_16(dd2, dp++, emask); 1302 } 1303 } 1304 } 1305 else { 1306 vis_alignaddr((void *)doff, 0); 1307 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask); 1308 if ((mlib_addr) dp <= (mlib_addr) dend) { 1309 emask = vis_edge16(dp, dend); 1310 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 1311 if ((mlib_addr) dp <= (mlib_addr) dend) { 1312 emask = vis_edge16(dp, dend); 1313 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 1314 } 1315 } 1316 } 1317 } 1318 } 1319 1320 /***************************************************************/ 1321 void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src, 1322 mlib_s32 slb, 1323 mlib_s16 *dst, 1324 mlib_s32 dlb, 1325 mlib_s32 xsize, 1326 mlib_s32 ysize) 1327 { 1328 mlib_s16 *sa, *da; 1329 mlib_s16 *sl, *dl; 1330 mlib_s32 j; 1331 1332 sa = sl = (void *)src; 1333 da = dl = dst; 1334 1335 for (j = 0; j < ysize; j++) { 1336 mlib_v_ImageChannelExtract_S16_43L_D1(sa, da, xsize); 1337 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb); 1338 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb); 1339 } 1340 } 1341 1342 /***************************************************************/