1 /* 2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * FILENAME: mlib_v_ImageChannelExtract_43.c 30 * 31 * FUNCTIONS 32 * mlib_v_ImageChannelExtract_U8_43L_D1 33 * mlib_v_ImageChannelExtract_S16_43L_D1 34 * 35 * SYNOPSIS 36 * 37 * ARGUMENT 38 * src pointer to source image data 39 * dst pointer to destination image data 40 * slb source image line stride in bytes 41 * dlb destination image line stride in bytes 42 * dsize image data size in pixels 43 * xsize image width in pixels 44 * ysize image height in lines 45 * cmask channel mask 46 * 47 * DESCRIPTION 48 * extract the right or left 3 channels of a 4-channel image to 49 * a 3-channel image -- VIS version low level functions. 50 * 51 * ABGR => BGR (43R), or RGBA => RGB (43L) 52 * 53 * NOTE 54 * These functions are separated from mlib_v_ImageChannelExtract.c 55 * for loop unrolling and structure clarity. 56 */ 57 58 #include "vis_proto.h" 59 #include "mlib_image.h" 60 #include "mlib_v_ImageChannelExtract.h" 61 62 /***************************************************************/ 63 #define EXTRACT_U8_43L /* shift left */ \ 64 \ 65 vis_alignaddr((void *)0, 3); \ 66 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \ 67 sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0)); \ 68 dd0 = vis_faligndata(dd0, sda); /* ----r0g0b0r1g1b1 */ \ 69 \ 70 vis_alignaddr((void *)0, 2); \ 71 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \ 72 \ 73 vis_alignaddr((void *)0, 3); \ 74 dd1 = vis_faligndata(dd1, sd1); /* ----------r2g2b2 */ \ 75 sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1)); \ 76 dd1 = vis_faligndata(dd1, sda); /* ----r2g2b2r3g3b3 */ \ 77 dd1 = vis_faligndata(dd1, sd2); /* g2b2r3g3b3r4g4b4 */ \ 78 \ 79 sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2)); \ 80 vis_alignaddr((void *)0, 1); \ 81 dd1 = vis_faligndata(dd1, sda); /* b2r3g3b3r4g4b4r5 */ \ 82 \ 83 vis_alignaddr((void *)0, 3); \ 84 dd2 = vis_faligndata(dd2, sda); /* ----------r5g5b5 */ \ 85 \ 86 dd2 = vis_faligndata(dd2, sd3); /* ----r5g5b5r6g6b6 */ \ 87 sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3)); \ 88 dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */ 89 90 /***************************************************************/ 91 #define LOAD_EXTRACT_U8_43L \ 92 \ 93 vis_alignaddr((void *)soff, 0); \ 94 s0 = s4; \ 95 s1 = sp[1]; \ 96 s2 = sp[2]; \ 97 s3 = sp[3]; \ 98 s4 = sp[4]; \ 99 sd0 = vis_faligndata(s0, s1); /* the intermediate is ABGR aligned */ \ 100 sd1 = vis_faligndata(s1, s2); \ 101 sd2 = vis_faligndata(s2, s3); \ 102 sd3 = vis_faligndata(s3, s4); \ 103 sp += 4; \ 104 \ 105 /* vis_alignaddr((void *)0, 1); */ /* for _old only */ \ 106 dd2old = dd2; \ 107 EXTRACT_U8_43L 108 109 /***************************************************************/ 110 /* 111 * Either source or destination data are not 8-byte aligned. 112 * And ssize is multiple of 8. 113 */ 114 115 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src, 116 mlib_u8 *dst, 117 mlib_s32 dsize) 118 { 119 mlib_u8 *sa, *da; 120 mlib_u8 *dend, *dend2; /* end points in dst */ 121 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 122 mlib_d64 *sp; /* 8-byte aligned start point in src */ 123 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 124 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 125 mlib_d64 dd0, dd1, dd2; /* dst data */ 126 mlib_d64 dd2old; /* the last datum of the last step */ 127 mlib_d64 sda; 128 mlib_s32 soff; /* offset of address in src */ 129 mlib_s32 doff; /* offset of address in dst */ 130 mlib_s32 emask; /* edge mask */ 131 mlib_s32 i, n; 132 133 sa = (void *)src; 134 da = dst; 135 136 /* prepare the source address */ 137 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 138 soff = ((mlib_addr) sa & 7); 139 140 /* prepare the destination addresses */ 141 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 142 dend = da + dsize * 3 - 1; 143 dend2 = dend - 23; 144 doff = 8 - ((mlib_addr) da & 7); 145 146 /* generate edge mask for the start point */ 147 emask = vis_edge8(da, dend); 148 149 /* load 32 byte, convert, store 24 bytes */ 150 s4 = sp[0]; /* initial value */ 151 LOAD_EXTRACT_U8_43L; 152 153 if (dsize >= 8) { 154 if (doff == 8) { 155 vis_pst_8(dd0, dp++, emask); 156 *dp++ = dd1; 157 *dp++ = dd2; 158 } 159 else { 160 vis_alignaddr((void *)doff, 0); 161 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 162 *dp++ = vis_faligndata(dd0, dd1); 163 *dp++ = vis_faligndata(dd1, dd2); 164 } 165 } 166 else { /* for very small size */ 167 if (doff == 8) { 168 vis_pst_8(dd0, dp++, emask); 169 if ((mlib_addr) dp <= (mlib_addr) dend) { 170 emask = vis_edge8(dp, dend); 171 vis_pst_8(dd1, dp++, emask); 172 if ((mlib_addr) dp <= (mlib_addr) dend) { 173 emask = vis_edge8(dp, dend); 174 vis_pst_8(dd2, dp++, emask); 175 } 176 } 177 } 178 else { 179 vis_alignaddr((void *)doff, 0); 180 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask); 181 if ((mlib_addr) dp <= (mlib_addr) dend) { 182 emask = vis_edge8(dp, dend); 183 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 184 if ((mlib_addr) dp <= (mlib_addr) dend) { 185 emask = vis_edge8(dp, dend); 186 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 187 if ((mlib_addr) dp <= (mlib_addr) dend) { 188 emask = vis_edge8(dp, dend); 189 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask); 190 } 191 } 192 } 193 } 194 } 195 196 /* no edge handling is needed in the loop */ 197 if (doff == 8) { 198 if ((mlib_addr) dp <= (mlib_addr) dend2) { 199 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 200 #pragma pipeloop(0) 201 for (i = 0; i < n; i++) { 202 LOAD_EXTRACT_U8_43L; 203 *dp++ = dd0; 204 *dp++ = dd1; 205 *dp++ = dd2; 206 } 207 } 208 } 209 else { 210 if ((mlib_addr) dp <= (mlib_addr) dend2) { 211 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 212 #pragma pipeloop(0) 213 for (i = 0; i < n; i++) { 214 LOAD_EXTRACT_U8_43L; 215 vis_alignaddr((void *)doff, 0); 216 *dp++ = vis_faligndata(dd2old, dd0); 217 *dp++ = vis_faligndata(dd0, dd1); 218 *dp++ = vis_faligndata(dd1, dd2); 219 } 220 } 221 } 222 223 if ((mlib_addr) dp <= (mlib_addr) dend) { 224 LOAD_EXTRACT_U8_43L; 225 emask = vis_edge8(dp, dend); 226 if (doff == 8) { 227 vis_pst_8(dd0, dp++, emask); 228 if ((mlib_addr) dp <= (mlib_addr) dend) { 229 emask = vis_edge8(dp, dend); 230 vis_pst_8(dd1, dp++, emask); 231 if ((mlib_addr) dp <= (mlib_addr) dend) { 232 emask = vis_edge8(dp, dend); 233 vis_pst_8(dd2, dp++, emask); 234 } 235 } 236 } 237 else { 238 vis_alignaddr((void *)doff, 0); 239 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask); 240 if ((mlib_addr) dp <= (mlib_addr) dend) { 241 emask = vis_edge8(dp, dend); 242 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask); 243 if ((mlib_addr) dp <= (mlib_addr) dend) { 244 emask = vis_edge8(dp, dend); 245 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask); 246 } 247 } 248 } 249 } 250 } 251 252 /***************************************************************/ 253 #define EXTRACT_S16_43L /* shift left */ \ 254 vis_alignaddr((void *)0, 6); \ 255 dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \ 256 vis_alignaddr((void *)0, 2); \ 257 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1 */ \ 258 \ 259 vis_alignaddr((void *)0, 6); \ 260 dd1 = vis_faligndata(dd1, sd1); /* --r1g1b1 */ \ 261 vis_alignaddr((void *)0, 4); \ 262 dd1 = vis_faligndata(dd1, sd2); /* g1b1r2g2 */ \ 263 \ 264 vis_alignaddr((void *)0, 6); \ 265 dd2 = vis_faligndata(dd2, sd2); /* --r2g2b2 */ \ 266 dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */ 267 268 /***************************************************************/ 269 #define LOAD_EXTRACT_S16_43L \ 270 \ 271 vis_alignaddr((void *)soff, 0); \ 272 s0 = s4; \ 273 s1 = sp[1]; \ 274 s2 = sp[2]; \ 275 s3 = sp[3]; \ 276 s4 = sp[4]; \ 277 sd0 = vis_faligndata(s0, s1); \ 278 sd1 = vis_faligndata(s1, s2); \ 279 sd2 = vis_faligndata(s2, s3); \ 280 sd3 = vis_faligndata(s3, s4); \ 281 sp += 4; \ 282 dd2old = dd2; \ 283 EXTRACT_S16_43L 284 285 /***************************************************************/ 286 /* 287 * Either source or destination data are not 8-byte aligned. 288 * And size is in pixels. 289 */ 290 291 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src, 292 mlib_s16 *dst, 293 mlib_s32 dsize) 294 { 295 mlib_s16 *sa, *da; /* pointer for pixel */ 296 mlib_s16 *dend, *dend2; /* end points in dst */ 297 mlib_d64 *dp; /* 8-byte aligned start points in dst */ 298 mlib_d64 *sp; /* 8-byte aligned start point in src */ 299 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */ 300 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */ 301 mlib_d64 dd0, dd1, dd2; /* dst data */ 302 mlib_d64 dd2old; /* the last datum of the last step */ 303 mlib_s32 soff; /* offset of address in src */ 304 mlib_s32 doff; /* offset of address in dst */ 305 mlib_s32 emask; /* edge mask */ 306 mlib_s32 i, n; 307 308 sa = (void *)src; 309 da = dst; 310 311 /* prepare the source address */ 312 sp = (mlib_d64 *) ((mlib_addr) sa & (~7)); 313 soff = ((mlib_addr) sa & 7); 314 315 /* prepare the destination addresses */ 316 dp = (mlib_d64 *) ((mlib_addr) da & (~7)); 317 dend = da + dsize * 3 - 1; 318 dend2 = dend - 11; 319 doff = 8 - ((mlib_addr) da & 7); 320 321 /* generate edge mask for the start point */ 322 emask = vis_edge16(da, dend); 323 324 /* load 32 byte, convert, store 24 bytes */ 325 s4 = sp[0]; /* initial value */ 326 LOAD_EXTRACT_S16_43L; 327 328 if (dsize >= 4) { 329 if (doff == 8) { 330 vis_pst_16(dd0, dp++, emask); 331 *dp++ = dd1; 332 *dp++ = dd2; 333 } 334 else { 335 vis_alignaddr((void *)doff, 0); 336 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 337 *dp++ = vis_faligndata(dd0, dd1); 338 *dp++ = vis_faligndata(dd1, dd2); 339 } 340 } 341 else { /* for very small size */ 342 if (doff == 8) { 343 vis_pst_16(dd0, dp++, emask); 344 if ((mlib_addr) dp <= (mlib_addr) dend) { 345 emask = vis_edge16(dp, dend); 346 vis_pst_16(dd1, dp++, emask); 347 if ((mlib_addr) dp <= (mlib_addr) dend) { 348 emask = vis_edge16(dp, dend); 349 vis_pst_16(dd2, dp++, emask); 350 } 351 } 352 } 353 else { 354 vis_alignaddr((void *)doff, 0); 355 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask); 356 if ((mlib_addr) dp <= (mlib_addr) dend) { 357 emask = vis_edge16(dp, dend); 358 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 359 if ((mlib_addr) dp <= (mlib_addr) dend) { 360 emask = vis_edge16(dp, dend); 361 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 362 } 363 } 364 } 365 } 366 367 /* no edge handling is needed in the loop */ 368 if (doff == 8) { 369 if ((mlib_addr) dp <= (mlib_addr) dend2) { 370 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 371 #pragma pipeloop(0) 372 for (i = 0; i < n; i++) { 373 LOAD_EXTRACT_S16_43L; 374 *dp++ = dd0; 375 *dp++ = dd1; 376 *dp++ = dd2; 377 } 378 } 379 } 380 else { 381 if ((mlib_addr) dp <= (mlib_addr) dend2) { 382 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1; 383 #pragma pipeloop(0) 384 for (i = 0; i < n; i++) { 385 LOAD_EXTRACT_S16_43L; 386 vis_alignaddr((void *)doff, 0); 387 *dp++ = vis_faligndata(dd2old, dd0); 388 *dp++ = vis_faligndata(dd0, dd1); 389 *dp++ = vis_faligndata(dd1, dd2); 390 } 391 } 392 } 393 394 if ((mlib_addr) dp <= (mlib_addr) dend) { 395 LOAD_EXTRACT_S16_43L; 396 emask = vis_edge16(dp, dend); 397 if (doff == 8) { 398 vis_pst_16(dd0, dp++, emask); 399 if ((mlib_addr) dp <= (mlib_addr) dend) { 400 emask = vis_edge16(dp, dend); 401 vis_pst_16(dd1, dp++, emask); 402 if ((mlib_addr) dp <= (mlib_addr) dend) { 403 emask = vis_edge16(dp, dend); 404 vis_pst_16(dd2, dp++, emask); 405 } 406 } 407 } 408 else { 409 vis_alignaddr((void *)doff, 0); 410 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask); 411 if ((mlib_addr) dp <= (mlib_addr) dend) { 412 emask = vis_edge16(dp, dend); 413 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask); 414 if ((mlib_addr) dp <= (mlib_addr) dend) { 415 emask = vis_edge16(dp, dend); 416 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask); 417 } 418 } 419 } 420 } 421 } 422 423 /***************************************************************/