1 /*
   2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 /*
  29  * FILENAME: mlib_v_ImageChannelExtract_43.c
  30  *
  31  * FUNCTIONS
  32  *      mlib_v_ImageChannelExtract_U8_43L_D1
  33  *      mlib_v_ImageChannelExtract_S16_43L_D1
  34  *
  35  * SYNOPSIS
  36  *
  37  * ARGUMENT
  38  *      src    pointer to source image data
  39  *      dst    pointer to destination image data
  40  *      slb    source image line stride in bytes
  41  *      dlb    destination image line stride in bytes
  42  *      dsize image data size in pixels
  43  *      xsize  image width in pixels
  44  *      ysize  image height in lines
  45  *      cmask channel mask
  46  *
  47  * DESCRIPTION
  48  *      extract the right or left 3 channels of a 4-channel image to
  49  *      a 3-channel image -- VIS version low level functions.
  50  *
  51  *      ABGR => BGR   (43R), or  RGBA => RGB  (43L)
  52  *
  53  * NOTE
  54  *      These functions are separated from mlib_v_ImageChannelExtract.c
  55  *      for loop unrolling and structure clarity.
  56  */
  57 
  58 #include "vis_proto.h"
  59 #include "mlib_image.h"
  60 #include "mlib_v_ImageChannelExtract.h"
  61 
  62 /***************************************************************/
  63 #define EXTRACT_U8_43L        /* shift left */                  \
  64                                                                 \
  65   vis_alignaddr((void *)0, 3);                                  \
  66   dd0 = vis_faligndata(dd0, sd0);    /* ----------r0g0b0 */     \
  67   sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0));      \
  68   dd0 = vis_faligndata(dd0, sda);    /* ----r0g0b0r1g1b1 */     \
  69                                                                 \
  70   vis_alignaddr((void *)0, 2);                                  \
  71   dd0 = vis_faligndata(dd0, sd1);    /* r0g0b0r1g1b1r2g2 */     \
  72                                                                 \
  73   vis_alignaddr((void *)0, 3);                                  \
  74   dd1 = vis_faligndata(dd1, sd1);    /* ----------r2g2b2 */     \
  75   sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1));      \
  76   dd1 = vis_faligndata(dd1, sda);    /* ----r2g2b2r3g3b3 */     \
  77   dd1 = vis_faligndata(dd1, sd2);    /* g2b2r3g3b3r4g4b4 */     \
  78                                                                 \
  79   sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2));      \
  80   vis_alignaddr((void *)0, 1);                                  \
  81   dd1 = vis_faligndata(dd1, sda);    /* b2r3g3b3r4g4b4r5 */     \
  82                                                                 \
  83   vis_alignaddr((void *)0, 3);                                  \
  84   dd2 = vis_faligndata(dd2, sda);    /* ----------r5g5b5 */     \
  85                                                                 \
  86   dd2 = vis_faligndata(dd2, sd3);    /* ----r5g5b5r6g6b6 */     \
  87   sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3));      \
  88   dd2 = vis_faligndata(dd2, sda);           /* g5b5r6g6b6r7g7b7 */
  89 
  90 /***************************************************************/
  91 #define LOAD_EXTRACT_U8_43L                                             \
  92                                                                         \
  93   vis_alignaddr((void *)soff, 0);                                       \
  94   s0 = s4;                                                              \
  95   s1 = sp[1];                                                           \
  96   s2 = sp[2];                                                           \
  97   s3 = sp[3];                                                           \
  98   s4 = sp[4];                                                           \
  99   sd0 = vis_faligndata(s0, s1);  /* the intermediate is ABGR aligned */ \
 100   sd1 = vis_faligndata(s1, s2);                                         \
 101   sd2 = vis_faligndata(s2, s3);                                         \
 102   sd3 = vis_faligndata(s3, s4);                                         \
 103   sp += 4;                                                              \
 104                                                                         \
 105 /*  vis_alignaddr((void *)0, 1); */    /* for _old only */              \
 106   dd2old = dd2;                                                         \
 107   EXTRACT_U8_43L
 108 
 109 /***************************************************************/
 110 /*
 111  * Either source or destination data are not 8-byte aligned.
 112  * And ssize is multiple of 8.
 113  */
 114 
 115 void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
 116                                           mlib_u8       *dst,
 117                                           mlib_s32      dsize)
 118 {
 119   mlib_u8 *sa, *da;
 120   mlib_u8 *dend, *dend2;                              /* end points in dst */
 121   mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
 122   mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
 123   mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
 124   mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
 125   mlib_d64 dd0, dd1, dd2;                             /* dst data */
 126   mlib_d64 dd2old;                                    /* the last datum of the last step */
 127   mlib_d64 sda;
 128   mlib_s32 soff;                                      /* offset of address in src */
 129   mlib_s32 doff;                                      /* offset of address in dst */
 130   mlib_s32 emask;                                     /* edge mask */
 131   mlib_s32 i, n;
 132 
 133   sa = (void *)src;
 134   da = dst;
 135 
 136   /* prepare the source address */
 137   sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
 138   soff = ((mlib_addr) sa & 7);
 139 
 140   /* prepare the destination addresses */
 141   dp = (mlib_d64 *) ((mlib_addr) da & (~7));
 142   dend = da + dsize * 3 - 1;
 143   dend2 = dend - 23;
 144   doff = 8 - ((mlib_addr) da & 7);
 145 
 146   /* generate edge mask for the start point */
 147   emask = vis_edge8(da, dend);
 148 
 149   /* load 32 byte, convert, store 24 bytes */
 150   s4 = sp[0];                               /* initial value */
 151   LOAD_EXTRACT_U8_43L;
 152 
 153   if (dsize >= 8) {
 154     if (doff == 8) {
 155       vis_pst_8(dd0, dp++, emask);
 156       *dp++ = dd1;
 157       *dp++ = dd2;
 158     }
 159     else {
 160       vis_alignaddr((void *)doff, 0);
 161       vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
 162       *dp++ = vis_faligndata(dd0, dd1);
 163       *dp++ = vis_faligndata(dd1, dd2);
 164     }
 165   }
 166   else {                                    /* for very small size */
 167     if (doff == 8) {
 168       vis_pst_8(dd0, dp++, emask);
 169       if ((mlib_addr) dp <= (mlib_addr) dend) {
 170         emask = vis_edge8(dp, dend);
 171         vis_pst_8(dd1, dp++, emask);
 172         if ((mlib_addr) dp <= (mlib_addr) dend) {
 173           emask = vis_edge8(dp, dend);
 174           vis_pst_8(dd2, dp++, emask);
 175         }
 176       }
 177     }
 178     else {
 179       vis_alignaddr((void *)doff, 0);
 180       vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
 181       if ((mlib_addr) dp <= (mlib_addr) dend) {
 182         emask = vis_edge8(dp, dend);
 183         vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
 184         if ((mlib_addr) dp <= (mlib_addr) dend) {
 185           emask = vis_edge8(dp, dend);
 186           vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
 187           if ((mlib_addr) dp <= (mlib_addr) dend) {
 188             emask = vis_edge8(dp, dend);
 189             vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
 190           }
 191         }
 192       }
 193     }
 194   }
 195 
 196   /* no edge handling is needed in the loop */
 197   if (doff == 8) {
 198     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 199       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
 200 #pragma pipeloop(0)
 201       for (i = 0; i < n; i++) {
 202         LOAD_EXTRACT_U8_43L;
 203         *dp++ = dd0;
 204         *dp++ = dd1;
 205         *dp++ = dd2;
 206       }
 207     }
 208   }
 209   else {
 210     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 211       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
 212 #pragma pipeloop(0)
 213       for (i = 0; i < n; i++) {
 214         LOAD_EXTRACT_U8_43L;
 215         vis_alignaddr((void *)doff, 0);
 216         *dp++ = vis_faligndata(dd2old, dd0);
 217         *dp++ = vis_faligndata(dd0, dd1);
 218         *dp++ = vis_faligndata(dd1, dd2);
 219       }
 220     }
 221   }
 222 
 223   if ((mlib_addr) dp <= (mlib_addr) dend) {
 224     LOAD_EXTRACT_U8_43L;
 225     emask = vis_edge8(dp, dend);
 226     if (doff == 8) {
 227       vis_pst_8(dd0, dp++, emask);
 228       if ((mlib_addr) dp <= (mlib_addr) dend) {
 229         emask = vis_edge8(dp, dend);
 230         vis_pst_8(dd1, dp++, emask);
 231         if ((mlib_addr) dp <= (mlib_addr) dend) {
 232           emask = vis_edge8(dp, dend);
 233           vis_pst_8(dd2, dp++, emask);
 234         }
 235       }
 236     }
 237     else {
 238       vis_alignaddr((void *)doff, 0);
 239       vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
 240       if ((mlib_addr) dp <= (mlib_addr) dend) {
 241         emask = vis_edge8(dp, dend);
 242         vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
 243         if ((mlib_addr) dp <= (mlib_addr) dend) {
 244           emask = vis_edge8(dp, dend);
 245           vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
 246         }
 247       }
 248     }
 249   }
 250 }
 251 
 252 /***************************************************************/
 253 #define EXTRACT_S16_43L              /* shift left */           \
 254   vis_alignaddr((void *)0, 6);                                  \
 255   dd0 = vis_faligndata(dd0, sd0);    /* --r0g0b0 */             \
 256   vis_alignaddr((void *)0, 2);                                  \
 257   dd0 = vis_faligndata(dd0, sd1);    /* r0g0b0r1 */             \
 258                                                                 \
 259   vis_alignaddr((void *)0, 6);                                  \
 260   dd1 = vis_faligndata(dd1, sd1);    /* --r1g1b1 */             \
 261   vis_alignaddr((void *)0, 4);                                  \
 262   dd1 = vis_faligndata(dd1, sd2);    /* g1b1r2g2 */             \
 263                                                                 \
 264   vis_alignaddr((void *)0, 6);                                  \
 265   dd2 = vis_faligndata(dd2, sd2);    /* --r2g2b2 */             \
 266   dd2 = vis_faligndata(dd2, sd3);           /* b2r3g3b3 */
 267 
 268 /***************************************************************/
 269 #define LOAD_EXTRACT_S16_43L                                    \
 270                                                                 \
 271   vis_alignaddr((void *)soff, 0);                               \
 272   s0 = s4;                                                      \
 273   s1 = sp[1];                                                   \
 274   s2 = sp[2];                                                   \
 275   s3 = sp[3];                                                   \
 276   s4 = sp[4];                                                   \
 277   sd0 = vis_faligndata(s0, s1);                                 \
 278   sd1 = vis_faligndata(s1, s2);                                 \
 279   sd2 = vis_faligndata(s2, s3);                                 \
 280   sd3 = vis_faligndata(s3, s4);                                 \
 281   sp += 4;                                                      \
 282   dd2old = dd2;                                                 \
 283   EXTRACT_S16_43L
 284 
 285 /***************************************************************/
 286 /*
 287  * Either source or destination data are not 8-byte aligned.
 288  * And size is in pixels.
 289  */
 290 
 291 void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
 292                                            mlib_s16       *dst,
 293                                            mlib_s32       dsize)
 294 {
 295   mlib_s16 *sa, *da;                                  /* pointer for pixel */
 296   mlib_s16 *dend, *dend2;                             /* end points in dst */
 297   mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
 298   mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
 299   mlib_d64 s0, s1, s2, s3, s4;                        /* 8-byte source row data */
 300   mlib_d64 sd0, sd1, sd2, sd3;                        /* 8-byte source data */
 301   mlib_d64 dd0, dd1, dd2;                             /* dst data */
 302   mlib_d64 dd2old;                                    /* the last datum of the last step */
 303   mlib_s32 soff;                                      /* offset of address in src */
 304   mlib_s32 doff;                                      /* offset of address in dst */
 305   mlib_s32 emask;                                     /* edge mask */
 306   mlib_s32 i, n;
 307 
 308   sa = (void *)src;
 309   da = dst;
 310 
 311   /* prepare the source address */
 312   sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
 313   soff = ((mlib_addr) sa & 7);
 314 
 315   /* prepare the destination addresses */
 316   dp = (mlib_d64 *) ((mlib_addr) da & (~7));
 317   dend = da + dsize * 3 - 1;
 318   dend2 = dend - 11;
 319   doff = 8 - ((mlib_addr) da & 7);
 320 
 321   /* generate edge mask for the start point */
 322   emask = vis_edge16(da, dend);
 323 
 324   /* load 32 byte, convert, store 24 bytes */
 325   s4 = sp[0];                               /* initial value */
 326   LOAD_EXTRACT_S16_43L;
 327 
 328   if (dsize >= 4) {
 329     if (doff == 8) {
 330       vis_pst_16(dd0, dp++, emask);
 331       *dp++ = dd1;
 332       *dp++ = dd2;
 333     }
 334     else {
 335       vis_alignaddr((void *)doff, 0);
 336       vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
 337       *dp++ = vis_faligndata(dd0, dd1);
 338       *dp++ = vis_faligndata(dd1, dd2);
 339     }
 340   }
 341   else {                                    /* for very small size */
 342     if (doff == 8) {
 343       vis_pst_16(dd0, dp++, emask);
 344       if ((mlib_addr) dp <= (mlib_addr) dend) {
 345         emask = vis_edge16(dp, dend);
 346         vis_pst_16(dd1, dp++, emask);
 347         if ((mlib_addr) dp <= (mlib_addr) dend) {
 348           emask = vis_edge16(dp, dend);
 349           vis_pst_16(dd2, dp++, emask);
 350         }
 351       }
 352     }
 353     else {
 354       vis_alignaddr((void *)doff, 0);
 355       vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
 356       if ((mlib_addr) dp <= (mlib_addr) dend) {
 357         emask = vis_edge16(dp, dend);
 358         vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
 359         if ((mlib_addr) dp <= (mlib_addr) dend) {
 360           emask = vis_edge16(dp, dend);
 361           vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
 362         }
 363       }
 364     }
 365   }
 366 
 367   /* no edge handling is needed in the loop */
 368   if (doff == 8) {
 369     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 370       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
 371 #pragma pipeloop(0)
 372       for (i = 0; i < n; i++) {
 373         LOAD_EXTRACT_S16_43L;
 374         *dp++ = dd0;
 375         *dp++ = dd1;
 376         *dp++ = dd2;
 377       }
 378     }
 379   }
 380   else {
 381     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 382       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
 383 #pragma pipeloop(0)
 384       for (i = 0; i < n; i++) {
 385         LOAD_EXTRACT_S16_43L;
 386         vis_alignaddr((void *)doff, 0);
 387         *dp++ = vis_faligndata(dd2old, dd0);
 388         *dp++ = vis_faligndata(dd0, dd1);
 389         *dp++ = vis_faligndata(dd1, dd2);
 390       }
 391     }
 392   }
 393 
 394   if ((mlib_addr) dp <= (mlib_addr) dend) {
 395     LOAD_EXTRACT_S16_43L;
 396     emask = vis_edge16(dp, dend);
 397     if (doff == 8) {
 398       vis_pst_16(dd0, dp++, emask);
 399       if ((mlib_addr) dp <= (mlib_addr) dend) {
 400         emask = vis_edge16(dp, dend);
 401         vis_pst_16(dd1, dp++, emask);
 402         if ((mlib_addr) dp <= (mlib_addr) dend) {
 403           emask = vis_edge16(dp, dend);
 404           vis_pst_16(dd2, dp++, emask);
 405         }
 406       }
 407     }
 408     else {
 409       vis_alignaddr((void *)doff, 0);
 410       vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
 411       if ((mlib_addr) dp <= (mlib_addr) dend) {
 412         emask = vis_edge16(dp, dend);
 413         vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
 414         if ((mlib_addr) dp <= (mlib_addr) dend) {
 415           emask = vis_edge16(dp, dend);
 416           vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
 417         }
 418       }
 419     }
 420   }
 421 }
 422 
 423 /***************************************************************/