1 /*
   2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 
  28 /*
  29  * FUNCTIONS
  30  *      mlib_v_ImageChannelInsert_U8_12_D1
  31  *      mlib_v_ImageChannelInsert_U8_13_D1
  32  *      mlib_v_ImageChannelInsert_U8_14_D1
  33  *
  34  * ARGUMENT
  35  *      src     pointer to source image data
  36  *      dst     pointer to destination image data
  37  *      slb     source image line stride in bytes
  38  *      dlb     destination image line stride in bytes
  39  *      dsize   image data size in pixels
  40  *      xsize   image width in pixels
  41  *      ysize   image height in lines
  42  *      cmask   channel mask
  43  *
  44  * DESCRIPTION
  45  *      Copy the 1-channel source image into the selected channel
  46  *      of the destination image -- VIS version low level functions.
  47  *
  48  * NOTE
  49  *      These functions are separated from mlib_v_ImageChannelInsert.c
  50  *      for loop unrolling and structure clarity.
  51  */
  52 
  53 #include "vis_proto.h"
  54 #include "mlib_image.h"
  55 #include "mlib_v_ImageChannelInsert.h"
  56 
  57 /***************************************************************/
  58 #define INSERT_U8_12(sd0, dd0, dd1)     /* channel duplicate */ \
  59   dd0 = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0));        \
  60   dd1 = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0))
  61 
  62 /***************************************************************/
  63 /* insert one channel to a 2-channel image.
  64  */
  65 
  66 void mlib_v_ImageChannelInsert_U8_12_D1(const mlib_u8 *src,
  67                                         mlib_u8       *dst,
  68                                         mlib_s32      dsize,
  69                                         mlib_s32      cmask)
  70 {
  71   mlib_u8 *sa, *da;
  72   mlib_u8 *dend, *dend2;                              /* end points in dst */
  73   mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
  74   mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
  75   mlib_d64 sd0, sd1;                                  /* 8-byte source data */
  76   mlib_d64 dd0, dd1, dd2, dd3;                        /* 8-byte destination data */
  77   mlib_s32 soff;                                      /* offset of address in src */
  78   mlib_s32 doff;                                      /* offset of address in dst */
  79   mlib_s32 off;                                       /* offset of src over dst */
  80   mlib_s32 emask;                                     /* edge mask */
  81   mlib_s32 bmask;                                     /* channel mask */
  82   mlib_s32 i, n;
  83 
  84   bmask = cmask | (cmask << 2) | (cmask << 4) | (cmask << 6);
  85 
  86   sa = (void *)src;
  87   da = dst;
  88 
  89   /* prepare the source address */
  90   sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
  91   soff = ((mlib_addr) sa & 7);
  92 
  93   /* prepare the destination addresses */
  94   dp = (mlib_d64 *) ((mlib_addr) da & (~7));
  95   doff = ((mlib_addr) da & 7);
  96   dend = da + dsize * 2 - 1;
  97   dend2 = dend - 15;
  98 
  99   /* calculate the src's offset over dst */
 100   off = soff * 2 - doff;
 101 
 102   if (doff % 2 != 0) {
 103     bmask = (~bmask) & 0xff;
 104   }
 105 
 106   if (off == 0) {                           /* src and dst have same alignment */
 107 
 108     /* load 8 bytes */
 109     sd0 = *sp++;
 110 
 111     /* insert, including some garbage at the start point */
 112     INSERT_U8_12(sd0, dd0, dd1);
 113 
 114     /* store 16 bytes result */
 115     emask = vis_edge8(da, dend);
 116     vis_pst_8(dd0, dp++, emask & bmask);
 117     if ((mlib_addr) dp <= (mlib_addr) dend) {
 118       emask = vis_edge8(dp, dend);
 119       vis_pst_8(dd1, dp++, emask & bmask);
 120     }
 121 
 122     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 123       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 16 + 1;
 124 
 125       /* 8-pixel column loop, emask not needed */
 126 #pragma pipeloop(0)
 127       for (i = 0; i < n; i++) {
 128         sd0 = *sp++;
 129         INSERT_U8_12(sd0, dd0, dd1);
 130         vis_pst_8(dd0, dp++, bmask);
 131         vis_pst_8(dd1, dp++, bmask);
 132       }
 133     }
 134 
 135     /* end point handling */
 136     if ((mlib_addr) dp <= (mlib_addr) dend) {
 137       sd0 = *sp++;
 138       INSERT_U8_12(sd0, dd0, dd1);
 139       emask = vis_edge8(dp, dend);
 140       vis_pst_8(dd0, dp++, emask & bmask);
 141       if ((mlib_addr) dp <= (mlib_addr) dend) {
 142         emask = vis_edge8(dp, dend);
 143         vis_pst_8(dd1, dp++, emask & bmask);
 144       }
 145     }
 146   }
 147   else if (off < 0) {
 148     vis_alignaddr((void *)0, off);
 149 
 150     /* generate edge mask for the start point */
 151     emask = vis_edge8(da, dend);
 152 
 153     /* load 8 bytes */
 154     sd0 = *sp++;
 155 
 156     /* insert and store 16 bytes */
 157     INSERT_U8_12(sd0, dd0, dd1);
 158     vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
 159     if ((mlib_addr) dp <= (mlib_addr) dend) {
 160       emask = vis_edge8(dp, dend);
 161       vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 162     }
 163 
 164     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 165       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 16 + 1;
 166 
 167       /* 8-pixel column loop, emask not needed */
 168 #pragma pipeloop(0)
 169       for (i = 0; i < n; i++) {
 170         dd2 = dd1;
 171         sd0 = *sp++;
 172         INSERT_U8_12(sd0, dd0, dd1);
 173         vis_pst_8(vis_faligndata(dd2, dd0), dp++, bmask);
 174         vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
 175       }
 176     }
 177 
 178     /* end point handling */
 179     if ((mlib_addr) dp <= (mlib_addr) dend) {
 180       emask = vis_edge8(dp, dend);
 181       dd2 = dd1;
 182       sd0 = *sp++;
 183       INSERT_U8_12(sd0, dd0, dd1);
 184       vis_pst_8(vis_faligndata(dd2, dd0), dp++, emask & bmask);
 185       if ((mlib_addr) dp <= (mlib_addr) dend) {
 186         emask = vis_edge8(dp, dend);
 187         vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 188       }
 189     }
 190   }
 191   else if (off < 8) {
 192     vis_alignaddr((void *)0, off);
 193 
 194     /* generate edge mask for the start point */
 195     emask = vis_edge8(da, dend);
 196 
 197     /* load 16 bytes */
 198     sd0 = *sp++;
 199     sd1 = *sp++;
 200 
 201     /* insert and store 16 bytes */
 202     INSERT_U8_12(sd0, dd0, dd1);
 203     INSERT_U8_12(sd1, dd2, dd3);
 204     vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 205     if ((mlib_addr) dp <= (mlib_addr) dend) {
 206       emask = vis_edge8(dp, dend);
 207       vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 208     }
 209 
 210     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 211       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 16 + 1;
 212 
 213       /* 8-pixel column loop, emask not needed */
 214 #pragma pipeloop(0)
 215       for (i = 0; i < n; i++) {
 216         dd0 = dd2;
 217         dd1 = dd3;
 218         sd1 = *sp++;
 219         INSERT_U8_12(sd1, dd2, dd3);
 220         vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
 221         vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
 222       }
 223     }
 224 
 225     /* end point handling */
 226     if ((mlib_addr) dp <= (mlib_addr) dend) {
 227       emask = vis_edge8(dp, dend);
 228       dd0 = dd2;
 229       dd1 = dd3;
 230       sd1 = *sp++;
 231       INSERT_U8_12(sd1, dd2, dd3);
 232       vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 233       if ((mlib_addr) dp <= (mlib_addr) dend) {
 234         emask = vis_edge8(dp, dend);
 235         vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 236       }
 237     }
 238   }
 239   else {                                    /* (off >= 8) */
 240     vis_alignaddr((void *)0, off);
 241 
 242     /* generate edge mask for the start point */
 243     emask = vis_edge8(da, dend);
 244 
 245     /* load 16 bytes */
 246     sd0 = *sp++;
 247     sd1 = *sp++;
 248 
 249     /* insert and store 16 bytes */
 250     INSERT_U8_12(sd0, dd0, dd1);
 251     INSERT_U8_12(sd1, dd2, dd3);
 252     vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 253     if ((mlib_addr) dp <= (mlib_addr) dend) {
 254       emask = vis_edge8(dp, dend);
 255       vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
 256     }
 257 
 258     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 259       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 16 + 1;
 260 
 261       /* 8-pixel column loop, emask not needed */
 262 #pragma pipeloop(0)
 263       for (i = 0; i < n; i++) {
 264         dd0 = dd2;
 265         dd1 = dd3;
 266         sd1 = *sp++;
 267         INSERT_U8_12(sd1, dd2, dd3);
 268         vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
 269         vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
 270       }
 271     }
 272 
 273     /* end point handling */
 274     if ((mlib_addr) dp <= (mlib_addr) dend) {
 275       emask = vis_edge8(dp, dend);
 276       dd0 = dd2;
 277       dd1 = dd3;
 278       sd1 = *sp++;
 279       INSERT_U8_12(sd1, dd2, dd3);
 280       vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 281       if ((mlib_addr) dp <= (mlib_addr) dend) {
 282         emask = vis_edge8(dp, dend);
 283         vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
 284       }
 285     }
 286   }
 287 }
 288 
 289 /***************************************************************/
 290 #define LOAD_INSERT_STORE_U8(channeld)                          \
 291   vis_alignaddr((void *)0, off);                                \
 292   sd0 = sd1;                                                    \
 293   sd1 = *sp++;                                                  \
 294   sd  = vis_faligndata(sd0, sd1);                               \
 295   vis_alignaddr((void *)0, 1);                                  \
 296   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 297   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 298   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 299   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 300   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 301   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 302   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld;   \
 303   vis_st_u8(sd = vis_faligndata(sd, sd), da); da += channeld
 304 
 305 /***************************************************************/
 306 void mlib_v_ImageChannelInsert_U8_13_D1(const mlib_u8 *src,
 307                                         mlib_u8       *dst,
 308                                         mlib_s32      dsize,
 309                                         mlib_s32      cmask)
 310 {
 311   mlib_u8 *sa, *da;
 312   mlib_u8 *dend;                                      /* end point in destination */
 313   mlib_d64 *sp;                                       /* 8-byte aligned start points in src */
 314   mlib_d64 sd0, sd1, sd;                              /* 8-byte registers for source data */
 315   mlib_s32 off;                                       /* offset of address alignment in src */
 316   mlib_s32 i;
 317 
 318   /* prepare the src address */
 319   sa = (void *)src;
 320   sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
 321   off = (mlib_addr) sa & 7;
 322 
 323   /* prepare the dst address */
 324   da = dst + (2 / cmask);                   /* 4,2,1 -> 0,1,2 */
 325   dend = da + dsize * 3 - 1;
 326 
 327   sd1 = *sp++;
 328 
 329 #pragma pipeloop(0)
 330   for (i = 0; i < dsize / 8; i++) {
 331     LOAD_INSERT_STORE_U8(3);
 332   }
 333 
 334   /* right end handling */
 335   if ((mlib_addr) da <= (mlib_addr) dend) {
 336 
 337     vis_alignaddr((void *)0, off);
 338     sd0 = sd1;
 339     sd1 = *sp++;
 340     sd = vis_faligndata(sd0, sd1);
 341 
 342     vis_alignaddr((void *)0, 1);
 343     vis_st_u8(sd = vis_faligndata(sd, sd), da);
 344     da += 3;
 345     if ((mlib_addr) da <= (mlib_addr) dend) {
 346       vis_st_u8(sd = vis_faligndata(sd, sd), da);
 347       da += 3;
 348       if ((mlib_addr) da <= (mlib_addr) dend) {
 349         vis_st_u8(sd = vis_faligndata(sd, sd), da);
 350         da += 3;
 351         if ((mlib_addr) da <= (mlib_addr) dend) {
 352           vis_st_u8(sd = vis_faligndata(sd, sd), da);
 353           da += 3;
 354           if ((mlib_addr) da <= (mlib_addr) dend) {
 355             vis_st_u8(sd = vis_faligndata(sd, sd), da);
 356             da += 3;
 357             if ((mlib_addr) da <= (mlib_addr) dend) {
 358               vis_st_u8(sd = vis_faligndata(sd, sd), da);
 359               da += 3;
 360               if ((mlib_addr) da <= (mlib_addr) dend) {
 361                 vis_st_u8(sd = vis_faligndata(sd, sd), da);
 362               }
 363             }
 364           }
 365         }
 366       }
 367     }
 368   }
 369 }
 370 
 371 /***************************************************************/
 372 #define INSERT_U8_14(sd0, dd0, dd1, dd2, dd3)                   \
 373   sda = vis_fpmerge(vis_read_hi(sd0), vis_read_hi(sd0));        \
 374   sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_lo(sd0));        \
 375   dd0 = vis_fpmerge(vis_read_hi(sda), vis_read_hi(sda));        \
 376   dd1 = vis_fpmerge(vis_read_lo(sda), vis_read_lo(sda));        \
 377   dd2 = vis_fpmerge(vis_read_hi(sdb), vis_read_hi(sdb));        \
 378   dd3 = vis_fpmerge(vis_read_lo(sdb), vis_read_lo(sdb))
 379 
 380 /***************************************************************/
 381 void mlib_v_ImageChannelInsert_U8_14_D1(const mlib_u8 *src,
 382                                         mlib_u8       *dst,
 383                                         mlib_s32      dsize,
 384                                         mlib_s32      cmask)
 385 {
 386   mlib_u8 *sa, *da;
 387   mlib_u8 *dend, *dend2;                              /* end points in dst */
 388   mlib_d64 *dp;                                       /* 8-byte aligned start points in dst */
 389   mlib_d64 *sp;                                       /* 8-byte aligned start point in src */
 390   mlib_d64 sd0, sd1, sd;                              /* 8-byte source data */
 391   mlib_d64 sda, sdb;
 392   mlib_d64 dd0, dd1, dd2, dd3, dd4;
 393   mlib_s32 soff;                                      /* offset of address in src */
 394   mlib_s32 doff;                                      /* offset of address in dst */
 395   mlib_s32 emask;                                     /* edge mask */
 396   mlib_s32 bmask;                                     /* channel mask */
 397   mlib_s32 i, n;
 398 
 399   sa = (void *)src;
 400   da = dst;
 401 
 402   bmask = cmask | (cmask << 4) | (cmask << 8);
 403 
 404   /* prepare the source address */
 405   sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
 406   soff = ((mlib_addr) sa & 7);
 407 
 408   /* prepare the destination addresses */
 409   dp = (mlib_d64 *) ((mlib_addr) da & (~7));
 410   doff = ((mlib_addr) da & 7);
 411   dend = da + dsize * 4 - 1;
 412   dend2 = dend - 31;
 413 
 414   bmask = (bmask >> (doff % 4)) & 0xff;
 415 
 416   if (doff == 0) {                          /* dst is 8-byte aligned */
 417 
 418     vis_alignaddr((void *)0, soff);
 419     sd0 = *sp++;
 420     sd1 = *sp++;
 421     sd = vis_faligndata(sd0, sd1);          /* the intermediate is aligned */
 422 
 423     INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 424 
 425     emask = vis_edge8(da, dend);
 426     vis_pst_8(dd0, dp++, emask & bmask);
 427     if ((mlib_addr) dp <= (mlib_addr) dend) { /* for very small size */
 428       emask = vis_edge8(dp, dend);
 429       vis_pst_8(dd1, dp++, emask & bmask);
 430       if ((mlib_addr) dp <= (mlib_addr) dend) {
 431         emask = vis_edge8(dp, dend);
 432         vis_pst_8(dd2, dp++, emask & bmask);
 433         if ((mlib_addr) dp <= (mlib_addr) dend) {
 434           emask = vis_edge8(dp, dend);
 435           vis_pst_8(dd3, dp++, emask & bmask);
 436         }
 437       }
 438     }
 439 
 440     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 441       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 32 + 1;
 442 
 443       /* 8-pixel column loop, emask not needed */
 444 #pragma pipeloop(0)
 445       for (i = 0; i < n; i++) {
 446         sd0 = sd1;
 447         sd1 = *sp++;
 448         sd = vis_faligndata(sd0, sd1);
 449 
 450         INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 451 
 452         vis_pst_8(dd0, dp++, bmask);
 453         vis_pst_8(dd1, dp++, bmask);
 454         vis_pst_8(dd2, dp++, bmask);
 455         vis_pst_8(dd3, dp++, bmask);
 456       }
 457     }
 458 
 459     /* end point handling */
 460     if ((mlib_addr) dp <= (mlib_addr) dend) {
 461       sd0 = sd1;
 462       sd1 = *sp++;
 463       sd = vis_faligndata(sd0, sd1);
 464 
 465       INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 466 
 467       emask = vis_edge8(dp, dend);
 468       vis_pst_8(dd0, dp++, emask & bmask);
 469       if ((mlib_addr) dp <= (mlib_addr) dend) {
 470         emask = vis_edge8(dp, dend);
 471         vis_pst_8(dd1, dp++, emask & bmask);
 472         if ((mlib_addr) dp <= (mlib_addr) dend) {
 473           emask = vis_edge8(dp, dend);
 474           vis_pst_8(dd2, dp++, emask & bmask);
 475           if ((mlib_addr) dp <= (mlib_addr) dend) {
 476             emask = vis_edge8(dp, dend);
 477             vis_pst_8(dd3, dp++, emask & bmask);
 478           }
 479         }
 480       }
 481     }
 482   }
 483   else {                                    /* dst is not 8-byte aligned */
 484     vis_alignaddr((void *)0, soff);
 485     sd0 = *sp++;
 486     sd1 = *sp++;
 487     sd = vis_faligndata(sd0, sd1);          /* the intermediate is aligned */
 488 
 489     INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 490 
 491     vis_alignaddr((void *)0, -doff);
 492 
 493     emask = vis_edge8(da, dend);
 494     vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask & bmask);
 495     if ((mlib_addr) dp <= (mlib_addr) dend) { /* for very small size */
 496       emask = vis_edge8(dp, dend);
 497       vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 498       if ((mlib_addr) dp <= (mlib_addr) dend) {
 499         emask = vis_edge8(dp, dend);
 500         vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 501         if ((mlib_addr) dp <= (mlib_addr) dend) {
 502           emask = vis_edge8(dp, dend);
 503           vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
 504         }
 505       }
 506     }
 507 
 508     if ((mlib_addr) dp <= (mlib_addr) dend2) {
 509       n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 32 + 1;
 510 
 511       /* 8-pixel column loop, emask not needed */
 512 #pragma pipeloop(0)
 513       for (i = 0; i < n; i++) {
 514         dd4 = dd3;
 515 
 516         vis_alignaddr((void *)0, soff);
 517         sd0 = sd1;
 518         sd1 = *sp++;
 519         sd = vis_faligndata(sd0, sd1);
 520 
 521         INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 522 
 523         vis_alignaddr((void *)0, -doff);
 524         vis_pst_8(vis_faligndata(dd4, dd0), dp++, bmask);
 525         vis_pst_8(vis_faligndata(dd0, dd1), dp++, bmask);
 526         vis_pst_8(vis_faligndata(dd1, dd2), dp++, bmask);
 527         vis_pst_8(vis_faligndata(dd2, dd3), dp++, bmask);
 528       }
 529     }
 530 
 531     /* end point handling */
 532     if ((mlib_addr) dp <= (mlib_addr) dend) {
 533       dd4 = dd3;
 534 
 535       vis_alignaddr((void *)0, soff);
 536       sd0 = sd1;
 537       sd1 = *sp++;
 538       sd = vis_faligndata(sd0, sd1);
 539 
 540       INSERT_U8_14(sd, dd0, dd1, dd2, dd3);
 541 
 542       vis_alignaddr((void *)0, -doff);
 543       emask = vis_edge8(dp, dend);
 544       vis_pst_8(vis_faligndata(dd4, dd0), dp++, emask & bmask);
 545       if ((mlib_addr) dp <= (mlib_addr) dend) {
 546         emask = vis_edge8(dp, dend);
 547         vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask & bmask);
 548         if ((mlib_addr) dp <= (mlib_addr) dend) {
 549           emask = vis_edge8(dp, dend);
 550           vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask & bmask);
 551           if ((mlib_addr) dp <= (mlib_addr) dend) {
 552             emask = vis_edge8(dp, dend);
 553             vis_pst_8(vis_faligndata(dd2, dd3), dp++, emask & bmask);
 554           }
 555         }
 556       }
 557     }
 558   }
 559 }
 560 
 561 
 562 /***************************************************************/