1 /*
   2  * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTIONS
  29  *      mlib_ImageCopy_bit_na     - BIT, non-aligned
  30  *      mlib_ImageCopy_bit_na_r   - BIT, non-aligned, reverse
  31  *
  32  * SYNOPSIS
  33  *
  34  *      void mlib_ImageCopy_bit_na(const mlib_u8 *sa,
  35  *                                 mlib_u8       *da,
  36  *                                 mlib_s32      size,
  37  *                                 mlib_s32      s_offset,
  38  *                                 mlib_s32      d_offset);
  39  *      void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa,
  40  *                                   mlib_u8       *da,
  41  *                                   mlib_s32      size,
  42  *                                   mlib_s32      s_offset,
  43  *                                   mlib_s32      d_offset);
  44  * ARGUMENT
  45  *      sp       pointer to source image data
  46  *      dp       pointer to destination image data
  47  *      size     size in 8-bytes, bytes, or SHORTs
  48  *      width    image width in 8-bytes
  49  *      height   image height in lines
  50  *      stride   source image line stride in 8-bytes
  51  *      dstride  destination image line stride in 8-bytes
  52  *      s_offset source image line bit offset
  53  *      d_offset destination image line bit offset
  54  *
  55  * DESCRIPTION
  56  *      Direct copy from one image to another -- C version low level
  57  *      functions.
  58  */
  59 
  60 #include <stdlib.h>
  61 #include "mlib_image.h"
  62 #include "mlib_ImageCopy.h"
  63 
  64 /***************************************************************/
  65 /*
  66  * Bit offsets of source and distination are not the same
  67  */
  68 
  69 void mlib_ImageCopy_bit_na(const mlib_u8 *sa,
  70                            mlib_u8       *da,
  71                            mlib_s32      size,
  72                            mlib_s32      s_offset,
  73                            mlib_s32      d_offset)
  74 {
  75 #ifdef _NO_LONGLONG
  76 
  77   mlib_u32 *dp;          /* 4-byte aligned start points in dst */
  78   mlib_u32 *sp;          /* 4-byte aligned start point in src */
  79   mlib_s32 j;            /* offset of address in dst */
  80   mlib_u32 mask0 = 0xFFFFFFFF;
  81   mlib_u32 dmask;
  82   mlib_u32 src, src0, src1, dst;
  83   mlib_s32 ls_offset, ld_offset, shift;
  84 
  85   if (size <= 0) return;
  86 
  87   /* prepare the destination addresses */
  88   dp = (mlib_u32 *)((mlib_addr)da & (~3));
  89   sp = (mlib_u32 *)((mlib_addr)sa & (~3));
  90   ld_offset = (((mlib_addr)da & 3) << 3) + d_offset;     /* bit d_offset to first mlib_s32 */
  91   ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset;     /* bit d_offset to first mlib_s32 */
  92 
  93   if (ld_offset > ls_offset) {
  94     src0 = sp[0];
  95     dst = dp[0];
  96     if (ld_offset + size < 32) {
  97       dmask = (mask0 << (32 - size)) >> ld_offset;
  98 #ifdef _LITTLE_ENDIAN
  99       src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 100       src = (src0 >> (ld_offset - ls_offset));
 101       dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 102       dst = (dst & (~dmask)) | (src & dmask);
 103       dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 104 #else
 105       src = (src0 >> (ld_offset - ls_offset));
 106       dp[0] = (dst & (~dmask)) | (src & dmask);
 107 #endif /* _LITTLE_ENDIAN */
 108       return;
 109     }
 110 
 111     dmask = mask0 >> ld_offset;
 112 #ifdef _LITTLE_ENDIAN
 113     src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 114     src = (src0 >> (ld_offset - ls_offset));
 115     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 116     dst = (dst & ~dmask) | (src & dmask);
 117     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 118 #else
 119     src = (src0 >> (ld_offset - ls_offset));
 120     dp[0] = (dst & ~dmask) | (src & dmask);
 121 #endif /* _LITTLE_ENDIAN */
 122     j = 32 - ld_offset;
 123     dp++;
 124     ls_offset += j;
 125   } else {
 126 
 127     shift = ls_offset - ld_offset;
 128     src0 = sp[0];
 129     if (ls_offset + size > 32) src1 = sp[1];
 130     dst = dp[0];
 131 
 132     if (ld_offset + size < 32) {
 133       dmask = (mask0 << (32 - size)) >> ld_offset;
 134 #ifdef _LITTLE_ENDIAN
 135       src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 136       src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 137       src = (src0 << shift) | (src1 >> (32 - shift));
 138       dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 139       dst = (dst & ~dmask) | (src & dmask);
 140       dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 141 #else
 142       src = (src0 << shift) | (src1 >> (32 - shift));
 143       dp[0] = (dst & ~dmask) | (src & dmask);
 144 #endif /* _LITTLE_ENDIAN */
 145       return;
 146     }
 147 
 148     dmask = mask0 >> ld_offset;
 149 #ifdef _LITTLE_ENDIAN
 150     src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 151     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 152     src = (src0 << shift) | (src1 >> (32 - shift));
 153     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 154     dst = (dst & ~dmask) | (src & dmask);
 155     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 156 #else
 157     src = (src0 << shift) | (src1 >> (32 - shift));
 158     dp[0] = (dst & ~dmask) | (src & dmask);
 159 #endif /* _LITTLE_ENDIAN */
 160     j = 32 - ld_offset;
 161     dp++;
 162     sp++;
 163     ls_offset = ls_offset + j - 32;
 164   }
 165 
 166   if (j < size) src1 = sp[0];
 167 #ifdef _LITTLE_ENDIAN
 168   src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 169 #endif /* _LITTLE_ENDIAN */
 170   for (; j <= size - 32; j += 32) {
 171     src0 = src1;
 172     src1 = sp[1];
 173 #ifdef _LITTLE_ENDIAN
 174     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 175     src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
 176     dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24);
 177 #else
 178     dp[0] = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
 179 #endif /* _LITTLE_ENDIAN */
 180     sp++;
 181     dp++;
 182   }
 183 
 184   if (j < size) {
 185     j = size - j;
 186     src0 = src1;
 187     if (ls_offset + j > 32) src1 = sp[1];
 188     dst = dp[0];
 189     dmask = mask0 << (32 - j);
 190 #ifdef _LITTLE_ENDIAN
 191     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 192     src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
 193     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 194     dst = (dst & ~dmask) | (src & dmask);
 195     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 196 #else
 197     src = (src0 << ls_offset) | (src1 >> (32 - ls_offset));
 198     dp[0] = (dst & ~dmask) | (src & dmask);
 199 #endif /* _LITTLE_ENDIAN */
 200   }
 201 
 202 #else /* _LONGLONG */
 203 
 204   mlib_u64 *dp;          /* 8-byte aligned start points in dst */
 205   mlib_u64 *sp;          /* 8-byte aligned start point in src */
 206   mlib_s32 j;            /* offset of address in dst */
 207   mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL;
 208   mlib_u64 dmask;
 209   mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst;
 210   mlib_s32 ls_offset, ld_offset, shift;
 211 
 212   if (size <= 0) return;
 213 
 214   /* prepare the destination addresses */
 215   dp = (mlib_u64 *)((mlib_addr)da & (~7));
 216   sp = (mlib_u64 *)((mlib_addr)sa & (~7));
 217   /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */
 218   ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset;     /* bit d_offset to first mlib_d64 */
 219   ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset;     /* bit d_offset to first mlib_d64 */
 220 
 221   if (ld_offset > ls_offset) {
 222     lsrc0 = sp[0];
 223     ldst = dp[0];
 224     lsrc = (lsrc0 >> (ld_offset - ls_offset));
 225     if (ld_offset + size < 64) {
 226       dmask = (lmask0 << (64 - size)) >> ld_offset;
 227       dp[0] = (ldst & (~dmask)) | (lsrc & dmask);
 228       return;
 229     }
 230 
 231     dmask = lmask0 >> ld_offset;
 232     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 233     j = 64 - ld_offset;
 234     dp++;
 235     ls_offset += j;
 236   } else {
 237 
 238     shift = ls_offset - ld_offset;
 239     lsrc0 = sp[0];
 240     if (ls_offset + size > 64) lsrc1 = sp[1];
 241     ldst = dp[0];
 242     lsrc = (lsrc0 << shift) | (lsrc1 >> (64 - shift));
 243 
 244     if (ld_offset + size < 64) {
 245       dmask = (lmask0 << (64 - size)) >> ld_offset;
 246       dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 247       return;
 248     }
 249 
 250     dmask = lmask0 >> ld_offset;
 251     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 252     j = 64 - ld_offset;
 253     dp++;
 254     sp++;
 255     ls_offset = ls_offset + j - 64;
 256   }
 257 
 258   if (j < size) lsrc1 = sp[0];
 259 #ifdef __SUNPRO_C
 260 #pragma pipeloop(0)
 261 #endif /* __SUNPRO_C */
 262   for (; j <= size - 64; j += 64) {
 263     lsrc0 = lsrc1;
 264     lsrc1 = sp[1];
 265     lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset));
 266     dp[0] = lsrc;
 267     sp++;
 268     dp++;
 269   }
 270 
 271   if (j < size) {
 272     j = size - j;
 273     lsrc0 = lsrc1;
 274     if (ls_offset + j > 64) lsrc1 = sp[1];
 275     ldst = dp[0];
 276     dmask = lmask0 << (64 - j);
 277     lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset));
 278     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 279   }
 280 #endif /* _NO_LONGLONG */
 281 }
 282 
 283 /***************************************************************/
 284 /*
 285  * Bit offsets of source and distination are not the same
 286  * This function is both for C and VIS version (LONGLONG case)
 287  */
 288 
 289 void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa,
 290                              mlib_u8       *da,
 291                              mlib_s32      size,
 292                              mlib_s32      s_offset,
 293                              mlib_s32      d_offset)
 294 {
 295 #ifdef _NO_LONGLONG
 296 
 297   mlib_u32 *dp;          /* 4-byte aligned start points in dst */
 298   mlib_u32 *sp;          /* 4-byte aligned start point in src */
 299   mlib_s32 j;            /* offset of address in dst */
 300   mlib_u32 lmask0 = 0xFFFFFFFF;
 301   mlib_u32 dmask;
 302   mlib_u32 src, src0, src1, dst;
 303   mlib_s32 ls_offset, ld_offset, shift;
 304 
 305   if (size <= 0) return;
 306 
 307   /* prepare the destination addresses */
 308   dp = (mlib_u32 *)((mlib_addr)da & (~3));
 309   sp = (mlib_u32 *)((mlib_addr)sa & (~3));
 310   ld_offset = (((mlib_addr)da & 3) << 3) + d_offset;     /* bit d_offset to first mlib_s32 */
 311   ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset;     /* bit d_offset to first mlib_s32 */
 312 
 313   if (ld_offset < ls_offset) {
 314     src0 = sp[0];
 315     dst = dp[0];
 316     if (ld_offset >= size) {
 317       dmask = (lmask0 << (32 - size)) >> (ld_offset - size);
 318 #ifdef _LITTLE_ENDIAN
 319       src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 320       src = (src0 << (ls_offset - ld_offset));
 321       dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 322       dst = (dst & (~dmask)) | (src & dmask);
 323       dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 324 #else
 325       src = (src0 << (ls_offset - ld_offset));
 326       dp[0] = (dst & (~dmask)) | (src & dmask);
 327 #endif /* _LITTLE_ENDIAN */
 328       return;
 329     }
 330 
 331     dmask = lmask0 << (32 - ld_offset);
 332 #ifdef _LITTLE_ENDIAN
 333     src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 334     src = (src0 << (ls_offset - ld_offset));
 335     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 336     dst = (dst & ~dmask) | (src & dmask);
 337     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 338 #else
 339     src = (src0 << (ls_offset - ld_offset));
 340     dp[0] = (dst & ~dmask) | (src & dmask);
 341 #endif /* _LITTLE_ENDIAN */
 342     j = ld_offset;
 343     dp--;
 344     ls_offset -= j;
 345   } else {
 346 
 347     shift = ld_offset - ls_offset;
 348     src0 = sp[0];
 349     if (ls_offset < size) src1 = sp[-1];
 350     dst = dp[0];
 351 
 352     if (ld_offset >= size) {
 353       dmask = (lmask0 << (32 - size)) >> (ld_offset - size);
 354 #ifdef _LITTLE_ENDIAN
 355       src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 356       src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 357       src = (src0 >> shift) | (src1 << (32 - shift));
 358       dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 359       dst = (dst & ~dmask) | (src & dmask);
 360       dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 361 #else
 362       src = (src0 >> shift) | (src1 << (32 - shift));
 363       dp[0] = (dst & ~dmask) | (src & dmask);
 364 #endif /* _LITTLE_ENDIAN */
 365       return;
 366     }
 367 
 368     dmask = lmask0 << (32 - ld_offset);
 369 #ifdef _LITTLE_ENDIAN
 370     src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24);
 371     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 372     src = (src0 >> shift) | (src1 << (32 - shift));
 373     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 374     dst = (dst & ~dmask) | (src & dmask);
 375     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 376 #else
 377     src = (src0 >> shift) | (src1 << (32 - shift));
 378     dp[0] = (dst & ~dmask) | (src & dmask);
 379 #endif /* _LITTLE_ENDIAN */
 380     j = ld_offset;
 381     dp--;
 382     sp--;
 383     ls_offset = ls_offset - j + 32;
 384   }
 385 
 386   if (j < size) src1 = sp[0];
 387 #ifdef _LITTLE_ENDIAN
 388   src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 389 #endif /* _LITTLE_ENDIAN */
 390 #ifdef __SUNPRO_C
 391 #pragma pipeloop(0)
 392 #endif /* __SUNPRO_C */
 393   for (; j <= size - 32; j += 32) {
 394     src0 = src1;
 395     src1 = sp[-1];
 396 #ifdef _LITTLE_ENDIAN
 397     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 398     src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
 399     dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24);
 400 #else
 401     dp[0] = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
 402 #endif /* _LITTLE_ENDIAN */
 403     sp--;
 404     dp--;
 405   }
 406 
 407   if (j < size) {
 408     j = size - j;
 409     src0 = src1;
 410     if (ls_offset < j) src1 = sp[-1];
 411     dst = dp[0];
 412     dmask = lmask0 >> (32 - j);
 413 #ifdef _LITTLE_ENDIAN
 414     src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24);
 415     src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
 416     dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 417     dst = (dst & ~dmask) | (src & dmask);
 418     dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24);
 419 #else
 420     src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset);
 421     dp[0] = (dst & ~dmask) | (src & dmask);
 422 #endif /* _LITTLE_ENDIAN */
 423   }
 424 
 425 #else  /* _LONGLONG */
 426 
 427   mlib_u64 *dp;          /* 8-byte aligned start points in dst */
 428   mlib_u64 *sp;          /* 8-byte aligned start point in src */
 429   mlib_s32 j;            /* offset of address in dst */
 430   mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL;
 431   mlib_u64 dmask;
 432   mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst;
 433   mlib_s32 ls_offset, ld_offset, shift;
 434 
 435   if (size <= 0) return;
 436 
 437   /* prepare the destination addresses */
 438   dp = (mlib_u64 *)((mlib_addr)da & (~7));
 439   sp = (mlib_u64 *)((mlib_addr)sa & (~7));
 440   /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */
 441   ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset;     /* bit d_offset to first mlib_d64 */
 442   ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset;     /* bit d_offset to first mlib_d64 */
 443 
 444   if (ld_offset < ls_offset) {
 445     lsrc0 = sp[0];
 446     ldst = dp[0];
 447     lsrc = (lsrc0 << (ls_offset - ld_offset));
 448     if (ld_offset >= size) {
 449       dmask = (lmask0 << (64 - size)) >> (ld_offset - size);
 450       dp[0] = (ldst & (~dmask)) | (lsrc & dmask);
 451       return;
 452     }
 453 
 454     dmask = lmask0 << (64 - ld_offset);
 455     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 456     j = ld_offset;
 457     dp--;
 458     ls_offset -= j;
 459   } else {
 460 
 461     shift = ld_offset - ls_offset;
 462     lsrc0 = sp[0];
 463     if (ls_offset < size) lsrc1 = sp[-1];
 464     ldst = dp[0];
 465     lsrc = (lsrc0 >> shift) | (lsrc1 << (64 - shift));
 466     if (ld_offset >= size) {
 467       dmask = (lmask0 << (64 - size)) >> (ld_offset - size);
 468       dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 469       return;
 470     }
 471 
 472     dmask = lmask0 << (64 - ld_offset);
 473     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 474     j = ld_offset;
 475     dp--;
 476     sp--;
 477     ls_offset = ls_offset - j + 64;
 478   }
 479 
 480   if (j < size) lsrc1 = sp[0];
 481 #ifdef __SUNPRO_C
 482 #pragma pipeloop(0)
 483 #endif /* __SUNPRO_C */
 484   for (; j <= size - 64; j += 64) {
 485     lsrc0 = lsrc1;
 486     lsrc1 = sp[-1];
 487     dp[0] = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset);
 488     sp--;
 489     dp--;
 490   }
 491 
 492   if (j < size) {
 493     j = size - j;
 494     lsrc0 = lsrc1;
 495     if (ls_offset < j) lsrc1 = sp[-1];
 496     ldst = dp[0];
 497     dmask = lmask0 >> (64 - j);
 498     lsrc = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset);
 499     dp[0] = (ldst & ~dmask) | (lsrc & dmask);
 500   }
 501 #endif /* _NO_LONGLONG */
 502 }
 503 
 504 /***************************************************************/