1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTIONS
  29  *      mlib_ImageCopy - Direct copy from one image to another.
  30  *
  31  * SYNOPSIS
  32  *      mlib_status mlib_ImageCopy(mlib_image       *dst,
  33  *                                 const mlib_image *src);
  34  *
  35  * ARGUMENT
  36  *      dst     pointer to output or destination image
  37  *      src     pointer to input or source image
  38  *
  39  * RESTRICTION
  40  *      src and dst must have the same size, type and number of channels.
  41  *      They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT,
  42  *      MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type.
  43  *
  44  * DESCRIPTION
  45  *      Direct copy from one image to another
  46  */
  47 
  48 #include <stdlib.h>
  49 #include "mlib_image.h"
  50 #include "mlib_ImageCheck.h"
  51 #include "mlib_ImageCopy.h"
  52 
  53 /***************************************************************/
  54 #ifdef _MSC_VER
  55 #pragma optimize("", off)                   /* Fix bug 4195132 */
  56 #endif /* _MSC_VER */
  57 
  58 /***************************************************************/
  59 /* do not perform the coping by mlib_d64 data type for x86 */
  60 #ifdef i386
  61 
  62 typedef struct {
  63   mlib_s32 int0, int1;
  64 } two_int;
  65 
  66 #define TYPE_64BIT two_int
  67 
  68 #else /* i386 */
  69 
  70 #define TYPE_64BIT mlib_d64
  71 #endif /* i386 */
  72 
  73 /***************************************************************/
  74 static void mlib_c_ImageCopy_u8(const mlib_image *src,
  75                                 mlib_image       *dst);
  76 static void mlib_c_ImageCopy_s16(const mlib_image *src,
  77                                  mlib_image       *dst);
  78 static void mlib_c_ImageCopy_s32(const mlib_image *src,
  79                                  mlib_image       *dst);
  80 static void mlib_c_ImageCopy_d64(const mlib_image *src,
  81                                  mlib_image       *dst);
  82 static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
  83                                 TYPE_64BIT       *dp,
  84                                 mlib_s32         size);
  85 
  86 /***************************************************************/
  87 mlib_status mlib_ImageCopy(mlib_image       *dst,
  88                            const mlib_image *src)
  89 {
  90   mlib_s32 s_offset, d_offset;
  91   mlib_s32 size, s_stride, d_stride;
  92   mlib_s32 width;                                     /* width in bytes of src and dst */
  93   mlib_s32 height;                                    /* height in lines of src and dst */
  94   mlib_u8 *sa, *da;
  95   mlib_s32 j;
  96 
  97   MLIB_IMAGE_CHECK(src);
  98   MLIB_IMAGE_CHECK(dst);
  99   MLIB_IMAGE_TYPE_EQUAL(src, dst);
 100   MLIB_IMAGE_CHAN_EQUAL(src, dst);
 101   MLIB_IMAGE_SIZE_EQUAL(src, dst);
 102 
 103   switch (mlib_ImageGetType(dst)) {
 104     case MLIB_BIT:
 105       width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */
 106       height = mlib_ImageGetHeight(src);
 107       sa = (mlib_u8 *) mlib_ImageGetData(src);
 108       da = (mlib_u8 *) mlib_ImageGetData(dst);
 109 
 110       if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) {
 111         size = height * (width >> 3);
 112         if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) {
 113 
 114           mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3);
 115         }
 116         else {
 117 
 118           mlib_ImageCopy_na(sa, da, size);
 119         }
 120       }
 121       else {
 122         s_stride = mlib_ImageGetStride(src);
 123         d_stride = mlib_ImageGetStride(dst);
 124         s_offset = mlib_ImageGetBitOffset(src); /* in bits */
 125         d_offset = mlib_ImageGetBitOffset(dst); /* in bits */
 126         if (s_offset == d_offset) {
 127           for (j = 0; j < height; j++) {
 128             mlib_ImageCopy_bit_al(sa, da, width, s_offset);
 129             sa += s_stride;
 130             da += d_stride;
 131           }
 132         }
 133         else {
 134           for (j = 0; j < height; j++) {
 135             mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset);
 136             sa += s_stride;
 137             da += d_stride;
 138           }
 139         }
 140       }
 141 
 142       break;
 143     case MLIB_BYTE:
 144       mlib_c_ImageCopy_u8(src, dst);
 145       break;
 146     case MLIB_SHORT:
 147     case MLIB_USHORT:
 148       mlib_c_ImageCopy_s16(src, dst);
 149       break;
 150     case MLIB_INT:
 151     case MLIB_FLOAT:
 152       mlib_c_ImageCopy_s32(src, dst);
 153       break;
 154     case MLIB_DOUBLE:
 155       mlib_c_ImageCopy_d64(src, dst);
 156       break;
 157     default:
 158       return MLIB_FAILURE;                  /* MLIB_BIT is not supported here */
 159   }
 160 
 161   return MLIB_SUCCESS;
 162 }
 163 
 164 /***************************************************************/
 165 #define PREPAREVARS(type)                                        \
 166   type *psrc = (type *) mlib_ImageGetData(src);                  \
 167   type *pdst = (type *) mlib_ImageGetData(dst);                  \
 168   mlib_s32 src_height = mlib_ImageGetHeight(src);                \
 169   mlib_s32 src_width  = mlib_ImageGetWidth(src);                 \
 170   mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
 171   mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
 172   mlib_s32 chan = mlib_ImageGetChannels(dst);                    \
 173   mlib_s32 i, j;                                                 \
 174                                                                  \
 175   src_width *= chan;                                             \
 176   if (src_width == src_stride && src_width == dst_stride) {      \
 177     src_width *= src_height;                                     \
 178     src_height = 1;                                              \
 179   }
 180 
 181 /***************************************************************/
 182 #define STRIP(pd, ps, w, h, data_type) {                        \
 183   data_type s0, s1;                                             \
 184   for ( i = 0; i < h; i++ ) {                                   \
 185     if ((j = (w & 1)))                                          \
 186       pd[i * dst_stride] = ps[i * src_stride];                  \
 187     for (; j < w; j += 2) {                                     \
 188       s0 = ps[i * src_stride + j];                              \
 189       s1 = ps[i * src_stride + j + 1];                          \
 190       pd[i * dst_stride + j]   = s0;                            \
 191       pd[i * dst_stride + j + 1] = s1;                          \
 192     }                                                           \
 193   }                                                             \
 194 }
 195 
 196 /***************************************************************/
 197 /*
 198  * Both bit offsets of source and distination are the same
 199  */
 200 
 201 void mlib_ImageCopy_bit_al(const mlib_u8 *sa,
 202                            mlib_u8       *da,
 203                            mlib_s32      size,
 204                            mlib_s32      offset)
 205 {
 206   mlib_s32 b_size, i, j;
 207   TYPE_64BIT *sp, *dp;
 208   mlib_u8 mask0 = 0xFF;
 209   mlib_u8 src, mask;
 210 
 211   if (size <= 0) return;
 212 
 213   if (size <= (8 - offset)) {
 214     mask = mask0 << (8 - size);
 215     mask >>= offset;
 216     src = da[0];
 217     da[0] = (src & (~mask)) | (sa[0] & mask);
 218     return;
 219   }
 220 
 221   mask = mask0 >> offset;
 222   src = da[0];
 223   da[0] = (src & (~mask)) | (sa[0] & mask);
 224   da++;
 225   sa++;
 226   size = size - 8 + offset;
 227   b_size = size >> 3;                       /* size in bytes */
 228 
 229   for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
 230     *da++ = *sa++;
 231 
 232   if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
 233     sp = (TYPE_64BIT *) sa;
 234     dp = (TYPE_64BIT *) da;
 235     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 236       dp[i] = sp[i];
 237     }
 238 
 239     sa += i << 3;
 240     da += i << 3;
 241   }
 242   else {
 243 #ifdef _NO_LONGLONG
 244     if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
 245       mlib_u32 *pws, *pwd;
 246 
 247       pws = (mlib_u32 *) sa;
 248       pwd = (mlib_u32 *) da;
 249       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 250         pwd[i] = pws[i];
 251       }
 252 
 253       sa += i << 2;
 254       da += i << 2;
 255     }
 256     else {
 257       mlib_u32 *pws, *pwd, src0, src1;
 258       mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
 259 
 260       pwd = (mlib_u32 *) da;
 261       pws = (mlib_u32 *) (sa - lshift);
 262       lshift <<= 3;
 263       rshift = 32 - lshift;
 264 
 265       src1 = pws[0];
 266       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 267         src0 = src1;
 268         src1 = pws[i + 1];
 269 #ifdef _LITTLE_ENDIAN
 270         pwd[i] = (src0 >> lshift) | (src1 << rshift);
 271 #else
 272         pwd[i] = (src0 << lshift) | (src1 >> rshift);
 273 #endif /* _LITTLE_ENDIAN */
 274       }
 275 
 276       sa += i << 2;
 277       da += i << 2;
 278     }
 279 
 280 #else
 281     mlib_u64 *pws, *pwd, src0, src1;
 282     mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
 283 
 284     pwd = (mlib_u64 *) da;
 285     pws = (mlib_u64 *) (sa - lshift);
 286     lshift <<= 3;
 287     rshift = 64 - lshift;
 288 
 289     src1 = pws[0];
 290     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 291       src0 = src1;
 292       src1 = pws[i + 1];
 293       pwd[i] = (src0 << lshift) | (src1 >> rshift);
 294     }
 295 
 296     sa += i << 3;
 297     da += i << 3;
 298 #endif /* _NO_LONGLONG */
 299   }
 300 
 301   for (; j < b_size; j++)
 302     *da++ = *sa++;
 303 
 304   j = size & 7;
 305 
 306   if (j > 0) {
 307     mask = mask0 << (8 - j);
 308     src = da[0];
 309     da[0] = (src & (~mask)) | (sa[0] & mask);
 310   }
 311 }
 312 
 313 /***************************************************************/
 314 void mlib_c_ImageCopy_u8(const mlib_image *src,
 315                          mlib_image       *dst)
 316 {
 317   PREPAREVARS(mlib_u8);
 318   if (src_width < 16) {
 319     STRIP(pdst, psrc, src_width, src_height, mlib_u8);
 320     return;
 321   }
 322 
 323   for (i = 0; i < src_height; i++) {
 324     mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 325 
 326     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 327       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
 328         pdst_row[j] = psrc_row[j];
 329       }
 330 
 331       for (; j <= (src_width - 8); j += 8) {
 332         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 333 
 334         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 335       }
 336     }
 337     else {
 338 
 339 #ifdef _NO_LONGLONG
 340 
 341       for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
 342         pdst_row[j] = psrc_row[j];
 343       }
 344 
 345       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 346         for (; j <= (src_width - 4); j += 4) {
 347           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 348         }
 349       }
 350       else {
 351         mlib_u32 *ps, shl, shr, src0, src1;
 352 
 353         ps = (mlib_u32 *) (psrc_row + j);
 354         shl = (mlib_addr) ps & 3;
 355         ps = (mlib_u32 *) ((mlib_addr) ps - shl);
 356         shl <<= 3;
 357         shr = 32 - shl;
 358 
 359         src1 = ps[0];
 360         for (; j <= (src_width - 4); j += 4) {
 361           src0 = src1;
 362           src1 = ps[1];
 363 #ifdef _LITTLE_ENDIAN
 364           *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 365 #else
 366           *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 367 #endif /* _LITTLE_ENDIAN */
 368           ps++;
 369         }
 370       }
 371 
 372 #else
 373 
 374       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
 375         pdst_row[j] = psrc_row[j];
 376       }
 377 
 378       {
 379         mlib_s32 shl, shr;
 380         mlib_u64 *ps, src0, src1;
 381 
 382         ps = (mlib_u64 *) (psrc_row + j);
 383         /* shl and shr are in range [0, 64] */
 384         shl = (mlib_s32) ((mlib_addr) ps & 7);
 385         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 386         shl <<= 3;
 387         shr = 64 - shl;
 388 
 389         src1 = ps[0];
 390         for (; j <= (src_width - 8); j += 8) {
 391           src0 = src1;
 392           src1 = ps[1];
 393 #ifdef _LITTLE_ENDIAN
 394           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 395 #else
 396           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 397 #endif /* _LITTLE_ENDIAN */
 398           ps++;
 399         }
 400       }
 401 #endif /* _NO_LONGLONG */
 402     }
 403 
 404     for (; j < src_width; j++)
 405       pdst_row[j] = psrc_row[j];
 406   }
 407 }
 408 
 409 /***************************************************************/
 410 void mlib_c_ImageCopy_s16(const mlib_image       *src,
 411                           mlib_image *dst)
 412 {
 413   PREPAREVARS(mlib_u16);
 414   if (src_width < 8) {
 415     STRIP(pdst, psrc, src_width, src_height, mlib_u16);
 416     return;
 417   }
 418 
 419   for (i = 0; i < src_height; i++) {
 420     mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 421 
 422     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 423       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
 424         pdst_row[j] = psrc_row[j];
 425       }
 426 
 427       for (; j <= (src_width - 4); j += 4) {
 428         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 429 
 430         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 431       }
 432     }
 433     else {
 434 
 435 #ifdef _NO_LONGLONG
 436 
 437       if (j = (((mlib_addr) pdst_row & 2) != 0)) {
 438         pdst_row[0] = psrc_row[0];
 439       }
 440 
 441       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 442         for (; j <= (src_width - 2); j += 2) {
 443           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 444         }
 445       }
 446       else {
 447         mlib_u32 *ps, src0, src1;
 448 
 449         ps = (mlib_u32 *) (psrc_row + j - 1);
 450         src1 = ps[0];
 451         for (; j <= (src_width - 2); j += 2) {
 452           src0 = src1;
 453           src1 = ps[1];
 454 #ifdef _LITTLE_ENDIAN
 455           *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
 456 #else
 457           *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
 458 #endif /* _LITTLE_ENDIAN */
 459           ps++;
 460         }
 461       }
 462 
 463 #else
 464 
 465       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
 466         pdst_row[j] = psrc_row[j];
 467       }
 468 
 469       {
 470         mlib_s32 shl, shr;
 471         mlib_u64 *ps, src0, src1;
 472 
 473         ps = (mlib_u64 *) (psrc_row + j);
 474         shl = (mlib_s32) ((mlib_addr) ps & 7);
 475         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 476         shl <<= 3;
 477         shr = 64 - shl;
 478 
 479         src1 = ps[0];
 480         for (; j <= (src_width - 4); j += 4) {
 481           src0 = src1;
 482           src1 = ps[1];
 483 #ifdef _LITTLE_ENDIAN
 484           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 485 #else
 486           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 487 #endif /* _LITTLE_ENDIAN */
 488           ps++;
 489         }
 490       }
 491 #endif /* _NO_LONGLONG */
 492     }
 493 
 494     for (; j < src_width; j++)
 495       pdst_row[j] = psrc_row[j];
 496   }
 497 }
 498 
 499 /***************************************************************/
 500 void mlib_c_ImageCopy_s32(const mlib_image       *src,
 501                           mlib_image *dst)
 502 {
 503   PREPAREVARS(mlib_u32);
 504   if (src_width < 4) {
 505     STRIP(pdst, psrc, src_width, src_height, mlib_u32);
 506     return;
 507   }
 508 
 509   for (i = 0; i < src_height; i++) {
 510     mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 511 
 512     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 513       j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2;
 514       if (j != 0) {
 515         pdst_row[0] = psrc_row[0];
 516       }
 517 
 518       for (; j <= (src_width - 2); j += 2) {
 519         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 520 
 521         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 522       }
 523     }
 524     else {
 525 
 526 #ifdef _NO_LONGLONG
 527 
 528       for (j = 0; j <= (src_width - 1); j++) {
 529         *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 530       }
 531 
 532 #else
 533 
 534       {
 535         mlib_u64 *ps, src0, src1;
 536 
 537         j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2;
 538         if (j != 0) {
 539           pdst_row[0] = psrc_row[0];
 540         }
 541         ps = (mlib_u64 *) (psrc_row + j - 1);
 542         src1 = ps[0];
 543         for (; j <= (src_width - 2); j += 2) {
 544           src0 = src1;
 545           src1 = ps[1];
 546 #ifdef _LITTLE_ENDIAN
 547           *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
 548 #else
 549           *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
 550 #endif /* _LITTLE_ENDIAN */
 551           ps++;
 552         }
 553       }
 554 #endif /* _NO_LONGLONG */
 555     }
 556 
 557     for (; j < src_width; j++)
 558       pdst_row[j] = psrc_row[j];
 559   }
 560 }
 561 
 562 /***************************************************************/
 563 void mlib_c_ImageCopy_d64(const mlib_image       *src,
 564                           mlib_image *dst)
 565 {
 566   PREPAREVARS(mlib_d64);
 567   for (i = 0; i < src_height; i++) {
 568     mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 569 
 570     for (j = 0; j < src_width; j++)
 571       *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
 572   }
 573 }
 574 
 575 /***************************************************************/
 576 /*
 577  * Both source and destination image data are 1 - d vectors and
 578  * 8 - byte aligned. And size is in 8 - bytes.
 579  */
 580 
 581 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
 582                          TYPE_64BIT       *dp,
 583                          mlib_s32         size)
 584 {
 585   mlib_s32 i;
 586 
 587   for (i = 0; i < size; i++) {
 588     *dp++ = *sp++;
 589   }
 590 }
 591 
 592 /***************************************************************/
 593 #ifndef _NO_LONGLONG
 594 #define TYPE    mlib_u64
 595 #define BSIZE   64
 596 #define SIZE    8
 597 #else
 598 #define TYPE    mlib_u32
 599 #define BSIZE   32
 600 #define SIZE    4
 601 #endif /* _NO_LONGLONG */
 602 
 603 /***************************************************************/
 604 void mlib_ImageCopy_na(const mlib_u8 *sp,
 605                        mlib_u8       *dp,
 606                        mlib_s32      n)
 607 {
 608   mlib_s32 shr, shl;
 609   TYPE *tmp, s0, s1;
 610 
 611   if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
 612 
 613     for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
 614       *dp++ = *sp++;
 615 
 616 #ifdef _NO_LONGLONG
 617 
 618     if (((mlib_addr) sp & (SIZE - 1)) == 0) {
 619       for (; n > SIZE; n -= SIZE) {
 620         *(TYPE *) dp = *(TYPE *) sp;
 621         dp += SIZE;
 622         sp += SIZE;
 623       }
 624     }
 625     else
 626 #endif /* _NO_LONGLONG */
 627     {
 628       tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
 629       /* shl and shr do not exceed 64 here */
 630       shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
 631       shr = BSIZE - shl;
 632       s0 = *tmp++;
 633 
 634       for (; n > SIZE; n -= SIZE) {
 635         s1 = *tmp++;
 636 #ifdef _LITTLE_ENDIAN
 637         *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
 638 #else
 639         *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
 640 #endif /* _LITTLE_ENDIAN */
 641         s0 = s1;
 642         dp += SIZE;
 643         sp += SIZE;
 644       }
 645     }
 646   }
 647   else {
 648     for (; (n > 0) && (mlib_addr) dp & 7; n--)
 649       *dp++ = *sp++;
 650 
 651     for (; n > 8; n -= 8) {
 652       *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
 653       dp += 8;
 654       sp += 8;
 655     }
 656   }
 657 
 658   for (; n > 0; n--)
 659     *dp++ = *sp++;
 660 }
 661 
 662 /***************************************************************/
 663 #ifdef _MSC_VER
 664 #pragma optimize("", on)
 665 #endif /* _MSC_VER */
 666 
 667 /***************************************************************/