1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTIONS
  29  *      mlib_ImageCopy - Direct copy from one image to another.
  30  *
  31  * SYNOPSIS
  32  *      mlib_status mlib_ImageCopy(mlib_image       *dst,
  33  *                                 const mlib_image *src);
  34  *
  35  * ARGUMENT
  36  *      dst     pointer to output or destination image
  37  *      src     pointer to input or source image
  38  *
  39  * RESTRICTION
  40  *      src and dst must have the same size, type and number of channels.
  41  *      They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT,
  42  *      MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type.
  43  *
  44  * DESCRIPTION
  45  *      Direct copy from one image to another
  46  */
  47 
  48 #include <stdlib.h>
  49 #include "mlib_image.h"
  50 #include "mlib_ImageCheck.h"
  51 #include "mlib_ImageCopy.h"
  52 
  53 /***************************************************************/
  54 #ifdef _MSC_VER
  55 #pragma optimize("", off)                   /* Fix bug 4195132 */
  56 #endif /* _MSC_VER */
  57 
  58 /***************************************************************/
  59 /* do not perform the coping by mlib_d64 data type for x86 */
  60 #ifdef i386
  61 
  62 typedef struct {
  63   mlib_s32 int0, int1;
  64 } two_int;
  65 
  66 #define TYPE_64BIT two_int
  67 
  68 #else /* i386 */
  69 
  70 #define TYPE_64BIT mlib_d64
  71 #endif /* i386 */
  72 
  73 /***************************************************************/
  74 static void mlib_c_ImageCopy_u8(const mlib_image *src,
  75                                 mlib_image       *dst);
  76 static void mlib_c_ImageCopy_s16(const mlib_image *src,
  77                                  mlib_image       *dst);
  78 static void mlib_c_ImageCopy_s32(const mlib_image *src,
  79                                  mlib_image       *dst);
  80 static void mlib_c_ImageCopy_d64(const mlib_image *src,
  81                                  mlib_image       *dst);
  82 static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
  83                                 TYPE_64BIT       *dp,
  84                                 mlib_s32         size);
  85 
  86 /***************************************************************/
  87 mlib_status mlib_ImageCopy(mlib_image       *dst,
  88                            const mlib_image *src)
  89 {
  90   mlib_s32 s_offset, d_offset;
  91   mlib_s32 size, s_stride, d_stride;
  92   mlib_s32 width;                                     /* width in bytes of src and dst */
  93   mlib_s32 height;                                    /* height in lines of src and dst */
  94   mlib_u8 *sa, *da;
  95   mlib_s32 j;
  96 
  97   MLIB_IMAGE_CHECK(src);
  98   MLIB_IMAGE_CHECK(dst);
  99   MLIB_IMAGE_TYPE_EQUAL(src, dst);
 100   MLIB_IMAGE_CHAN_EQUAL(src, dst);
 101   MLIB_IMAGE_SIZE_EQUAL(src, dst);
 102 
 103   switch (mlib_ImageGetType(dst)) {
 104     case MLIB_BIT:
 105       width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */
 106       height = mlib_ImageGetHeight(src);
 107       sa = (mlib_u8 *) mlib_ImageGetData(src);
 108       da = (mlib_u8 *) mlib_ImageGetData(dst);
 109 
 110       if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) {
 111         size = height * (width >> 3);
 112         if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) {
 113 
 114           mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3);
 115         }
 116         else {
 117 
 118           mlib_ImageCopy_na(sa, da, size);
 119         }
 120       }
 121       else {
 122         s_stride = mlib_ImageGetStride(src);
 123         d_stride = mlib_ImageGetStride(dst);
 124         s_offset = mlib_ImageGetBitOffset(src); /* in bits */
 125         d_offset = mlib_ImageGetBitOffset(dst); /* in bits */
 126         if (s_offset == d_offset) {
 127           for (j = 0; j < height; j++) {
 128             mlib_ImageCopy_bit_al(sa, da, width, s_offset);
 129             sa += s_stride;
 130             da += d_stride;
 131           }
 132         }
 133         else {
 134           for (j = 0; j < height; j++) {
 135             mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset);
 136             sa += s_stride;
 137             da += d_stride;
 138           }
 139         }
 140       }
 141 
 142       break;
 143     case MLIB_BYTE:
 144       mlib_c_ImageCopy_u8(src, dst);
 145       break;
 146     case MLIB_SHORT:
 147     case MLIB_USHORT:
 148       mlib_c_ImageCopy_s16(src, dst);
 149       break;
 150     case MLIB_INT:
 151     case MLIB_FLOAT:
 152       mlib_c_ImageCopy_s32(src, dst);
 153       break;
 154     case MLIB_DOUBLE:
 155       mlib_c_ImageCopy_d64(src, dst);
 156       break;
 157     default:
 158       return MLIB_FAILURE;                  /* MLIB_BIT is not supported here */
 159   }
 160 
 161   return MLIB_SUCCESS;
 162 }
 163 
 164 /***************************************************************/
 165 #define PREPAREVARS(type)                                        \
 166   type *psrc = (type *) mlib_ImageGetData(src);                  \
 167   type *pdst = (type *) mlib_ImageGetData(dst);                  \
 168   mlib_s32 src_height = mlib_ImageGetHeight(src);                \
 169   mlib_s32 src_width  = mlib_ImageGetWidth(src);                 \
 170   mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
 171   mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
 172   mlib_s32 chan = mlib_ImageGetChannels(dst);                    \
 173   mlib_s32 i, j;                                                 \
 174                                                                  \
 175   src_width *= chan;                                             \
 176   if (src_width == src_stride && src_width == dst_stride) {      \
 177     src_width *= src_height;                                     \
 178     src_height = 1;                                              \
 179   }
 180 
 181 /***************************************************************/
 182 #define STRIP(pd, ps, w, h, data_type) {                        \
 183   data_type s0, s1;                                             \
 184   for ( i = 0; i < h; i++ ) {                                   \
 185     if ((j = (w & 1)))                                              \
 186       pd[i * dst_stride] = ps[i * src_stride];                  \
 187     for (; j < w; j += 2) {                                     \
 188       s0 = ps[i * src_stride + j];                              \
 189       s1 = ps[i * src_stride + j + 1];                          \
 190       pd[i * dst_stride + j]   = s0;                            \
 191       pd[i * dst_stride + j + 1] = s1;                          \
 192     }                                                           \
 193   }                                                             \
 194 }
 195 
 196 /***************************************************************/
 197 /*
 198  * Both bit offsets of source and distination are the same
 199  */
 200 
 201 void mlib_ImageCopy_bit_al(const mlib_u8 *sa,
 202                            mlib_u8       *da,
 203                            mlib_s32      size,
 204                            mlib_s32      offset)
 205 {
 206   mlib_s32 b_size, i, j;
 207   TYPE_64BIT *sp, *dp;
 208   mlib_u8 mask0 = 0xFF;
 209   mlib_u8 src, mask;
 210 
 211   if (size <= 0) return;
 212 
 213   if (size <= (8 - offset)) {
 214     mask = mask0 << (8 - size);
 215     mask >>= offset;
 216     src = da[0];
 217     da[0] = (src & (~mask)) | (sa[0] & mask);
 218     return;
 219   }
 220 
 221   mask = mask0 >> offset;
 222   src = da[0];
 223   da[0] = (src & (~mask)) | (sa[0] & mask);
 224   da++;
 225   sa++;
 226   size = size - 8 + offset;
 227   b_size = size >> 3;                       /* size in bytes */
 228 
 229   for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
 230     *da++ = *sa++;
 231 
 232   if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
 233     sp = (TYPE_64BIT *) sa;
 234     dp = (TYPE_64BIT *) da;
 235 #ifdef __SUNPRO_C
 236 #pragma pipeloop(0)
 237 #endif /* __SUNPRO_C */
 238     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 239       dp[i] = sp[i];
 240     }
 241 
 242     sa += i << 3;
 243     da += i << 3;
 244   }
 245   else {
 246 #ifdef _NO_LONGLONG
 247     if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
 248       mlib_u32 *pws, *pwd;
 249 
 250       pws = (mlib_u32 *) sa;
 251       pwd = (mlib_u32 *) da;
 252 #ifdef __SUNPRO_C
 253 #pragma pipeloop(0)
 254 #endif /* __SUNPRO_C */
 255       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 256         pwd[i] = pws[i];
 257       }
 258 
 259       sa += i << 2;
 260       da += i << 2;
 261     }
 262     else {
 263       mlib_u32 *pws, *pwd, src0, src1;
 264       mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
 265 
 266       pwd = (mlib_u32 *) da;
 267       pws = (mlib_u32 *) (sa - lshift);
 268       lshift <<= 3;
 269       rshift = 32 - lshift;
 270 
 271       src1 = pws[0];
 272 #ifdef __SUNPRO_C
 273 #pragma pipeloop(0)
 274 #endif /* __SUNPRO_C */
 275       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 276         src0 = src1;
 277         src1 = pws[i + 1];
 278 #ifdef _LITTLE_ENDIAN
 279         pwd[i] = (src0 >> lshift) | (src1 << rshift);
 280 #else
 281         pwd[i] = (src0 << lshift) | (src1 >> rshift);
 282 #endif /* _LITTLE_ENDIAN */
 283       }
 284 
 285       sa += i << 2;
 286       da += i << 2;
 287     }
 288 
 289 #else
 290     mlib_u64 *pws, *pwd, src0, src1;
 291     mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
 292 
 293     pwd = (mlib_u64 *) da;
 294     pws = (mlib_u64 *) (sa - lshift);
 295     lshift <<= 3;
 296     rshift = 64 - lshift;
 297 
 298     src1 = pws[0];
 299 #ifdef __SUNPRO_C
 300 #pragma pipeloop(0)
 301 #endif /* __SUNPRO_C */
 302     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 303       src0 = src1;
 304       src1 = pws[i + 1];
 305       pwd[i] = (src0 << lshift) | (src1 >> rshift);
 306     }
 307 
 308     sa += i << 3;
 309     da += i << 3;
 310 #endif /* _NO_LONGLONG */
 311   }
 312 
 313   for (; j < b_size; j++)
 314     *da++ = *sa++;
 315 
 316   j = size & 7;
 317 
 318   if (j > 0) {
 319     mask = mask0 << (8 - j);
 320     src = da[0];
 321     da[0] = (src & (~mask)) | (sa[0] & mask);
 322   }
 323 }
 324 
 325 /***************************************************************/
 326 void mlib_c_ImageCopy_u8(const mlib_image *src,
 327                          mlib_image       *dst)
 328 {
 329   PREPAREVARS(mlib_u8);
 330   if (src_width < 16) {
 331     STRIP(pdst, psrc, src_width, src_height, mlib_u8);
 332     return;
 333   }
 334 
 335   for (i = 0; i < src_height; i++) {
 336     mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 337 
 338     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 339       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
 340         pdst_row[j] = psrc_row[j];
 341       }
 342 
 343 #ifdef __SUNPRO_C
 344 #pragma pipeloop(0)
 345 #endif /* __SUNPRO_C */
 346       for (; j <= (src_width - 8); j += 8) {
 347         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 348 
 349         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 350       }
 351     }
 352     else {
 353 
 354 #ifdef _NO_LONGLONG
 355 
 356       for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
 357         pdst_row[j] = psrc_row[j];
 358       }
 359 
 360       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 361 #ifdef __SUNPRO_C
 362 #pragma pipeloop(0)
 363 #endif /* __SUNPRO_C */
 364         for (; j <= (src_width - 4); j += 4) {
 365           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 366         }
 367       }
 368       else {
 369         mlib_u32 *ps, shl, shr, src0, src1;
 370 
 371         ps = (mlib_u32 *) (psrc_row + j);
 372         shl = (mlib_addr) ps & 3;
 373         ps = (mlib_u32 *) ((mlib_addr) ps - shl);
 374         shl <<= 3;
 375         shr = 32 - shl;
 376 
 377         src1 = ps[0];
 378 #ifdef __SUNPRO_C
 379 #pragma pipeloop(0)
 380 #endif /* __SUNPRO_C */
 381         for (; j <= (src_width - 4); j += 4) {
 382           src0 = src1;
 383           src1 = ps[1];
 384 #ifdef _LITTLE_ENDIAN
 385           *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 386 #else
 387           *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 388 #endif /* _LITTLE_ENDIAN */
 389           ps++;
 390         }
 391       }
 392 
 393 #else
 394 
 395       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
 396         pdst_row[j] = psrc_row[j];
 397       }
 398 
 399       {
 400         mlib_s32 shl, shr;
 401         mlib_u64 *ps, src0, src1;
 402 
 403         ps = (mlib_u64 *) (psrc_row + j);
 404         /* shl and shr are in range [0, 64] */
 405         shl = (mlib_s32) ((mlib_addr) ps & 7);
 406         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 407         shl <<= 3;
 408         shr = 64 - shl;
 409 
 410         src1 = ps[0];
 411 #ifdef __SUNPRO_C
 412 #pragma pipeloop(0)
 413 #endif /* __SUNPRO_C */
 414         for (; j <= (src_width - 8); j += 8) {
 415           src0 = src1;
 416           src1 = ps[1];
 417 #ifdef _LITTLE_ENDIAN
 418           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 419 #else
 420           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 421 #endif /* _LITTLE_ENDIAN */
 422           ps++;
 423         }
 424       }
 425 #endif /* _NO_LONGLONG */
 426     }
 427 
 428     for (; j < src_width; j++)
 429       pdst_row[j] = psrc_row[j];
 430   }
 431 }
 432 
 433 /***************************************************************/
 434 void mlib_c_ImageCopy_s16(const mlib_image       *src,
 435                           mlib_image *dst)
 436 {
 437   PREPAREVARS(mlib_u16);
 438   if (src_width < 8) {
 439     STRIP(pdst, psrc, src_width, src_height, mlib_u16);
 440     return;
 441   }
 442 
 443   for (i = 0; i < src_height; i++) {
 444     mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 445 
 446     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 447       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
 448         pdst_row[j] = psrc_row[j];
 449       }
 450 
 451 #ifdef __SUNPRO_C
 452 #pragma pipeloop(0)
 453 #endif /* __SUNPRO_C */
 454       for (; j <= (src_width - 4); j += 4) {
 455         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 456 
 457         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 458       }
 459     }
 460     else {
 461 
 462 #ifdef _NO_LONGLONG
 463 
 464       if (j = (((mlib_addr) pdst_row & 2) != 0)) {
 465         pdst_row[0] = psrc_row[0];
 466       }
 467 
 468       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 469 #ifdef __SUNPRO_C
 470 #pragma pipeloop(0)
 471 #endif /* __SUNPRO_C */
 472         for (; j <= (src_width - 2); j += 2) {
 473           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 474         }
 475       }
 476       else {
 477         mlib_u32 *ps, src0, src1;
 478 
 479         ps = (mlib_u32 *) (psrc_row + j - 1);
 480         src1 = ps[0];
 481 #ifdef __SUNPRO_C
 482 #pragma pipeloop(0)
 483 #endif /* __SUNPRO_C */
 484         for (; j <= (src_width - 2); j += 2) {
 485           src0 = src1;
 486           src1 = ps[1];
 487 #ifdef _LITTLE_ENDIAN
 488           *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
 489 #else
 490           *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
 491 #endif /* _LITTLE_ENDIAN */
 492           ps++;
 493         }
 494       }
 495 
 496 #else
 497 
 498       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
 499         pdst_row[j] = psrc_row[j];
 500       }
 501 
 502       {
 503         mlib_s32 shl, shr;
 504         mlib_u64 *ps, src0, src1;
 505 
 506         ps = (mlib_u64 *) (psrc_row + j);
 507         shl = (mlib_s32) ((mlib_addr) ps & 7);
 508         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 509         shl <<= 3;
 510         shr = 64 - shl;
 511 
 512         src1 = ps[0];
 513 #ifdef __SUNPRO_C
 514 #pragma pipeloop(0)
 515 #endif /* __SUNPRO_C */
 516         for (; j <= (src_width - 4); j += 4) {
 517           src0 = src1;
 518           src1 = ps[1];
 519 #ifdef _LITTLE_ENDIAN
 520           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 521 #else
 522           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 523 #endif /* _LITTLE_ENDIAN */
 524           ps++;
 525         }
 526       }
 527 #endif /* _NO_LONGLONG */
 528     }
 529 
 530     for (; j < src_width; j++)
 531       pdst_row[j] = psrc_row[j];
 532   }
 533 }
 534 
 535 /***************************************************************/
 536 void mlib_c_ImageCopy_s32(const mlib_image       *src,
 537                           mlib_image *dst)
 538 {
 539   PREPAREVARS(mlib_u32);
 540   if (src_width < 4) {
 541     STRIP(pdst, psrc, src_width, src_height, mlib_u32);
 542     return;
 543   }
 544 
 545   for (i = 0; i < src_height; i++) {
 546     mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 547 
 548     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 549       if ((j = ((mlib_s32) ((mlib_addr) psrc_row & 4) >> 2))) {
 550         pdst_row[0] = psrc_row[0];
 551       }
 552 
 553 #ifdef __SUNPRO_C
 554 #pragma pipeloop(0)
 555 #endif /* __SUNPRO_C */
 556       for (; j <= (src_width - 2); j += 2) {
 557         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 558 
 559         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 560       }
 561     }
 562     else {
 563 
 564 #ifdef _NO_LONGLONG
 565 
 566 #ifdef __SUNPRO_C
 567 #pragma pipeloop(0)
 568 #endif /* __SUNPRO_C */
 569       for (j = 0; j <= (src_width - 1); j++) {
 570         *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 571       }
 572 
 573 #else
 574 
 575       {
 576         mlib_u64 *ps, src0, src1;
 577 
 578         if ((j = ((mlib_s32) ((mlib_addr) pdst_row & 4) >> 2)))
 579           pdst_row[0] = psrc_row[0];
 580         ps = (mlib_u64 *) (psrc_row + j - 1);
 581         src1 = ps[0];
 582 #ifdef __SUNPRO_C
 583 #pragma pipeloop(0)
 584 #endif /* __SUNPRO_C */
 585         for (; j <= (src_width - 2); j += 2) {
 586           src0 = src1;
 587           src1 = ps[1];
 588 #ifdef _LITTLE_ENDIAN
 589           *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
 590 #else
 591           *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
 592 #endif /* _LITTLE_ENDIAN */
 593           ps++;
 594         }
 595       }
 596 #endif /* _NO_LONGLONG */
 597     }
 598 
 599     for (; j < src_width; j++)
 600       pdst_row[j] = psrc_row[j];
 601   }
 602 }
 603 
 604 /***************************************************************/
 605 void mlib_c_ImageCopy_d64(const mlib_image       *src,
 606                           mlib_image *dst)
 607 {
 608   PREPAREVARS(mlib_d64);
 609   for (i = 0; i < src_height; i++) {
 610     mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 611 
 612 #ifdef __SUNPRO_C
 613 #pragma pipeloop(0)
 614 #endif /* __SUNPRO_C */
 615     for (j = 0; j < src_width; j++)
 616       *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
 617   }
 618 }
 619 
 620 /***************************************************************/
 621 /*
 622  * Both source and destination image data are 1 - d vectors and
 623  * 8 - byte aligned. And size is in 8 - bytes.
 624  */
 625 
 626 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
 627                          TYPE_64BIT       *dp,
 628                          mlib_s32         size)
 629 {
 630   mlib_s32 i;
 631 
 632 #ifdef __SUNPRO_C
 633 #pragma pipeloop(0)
 634 #endif /* __SUNPRO_C */
 635   for (i = 0; i < size; i++) {
 636     *dp++ = *sp++;
 637   }
 638 }
 639 
 640 /***************************************************************/
 641 #ifndef _NO_LONGLONG
 642 #define TYPE    mlib_u64
 643 #define BSIZE   64
 644 #define SIZE    8
 645 #else
 646 #define TYPE    mlib_u32
 647 #define BSIZE   32
 648 #define SIZE    4
 649 #endif /* _NO_LONGLONG */
 650 
 651 /***************************************************************/
 652 void mlib_ImageCopy_na(const mlib_u8 *sp,
 653                        mlib_u8       *dp,
 654                        mlib_s32      n)
 655 {
 656   mlib_s32 shr, shl;
 657   TYPE *tmp, s0, s1;
 658 
 659   if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
 660 
 661 #ifdef __SUNPRO_C
 662 #pragma pipeloop(0)
 663 #endif /* __SUNPRO_C */
 664     for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
 665       *dp++ = *sp++;
 666 
 667 #ifdef _NO_LONGLONG
 668 
 669     if (((mlib_addr) sp & (SIZE - 1)) == 0) {
 670       for (; n > SIZE; n -= SIZE) {
 671         *(TYPE *) dp = *(TYPE *) sp;
 672         dp += SIZE;
 673         sp += SIZE;
 674       }
 675     }
 676     else
 677 #endif /* _NO_LONGLONG */
 678     {
 679       tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
 680       /* shl and shr do not exceed 64 here */
 681       shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
 682       shr = BSIZE - shl;
 683       s0 = *tmp++;
 684 
 685 #ifdef __SUNPRO_C
 686 #pragma pipeloop(0)
 687 #endif /* __SUNPRO_C */
 688       for (; n > SIZE; n -= SIZE) {
 689         s1 = *tmp++;
 690 #ifdef _LITTLE_ENDIAN
 691         *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
 692 #else
 693         *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
 694 #endif /* _LITTLE_ENDIAN */
 695         s0 = s1;
 696         dp += SIZE;
 697         sp += SIZE;
 698       }
 699     }
 700   }
 701   else {
 702 #ifdef __SUNPRO_C
 703 #pragma pipeloop(0)
 704 #endif /* __SUNPRO_C */
 705     for (; (n > 0) && (mlib_addr) dp & 7; n--)
 706       *dp++ = *sp++;
 707 
 708 #ifdef __SUNPRO_C
 709 #pragma pipeloop(0)
 710 #endif /* __SUNPRO_C */
 711     for (; n > 8; n -= 8) {
 712       *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
 713       dp += 8;
 714       sp += 8;
 715     }
 716   }
 717 
 718 #ifdef __SUNPRO_C
 719 #pragma pipeloop(0)
 720 #endif /* __SUNPRO_C */
 721   for (; n > 0; n--)
 722     *dp++ = *sp++;
 723 }
 724 
 725 /***************************************************************/
 726 #ifdef _MSC_VER
 727 #pragma optimize("", on)
 728 #endif /* _MSC_VER */
 729 
 730 /***************************************************************/