1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTIONS 29 * mlib_ImageCopy - Direct copy from one image to another. 30 * 31 * SYNOPSIS 32 * mlib_status mlib_ImageCopy(mlib_image *dst, 33 * const mlib_image *src); 34 * 35 * ARGUMENT 36 * dst pointer to output or destination image 37 * src pointer to input or source image 38 * 39 * RESTRICTION 40 * src and dst must have the same size, type and number of channels. 41 * They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT, 42 * MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type. 43 * 44 * DESCRIPTION 45 * Direct copy from one image to another 46 */ 47 48 #include <stdlib.h> 49 #include "mlib_image.h" 50 #include "mlib_ImageCheck.h" 51 #include "mlib_ImageCopy.h" 52 53 /***************************************************************/ 54 #ifdef _MSC_VER 55 #pragma optimize("", off) /* Fix bug 4195132 */ 56 #endif /* _MSC_VER */ 57 58 /***************************************************************/ 59 /* do not perform the coping by mlib_d64 data type for x86 */ 60 #ifdef i386 61 62 typedef struct { 63 mlib_s32 int0, int1; 64 } two_int; 65 66 #define TYPE_64BIT two_int 67 68 #else /* i386 */ 69 70 #define TYPE_64BIT mlib_d64 71 #endif /* i386 */ 72 73 /***************************************************************/ 74 static void mlib_c_ImageCopy_u8(const mlib_image *src, 75 mlib_image *dst); 76 static void mlib_c_ImageCopy_s16(const mlib_image *src, 77 mlib_image *dst); 78 static void mlib_c_ImageCopy_s32(const mlib_image *src, 79 mlib_image *dst); 80 static void mlib_c_ImageCopy_d64(const mlib_image *src, 81 mlib_image *dst); 82 static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp, 83 TYPE_64BIT *dp, 84 mlib_s32 size); 85 86 /***************************************************************/ 87 mlib_status mlib_ImageCopy(mlib_image *dst, 88 const mlib_image *src) 89 { 90 mlib_s32 s_offset, d_offset; 91 mlib_s32 size, s_stride, d_stride; 92 mlib_s32 width; /* width in bytes of src and dst */ 93 mlib_s32 height; /* height in lines of src and dst */ 94 mlib_u8 *sa, *da; 95 mlib_s32 j; 96 97 MLIB_IMAGE_CHECK(src); 98 MLIB_IMAGE_CHECK(dst); 99 MLIB_IMAGE_TYPE_EQUAL(src, dst); 100 MLIB_IMAGE_CHAN_EQUAL(src, dst); 101 MLIB_IMAGE_SIZE_EQUAL(src, dst); 102 103 switch (mlib_ImageGetType(dst)) { 104 case MLIB_BIT: 105 width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */ 106 height = mlib_ImageGetHeight(src); 107 sa = (mlib_u8 *) mlib_ImageGetData(src); 108 da = (mlib_u8 *) mlib_ImageGetData(dst); 109 110 if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) { 111 size = height * (width >> 3); 112 if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) { 113 114 mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3); 115 } 116 else { 117 118 mlib_ImageCopy_na(sa, da, size); 119 } 120 } 121 else { 122 s_stride = mlib_ImageGetStride(src); 123 d_stride = mlib_ImageGetStride(dst); 124 s_offset = mlib_ImageGetBitOffset(src); /* in bits */ 125 d_offset = mlib_ImageGetBitOffset(dst); /* in bits */ 126 if (s_offset == d_offset) { 127 for (j = 0; j < height; j++) { 128 mlib_ImageCopy_bit_al(sa, da, width, s_offset); 129 sa += s_stride; 130 da += d_stride; 131 } 132 } 133 else { 134 for (j = 0; j < height; j++) { 135 mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset); 136 sa += s_stride; 137 da += d_stride; 138 } 139 } 140 } 141 142 break; 143 case MLIB_BYTE: 144 mlib_c_ImageCopy_u8(src, dst); 145 break; 146 case MLIB_SHORT: 147 case MLIB_USHORT: 148 mlib_c_ImageCopy_s16(src, dst); 149 break; 150 case MLIB_INT: 151 case MLIB_FLOAT: 152 mlib_c_ImageCopy_s32(src, dst); 153 break; 154 case MLIB_DOUBLE: 155 mlib_c_ImageCopy_d64(src, dst); 156 break; 157 default: 158 return MLIB_FAILURE; /* MLIB_BIT is not supported here */ 159 } 160 161 return MLIB_SUCCESS; 162 } 163 164 /***************************************************************/ 165 #define PREPAREVARS(type) \ 166 type *psrc = (type *) mlib_ImageGetData(src); \ 167 type *pdst = (type *) mlib_ImageGetData(dst); \ 168 mlib_s32 src_height = mlib_ImageGetHeight(src); \ 169 mlib_s32 src_width = mlib_ImageGetWidth(src); \ 170 mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \ 171 mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \ 172 mlib_s32 chan = mlib_ImageGetChannels(dst); \ 173 mlib_s32 i, j; \ 174 \ 175 src_width *= chan; \ 176 if (src_width == src_stride && src_width == dst_stride) { \ 177 src_width *= src_height; \ 178 src_height = 1; \ 179 } 180 181 /***************************************************************/ 182 #define STRIP(pd, ps, w, h, data_type) { \ 183 data_type s0, s1; \ 184 for ( i = 0; i < h; i++ ) { \ 185 if ((j = (w & 1))) \ 186 pd[i * dst_stride] = ps[i * src_stride]; \ 187 for (; j < w; j += 2) { \ 188 s0 = ps[i * src_stride + j]; \ 189 s1 = ps[i * src_stride + j + 1]; \ 190 pd[i * dst_stride + j] = s0; \ 191 pd[i * dst_stride + j + 1] = s1; \ 192 } \ 193 } \ 194 } 195 196 /***************************************************************/ 197 /* 198 * Both bit offsets of source and distination are the same 199 */ 200 201 void mlib_ImageCopy_bit_al(const mlib_u8 *sa, 202 mlib_u8 *da, 203 mlib_s32 size, 204 mlib_s32 offset) 205 { 206 mlib_s32 b_size, i, j; 207 TYPE_64BIT *sp, *dp; 208 mlib_u8 mask0 = 0xFF; 209 mlib_u8 src, mask; 210 211 if (size <= 0) return; 212 213 if (size <= (8 - offset)) { 214 mask = mask0 << (8 - size); 215 mask >>= offset; 216 src = da[0]; 217 da[0] = (src & (~mask)) | (sa[0] & mask); 218 return; 219 } 220 221 mask = mask0 >> offset; 222 src = da[0]; 223 da[0] = (src & (~mask)) | (sa[0] & mask); 224 da++; 225 sa++; 226 size = size - 8 + offset; 227 b_size = size >> 3; /* size in bytes */ 228 229 for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++) 230 *da++ = *sa++; 231 232 if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) { 233 sp = (TYPE_64BIT *) sa; 234 dp = (TYPE_64BIT *) da; 235 #ifdef __SUNPRO_C 236 #pragma pipeloop(0) 237 #endif /* __SUNPRO_C */ 238 for (i = 0; j <= (b_size - 8); j += 8, i++) { 239 dp[i] = sp[i]; 240 } 241 242 sa += i << 3; 243 da += i << 3; 244 } 245 else { 246 #ifdef _NO_LONGLONG 247 if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) { 248 mlib_u32 *pws, *pwd; 249 250 pws = (mlib_u32 *) sa; 251 pwd = (mlib_u32 *) da; 252 #ifdef __SUNPRO_C 253 #pragma pipeloop(0) 254 #endif /* __SUNPRO_C */ 255 for (i = 0; j <= (b_size - 4); j += 4, i++) { 256 pwd[i] = pws[i]; 257 } 258 259 sa += i << 2; 260 da += i << 2; 261 } 262 else { 263 mlib_u32 *pws, *pwd, src0, src1; 264 mlib_s32 lshift = (mlib_addr) sa & 3, rshift; 265 266 pwd = (mlib_u32 *) da; 267 pws = (mlib_u32 *) (sa - lshift); 268 lshift <<= 3; 269 rshift = 32 - lshift; 270 271 src1 = pws[0]; 272 #ifdef __SUNPRO_C 273 #pragma pipeloop(0) 274 #endif /* __SUNPRO_C */ 275 for (i = 0; j <= (b_size - 4); j += 4, i++) { 276 src0 = src1; 277 src1 = pws[i + 1]; 278 #ifdef _LITTLE_ENDIAN 279 pwd[i] = (src0 >> lshift) | (src1 << rshift); 280 #else 281 pwd[i] = (src0 << lshift) | (src1 >> rshift); 282 #endif /* _LITTLE_ENDIAN */ 283 } 284 285 sa += i << 2; 286 da += i << 2; 287 } 288 289 #else 290 mlib_u64 *pws, *pwd, src0, src1; 291 mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift; 292 293 pwd = (mlib_u64 *) da; 294 pws = (mlib_u64 *) (sa - lshift); 295 lshift <<= 3; 296 rshift = 64 - lshift; 297 298 src1 = pws[0]; 299 #ifdef __SUNPRO_C 300 #pragma pipeloop(0) 301 #endif /* __SUNPRO_C */ 302 for (i = 0; j <= (b_size - 8); j += 8, i++) { 303 src0 = src1; 304 src1 = pws[i + 1]; 305 pwd[i] = (src0 << lshift) | (src1 >> rshift); 306 } 307 308 sa += i << 3; 309 da += i << 3; 310 #endif /* _NO_LONGLONG */ 311 } 312 313 for (; j < b_size; j++) 314 *da++ = *sa++; 315 316 j = size & 7; 317 318 if (j > 0) { 319 mask = mask0 << (8 - j); 320 src = da[0]; 321 da[0] = (src & (~mask)) | (sa[0] & mask); 322 } 323 } 324 325 /***************************************************************/ 326 void mlib_c_ImageCopy_u8(const mlib_image *src, 327 mlib_image *dst) 328 { 329 PREPAREVARS(mlib_u8); 330 if (src_width < 16) { 331 STRIP(pdst, psrc, src_width, src_height, mlib_u8); 332 return; 333 } 334 335 for (i = 0; i < src_height; i++) { 336 mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; 337 338 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { 339 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) { 340 pdst_row[j] = psrc_row[j]; 341 } 342 343 #ifdef __SUNPRO_C 344 #pragma pipeloop(0) 345 #endif /* __SUNPRO_C */ 346 for (; j <= (src_width - 8); j += 8) { 347 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); 348 349 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; 350 } 351 } 352 else { 353 354 #ifdef _NO_LONGLONG 355 356 for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) { 357 pdst_row[j] = psrc_row[j]; 358 } 359 360 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) { 361 #ifdef __SUNPRO_C 362 #pragma pipeloop(0) 363 #endif /* __SUNPRO_C */ 364 for (; j <= (src_width - 4); j += 4) { 365 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); 366 } 367 } 368 else { 369 mlib_u32 *ps, shl, shr, src0, src1; 370 371 ps = (mlib_u32 *) (psrc_row + j); 372 shl = (mlib_addr) ps & 3; 373 ps = (mlib_u32 *) ((mlib_addr) ps - shl); 374 shl <<= 3; 375 shr = 32 - shl; 376 377 src1 = ps[0]; 378 #ifdef __SUNPRO_C 379 #pragma pipeloop(0) 380 #endif /* __SUNPRO_C */ 381 for (; j <= (src_width - 4); j += 4) { 382 src0 = src1; 383 src1 = ps[1]; 384 #ifdef _LITTLE_ENDIAN 385 *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); 386 #else 387 *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); 388 #endif /* _LITTLE_ENDIAN */ 389 ps++; 390 } 391 } 392 393 #else 394 395 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) { 396 pdst_row[j] = psrc_row[j]; 397 } 398 399 { 400 mlib_s32 shl, shr; 401 mlib_u64 *ps, src0, src1; 402 403 ps = (mlib_u64 *) (psrc_row + j); 404 /* shl and shr are in range [0, 64] */ 405 shl = (mlib_s32) ((mlib_addr) ps & 7); 406 ps = (mlib_u64 *) ((mlib_addr) ps - shl); 407 shl <<= 3; 408 shr = 64 - shl; 409 410 src1 = ps[0]; 411 #ifdef __SUNPRO_C 412 #pragma pipeloop(0) 413 #endif /* __SUNPRO_C */ 414 for (; j <= (src_width - 8); j += 8) { 415 src0 = src1; 416 src1 = ps[1]; 417 #ifdef _LITTLE_ENDIAN 418 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); 419 #else 420 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); 421 #endif /* _LITTLE_ENDIAN */ 422 ps++; 423 } 424 } 425 #endif /* _NO_LONGLONG */ 426 } 427 428 for (; j < src_width; j++) 429 pdst_row[j] = psrc_row[j]; 430 } 431 } 432 433 /***************************************************************/ 434 void mlib_c_ImageCopy_s16(const mlib_image *src, 435 mlib_image *dst) 436 { 437 PREPAREVARS(mlib_u16); 438 if (src_width < 8) { 439 STRIP(pdst, psrc, src_width, src_height, mlib_u16); 440 return; 441 } 442 443 for (i = 0; i < src_height; i++) { 444 mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; 445 446 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { 447 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) { 448 pdst_row[j] = psrc_row[j]; 449 } 450 451 #ifdef __SUNPRO_C 452 #pragma pipeloop(0) 453 #endif /* __SUNPRO_C */ 454 for (; j <= (src_width - 4); j += 4) { 455 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); 456 457 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; 458 } 459 } 460 else { 461 462 #ifdef _NO_LONGLONG 463 464 if (j = (((mlib_addr) pdst_row & 2) != 0)) { 465 pdst_row[0] = psrc_row[0]; 466 } 467 468 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) { 469 #ifdef __SUNPRO_C 470 #pragma pipeloop(0) 471 #endif /* __SUNPRO_C */ 472 for (; j <= (src_width - 2); j += 2) { 473 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); 474 } 475 } 476 else { 477 mlib_u32 *ps, src0, src1; 478 479 ps = (mlib_u32 *) (psrc_row + j - 1); 480 src1 = ps[0]; 481 #ifdef __SUNPRO_C 482 #pragma pipeloop(0) 483 #endif /* __SUNPRO_C */ 484 for (; j <= (src_width - 2); j += 2) { 485 src0 = src1; 486 src1 = ps[1]; 487 #ifdef _LITTLE_ENDIAN 488 *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16); 489 #else 490 *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16); 491 #endif /* _LITTLE_ENDIAN */ 492 ps++; 493 } 494 } 495 496 #else 497 498 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) { 499 pdst_row[j] = psrc_row[j]; 500 } 501 502 { 503 mlib_s32 shl, shr; 504 mlib_u64 *ps, src0, src1; 505 506 ps = (mlib_u64 *) (psrc_row + j); 507 shl = (mlib_s32) ((mlib_addr) ps & 7); 508 ps = (mlib_u64 *) ((mlib_addr) ps - shl); 509 shl <<= 3; 510 shr = 64 - shl; 511 512 src1 = ps[0]; 513 #ifdef __SUNPRO_C 514 #pragma pipeloop(0) 515 #endif /* __SUNPRO_C */ 516 for (; j <= (src_width - 4); j += 4) { 517 src0 = src1; 518 src1 = ps[1]; 519 #ifdef _LITTLE_ENDIAN 520 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); 521 #else 522 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); 523 #endif /* _LITTLE_ENDIAN */ 524 ps++; 525 } 526 } 527 #endif /* _NO_LONGLONG */ 528 } 529 530 for (; j < src_width; j++) 531 pdst_row[j] = psrc_row[j]; 532 } 533 } 534 535 /***************************************************************/ 536 void mlib_c_ImageCopy_s32(const mlib_image *src, 537 mlib_image *dst) 538 { 539 PREPAREVARS(mlib_u32); 540 if (src_width < 4) { 541 STRIP(pdst, psrc, src_width, src_height, mlib_u32); 542 return; 543 } 544 545 for (i = 0; i < src_height; i++) { 546 mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; 547 548 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { 549 j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2; 550 if (j != 0) { 551 pdst_row[0] = psrc_row[0]; 552 } 553 554 #ifdef __SUNPRO_C 555 #pragma pipeloop(0) 556 #endif /* __SUNPRO_C */ 557 for (; j <= (src_width - 2); j += 2) { 558 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); 559 560 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; 561 } 562 } 563 else { 564 565 #ifdef _NO_LONGLONG 566 567 #ifdef __SUNPRO_C 568 #pragma pipeloop(0) 569 #endif /* __SUNPRO_C */ 570 for (j = 0; j <= (src_width - 1); j++) { 571 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); 572 } 573 574 #else 575 576 { 577 mlib_u64 *ps, src0, src1; 578 579 j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2; 580 if (j != 0) { 581 pdst_row[0] = psrc_row[0]; 582 } 583 ps = (mlib_u64 *) (psrc_row + j - 1); 584 src1 = ps[0]; 585 #ifdef __SUNPRO_C 586 #pragma pipeloop(0) 587 #endif /* __SUNPRO_C */ 588 for (; j <= (src_width - 2); j += 2) { 589 src0 = src1; 590 src1 = ps[1]; 591 #ifdef _LITTLE_ENDIAN 592 *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32); 593 #else 594 *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32); 595 #endif /* _LITTLE_ENDIAN */ 596 ps++; 597 } 598 } 599 #endif /* _NO_LONGLONG */ 600 } 601 602 for (; j < src_width; j++) 603 pdst_row[j] = psrc_row[j]; 604 } 605 } 606 607 /***************************************************************/ 608 void mlib_c_ImageCopy_d64(const mlib_image *src, 609 mlib_image *dst) 610 { 611 PREPAREVARS(mlib_d64); 612 for (i = 0; i < src_height; i++) { 613 mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; 614 615 #ifdef __SUNPRO_C 616 #pragma pipeloop(0) 617 #endif /* __SUNPRO_C */ 618 for (j = 0; j < src_width; j++) 619 *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j)); 620 } 621 } 622 623 /***************************************************************/ 624 /* 625 * Both source and destination image data are 1 - d vectors and 626 * 8 - byte aligned. And size is in 8 - bytes. 627 */ 628 629 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp, 630 TYPE_64BIT *dp, 631 mlib_s32 size) 632 { 633 mlib_s32 i; 634 635 #ifdef __SUNPRO_C 636 #pragma pipeloop(0) 637 #endif /* __SUNPRO_C */ 638 for (i = 0; i < size; i++) { 639 *dp++ = *sp++; 640 } 641 } 642 643 /***************************************************************/ 644 #ifndef _NO_LONGLONG 645 #define TYPE mlib_u64 646 #define BSIZE 64 647 #define SIZE 8 648 #else 649 #define TYPE mlib_u32 650 #define BSIZE 32 651 #define SIZE 4 652 #endif /* _NO_LONGLONG */ 653 654 /***************************************************************/ 655 void mlib_ImageCopy_na(const mlib_u8 *sp, 656 mlib_u8 *dp, 657 mlib_s32 n) 658 { 659 mlib_s32 shr, shl; 660 TYPE *tmp, s0, s1; 661 662 if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) { 663 664 #ifdef __SUNPRO_C 665 #pragma pipeloop(0) 666 #endif /* __SUNPRO_C */ 667 for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--) 668 *dp++ = *sp++; 669 670 #ifdef _NO_LONGLONG 671 672 if (((mlib_addr) sp & (SIZE - 1)) == 0) { 673 for (; n > SIZE; n -= SIZE) { 674 *(TYPE *) dp = *(TYPE *) sp; 675 dp += SIZE; 676 sp += SIZE; 677 } 678 } 679 else 680 #endif /* _NO_LONGLONG */ 681 { 682 tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1)); 683 /* shl and shr do not exceed 64 here */ 684 shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3); 685 shr = BSIZE - shl; 686 s0 = *tmp++; 687 688 #ifdef __SUNPRO_C 689 #pragma pipeloop(0) 690 #endif /* __SUNPRO_C */ 691 for (; n > SIZE; n -= SIZE) { 692 s1 = *tmp++; 693 #ifdef _LITTLE_ENDIAN 694 *(TYPE *) dp = (s0 >> shl) | (s1 << shr); 695 #else 696 *(TYPE *) dp = (s0 << shl) | (s1 >> shr); 697 #endif /* _LITTLE_ENDIAN */ 698 s0 = s1; 699 dp += SIZE; 700 sp += SIZE; 701 } 702 } 703 } 704 else { 705 #ifdef __SUNPRO_C 706 #pragma pipeloop(0) 707 #endif /* __SUNPRO_C */ 708 for (; (n > 0) && (mlib_addr) dp & 7; n--) 709 *dp++ = *sp++; 710 711 #ifdef __SUNPRO_C 712 #pragma pipeloop(0) 713 #endif /* __SUNPRO_C */ 714 for (; n > 8; n -= 8) { 715 *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp; 716 dp += 8; 717 sp += 8; 718 } 719 } 720 721 #ifdef __SUNPRO_C 722 #pragma pipeloop(0) 723 #endif /* __SUNPRO_C */ 724 for (; n > 0; n--) 725 *dp++ = *sp++; 726 } 727 728 /***************************************************************/ 729 #ifdef _MSC_VER 730 #pragma optimize("", on) 731 #endif /* _MSC_VER */ 732 733 /***************************************************************/