< prev index next >

src/java.desktop/share/native/libmlib_image/mlib_c_ImageCopy.c

Print this page
rev 59383 : [mq]: final
   1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 215     mask >>= offset;
 216     src = da[0];
 217     da[0] = (src & (~mask)) | (sa[0] & mask);
 218     return;
 219   }
 220 
 221   mask = mask0 >> offset;
 222   src = da[0];
 223   da[0] = (src & (~mask)) | (sa[0] & mask);
 224   da++;
 225   sa++;
 226   size = size - 8 + offset;
 227   b_size = size >> 3;                       /* size in bytes */
 228 
 229   for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
 230     *da++ = *sa++;
 231 
 232   if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
 233     sp = (TYPE_64BIT *) sa;
 234     dp = (TYPE_64BIT *) da;
 235 #ifdef __SUNPRO_C
 236 #pragma pipeloop(0)
 237 #endif /* __SUNPRO_C */
 238     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 239       dp[i] = sp[i];
 240     }
 241 
 242     sa += i << 3;
 243     da += i << 3;
 244   }
 245   else {
 246 #ifdef _NO_LONGLONG
 247     if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
 248       mlib_u32 *pws, *pwd;
 249 
 250       pws = (mlib_u32 *) sa;
 251       pwd = (mlib_u32 *) da;
 252 #ifdef __SUNPRO_C
 253 #pragma pipeloop(0)
 254 #endif /* __SUNPRO_C */
 255       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 256         pwd[i] = pws[i];
 257       }
 258 
 259       sa += i << 2;
 260       da += i << 2;
 261     }
 262     else {
 263       mlib_u32 *pws, *pwd, src0, src1;
 264       mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
 265 
 266       pwd = (mlib_u32 *) da;
 267       pws = (mlib_u32 *) (sa - lshift);
 268       lshift <<= 3;
 269       rshift = 32 - lshift;
 270 
 271       src1 = pws[0];
 272 #ifdef __SUNPRO_C
 273 #pragma pipeloop(0)
 274 #endif /* __SUNPRO_C */
 275       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 276         src0 = src1;
 277         src1 = pws[i + 1];
 278 #ifdef _LITTLE_ENDIAN
 279         pwd[i] = (src0 >> lshift) | (src1 << rshift);
 280 #else
 281         pwd[i] = (src0 << lshift) | (src1 >> rshift);
 282 #endif /* _LITTLE_ENDIAN */
 283       }
 284 
 285       sa += i << 2;
 286       da += i << 2;
 287     }
 288 
 289 #else
 290     mlib_u64 *pws, *pwd, src0, src1;
 291     mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
 292 
 293     pwd = (mlib_u64 *) da;
 294     pws = (mlib_u64 *) (sa - lshift);
 295     lshift <<= 3;
 296     rshift = 64 - lshift;
 297 
 298     src1 = pws[0];
 299 #ifdef __SUNPRO_C
 300 #pragma pipeloop(0)
 301 #endif /* __SUNPRO_C */
 302     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 303       src0 = src1;
 304       src1 = pws[i + 1];
 305       pwd[i] = (src0 << lshift) | (src1 >> rshift);
 306     }
 307 
 308     sa += i << 3;
 309     da += i << 3;
 310 #endif /* _NO_LONGLONG */
 311   }
 312 
 313   for (; j < b_size; j++)
 314     *da++ = *sa++;
 315 
 316   j = size & 7;
 317 
 318   if (j > 0) {
 319     mask = mask0 << (8 - j);
 320     src = da[0];
 321     da[0] = (src & (~mask)) | (sa[0] & mask);


 323 }
 324 
 325 /***************************************************************/
 326 void mlib_c_ImageCopy_u8(const mlib_image *src,
 327                          mlib_image       *dst)
 328 {
 329   PREPAREVARS(mlib_u8);
 330   if (src_width < 16) {
 331     STRIP(pdst, psrc, src_width, src_height, mlib_u8);
 332     return;
 333   }
 334 
 335   for (i = 0; i < src_height; i++) {
 336     mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 337 
 338     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 339       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
 340         pdst_row[j] = psrc_row[j];
 341       }
 342 
 343 #ifdef __SUNPRO_C
 344 #pragma pipeloop(0)
 345 #endif /* __SUNPRO_C */
 346       for (; j <= (src_width - 8); j += 8) {
 347         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 348 
 349         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 350       }
 351     }
 352     else {
 353 
 354 #ifdef _NO_LONGLONG
 355 
 356       for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
 357         pdst_row[j] = psrc_row[j];
 358       }
 359 
 360       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 361 #ifdef __SUNPRO_C
 362 #pragma pipeloop(0)
 363 #endif /* __SUNPRO_C */
 364         for (; j <= (src_width - 4); j += 4) {
 365           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 366         }
 367       }
 368       else {
 369         mlib_u32 *ps, shl, shr, src0, src1;
 370 
 371         ps = (mlib_u32 *) (psrc_row + j);
 372         shl = (mlib_addr) ps & 3;
 373         ps = (mlib_u32 *) ((mlib_addr) ps - shl);
 374         shl <<= 3;
 375         shr = 32 - shl;
 376 
 377         src1 = ps[0];
 378 #ifdef __SUNPRO_C
 379 #pragma pipeloop(0)
 380 #endif /* __SUNPRO_C */
 381         for (; j <= (src_width - 4); j += 4) {
 382           src0 = src1;
 383           src1 = ps[1];
 384 #ifdef _LITTLE_ENDIAN
 385           *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 386 #else
 387           *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 388 #endif /* _LITTLE_ENDIAN */
 389           ps++;
 390         }
 391       }
 392 
 393 #else
 394 
 395       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
 396         pdst_row[j] = psrc_row[j];
 397       }
 398 
 399       {
 400         mlib_s32 shl, shr;
 401         mlib_u64 *ps, src0, src1;
 402 
 403         ps = (mlib_u64 *) (psrc_row + j);
 404         /* shl and shr are in range [0, 64] */
 405         shl = (mlib_s32) ((mlib_addr) ps & 7);
 406         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 407         shl <<= 3;
 408         shr = 64 - shl;
 409 
 410         src1 = ps[0];
 411 #ifdef __SUNPRO_C
 412 #pragma pipeloop(0)
 413 #endif /* __SUNPRO_C */
 414         for (; j <= (src_width - 8); j += 8) {
 415           src0 = src1;
 416           src1 = ps[1];
 417 #ifdef _LITTLE_ENDIAN
 418           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 419 #else
 420           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 421 #endif /* _LITTLE_ENDIAN */
 422           ps++;
 423         }
 424       }
 425 #endif /* _NO_LONGLONG */
 426     }
 427 
 428     for (; j < src_width; j++)
 429       pdst_row[j] = psrc_row[j];
 430   }
 431 }
 432 
 433 /***************************************************************/
 434 void mlib_c_ImageCopy_s16(const mlib_image       *src,
 435                           mlib_image *dst)
 436 {
 437   PREPAREVARS(mlib_u16);
 438   if (src_width < 8) {
 439     STRIP(pdst, psrc, src_width, src_height, mlib_u16);
 440     return;
 441   }
 442 
 443   for (i = 0; i < src_height; i++) {
 444     mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 445 
 446     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 447       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
 448         pdst_row[j] = psrc_row[j];
 449       }
 450 
 451 #ifdef __SUNPRO_C
 452 #pragma pipeloop(0)
 453 #endif /* __SUNPRO_C */
 454       for (; j <= (src_width - 4); j += 4) {
 455         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 456 
 457         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 458       }
 459     }
 460     else {
 461 
 462 #ifdef _NO_LONGLONG
 463 
 464       if (j = (((mlib_addr) pdst_row & 2) != 0)) {
 465         pdst_row[0] = psrc_row[0];
 466       }
 467 
 468       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
 469 #ifdef __SUNPRO_C
 470 #pragma pipeloop(0)
 471 #endif /* __SUNPRO_C */
 472         for (; j <= (src_width - 2); j += 2) {
 473           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 474         }
 475       }
 476       else {
 477         mlib_u32 *ps, src0, src1;
 478 
 479         ps = (mlib_u32 *) (psrc_row + j - 1);
 480         src1 = ps[0];
 481 #ifdef __SUNPRO_C
 482 #pragma pipeloop(0)
 483 #endif /* __SUNPRO_C */
 484         for (; j <= (src_width - 2); j += 2) {
 485           src0 = src1;
 486           src1 = ps[1];
 487 #ifdef _LITTLE_ENDIAN
 488           *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
 489 #else
 490           *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
 491 #endif /* _LITTLE_ENDIAN */
 492           ps++;
 493         }
 494       }
 495 
 496 #else
 497 
 498       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
 499         pdst_row[j] = psrc_row[j];
 500       }
 501 
 502       {
 503         mlib_s32 shl, shr;
 504         mlib_u64 *ps, src0, src1;
 505 
 506         ps = (mlib_u64 *) (psrc_row + j);
 507         shl = (mlib_s32) ((mlib_addr) ps & 7);
 508         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 509         shl <<= 3;
 510         shr = 64 - shl;
 511 
 512         src1 = ps[0];
 513 #ifdef __SUNPRO_C
 514 #pragma pipeloop(0)
 515 #endif /* __SUNPRO_C */
 516         for (; j <= (src_width - 4); j += 4) {
 517           src0 = src1;
 518           src1 = ps[1];
 519 #ifdef _LITTLE_ENDIAN
 520           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 521 #else
 522           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 523 #endif /* _LITTLE_ENDIAN */
 524           ps++;
 525         }
 526       }
 527 #endif /* _NO_LONGLONG */
 528     }
 529 
 530     for (; j < src_width; j++)
 531       pdst_row[j] = psrc_row[j];
 532   }
 533 }
 534 
 535 /***************************************************************/
 536 void mlib_c_ImageCopy_s32(const mlib_image       *src,
 537                           mlib_image *dst)
 538 {
 539   PREPAREVARS(mlib_u32);
 540   if (src_width < 4) {
 541     STRIP(pdst, psrc, src_width, src_height, mlib_u32);
 542     return;
 543   }
 544 
 545   for (i = 0; i < src_height; i++) {
 546     mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 547 
 548     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 549       j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2;
 550       if (j != 0) {
 551         pdst_row[0] = psrc_row[0];
 552       }
 553 
 554 #ifdef __SUNPRO_C
 555 #pragma pipeloop(0)
 556 #endif /* __SUNPRO_C */
 557       for (; j <= (src_width - 2); j += 2) {
 558         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 559 
 560         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 561       }
 562     }
 563     else {
 564 
 565 #ifdef _NO_LONGLONG
 566 
 567 #ifdef __SUNPRO_C
 568 #pragma pipeloop(0)
 569 #endif /* __SUNPRO_C */
 570       for (j = 0; j <= (src_width - 1); j++) {
 571         *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 572       }
 573 
 574 #else
 575 
 576       {
 577         mlib_u64 *ps, src0, src1;
 578 
 579         j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2;
 580         if (j != 0) {
 581           pdst_row[0] = psrc_row[0];
 582         }
 583         ps = (mlib_u64 *) (psrc_row + j - 1);
 584         src1 = ps[0];
 585 #ifdef __SUNPRO_C
 586 #pragma pipeloop(0)
 587 #endif /* __SUNPRO_C */
 588         for (; j <= (src_width - 2); j += 2) {
 589           src0 = src1;
 590           src1 = ps[1];
 591 #ifdef _LITTLE_ENDIAN
 592           *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
 593 #else
 594           *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
 595 #endif /* _LITTLE_ENDIAN */
 596           ps++;
 597         }
 598       }
 599 #endif /* _NO_LONGLONG */
 600     }
 601 
 602     for (; j < src_width; j++)
 603       pdst_row[j] = psrc_row[j];
 604   }
 605 }
 606 
 607 /***************************************************************/
 608 void mlib_c_ImageCopy_d64(const mlib_image       *src,
 609                           mlib_image *dst)
 610 {
 611   PREPAREVARS(mlib_d64);
 612   for (i = 0; i < src_height; i++) {
 613     mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 614 
 615 #ifdef __SUNPRO_C
 616 #pragma pipeloop(0)
 617 #endif /* __SUNPRO_C */
 618     for (j = 0; j < src_width; j++)
 619       *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
 620   }
 621 }
 622 
 623 /***************************************************************/
 624 /*
 625  * Both source and destination image data are 1 - d vectors and
 626  * 8 - byte aligned. And size is in 8 - bytes.
 627  */
 628 
 629 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
 630                          TYPE_64BIT       *dp,
 631                          mlib_s32         size)
 632 {
 633   mlib_s32 i;
 634 
 635 #ifdef __SUNPRO_C
 636 #pragma pipeloop(0)
 637 #endif /* __SUNPRO_C */
 638   for (i = 0; i < size; i++) {
 639     *dp++ = *sp++;
 640   }
 641 }
 642 
 643 /***************************************************************/
 644 #ifndef _NO_LONGLONG
 645 #define TYPE    mlib_u64
 646 #define BSIZE   64
 647 #define SIZE    8
 648 #else
 649 #define TYPE    mlib_u32
 650 #define BSIZE   32
 651 #define SIZE    4
 652 #endif /* _NO_LONGLONG */
 653 
 654 /***************************************************************/
 655 void mlib_ImageCopy_na(const mlib_u8 *sp,
 656                        mlib_u8       *dp,
 657                        mlib_s32      n)
 658 {
 659   mlib_s32 shr, shl;
 660   TYPE *tmp, s0, s1;
 661 
 662   if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
 663 
 664 #ifdef __SUNPRO_C
 665 #pragma pipeloop(0)
 666 #endif /* __SUNPRO_C */
 667     for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
 668       *dp++ = *sp++;
 669 
 670 #ifdef _NO_LONGLONG
 671 
 672     if (((mlib_addr) sp & (SIZE - 1)) == 0) {
 673       for (; n > SIZE; n -= SIZE) {
 674         *(TYPE *) dp = *(TYPE *) sp;
 675         dp += SIZE;
 676         sp += SIZE;
 677       }
 678     }
 679     else
 680 #endif /* _NO_LONGLONG */
 681     {
 682       tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
 683       /* shl and shr do not exceed 64 here */
 684       shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
 685       shr = BSIZE - shl;
 686       s0 = *tmp++;
 687 
 688 #ifdef __SUNPRO_C
 689 #pragma pipeloop(0)
 690 #endif /* __SUNPRO_C */
 691       for (; n > SIZE; n -= SIZE) {
 692         s1 = *tmp++;
 693 #ifdef _LITTLE_ENDIAN
 694         *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
 695 #else
 696         *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
 697 #endif /* _LITTLE_ENDIAN */
 698         s0 = s1;
 699         dp += SIZE;
 700         sp += SIZE;
 701       }
 702     }
 703   }
 704   else {
 705 #ifdef __SUNPRO_C
 706 #pragma pipeloop(0)
 707 #endif /* __SUNPRO_C */
 708     for (; (n > 0) && (mlib_addr) dp & 7; n--)
 709       *dp++ = *sp++;
 710 
 711 #ifdef __SUNPRO_C
 712 #pragma pipeloop(0)
 713 #endif /* __SUNPRO_C */
 714     for (; n > 8; n -= 8) {
 715       *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
 716       dp += 8;
 717       sp += 8;
 718     }
 719   }
 720 
 721 #ifdef __SUNPRO_C
 722 #pragma pipeloop(0)
 723 #endif /* __SUNPRO_C */
 724   for (; n > 0; n--)
 725     *dp++ = *sp++;
 726 }
 727 
 728 /***************************************************************/
 729 #ifdef _MSC_VER
 730 #pragma optimize("", on)
 731 #endif /* _MSC_VER */
 732 
 733 /***************************************************************/
   1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 215     mask >>= offset;
 216     src = da[0];
 217     da[0] = (src & (~mask)) | (sa[0] & mask);
 218     return;
 219   }
 220 
 221   mask = mask0 >> offset;
 222   src = da[0];
 223   da[0] = (src & (~mask)) | (sa[0] & mask);
 224   da++;
 225   sa++;
 226   size = size - 8 + offset;
 227   b_size = size >> 3;                       /* size in bytes */
 228 
 229   for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
 230     *da++ = *sa++;
 231 
 232   if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
 233     sp = (TYPE_64BIT *) sa;
 234     dp = (TYPE_64BIT *) da;



 235     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 236       dp[i] = sp[i];
 237     }
 238 
 239     sa += i << 3;
 240     da += i << 3;
 241   }
 242   else {
 243 #ifdef _NO_LONGLONG
 244     if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
 245       mlib_u32 *pws, *pwd;
 246 
 247       pws = (mlib_u32 *) sa;
 248       pwd = (mlib_u32 *) da;



 249       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 250         pwd[i] = pws[i];
 251       }
 252 
 253       sa += i << 2;
 254       da += i << 2;
 255     }
 256     else {
 257       mlib_u32 *pws, *pwd, src0, src1;
 258       mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
 259 
 260       pwd = (mlib_u32 *) da;
 261       pws = (mlib_u32 *) (sa - lshift);
 262       lshift <<= 3;
 263       rshift = 32 - lshift;
 264 
 265       src1 = pws[0];



 266       for (i = 0; j <= (b_size - 4); j += 4, i++) {
 267         src0 = src1;
 268         src1 = pws[i + 1];
 269 #ifdef _LITTLE_ENDIAN
 270         pwd[i] = (src0 >> lshift) | (src1 << rshift);
 271 #else
 272         pwd[i] = (src0 << lshift) | (src1 >> rshift);
 273 #endif /* _LITTLE_ENDIAN */
 274       }
 275 
 276       sa += i << 2;
 277       da += i << 2;
 278     }
 279 
 280 #else
 281     mlib_u64 *pws, *pwd, src0, src1;
 282     mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
 283 
 284     pwd = (mlib_u64 *) da;
 285     pws = (mlib_u64 *) (sa - lshift);
 286     lshift <<= 3;
 287     rshift = 64 - lshift;
 288 
 289     src1 = pws[0];



 290     for (i = 0; j <= (b_size - 8); j += 8, i++) {
 291       src0 = src1;
 292       src1 = pws[i + 1];
 293       pwd[i] = (src0 << lshift) | (src1 >> rshift);
 294     }
 295 
 296     sa += i << 3;
 297     da += i << 3;
 298 #endif /* _NO_LONGLONG */
 299   }
 300 
 301   for (; j < b_size; j++)
 302     *da++ = *sa++;
 303 
 304   j = size & 7;
 305 
 306   if (j > 0) {
 307     mask = mask0 << (8 - j);
 308     src = da[0];
 309     da[0] = (src & (~mask)) | (sa[0] & mask);


 311 }
 312 
 313 /***************************************************************/
 314 void mlib_c_ImageCopy_u8(const mlib_image *src,
 315                          mlib_image       *dst)
 316 {
 317   PREPAREVARS(mlib_u8);
 318   if (src_width < 16) {
 319     STRIP(pdst, psrc, src_width, src_height, mlib_u8);
 320     return;
 321   }
 322 
 323   for (i = 0; i < src_height; i++) {
 324     mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 325 
 326     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 327       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
 328         pdst_row[j] = psrc_row[j];
 329       }
 330 



 331       for (; j <= (src_width - 8); j += 8) {
 332         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 333 
 334         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 335       }
 336     }
 337     else {
 338 
 339 #ifdef _NO_LONGLONG
 340 
 341       for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
 342         pdst_row[j] = psrc_row[j];
 343       }
 344 
 345       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {



 346         for (; j <= (src_width - 4); j += 4) {
 347           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 348         }
 349       }
 350       else {
 351         mlib_u32 *ps, shl, shr, src0, src1;
 352 
 353         ps = (mlib_u32 *) (psrc_row + j);
 354         shl = (mlib_addr) ps & 3;
 355         ps = (mlib_u32 *) ((mlib_addr) ps - shl);
 356         shl <<= 3;
 357         shr = 32 - shl;
 358 
 359         src1 = ps[0];



 360         for (; j <= (src_width - 4); j += 4) {
 361           src0 = src1;
 362           src1 = ps[1];
 363 #ifdef _LITTLE_ENDIAN
 364           *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 365 #else
 366           *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 367 #endif /* _LITTLE_ENDIAN */
 368           ps++;
 369         }
 370       }
 371 
 372 #else
 373 
 374       for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
 375         pdst_row[j] = psrc_row[j];
 376       }
 377 
 378       {
 379         mlib_s32 shl, shr;
 380         mlib_u64 *ps, src0, src1;
 381 
 382         ps = (mlib_u64 *) (psrc_row + j);
 383         /* shl and shr are in range [0, 64] */
 384         shl = (mlib_s32) ((mlib_addr) ps & 7);
 385         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 386         shl <<= 3;
 387         shr = 64 - shl;
 388 
 389         src1 = ps[0];



 390         for (; j <= (src_width - 8); j += 8) {
 391           src0 = src1;
 392           src1 = ps[1];
 393 #ifdef _LITTLE_ENDIAN
 394           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 395 #else
 396           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 397 #endif /* _LITTLE_ENDIAN */
 398           ps++;
 399         }
 400       }
 401 #endif /* _NO_LONGLONG */
 402     }
 403 
 404     for (; j < src_width; j++)
 405       pdst_row[j] = psrc_row[j];
 406   }
 407 }
 408 
 409 /***************************************************************/
 410 void mlib_c_ImageCopy_s16(const mlib_image       *src,
 411                           mlib_image *dst)
 412 {
 413   PREPAREVARS(mlib_u16);
 414   if (src_width < 8) {
 415     STRIP(pdst, psrc, src_width, src_height, mlib_u16);
 416     return;
 417   }
 418 
 419   for (i = 0; i < src_height; i++) {
 420     mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 421 
 422     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 423       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
 424         pdst_row[j] = psrc_row[j];
 425       }
 426 



 427       for (; j <= (src_width - 4); j += 4) {
 428         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 429 
 430         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 431       }
 432     }
 433     else {
 434 
 435 #ifdef _NO_LONGLONG
 436 
 437       if (j = (((mlib_addr) pdst_row & 2) != 0)) {
 438         pdst_row[0] = psrc_row[0];
 439       }
 440 
 441       if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {



 442         for (; j <= (src_width - 2); j += 2) {
 443           *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 444         }
 445       }
 446       else {
 447         mlib_u32 *ps, src0, src1;
 448 
 449         ps = (mlib_u32 *) (psrc_row + j - 1);
 450         src1 = ps[0];



 451         for (; j <= (src_width - 2); j += 2) {
 452           src0 = src1;
 453           src1 = ps[1];
 454 #ifdef _LITTLE_ENDIAN
 455           *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
 456 #else
 457           *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
 458 #endif /* _LITTLE_ENDIAN */
 459           ps++;
 460         }
 461       }
 462 
 463 #else
 464 
 465       for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
 466         pdst_row[j] = psrc_row[j];
 467       }
 468 
 469       {
 470         mlib_s32 shl, shr;
 471         mlib_u64 *ps, src0, src1;
 472 
 473         ps = (mlib_u64 *) (psrc_row + j);
 474         shl = (mlib_s32) ((mlib_addr) ps & 7);
 475         ps = (mlib_u64 *) ((mlib_addr) ps - shl);
 476         shl <<= 3;
 477         shr = 64 - shl;
 478 
 479         src1 = ps[0];



 480         for (; j <= (src_width - 4); j += 4) {
 481           src0 = src1;
 482           src1 = ps[1];
 483 #ifdef _LITTLE_ENDIAN
 484           *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
 485 #else
 486           *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
 487 #endif /* _LITTLE_ENDIAN */
 488           ps++;
 489         }
 490       }
 491 #endif /* _NO_LONGLONG */
 492     }
 493 
 494     for (; j < src_width; j++)
 495       pdst_row[j] = psrc_row[j];
 496   }
 497 }
 498 
 499 /***************************************************************/
 500 void mlib_c_ImageCopy_s32(const mlib_image       *src,
 501                           mlib_image *dst)
 502 {
 503   PREPAREVARS(mlib_u32);
 504   if (src_width < 4) {
 505     STRIP(pdst, psrc, src_width, src_height, mlib_u32);
 506     return;
 507   }
 508 
 509   for (i = 0; i < src_height; i++) {
 510     mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 511 
 512     if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
 513       j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2;
 514       if (j != 0) {
 515         pdst_row[0] = psrc_row[0];
 516       }
 517 



 518       for (; j <= (src_width - 2); j += 2) {
 519         TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
 520 
 521         *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
 522       }
 523     }
 524     else {
 525 
 526 #ifdef _NO_LONGLONG
 527 



 528       for (j = 0; j <= (src_width - 1); j++) {
 529         *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
 530       }
 531 
 532 #else
 533 
 534       {
 535         mlib_u64 *ps, src0, src1;
 536 
 537         j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2;
 538         if (j != 0) {
 539           pdst_row[0] = psrc_row[0];
 540         }
 541         ps = (mlib_u64 *) (psrc_row + j - 1);
 542         src1 = ps[0];



 543         for (; j <= (src_width - 2); j += 2) {
 544           src0 = src1;
 545           src1 = ps[1];
 546 #ifdef _LITTLE_ENDIAN
 547           *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
 548 #else
 549           *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
 550 #endif /* _LITTLE_ENDIAN */
 551           ps++;
 552         }
 553       }
 554 #endif /* _NO_LONGLONG */
 555     }
 556 
 557     for (; j < src_width; j++)
 558       pdst_row[j] = psrc_row[j];
 559   }
 560 }
 561 
 562 /***************************************************************/
 563 void mlib_c_ImageCopy_d64(const mlib_image       *src,
 564                           mlib_image *dst)
 565 {
 566   PREPAREVARS(mlib_d64);
 567   for (i = 0; i < src_height; i++) {
 568     mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
 569 



 570     for (j = 0; j < src_width; j++)
 571       *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
 572   }
 573 }
 574 
 575 /***************************************************************/
 576 /*
 577  * Both source and destination image data are 1 - d vectors and
 578  * 8 - byte aligned. And size is in 8 - bytes.
 579  */
 580 
 581 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
 582                          TYPE_64BIT       *dp,
 583                          mlib_s32         size)
 584 {
 585   mlib_s32 i;
 586 



 587   for (i = 0; i < size; i++) {
 588     *dp++ = *sp++;
 589   }
 590 }
 591 
 592 /***************************************************************/
 593 #ifndef _NO_LONGLONG
 594 #define TYPE    mlib_u64
 595 #define BSIZE   64
 596 #define SIZE    8
 597 #else
 598 #define TYPE    mlib_u32
 599 #define BSIZE   32
 600 #define SIZE    4
 601 #endif /* _NO_LONGLONG */
 602 
 603 /***************************************************************/
 604 void mlib_ImageCopy_na(const mlib_u8 *sp,
 605                        mlib_u8       *dp,
 606                        mlib_s32      n)
 607 {
 608   mlib_s32 shr, shl;
 609   TYPE *tmp, s0, s1;
 610 
 611   if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
 612 



 613     for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
 614       *dp++ = *sp++;
 615 
 616 #ifdef _NO_LONGLONG
 617 
 618     if (((mlib_addr) sp & (SIZE - 1)) == 0) {
 619       for (; n > SIZE; n -= SIZE) {
 620         *(TYPE *) dp = *(TYPE *) sp;
 621         dp += SIZE;
 622         sp += SIZE;
 623       }
 624     }
 625     else
 626 #endif /* _NO_LONGLONG */
 627     {
 628       tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
 629       /* shl and shr do not exceed 64 here */
 630       shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
 631       shr = BSIZE - shl;
 632       s0 = *tmp++;
 633 



 634       for (; n > SIZE; n -= SIZE) {
 635         s1 = *tmp++;
 636 #ifdef _LITTLE_ENDIAN
 637         *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
 638 #else
 639         *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
 640 #endif /* _LITTLE_ENDIAN */
 641         s0 = s1;
 642         dp += SIZE;
 643         sp += SIZE;
 644       }
 645     }
 646   }
 647   else {



 648     for (; (n > 0) && (mlib_addr) dp & 7; n--)
 649       *dp++ = *sp++;
 650 



 651     for (; n > 8; n -= 8) {
 652       *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
 653       dp += 8;
 654       sp += 8;
 655     }
 656   }
 657 



 658   for (; n > 0; n--)
 659     *dp++ = *sp++;
 660 }
 661 
 662 /***************************************************************/
 663 #ifdef _MSC_VER
 664 #pragma optimize("", on)
 665 #endif /* _MSC_VER */
 666 
 667 /***************************************************************/
< prev index next >