1 /*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
215 mask >>= offset;
216 src = da[0];
217 da[0] = (src & (~mask)) | (sa[0] & mask);
218 return;
219 }
220
221 mask = mask0 >> offset;
222 src = da[0];
223 da[0] = (src & (~mask)) | (sa[0] & mask);
224 da++;
225 sa++;
226 size = size - 8 + offset;
227 b_size = size >> 3; /* size in bytes */
228
229 for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
230 *da++ = *sa++;
231
232 if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
233 sp = (TYPE_64BIT *) sa;
234 dp = (TYPE_64BIT *) da;
235 #ifdef __SUNPRO_C
236 #pragma pipeloop(0)
237 #endif /* __SUNPRO_C */
238 for (i = 0; j <= (b_size - 8); j += 8, i++) {
239 dp[i] = sp[i];
240 }
241
242 sa += i << 3;
243 da += i << 3;
244 }
245 else {
246 #ifdef _NO_LONGLONG
247 if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
248 mlib_u32 *pws, *pwd;
249
250 pws = (mlib_u32 *) sa;
251 pwd = (mlib_u32 *) da;
252 #ifdef __SUNPRO_C
253 #pragma pipeloop(0)
254 #endif /* __SUNPRO_C */
255 for (i = 0; j <= (b_size - 4); j += 4, i++) {
256 pwd[i] = pws[i];
257 }
258
259 sa += i << 2;
260 da += i << 2;
261 }
262 else {
263 mlib_u32 *pws, *pwd, src0, src1;
264 mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
265
266 pwd = (mlib_u32 *) da;
267 pws = (mlib_u32 *) (sa - lshift);
268 lshift <<= 3;
269 rshift = 32 - lshift;
270
271 src1 = pws[0];
272 #ifdef __SUNPRO_C
273 #pragma pipeloop(0)
274 #endif /* __SUNPRO_C */
275 for (i = 0; j <= (b_size - 4); j += 4, i++) {
276 src0 = src1;
277 src1 = pws[i + 1];
278 #ifdef _LITTLE_ENDIAN
279 pwd[i] = (src0 >> lshift) | (src1 << rshift);
280 #else
281 pwd[i] = (src0 << lshift) | (src1 >> rshift);
282 #endif /* _LITTLE_ENDIAN */
283 }
284
285 sa += i << 2;
286 da += i << 2;
287 }
288
289 #else
290 mlib_u64 *pws, *pwd, src0, src1;
291 mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
292
293 pwd = (mlib_u64 *) da;
294 pws = (mlib_u64 *) (sa - lshift);
295 lshift <<= 3;
296 rshift = 64 - lshift;
297
298 src1 = pws[0];
299 #ifdef __SUNPRO_C
300 #pragma pipeloop(0)
301 #endif /* __SUNPRO_C */
302 for (i = 0; j <= (b_size - 8); j += 8, i++) {
303 src0 = src1;
304 src1 = pws[i + 1];
305 pwd[i] = (src0 << lshift) | (src1 >> rshift);
306 }
307
308 sa += i << 3;
309 da += i << 3;
310 #endif /* _NO_LONGLONG */
311 }
312
313 for (; j < b_size; j++)
314 *da++ = *sa++;
315
316 j = size & 7;
317
318 if (j > 0) {
319 mask = mask0 << (8 - j);
320 src = da[0];
321 da[0] = (src & (~mask)) | (sa[0] & mask);
323 }
324
325 /***************************************************************/
326 void mlib_c_ImageCopy_u8(const mlib_image *src,
327 mlib_image *dst)
328 {
329 PREPAREVARS(mlib_u8);
330 if (src_width < 16) {
331 STRIP(pdst, psrc, src_width, src_height, mlib_u8);
332 return;
333 }
334
335 for (i = 0; i < src_height; i++) {
336 mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
337
338 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
339 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
340 pdst_row[j] = psrc_row[j];
341 }
342
343 #ifdef __SUNPRO_C
344 #pragma pipeloop(0)
345 #endif /* __SUNPRO_C */
346 for (; j <= (src_width - 8); j += 8) {
347 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
348
349 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
350 }
351 }
352 else {
353
354 #ifdef _NO_LONGLONG
355
356 for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
357 pdst_row[j] = psrc_row[j];
358 }
359
360 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
361 #ifdef __SUNPRO_C
362 #pragma pipeloop(0)
363 #endif /* __SUNPRO_C */
364 for (; j <= (src_width - 4); j += 4) {
365 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
366 }
367 }
368 else {
369 mlib_u32 *ps, shl, shr, src0, src1;
370
371 ps = (mlib_u32 *) (psrc_row + j);
372 shl = (mlib_addr) ps & 3;
373 ps = (mlib_u32 *) ((mlib_addr) ps - shl);
374 shl <<= 3;
375 shr = 32 - shl;
376
377 src1 = ps[0];
378 #ifdef __SUNPRO_C
379 #pragma pipeloop(0)
380 #endif /* __SUNPRO_C */
381 for (; j <= (src_width - 4); j += 4) {
382 src0 = src1;
383 src1 = ps[1];
384 #ifdef _LITTLE_ENDIAN
385 *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
386 #else
387 *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
388 #endif /* _LITTLE_ENDIAN */
389 ps++;
390 }
391 }
392
393 #else
394
395 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
396 pdst_row[j] = psrc_row[j];
397 }
398
399 {
400 mlib_s32 shl, shr;
401 mlib_u64 *ps, src0, src1;
402
403 ps = (mlib_u64 *) (psrc_row + j);
404 /* shl and shr are in range [0, 64] */
405 shl = (mlib_s32) ((mlib_addr) ps & 7);
406 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
407 shl <<= 3;
408 shr = 64 - shl;
409
410 src1 = ps[0];
411 #ifdef __SUNPRO_C
412 #pragma pipeloop(0)
413 #endif /* __SUNPRO_C */
414 for (; j <= (src_width - 8); j += 8) {
415 src0 = src1;
416 src1 = ps[1];
417 #ifdef _LITTLE_ENDIAN
418 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
419 #else
420 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
421 #endif /* _LITTLE_ENDIAN */
422 ps++;
423 }
424 }
425 #endif /* _NO_LONGLONG */
426 }
427
428 for (; j < src_width; j++)
429 pdst_row[j] = psrc_row[j];
430 }
431 }
432
433 /***************************************************************/
434 void mlib_c_ImageCopy_s16(const mlib_image *src,
435 mlib_image *dst)
436 {
437 PREPAREVARS(mlib_u16);
438 if (src_width < 8) {
439 STRIP(pdst, psrc, src_width, src_height, mlib_u16);
440 return;
441 }
442
443 for (i = 0; i < src_height; i++) {
444 mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
445
446 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
447 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
448 pdst_row[j] = psrc_row[j];
449 }
450
451 #ifdef __SUNPRO_C
452 #pragma pipeloop(0)
453 #endif /* __SUNPRO_C */
454 for (; j <= (src_width - 4); j += 4) {
455 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
456
457 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
458 }
459 }
460 else {
461
462 #ifdef _NO_LONGLONG
463
464 if (j = (((mlib_addr) pdst_row & 2) != 0)) {
465 pdst_row[0] = psrc_row[0];
466 }
467
468 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
469 #ifdef __SUNPRO_C
470 #pragma pipeloop(0)
471 #endif /* __SUNPRO_C */
472 for (; j <= (src_width - 2); j += 2) {
473 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
474 }
475 }
476 else {
477 mlib_u32 *ps, src0, src1;
478
479 ps = (mlib_u32 *) (psrc_row + j - 1);
480 src1 = ps[0];
481 #ifdef __SUNPRO_C
482 #pragma pipeloop(0)
483 #endif /* __SUNPRO_C */
484 for (; j <= (src_width - 2); j += 2) {
485 src0 = src1;
486 src1 = ps[1];
487 #ifdef _LITTLE_ENDIAN
488 *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
489 #else
490 *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
491 #endif /* _LITTLE_ENDIAN */
492 ps++;
493 }
494 }
495
496 #else
497
498 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
499 pdst_row[j] = psrc_row[j];
500 }
501
502 {
503 mlib_s32 shl, shr;
504 mlib_u64 *ps, src0, src1;
505
506 ps = (mlib_u64 *) (psrc_row + j);
507 shl = (mlib_s32) ((mlib_addr) ps & 7);
508 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
509 shl <<= 3;
510 shr = 64 - shl;
511
512 src1 = ps[0];
513 #ifdef __SUNPRO_C
514 #pragma pipeloop(0)
515 #endif /* __SUNPRO_C */
516 for (; j <= (src_width - 4); j += 4) {
517 src0 = src1;
518 src1 = ps[1];
519 #ifdef _LITTLE_ENDIAN
520 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
521 #else
522 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
523 #endif /* _LITTLE_ENDIAN */
524 ps++;
525 }
526 }
527 #endif /* _NO_LONGLONG */
528 }
529
530 for (; j < src_width; j++)
531 pdst_row[j] = psrc_row[j];
532 }
533 }
534
535 /***************************************************************/
536 void mlib_c_ImageCopy_s32(const mlib_image *src,
537 mlib_image *dst)
538 {
539 PREPAREVARS(mlib_u32);
540 if (src_width < 4) {
541 STRIP(pdst, psrc, src_width, src_height, mlib_u32);
542 return;
543 }
544
545 for (i = 0; i < src_height; i++) {
546 mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
547
548 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
549 j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2;
550 if (j != 0) {
551 pdst_row[0] = psrc_row[0];
552 }
553
554 #ifdef __SUNPRO_C
555 #pragma pipeloop(0)
556 #endif /* __SUNPRO_C */
557 for (; j <= (src_width - 2); j += 2) {
558 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
559
560 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
561 }
562 }
563 else {
564
565 #ifdef _NO_LONGLONG
566
567 #ifdef __SUNPRO_C
568 #pragma pipeloop(0)
569 #endif /* __SUNPRO_C */
570 for (j = 0; j <= (src_width - 1); j++) {
571 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
572 }
573
574 #else
575
576 {
577 mlib_u64 *ps, src0, src1;
578
579 j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2;
580 if (j != 0) {
581 pdst_row[0] = psrc_row[0];
582 }
583 ps = (mlib_u64 *) (psrc_row + j - 1);
584 src1 = ps[0];
585 #ifdef __SUNPRO_C
586 #pragma pipeloop(0)
587 #endif /* __SUNPRO_C */
588 for (; j <= (src_width - 2); j += 2) {
589 src0 = src1;
590 src1 = ps[1];
591 #ifdef _LITTLE_ENDIAN
592 *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
593 #else
594 *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
595 #endif /* _LITTLE_ENDIAN */
596 ps++;
597 }
598 }
599 #endif /* _NO_LONGLONG */
600 }
601
602 for (; j < src_width; j++)
603 pdst_row[j] = psrc_row[j];
604 }
605 }
606
607 /***************************************************************/
608 void mlib_c_ImageCopy_d64(const mlib_image *src,
609 mlib_image *dst)
610 {
611 PREPAREVARS(mlib_d64);
612 for (i = 0; i < src_height; i++) {
613 mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
614
615 #ifdef __SUNPRO_C
616 #pragma pipeloop(0)
617 #endif /* __SUNPRO_C */
618 for (j = 0; j < src_width; j++)
619 *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
620 }
621 }
622
623 /***************************************************************/
624 /*
625 * Both source and destination image data are 1 - d vectors and
626 * 8 - byte aligned. And size is in 8 - bytes.
627 */
628
629 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
630 TYPE_64BIT *dp,
631 mlib_s32 size)
632 {
633 mlib_s32 i;
634
635 #ifdef __SUNPRO_C
636 #pragma pipeloop(0)
637 #endif /* __SUNPRO_C */
638 for (i = 0; i < size; i++) {
639 *dp++ = *sp++;
640 }
641 }
642
643 /***************************************************************/
644 #ifndef _NO_LONGLONG
645 #define TYPE mlib_u64
646 #define BSIZE 64
647 #define SIZE 8
648 #else
649 #define TYPE mlib_u32
650 #define BSIZE 32
651 #define SIZE 4
652 #endif /* _NO_LONGLONG */
653
654 /***************************************************************/
655 void mlib_ImageCopy_na(const mlib_u8 *sp,
656 mlib_u8 *dp,
657 mlib_s32 n)
658 {
659 mlib_s32 shr, shl;
660 TYPE *tmp, s0, s1;
661
662 if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
663
664 #ifdef __SUNPRO_C
665 #pragma pipeloop(0)
666 #endif /* __SUNPRO_C */
667 for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
668 *dp++ = *sp++;
669
670 #ifdef _NO_LONGLONG
671
672 if (((mlib_addr) sp & (SIZE - 1)) == 0) {
673 for (; n > SIZE; n -= SIZE) {
674 *(TYPE *) dp = *(TYPE *) sp;
675 dp += SIZE;
676 sp += SIZE;
677 }
678 }
679 else
680 #endif /* _NO_LONGLONG */
681 {
682 tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
683 /* shl and shr do not exceed 64 here */
684 shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
685 shr = BSIZE - shl;
686 s0 = *tmp++;
687
688 #ifdef __SUNPRO_C
689 #pragma pipeloop(0)
690 #endif /* __SUNPRO_C */
691 for (; n > SIZE; n -= SIZE) {
692 s1 = *tmp++;
693 #ifdef _LITTLE_ENDIAN
694 *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
695 #else
696 *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
697 #endif /* _LITTLE_ENDIAN */
698 s0 = s1;
699 dp += SIZE;
700 sp += SIZE;
701 }
702 }
703 }
704 else {
705 #ifdef __SUNPRO_C
706 #pragma pipeloop(0)
707 #endif /* __SUNPRO_C */
708 for (; (n > 0) && (mlib_addr) dp & 7; n--)
709 *dp++ = *sp++;
710
711 #ifdef __SUNPRO_C
712 #pragma pipeloop(0)
713 #endif /* __SUNPRO_C */
714 for (; n > 8; n -= 8) {
715 *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
716 dp += 8;
717 sp += 8;
718 }
719 }
720
721 #ifdef __SUNPRO_C
722 #pragma pipeloop(0)
723 #endif /* __SUNPRO_C */
724 for (; n > 0; n--)
725 *dp++ = *sp++;
726 }
727
728 /***************************************************************/
729 #ifdef _MSC_VER
730 #pragma optimize("", on)
731 #endif /* _MSC_VER */
732
733 /***************************************************************/
|
1 /*
2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
215 mask >>= offset;
216 src = da[0];
217 da[0] = (src & (~mask)) | (sa[0] & mask);
218 return;
219 }
220
221 mask = mask0 >> offset;
222 src = da[0];
223 da[0] = (src & (~mask)) | (sa[0] & mask);
224 da++;
225 sa++;
226 size = size - 8 + offset;
227 b_size = size >> 3; /* size in bytes */
228
229 for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++)
230 *da++ = *sa++;
231
232 if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) {
233 sp = (TYPE_64BIT *) sa;
234 dp = (TYPE_64BIT *) da;
235 for (i = 0; j <= (b_size - 8); j += 8, i++) {
236 dp[i] = sp[i];
237 }
238
239 sa += i << 3;
240 da += i << 3;
241 }
242 else {
243 #ifdef _NO_LONGLONG
244 if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) {
245 mlib_u32 *pws, *pwd;
246
247 pws = (mlib_u32 *) sa;
248 pwd = (mlib_u32 *) da;
249 for (i = 0; j <= (b_size - 4); j += 4, i++) {
250 pwd[i] = pws[i];
251 }
252
253 sa += i << 2;
254 da += i << 2;
255 }
256 else {
257 mlib_u32 *pws, *pwd, src0, src1;
258 mlib_s32 lshift = (mlib_addr) sa & 3, rshift;
259
260 pwd = (mlib_u32 *) da;
261 pws = (mlib_u32 *) (sa - lshift);
262 lshift <<= 3;
263 rshift = 32 - lshift;
264
265 src1 = pws[0];
266 for (i = 0; j <= (b_size - 4); j += 4, i++) {
267 src0 = src1;
268 src1 = pws[i + 1];
269 #ifdef _LITTLE_ENDIAN
270 pwd[i] = (src0 >> lshift) | (src1 << rshift);
271 #else
272 pwd[i] = (src0 << lshift) | (src1 >> rshift);
273 #endif /* _LITTLE_ENDIAN */
274 }
275
276 sa += i << 2;
277 da += i << 2;
278 }
279
280 #else
281 mlib_u64 *pws, *pwd, src0, src1;
282 mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift;
283
284 pwd = (mlib_u64 *) da;
285 pws = (mlib_u64 *) (sa - lshift);
286 lshift <<= 3;
287 rshift = 64 - lshift;
288
289 src1 = pws[0];
290 for (i = 0; j <= (b_size - 8); j += 8, i++) {
291 src0 = src1;
292 src1 = pws[i + 1];
293 pwd[i] = (src0 << lshift) | (src1 >> rshift);
294 }
295
296 sa += i << 3;
297 da += i << 3;
298 #endif /* _NO_LONGLONG */
299 }
300
301 for (; j < b_size; j++)
302 *da++ = *sa++;
303
304 j = size & 7;
305
306 if (j > 0) {
307 mask = mask0 << (8 - j);
308 src = da[0];
309 da[0] = (src & (~mask)) | (sa[0] & mask);
311 }
312
313 /***************************************************************/
314 void mlib_c_ImageCopy_u8(const mlib_image *src,
315 mlib_image *dst)
316 {
317 PREPAREVARS(mlib_u8);
318 if (src_width < 16) {
319 STRIP(pdst, psrc, src_width, src_height, mlib_u8);
320 return;
321 }
322
323 for (i = 0; i < src_height; i++) {
324 mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
325
326 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
327 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) {
328 pdst_row[j] = psrc_row[j];
329 }
330
331 for (; j <= (src_width - 8); j += 8) {
332 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
333
334 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
335 }
336 }
337 else {
338
339 #ifdef _NO_LONGLONG
340
341 for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) {
342 pdst_row[j] = psrc_row[j];
343 }
344
345 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
346 for (; j <= (src_width - 4); j += 4) {
347 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
348 }
349 }
350 else {
351 mlib_u32 *ps, shl, shr, src0, src1;
352
353 ps = (mlib_u32 *) (psrc_row + j);
354 shl = (mlib_addr) ps & 3;
355 ps = (mlib_u32 *) ((mlib_addr) ps - shl);
356 shl <<= 3;
357 shr = 32 - shl;
358
359 src1 = ps[0];
360 for (; j <= (src_width - 4); j += 4) {
361 src0 = src1;
362 src1 = ps[1];
363 #ifdef _LITTLE_ENDIAN
364 *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
365 #else
366 *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
367 #endif /* _LITTLE_ENDIAN */
368 ps++;
369 }
370 }
371
372 #else
373
374 for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) {
375 pdst_row[j] = psrc_row[j];
376 }
377
378 {
379 mlib_s32 shl, shr;
380 mlib_u64 *ps, src0, src1;
381
382 ps = (mlib_u64 *) (psrc_row + j);
383 /* shl and shr are in range [0, 64] */
384 shl = (mlib_s32) ((mlib_addr) ps & 7);
385 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
386 shl <<= 3;
387 shr = 64 - shl;
388
389 src1 = ps[0];
390 for (; j <= (src_width - 8); j += 8) {
391 src0 = src1;
392 src1 = ps[1];
393 #ifdef _LITTLE_ENDIAN
394 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
395 #else
396 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
397 #endif /* _LITTLE_ENDIAN */
398 ps++;
399 }
400 }
401 #endif /* _NO_LONGLONG */
402 }
403
404 for (; j < src_width; j++)
405 pdst_row[j] = psrc_row[j];
406 }
407 }
408
409 /***************************************************************/
410 void mlib_c_ImageCopy_s16(const mlib_image *src,
411 mlib_image *dst)
412 {
413 PREPAREVARS(mlib_u16);
414 if (src_width < 8) {
415 STRIP(pdst, psrc, src_width, src_height, mlib_u16);
416 return;
417 }
418
419 for (i = 0; i < src_height; i++) {
420 mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
421
422 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
423 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) {
424 pdst_row[j] = psrc_row[j];
425 }
426
427 for (; j <= (src_width - 4); j += 4) {
428 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
429
430 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
431 }
432 }
433 else {
434
435 #ifdef _NO_LONGLONG
436
437 if (j = (((mlib_addr) pdst_row & 2) != 0)) {
438 pdst_row[0] = psrc_row[0];
439 }
440
441 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) {
442 for (; j <= (src_width - 2); j += 2) {
443 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
444 }
445 }
446 else {
447 mlib_u32 *ps, src0, src1;
448
449 ps = (mlib_u32 *) (psrc_row + j - 1);
450 src1 = ps[0];
451 for (; j <= (src_width - 2); j += 2) {
452 src0 = src1;
453 src1 = ps[1];
454 #ifdef _LITTLE_ENDIAN
455 *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16);
456 #else
457 *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16);
458 #endif /* _LITTLE_ENDIAN */
459 ps++;
460 }
461 }
462
463 #else
464
465 for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) {
466 pdst_row[j] = psrc_row[j];
467 }
468
469 {
470 mlib_s32 shl, shr;
471 mlib_u64 *ps, src0, src1;
472
473 ps = (mlib_u64 *) (psrc_row + j);
474 shl = (mlib_s32) ((mlib_addr) ps & 7);
475 ps = (mlib_u64 *) ((mlib_addr) ps - shl);
476 shl <<= 3;
477 shr = 64 - shl;
478
479 src1 = ps[0];
480 for (; j <= (src_width - 4); j += 4) {
481 src0 = src1;
482 src1 = ps[1];
483 #ifdef _LITTLE_ENDIAN
484 *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr);
485 #else
486 *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr);
487 #endif /* _LITTLE_ENDIAN */
488 ps++;
489 }
490 }
491 #endif /* _NO_LONGLONG */
492 }
493
494 for (; j < src_width; j++)
495 pdst_row[j] = psrc_row[j];
496 }
497 }
498
499 /***************************************************************/
500 void mlib_c_ImageCopy_s32(const mlib_image *src,
501 mlib_image *dst)
502 {
503 PREPAREVARS(mlib_u32);
504 if (src_width < 4) {
505 STRIP(pdst, psrc, src_width, src_height, mlib_u32);
506 return;
507 }
508
509 for (i = 0; i < src_height; i++) {
510 mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
511
512 if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) {
513 j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2;
514 if (j != 0) {
515 pdst_row[0] = psrc_row[0];
516 }
517
518 for (; j <= (src_width - 2); j += 2) {
519 TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j));
520
521 *((TYPE_64BIT *) (pdst_row + j)) = dsrc0;
522 }
523 }
524 else {
525
526 #ifdef _NO_LONGLONG
527
528 for (j = 0; j <= (src_width - 1); j++) {
529 *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j));
530 }
531
532 #else
533
534 {
535 mlib_u64 *ps, src0, src1;
536
537 j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2;
538 if (j != 0) {
539 pdst_row[0] = psrc_row[0];
540 }
541 ps = (mlib_u64 *) (psrc_row + j - 1);
542 src1 = ps[0];
543 for (; j <= (src_width - 2); j += 2) {
544 src0 = src1;
545 src1 = ps[1];
546 #ifdef _LITTLE_ENDIAN
547 *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32);
548 #else
549 *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32);
550 #endif /* _LITTLE_ENDIAN */
551 ps++;
552 }
553 }
554 #endif /* _NO_LONGLONG */
555 }
556
557 for (; j < src_width; j++)
558 pdst_row[j] = psrc_row[j];
559 }
560 }
561
562 /***************************************************************/
563 void mlib_c_ImageCopy_d64(const mlib_image *src,
564 mlib_image *dst)
565 {
566 PREPAREVARS(mlib_d64);
567 for (i = 0; i < src_height; i++) {
568 mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride;
569
570 for (j = 0; j < src_width; j++)
571 *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j));
572 }
573 }
574
575 /***************************************************************/
576 /*
577 * Both source and destination image data are 1 - d vectors and
578 * 8 - byte aligned. And size is in 8 - bytes.
579 */
580
581 void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp,
582 TYPE_64BIT *dp,
583 mlib_s32 size)
584 {
585 mlib_s32 i;
586
587 for (i = 0; i < size; i++) {
588 *dp++ = *sp++;
589 }
590 }
591
592 /***************************************************************/
593 #ifndef _NO_LONGLONG
594 #define TYPE mlib_u64
595 #define BSIZE 64
596 #define SIZE 8
597 #else
598 #define TYPE mlib_u32
599 #define BSIZE 32
600 #define SIZE 4
601 #endif /* _NO_LONGLONG */
602
603 /***************************************************************/
604 void mlib_ImageCopy_na(const mlib_u8 *sp,
605 mlib_u8 *dp,
606 mlib_s32 n)
607 {
608 mlib_s32 shr, shl;
609 TYPE *tmp, s0, s1;
610
611 if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) {
612
613 for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--)
614 *dp++ = *sp++;
615
616 #ifdef _NO_LONGLONG
617
618 if (((mlib_addr) sp & (SIZE - 1)) == 0) {
619 for (; n > SIZE; n -= SIZE) {
620 *(TYPE *) dp = *(TYPE *) sp;
621 dp += SIZE;
622 sp += SIZE;
623 }
624 }
625 else
626 #endif /* _NO_LONGLONG */
627 {
628 tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1));
629 /* shl and shr do not exceed 64 here */
630 shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3);
631 shr = BSIZE - shl;
632 s0 = *tmp++;
633
634 for (; n > SIZE; n -= SIZE) {
635 s1 = *tmp++;
636 #ifdef _LITTLE_ENDIAN
637 *(TYPE *) dp = (s0 >> shl) | (s1 << shr);
638 #else
639 *(TYPE *) dp = (s0 << shl) | (s1 >> shr);
640 #endif /* _LITTLE_ENDIAN */
641 s0 = s1;
642 dp += SIZE;
643 sp += SIZE;
644 }
645 }
646 }
647 else {
648 for (; (n > 0) && (mlib_addr) dp & 7; n--)
649 *dp++ = *sp++;
650
651 for (; n > 8; n -= 8) {
652 *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp;
653 dp += 8;
654 sp += 8;
655 }
656 }
657
658 for (; n > 0; n--)
659 *dp++ = *sp++;
660 }
661
662 /***************************************************************/
663 #ifdef _MSC_VER
664 #pragma optimize("", on)
665 #endif /* _MSC_VER */
666
667 /***************************************************************/
|