1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * Internal functions for mlib_ImageConv2x2 on U8/S16/U16 types 30 * and MLIB_EDGE_DST_NO_WRITE mask. 31 */ 32 33 #include "mlib_image.h" 34 #include "mlib_ImageConv.h" 35 #include "mlib_c_ImageConv.h" 36 37 /***************************************************************/ 38 #ifdef i386 /* do not copy by mlib_d64 data type for x86 */ 39 40 typedef struct { 41 mlib_s32 int0, int1; 42 } two_int; 43 44 #define TYPE_64BIT two_int 45 46 #else /* i386 */ 47 48 #define TYPE_64BIT mlib_d64 49 50 #endif /* i386 ( do not copy by mlib_d64 data type for x86 ) */ 51 52 /***************************************************************/ 53 #define LOAD_KERNEL_INTO_DOUBLE() \ 54 while (scalef_expon > 30) { \ 55 scalef /= (1 << 30); \ 56 scalef_expon -= 30; \ 57 } \ 58 \ 59 scalef /= (1 << scalef_expon); \ 60 \ 61 /* keep kernel in regs */ \ 62 k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \ 63 k3 = scalef * kern[3] 64 65 /***************************************************************/ 66 #define GET_SRC_DST_PARAMETERS(type) \ 67 hgt = mlib_ImageGetHeight(src); \ 68 wid = mlib_ImageGetWidth(src); \ 69 nchannel = mlib_ImageGetChannels(src); \ 70 sll = mlib_ImageGetStride(src) / sizeof(type); \ 71 dll = mlib_ImageGetStride(dst) / sizeof(type); \ 72 adr_src = (type *)mlib_ImageGetData(src); \ 73 adr_dst = (type *)mlib_ImageGetData(dst) 74 75 /***************************************************************/ 76 #ifndef MLIB_USE_FTOI_CLAMPING 77 78 #define CLAMP_S32(x) \ 79 (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : \ 80 (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x))) 81 82 #else 83 84 #define CLAMP_S32(x) ((mlib_s32)(x)) 85 86 #endif /* MLIB_USE_FTOI_CLAMPING */ 87 88 /***************************************************************/ 89 #if defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) 90 91 /* NB: Explicit cast to DTYPE is necessary to avoid warning from Microsoft VC compiler. 92 And we need to explicitly define cast behavior if source exceeds destination range. 93 (it is undefined according to C99 spec). We use mask here because this macro is typically 94 used to extract bit regions. */ 95 96 #define STORE2(res0, res1) \ 97 dp[0 ] = (DTYPE) ((res1) & DTYPE_MASK); \ 98 dp[chan1] = (DTYPE) ((res0) & DTYPE_MASK) 99 100 #else 101 102 #define STORE2(res0, res1) \ 103 dp[0 ] = (DTYPE) ((res0) & DTYPE_MASK); \ 104 dp[chan1] = (DTYPE) ((res1) & DTYPE_MASK) 105 106 #endif /* defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) */ 107 108 /***************************************************************/ 109 #ifdef _NO_LONGLONG 110 111 #define LOAD_BUFF(buff) \ 112 buff[i ] = sp[0]; \ 113 buff[i + 1] = sp[chan1] 114 115 #else /* _NO_LONGLONG */ 116 117 #ifdef _LITTLE_ENDIAN 118 119 #define LOAD_BUFF(buff) \ 120 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | ((mlib_s64)sp[0] & 0xffffffff) 121 122 #else /* _LITTLE_ENDIAN */ 123 124 #define LOAD_BUFF(buff) \ 125 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | ((mlib_s64)sp[chan1] & 0xffffffff) 126 127 #endif /* _LITTLE_ENDIAN */ 128 129 #endif /* _NO_LONGLONG */ 130 131 /***************************************************************/ 132 typedef union { 133 TYPE_64BIT d64; 134 struct { 135 mlib_s32 i0, i1; 136 } i32s; 137 } d64_2x32; 138 139 /***************************************************************/ 140 #define D_KER 1 141 142 #define BUFF_LINE 256 143 144 /***************************************************************/ 145 #define XOR_80(x) x ^= 0x80 146 147 void mlib_ImageXor80_aa(mlib_u8 *dl, 148 mlib_s32 wid, 149 mlib_s32 hgt, 150 mlib_s32 str) 151 { 152 mlib_u8 *dp, *dend; 153 #ifdef _NO_LONGLONG 154 mlib_u32 cadd = 0x80808080; 155 #else /* _NO_LONGLONG */ 156 mlib_u64 cadd = MLIB_U64_CONST(0x8080808080808080); 157 #endif /* _NO_LONGLONG */ 158 mlib_s32 j; 159 160 if (wid == str) { 161 wid *= hgt; 162 hgt = 1; 163 } 164 165 for (j = 0; j < hgt; j++) { 166 dend = dl + wid; 167 168 for (dp = dl; ((mlib_addr)dp & 7) && (dp < dend); dp++) XOR_80(dp[0]); 169 170 #ifdef __SUNPRO_C 171 #pragma pipeloop(0) 172 #endif /* __SUNPRO_C */ 173 for (; dp <= (dend - 8); dp += 8) { 174 #ifdef _NO_LONGLONG 175 *((mlib_s32*)dp) ^= cadd; 176 *((mlib_s32*)dp+1) ^= cadd; 177 #else /* _NO_LONGLONG */ 178 *((mlib_u64*)dp) ^= cadd; 179 #endif /* _NO_LONGLONG */ 180 } 181 182 for (; (dp < dend); dp++) XOR_80(dp[0]); 183 184 dl += str; 185 } 186 } 187 188 /***************************************************************/ 189 void mlib_ImageXor80(mlib_u8 *dl, 190 mlib_s32 wid, 191 mlib_s32 hgt, 192 mlib_s32 str, 193 mlib_s32 nchan, 194 mlib_s32 cmask) 195 { 196 mlib_s32 i, j, c; 197 198 for (j = 0; j < hgt; j++) { 199 for (c = 0; c < nchan; c++) { 200 if (cmask & (1 << (nchan - 1 - c))) { 201 mlib_u8 *dp = dl + c; 202 203 #ifdef __SUNPRO_C 204 #pragma pipeloop(0) 205 #endif /* __SUNPRO_C */ 206 for (i = 0; i < wid; i++) XOR_80(dp[i*nchan]); 207 } 208 } 209 210 dl += str; 211 } 212 } 213 214 /***************************************************************/ 215 #define DTYPE mlib_s16 216 #define DTYPE_MASK 0xffff 217 218 mlib_status mlib_c_conv2x2nw_s16(mlib_image *dst, 219 const mlib_image *src, 220 const mlib_s32 *kern, 221 mlib_s32 scalef_expon, 222 mlib_s32 cmask) 223 { 224 mlib_d64 buff_arr[2*BUFF_LINE]; 225 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 226 DTYPE *adr_src, *sl, *sp, *sl1; 227 DTYPE *adr_dst, *dl, *dp; 228 mlib_d64 k0, k1, k2, k3, scalef = 65536.0; 229 mlib_d64 p00, p01, p02, 230 p10, p11, p12; 231 mlib_s32 wid, hgt, sll, dll, wid1; 232 mlib_s32 nchannel, chan1, chan2; 233 mlib_s32 i, j, c; 234 LOAD_KERNEL_INTO_DOUBLE(); 235 GET_SRC_DST_PARAMETERS(DTYPE); 236 237 wid1 = (wid + 1) &~ 1; 238 239 if (wid1 > BUFF_LINE) { 240 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 241 242 if (pbuff == NULL) return MLIB_FAILURE; 243 } 244 245 buffo = pbuff; 246 buff0 = buffo + wid1; 247 buff1 = buff0 + wid1; 248 buff2 = buff1 + wid1; 249 250 chan1 = nchannel; 251 chan2 = chan1 + chan1; 252 253 wid -= D_KER; 254 hgt -= D_KER; 255 256 for (c = 0; c < nchannel; c++) { 257 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 258 259 sl = adr_src + c; 260 dl = adr_dst + c; 261 262 sl1 = sl + sll; 263 #ifdef __SUNPRO_C 264 #pragma pipeloop(0) 265 #endif /* __SUNPRO_C */ 266 for (i = 0; i < wid + D_KER; i++) { 267 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 268 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 269 } 270 271 sl += (D_KER + 1)*sll; 272 273 for (j = 0; j < hgt; j++) { 274 sp = sl; 275 dp = dl; 276 277 buff2[-1] = (mlib_s32)sp[0]; 278 sp += chan1; 279 280 p02 = buff0[-1]; 281 p12 = buff1[-1]; 282 283 #ifdef __SUNPRO_C 284 #pragma pipeloop(0) 285 #endif /* __SUNPRO_C */ 286 for (i = 0; i <= (wid - 2); i += 2) { 287 #ifdef _NO_LONGLONG 288 mlib_s32 o64_1, o64_2; 289 #else /* _NO_LONGLONG */ 290 mlib_s64 o64; 291 #endif /* _NO_LONGLONG */ 292 d64_2x32 sd0, sd1, dd; 293 294 p00 = p02; p10 = p12; 295 296 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 297 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 298 p01 = (mlib_d64)sd0.i32s.i0; 299 p02 = (mlib_d64)sd0.i32s.i1; 300 p11 = (mlib_d64)sd1.i32s.i0; 301 p12 = (mlib_d64)sd1.i32s.i1; 302 303 LOAD_BUFF(buff2); 304 305 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); 306 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3); 307 *(TYPE_64BIT*)(buffo + i) = dd.d64; 308 309 #ifdef _NO_LONGLONG 310 311 o64_1 = buffo[i]; 312 o64_2 = buffo[i+1]; 313 STORE2(o64_1 >> 16, o64_2 >> 16); 314 315 #else /* _NO_LONGLONG */ 316 317 o64 = *(mlib_s64*)(buffo + i); 318 STORE2(o64 >> 48, o64 >> 16); 319 320 #endif /* _NO_LONGLONG */ 321 322 sp += chan2; 323 dp += chan2; 324 } 325 326 for (; i < wid; i++) { 327 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 328 p01 = buff0[i]; p11 = buff1[i]; 329 330 buff2[i] = (mlib_s32)sp[0]; 331 332 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); 333 dp[0] = buffo[i] >> 16; 334 335 sp += chan1; 336 dp += chan1; 337 } 338 339 sl += sll; 340 dl += dll; 341 342 buffT = buff0; 343 buff0 = buff1; 344 buff1 = buff2; 345 buff2 = buffT; 346 } 347 } 348 349 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 350 351 return MLIB_SUCCESS; 352 } 353 354 /***************************************************************/ 355 mlib_status mlib_c_conv2x2ext_s16(mlib_image *dst, 356 const mlib_image *src, 357 mlib_s32 dx_l, 358 mlib_s32 dx_r, 359 mlib_s32 dy_t, 360 mlib_s32 dy_b, 361 const mlib_s32 *kern, 362 mlib_s32 scalef_expon, 363 mlib_s32 cmask) 364 { 365 mlib_d64 buff_arr[2*BUFF_LINE]; 366 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 367 DTYPE *adr_src, *sl, *sp, *sl1; 368 DTYPE *adr_dst, *dl, *dp; 369 mlib_d64 k0, k1, k2, k3, scalef = 65536.0; 370 mlib_d64 p00, p01, p02, 371 p10, p11, p12; 372 mlib_s32 wid, hgt, sll, dll, wid1; 373 mlib_s32 nchannel, chan1, chan2; 374 mlib_s32 i, j, c, swid; 375 LOAD_KERNEL_INTO_DOUBLE(); 376 GET_SRC_DST_PARAMETERS(DTYPE); 377 378 swid = wid + D_KER; 379 380 wid1 = (swid + 1) &~ 1; 381 382 if (wid1 > BUFF_LINE) { 383 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 384 385 if (pbuff == NULL) return MLIB_FAILURE; 386 } 387 388 buffo = pbuff; 389 buff0 = buffo + wid1; 390 buff1 = buff0 + wid1; 391 buff2 = buff1 + wid1; 392 393 swid -= dx_r; 394 395 chan1 = nchannel; 396 chan2 = chan1 + chan1; 397 398 for (c = 0; c < nchannel; c++) { 399 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 400 401 sl = adr_src + c; 402 dl = adr_dst + c; 403 404 if ((hgt - dy_b) > 0) sl1 = sl + sll; 405 else sl1 = sl; 406 407 #ifdef __SUNPRO_C 408 #pragma pipeloop(0) 409 #endif /* __SUNPRO_C */ 410 for (i = 0; i < swid; i++) { 411 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 412 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 413 } 414 415 if (dx_r != 0) { 416 buff0[swid - 1] = buff0[swid - 2]; 417 buff1[swid - 1] = buff1[swid - 2]; 418 } 419 420 if ((hgt - dy_b) > 1) sl = sl1 + sll; 421 else sl = sl1; 422 423 for (j = 0; j < hgt; j++) { 424 sp = sl; 425 dp = dl; 426 427 buff2[-1] = (mlib_s32)sp[0]; 428 sp += chan1; 429 430 p02 = buff0[-1]; 431 p12 = buff1[-1]; 432 433 #ifdef __SUNPRO_C 434 #pragma pipeloop(0) 435 #endif /* __SUNPRO_C */ 436 for (i = 0; i <= (wid - 2); i += 2) { 437 #ifdef _NO_LONGLONG 438 mlib_s32 o64_1, o64_2; 439 #else /* _NO_LONGLONG */ 440 mlib_s64 o64; 441 #endif /* _NO_LONGLONG */ 442 d64_2x32 sd0, sd1, dd; 443 444 p00 = p02; p10 = p12; 445 446 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 447 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 448 p01 = (mlib_d64)sd0.i32s.i0; 449 p02 = (mlib_d64)sd0.i32s.i1; 450 p11 = (mlib_d64)sd1.i32s.i0; 451 p12 = (mlib_d64)sd1.i32s.i1; 452 453 LOAD_BUFF(buff2); 454 455 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); 456 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3); 457 *(TYPE_64BIT*)(buffo + i) = dd.d64; 458 459 #ifdef _NO_LONGLONG 460 461 o64_1 = buffo[i]; 462 o64_2 = buffo[i+1]; 463 STORE2(o64_1 >> 16, o64_2 >> 16); 464 465 #else /* _NO_LONGLONG */ 466 467 o64 = *(mlib_s64*)(buffo + i); 468 STORE2(o64 >> 48, o64 >> 16); 469 470 #endif /* _NO_LONGLONG */ 471 472 sp += chan2; 473 dp += chan2; 474 } 475 476 for (; i < wid; i++) { 477 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 478 p01 = buff0[i]; p11 = buff1[i]; 479 480 buff2[i] = (mlib_s32)sp[0]; 481 482 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3); 483 dp[0] = buffo[i] >> 16; 484 485 sp += chan1; 486 dp += chan1; 487 } 488 489 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; 490 491 if (j < hgt - dy_b - 2) sl += sll; 492 dl += dll; 493 494 buffT = buff0; 495 buff0 = buff1; 496 buff1 = buff2; 497 buff2 = buffT; 498 } 499 } 500 501 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 502 503 return MLIB_SUCCESS; 504 } 505 506 /***************************************************************/ 507 #undef DTYPE 508 #define DTYPE mlib_u16 509 510 mlib_status mlib_c_conv2x2nw_u16(mlib_image *dst, 511 const mlib_image *src, 512 const mlib_s32 *kern, 513 mlib_s32 scalef_expon, 514 mlib_s32 cmask) 515 { 516 mlib_d64 buff_arr[2*BUFF_LINE]; 517 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 518 DTYPE *adr_src, *sl, *sp, *sl1; 519 DTYPE *adr_dst, *dl, *dp; 520 mlib_d64 k0, k1, k2, k3, scalef = 65536.0; 521 mlib_d64 p00, p01, p02, 522 p10, p11, p12; 523 mlib_s32 wid, hgt, sll, dll, wid1; 524 mlib_s32 nchannel, chan1, chan2; 525 mlib_s32 i, j, c; 526 mlib_d64 doff = 0x7FFF8000; 527 LOAD_KERNEL_INTO_DOUBLE(); 528 GET_SRC_DST_PARAMETERS(DTYPE); 529 530 wid1 = (wid + 1) &~ 1; 531 532 if (wid1 > BUFF_LINE) { 533 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 534 535 if (pbuff == NULL) return MLIB_FAILURE; 536 } 537 538 buffo = pbuff; 539 buff0 = buffo + wid1; 540 buff1 = buff0 + wid1; 541 buff2 = buff1 + wid1; 542 543 chan1 = nchannel; 544 chan2 = chan1 + chan1; 545 546 wid -= D_KER; 547 hgt -= D_KER; 548 549 for (c = 0; c < nchannel; c++) { 550 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 551 552 sl = adr_src + c; 553 dl = adr_dst + c; 554 555 sl1 = sl + sll; 556 #ifdef __SUNPRO_C 557 #pragma pipeloop(0) 558 #endif /* __SUNPRO_C */ 559 for (i = 0; i < wid + D_KER; i++) { 560 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 561 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 562 } 563 564 sl += (D_KER + 1)*sll; 565 566 for (j = 0; j < hgt; j++) { 567 sp = sl; 568 dp = dl; 569 570 buff2[-1] = (mlib_s32)sp[0]; 571 sp += chan1; 572 573 p02 = buff0[-1]; 574 p12 = buff1[-1]; 575 576 #ifdef __SUNPRO_C 577 #pragma pipeloop(0) 578 #endif /* __SUNPRO_C */ 579 for (i = 0; i <= (wid - 2); i += 2) { 580 #ifdef _NO_LONGLONG 581 mlib_s32 o64_1, o64_2; 582 #else /* _NO_LONGLONG */ 583 mlib_s64 o64; 584 #endif /* _NO_LONGLONG */ 585 d64_2x32 sd0, sd1, dd; 586 587 p00 = p02; p10 = p12; 588 589 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 590 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 591 p01 = (mlib_d64)sd0.i32s.i0; 592 p02 = (mlib_d64)sd0.i32s.i1; 593 p11 = (mlib_d64)sd1.i32s.i0; 594 p12 = (mlib_d64)sd1.i32s.i1; 595 596 LOAD_BUFF(buff2); 597 598 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); 599 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff); 600 *(TYPE_64BIT*)(buffo + i) = dd.d64; 601 602 #ifdef _NO_LONGLONG 603 604 o64_1 = buffo[i]; 605 o64_2 = buffo[i+1]; 606 o64_1 = o64_1 ^ 0x80000000U; 607 o64_2 = o64_2 ^ 0x80000000U; 608 STORE2(o64_1 >> 16, o64_2 >> 16); 609 610 #else /* _NO_LONGLONG */ 611 612 o64 = *(mlib_s64*)(buffo + i); 613 o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000); 614 STORE2(o64 >> 48, o64 >> 16); 615 616 #endif /* _NO_LONGLONG */ 617 618 sp += chan2; 619 dp += chan2; 620 } 621 622 for (; i < wid; i++) { 623 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 624 p01 = buff0[i]; p11 = buff1[i]; 625 626 buff2[i] = (mlib_s32)sp[0]; 627 628 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); 629 dp[0] = (buffo[i] >> 16) ^ 0x8000; 630 631 sp += chan1; 632 dp += chan1; 633 } 634 635 sl += sll; 636 dl += dll; 637 638 buffT = buff0; 639 buff0 = buff1; 640 buff1 = buff2; 641 buff2 = buffT; 642 } 643 } 644 645 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 646 647 return MLIB_SUCCESS; 648 } 649 650 /***************************************************************/ 651 mlib_status mlib_c_conv2x2ext_u16(mlib_image *dst, 652 const mlib_image *src, 653 mlib_s32 dx_l, 654 mlib_s32 dx_r, 655 mlib_s32 dy_t, 656 mlib_s32 dy_b, 657 const mlib_s32 *kern, 658 mlib_s32 scalef_expon, 659 mlib_s32 cmask) 660 { 661 mlib_d64 buff_arr[2*BUFF_LINE]; 662 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 663 DTYPE *adr_src, *sl, *sp, *sl1; 664 DTYPE *adr_dst, *dl, *dp; 665 mlib_d64 k0, k1, k2, k3, scalef = 65536.0; 666 mlib_d64 p00, p01, p02, 667 p10, p11, p12; 668 mlib_s32 wid, hgt, sll, dll, wid1; 669 mlib_s32 nchannel, chan1, chan2; 670 mlib_s32 i, j, c, swid; 671 mlib_d64 doff = 0x7FFF8000; 672 LOAD_KERNEL_INTO_DOUBLE(); 673 GET_SRC_DST_PARAMETERS(DTYPE); 674 675 swid = wid + D_KER; 676 677 wid1 = (swid + 1) &~ 1; 678 679 if (wid1 > BUFF_LINE) { 680 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 681 682 if (pbuff == NULL) return MLIB_FAILURE; 683 } 684 685 buffo = pbuff; 686 buff0 = buffo + wid1; 687 buff1 = buff0 + wid1; 688 buff2 = buff1 + wid1; 689 690 swid -= dx_r; 691 692 chan1 = nchannel; 693 chan2 = chan1 + chan1; 694 695 for (c = 0; c < nchannel; c++) { 696 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 697 698 sl = adr_src + c; 699 dl = adr_dst + c; 700 701 if ((hgt - dy_b) > 0) sl1 = sl + sll; 702 else sl1 = sl; 703 704 #ifdef __SUNPRO_C 705 #pragma pipeloop(0) 706 #endif /* __SUNPRO_C */ 707 for (i = 0; i < swid; i++) { 708 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 709 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 710 } 711 712 if (dx_r != 0) { 713 buff0[swid - 1] = buff0[swid - 2]; 714 buff1[swid - 1] = buff1[swid - 2]; 715 } 716 717 if ((hgt - dy_b) > 1) sl = sl1 + sll; 718 else sl = sl1; 719 720 for (j = 0; j < hgt; j++) { 721 sp = sl; 722 dp = dl; 723 724 buff2[-1] = (mlib_s32)sp[0]; 725 sp += chan1; 726 727 p02 = buff0[-1]; 728 p12 = buff1[-1]; 729 730 #ifdef __SUNPRO_C 731 #pragma pipeloop(0) 732 #endif /* __SUNPRO_C */ 733 for (i = 0; i <= (wid - 2); i += 2) { 734 #ifdef _NO_LONGLONG 735 mlib_s32 o64_1, o64_2; 736 #else /* _NO_LONGLONG */ 737 mlib_s64 o64; 738 #endif /* _NO_LONGLONG */ 739 d64_2x32 sd0, sd1, dd; 740 741 p00 = p02; p10 = p12; 742 743 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 744 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 745 p01 = (mlib_d64)sd0.i32s.i0; 746 p02 = (mlib_d64)sd0.i32s.i1; 747 p11 = (mlib_d64)sd1.i32s.i0; 748 p12 = (mlib_d64)sd1.i32s.i1; 749 750 LOAD_BUFF(buff2); 751 752 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); 753 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff); 754 *(TYPE_64BIT*)(buffo + i) = dd.d64; 755 756 #ifdef _NO_LONGLONG 757 758 o64_1 = buffo[i]; 759 o64_2 = buffo[i+1]; 760 o64_1 = o64_1 ^ 0x80000000U; 761 o64_2 = o64_2 ^ 0x80000000U; 762 STORE2(o64_1 >> 16, o64_2 >> 16); 763 764 #else /* _NO_LONGLONG */ 765 766 o64 = *(mlib_s64*)(buffo + i); 767 o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000); 768 STORE2(o64 >> 48, o64 >> 16); 769 770 #endif /* _NO_LONGLONG */ 771 772 sp += chan2; 773 dp += chan2; 774 } 775 776 for (; i < wid; i++) { 777 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 778 p01 = buff0[i]; p11 = buff1[i]; 779 780 buff2[i] = (mlib_s32)sp[0]; 781 782 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff); 783 dp[0] = (buffo[i] >> 16) ^ 0x8000; 784 785 sp += chan1; 786 dp += chan1; 787 } 788 789 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; 790 791 if (j < hgt - dy_b - 2) sl += sll; 792 dl += dll; 793 794 buffT = buff0; 795 buff0 = buff1; 796 buff1 = buff2; 797 buff2 = buffT; 798 } 799 } 800 801 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 802 803 return MLIB_SUCCESS; 804 } 805 806 /***************************************************************/ 807 #undef DTYPE 808 #define DTYPE mlib_u8 809 810 mlib_status mlib_c_conv2x2nw_u8(mlib_image *dst, 811 const mlib_image *src, 812 const mlib_s32 *kern, 813 mlib_s32 scalef_expon, 814 mlib_s32 cmask) 815 { 816 mlib_d64 buff_arr[2*BUFF_LINE]; 817 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 818 DTYPE *adr_src, *sl, *sp, *sl1; 819 DTYPE *adr_dst, *dl, *dp; 820 mlib_d64 k0, k1, k2, k3, scalef = (1 << 24); 821 mlib_d64 p00, p01, p02, 822 p10, p11, p12; 823 mlib_s32 wid, hgt, sll, dll, wid1; 824 mlib_s32 nchannel, chan1, chan2; 825 mlib_s32 i, j, c; 826 LOAD_KERNEL_INTO_DOUBLE(); 827 GET_SRC_DST_PARAMETERS(DTYPE); 828 829 wid1 = (wid + 1) &~ 1; 830 831 if (wid1 > BUFF_LINE) { 832 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 833 834 if (pbuff == NULL) return MLIB_FAILURE; 835 } 836 837 buffo = pbuff; 838 buff0 = buffo + wid1; 839 buff1 = buff0 + wid1; 840 buff2 = buff1 + wid1; 841 842 chan1 = nchannel; 843 chan2 = chan1 + chan1; 844 845 wid -= D_KER; 846 hgt -= D_KER; 847 848 for (c = 0; c < nchannel; c++) { 849 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 850 851 sl = adr_src + c; 852 dl = adr_dst + c; 853 854 sl1 = sl + sll; 855 #ifdef __SUNPRO_C 856 #pragma pipeloop(0) 857 #endif /* __SUNPRO_C */ 858 for (i = 0; i < wid + D_KER; i++) { 859 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 860 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 861 } 862 863 sl += (D_KER + 1)*sll; 864 865 for (j = 0; j < hgt; j++) { 866 sp = sl; 867 dp = dl; 868 869 buff2[-1] = (mlib_s32)sp[0]; 870 sp += chan1; 871 872 p02 = buff0[-1]; 873 p12 = buff1[-1]; 874 875 #ifdef __SUNPRO_C 876 #pragma pipeloop(0) 877 #endif /* __SUNPRO_C */ 878 for (i = 0; i <= (wid - 2); i += 2) { 879 #ifdef _NO_LONGLONG 880 mlib_s32 o64_1, o64_2; 881 #else /* _NO_LONGLONG */ 882 mlib_s64 o64; 883 #endif /* _NO_LONGLONG */ 884 d64_2x32 sd0, sd1, dd; 885 886 p00 = p02; p10 = p12; 887 888 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 889 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 890 p01 = (mlib_d64)sd0.i32s.i0; 891 p02 = (mlib_d64)sd0.i32s.i1; 892 p11 = (mlib_d64)sd1.i32s.i0; 893 p12 = (mlib_d64)sd1.i32s.i1; 894 895 LOAD_BUFF(buff2); 896 897 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); 898 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31)); 899 *(TYPE_64BIT*)(buffo + i) = dd.d64; 900 901 #ifdef _NO_LONGLONG 902 903 o64_1 = buffo[i]; 904 o64_2 = buffo[i+1]; 905 STORE2(o64_1 >> 24, o64_2 >> 24); 906 907 #else /* _NO_LONGLONG */ 908 909 o64 = *(mlib_s64*)(buffo + i); 910 STORE2(o64 >> 56, o64 >> 24); 911 912 #endif /* _NO_LONGLONG */ 913 914 sp += chan2; 915 dp += chan2; 916 } 917 918 for (; i < wid; i++) { 919 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 920 p01 = buff0[i]; p11 = buff1[i]; 921 922 buff2[i] = (mlib_s32)sp[0]; 923 924 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); 925 dp[0] = (buffo[i] >> 24); 926 927 sp += chan1; 928 dp += chan1; 929 } 930 931 sl += sll; 932 dl += dll; 933 934 buffT = buff0; 935 buff0 = buff1; 936 buff1 = buff2; 937 buff2 = buffT; 938 } 939 } 940 941 { 942 mlib_s32 amask = (1 << nchannel) - 1; 943 944 if ((cmask & amask) != amask) { 945 mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); 946 } else { 947 mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); 948 } 949 } 950 951 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 952 953 return MLIB_SUCCESS; 954 } 955 956 /***************************************************************/ 957 mlib_status mlib_c_conv2x2ext_u8(mlib_image *dst, 958 const mlib_image *src, 959 mlib_s32 dx_l, 960 mlib_s32 dx_r, 961 mlib_s32 dy_t, 962 mlib_s32 dy_b, 963 const mlib_s32 *kern, 964 mlib_s32 scalef_expon, 965 mlib_s32 cmask) 966 { 967 mlib_d64 buff_arr[4*BUFF_LINE]; 968 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT; 969 DTYPE *adr_src, *sl, *sp, *sl1; 970 DTYPE *adr_dst, *dl, *dp; 971 mlib_d64 k0, k1, k2, k3, scalef = (1 << 24); 972 mlib_d64 p00, p01, p02, 973 p10, p11, p12; 974 mlib_s32 wid, hgt, sll, dll, wid1; 975 mlib_s32 nchannel, chan1, chan2; 976 mlib_s32 i, j, c, swid; 977 LOAD_KERNEL_INTO_DOUBLE(); 978 GET_SRC_DST_PARAMETERS(DTYPE); 979 980 swid = wid + D_KER; 981 982 wid1 = (swid + 1) &~ 1; 983 984 if (wid1 > BUFF_LINE) { 985 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1); 986 987 if (pbuff == NULL) return MLIB_FAILURE; 988 } 989 990 buffo = pbuff; 991 buff0 = buffo + wid1; 992 buff1 = buff0 + wid1; 993 buff2 = buff1 + wid1; 994 995 chan1 = nchannel; 996 chan2 = chan1 + chan1; 997 998 swid -= dx_r; 999 1000 for (c = 0; c < nchannel; c++) { 1001 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 1002 1003 sl = adr_src + c; 1004 dl = adr_dst + c; 1005 1006 if ((hgt - dy_b) > 0) sl1 = sl + sll; 1007 else sl1 = sl; 1008 1009 #ifdef __SUNPRO_C 1010 #pragma pipeloop(0) 1011 #endif /* __SUNPRO_C */ 1012 for (i = 0; i < swid; i++) { 1013 buff0[i - 1] = (mlib_s32)sl[i*chan1]; 1014 buff1[i - 1] = (mlib_s32)sl1[i*chan1]; 1015 } 1016 1017 if (dx_r != 0) { 1018 buff0[swid - 1] = buff0[swid - 2]; 1019 buff1[swid - 1] = buff1[swid - 2]; 1020 } 1021 1022 if ((hgt - dy_b) > 1) sl = sl1 + sll; 1023 else sl = sl1; 1024 1025 for (j = 0; j < hgt; j++) { 1026 sp = sl; 1027 dp = dl; 1028 1029 buff2[-1] = (mlib_s32)sp[0]; 1030 sp += chan1; 1031 1032 p02 = buff0[-1]; 1033 p12 = buff1[-1]; 1034 1035 #ifdef __SUNPRO_C 1036 #pragma pipeloop(0) 1037 #endif /* __SUNPRO_C */ 1038 for (i = 0; i <= (wid - 2); i += 2) { 1039 #ifdef _NO_LONGLONG 1040 mlib_s32 o64_1, o64_2; 1041 #else /* _NO_LONGLONG */ 1042 mlib_s64 o64; 1043 #endif /* _NO_LONGLONG */ 1044 d64_2x32 sd0, sd1, dd; 1045 1046 p00 = p02; p10 = p12; 1047 1048 sd0.d64 = *(TYPE_64BIT*)(buff0 + i); 1049 sd1.d64 = *(TYPE_64BIT*)(buff1 + i); 1050 p01 = (mlib_d64)sd0.i32s.i0; 1051 p02 = (mlib_d64)sd0.i32s.i1; 1052 p11 = (mlib_d64)sd1.i32s.i0; 1053 p12 = (mlib_d64)sd1.i32s.i1; 1054 1055 LOAD_BUFF(buff2); 1056 1057 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); 1058 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31)); 1059 *(TYPE_64BIT*)(buffo + i) = dd.d64; 1060 1061 #ifdef _NO_LONGLONG 1062 1063 o64_1 = buffo[i]; 1064 o64_2 = buffo[i+1]; 1065 STORE2(o64_1 >> 24, o64_2 >> 24); 1066 1067 #else /* _NO_LONGLONG */ 1068 1069 o64 = *(mlib_s64*)(buffo + i); 1070 STORE2(o64 >> 56, o64 >> 24); 1071 1072 #endif /* _NO_LONGLONG */ 1073 1074 sp += chan2; 1075 dp += chan2; 1076 } 1077 1078 for (; i < wid; i++) { 1079 p00 = buff0[i - 1]; p10 = buff1[i - 1]; 1080 p01 = buff0[i]; p11 = buff1[i]; 1081 1082 buff2[i] = (mlib_s32)sp[0]; 1083 1084 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31)); 1085 dp[0] = (buffo[i] >> 24); 1086 1087 sp += chan1; 1088 dp += chan1; 1089 } 1090 1091 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2]; 1092 1093 if (j < hgt - dy_b - 2) sl += sll; 1094 dl += dll; 1095 1096 buffT = buff0; 1097 buff0 = buff1; 1098 buff1 = buff2; 1099 buff2 = buffT; 1100 } 1101 } 1102 1103 { 1104 mlib_s32 amask = (1 << nchannel) - 1; 1105 1106 if ((cmask & amask) != amask) { 1107 mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask); 1108 } else { 1109 mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll); 1110 } 1111 } 1112 1113 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff); 1114 1115 return MLIB_SUCCESS; 1116 } 1117 1118 /***************************************************************/