1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * Internal functions for mlib_ImageConv* on U8/S16/U16 type and 30 * MLIB_EDGE_SRC_EXTEND mask 31 */ 32 33 #include "mlib_image.h" 34 #include "mlib_ImageConv.h" 35 #include "mlib_c_ImageConv.h" 36 37 /* 38 * This define switches between functions of different data types 39 */ 40 41 #define IMG_TYPE 3 42 43 /***************************************************************/ 44 #if IMG_TYPE == 1 45 46 #define DTYPE mlib_u8 47 #define CONV_FUNC(KERN) mlib_c_conv##KERN##ext_u8(PARAM) 48 #define CONV_FUNC_MxN mlib_c_convMxNext_u8(PARAM_MxN) 49 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##ext_u8(PARAM) 50 #define CONV_FUNC_MxN_I mlib_i_convMxNext_u8(PARAM_MxN) 51 #define DSCALE (1 << 24) 52 #define FROM_S32(x) (((x) >> 24) ^ 128) 53 #define S64TOS32(x) (x) 54 #define SAT_OFF -(1u << 31) 55 56 #elif IMG_TYPE == 2 57 58 #define DTYPE mlib_s16 59 #define CONV_FUNC(KERN) mlib_conv##KERN##ext_s16(PARAM) 60 #define CONV_FUNC_MxN mlib_convMxNext_s16(PARAM_MxN) 61 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##ext_s16(PARAM) 62 #define CONV_FUNC_MxN_I mlib_i_convMxNext_s16(PARAM_MxN) 63 #define DSCALE 65536.0 64 #define FROM_S32(x) ((x) >> 16) 65 #define S64TOS32(x) ((x) & 0xffffffff) 66 #define SAT_OFF 67 68 #elif IMG_TYPE == 3 69 70 #define DTYPE mlib_u16 71 #define CONV_FUNC(KERN) mlib_conv##KERN##ext_u16(PARAM) 72 #define CONV_FUNC_MxN mlib_convMxNext_u16(PARAM_MxN) 73 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##ext_u16(PARAM) 74 #define CONV_FUNC_MxN_I mlib_i_convMxNext_u16(PARAM_MxN) 75 #define DSCALE 65536.0 76 #define FROM_S32(x) (((x) >> 16) ^ 0x8000) 77 #define S64TOS32(x) (x) 78 #define SAT_OFF -(1u << 31) 79 80 #endif /* IMG_TYPE == 1 */ 81 82 /***************************************************************/ 83 #define PARAM \ 84 mlib_image *dst, \ 85 const mlib_image *src, \ 86 mlib_s32 dx_l, \ 87 mlib_s32 dx_r, \ 88 mlib_s32 dy_t, \ 89 mlib_s32 dy_b, \ 90 const mlib_s32 *kern, \ 91 mlib_s32 scalef_expon, \ 92 mlib_s32 cmask 93 94 /***************************************************************/ 95 #define PARAM_MxN \ 96 mlib_image *dst, \ 97 const mlib_image *src, \ 98 const mlib_s32 *kernel, \ 99 mlib_s32 m, \ 100 mlib_s32 n, \ 101 mlib_s32 dx_l, \ 102 mlib_s32 dx_r, \ 103 mlib_s32 dy_t, \ 104 mlib_s32 dy_b, \ 105 mlib_s32 scale, \ 106 mlib_s32 cmask 107 108 /***************************************************************/ 109 #define FTYPE mlib_d64 110 111 #ifndef MLIB_USE_FTOI_CLAMPING 112 113 #define CLAMP_S32(x) \ 114 (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x))) 115 116 #else 117 118 #define CLAMP_S32(x) ((mlib_s32)(x)) 119 120 #endif /* MLIB_USE_FTOI_CLAMPING */ 121 122 /***************************************************************/ 123 #define D2I(x) CLAMP_S32((x) SAT_OFF) 124 125 /***************************************************************/ 126 #ifdef _LITTLE_ENDIAN 127 128 #define STORE2(res0, res1) \ 129 dp[0 ] = res1; \ 130 dp[chan1] = res0 131 132 #else 133 134 #define STORE2(res0, res1) \ 135 dp[0 ] = res0; \ 136 dp[chan1] = res1 137 138 #endif /* _LITTLE_ENDIAN */ 139 140 /***************************************************************/ 141 #ifdef _NO_LONGLONG 142 143 #define LOAD_BUFF(buff) \ 144 buff[i ] = sp[0]; \ 145 buff[i + 1] = sp[chan1] 146 147 #else /* _NO_LONGLONG */ 148 149 #ifdef _LITTLE_ENDIAN 150 151 #define LOAD_BUFF(buff) \ 152 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0]) 153 154 #else /* _LITTLE_ENDIAN */ 155 156 #define LOAD_BUFF(buff) \ 157 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1]) 158 159 #endif /* _LITTLE_ENDIAN */ 160 #endif /* _NO_LONGLONG */ 161 162 /***************************************************************/ 163 typedef union { 164 mlib_d64 d64; 165 struct { 166 mlib_s32 i0; 167 mlib_s32 i1; 168 } i32s; 169 } d64_2x32; 170 171 /***************************************************************/ 172 #define DEF_VARS(type) \ 173 type *adr_src, *sl, *sp, *sl1; \ 174 type *adr_dst, *dl, *dp; \ 175 FTYPE *pbuff = buff; \ 176 mlib_s32 *buffi, *buffo; \ 177 mlib_s32 wid, hgt, sll, dll; \ 178 mlib_s32 nchannel, chan1, chan2; \ 179 mlib_s32 i, j, c, swid 180 181 /***************************************************************/ 182 #define GET_SRC_DST_PARAMETERS(type) \ 183 hgt = mlib_ImageGetHeight(src); \ 184 wid = mlib_ImageGetWidth(src); \ 185 nchannel = mlib_ImageGetChannels(src); \ 186 sll = mlib_ImageGetStride(src) / sizeof(type); \ 187 dll = mlib_ImageGetStride(dst) / sizeof(type); \ 188 adr_src = (type *)mlib_ImageGetData(src); \ 189 adr_dst = (type *)mlib_ImageGetData(dst) 190 191 /***************************************************************/ 192 #if IMG_TYPE == 1 193 194 /* 195 * Test for the presence of any "1" bit in bits 196 8 to 31 of val. If present, then val is either 197 negative or >255. If over/underflows of 8 bits 198 are uncommon, then this technique can be a win, 199 since only a single test, rather than two, is 200 necessary to determine if clamping is needed. 201 On the other hand, if over/underflows are common, 202 it adds an extra test. 203 */ 204 #define CLAMP_STORE(dst, val) \ 205 if (val & 0xffffff00) { \ 206 if (val < MLIB_U8_MIN) \ 207 dst = MLIB_U8_MIN; \ 208 else \ 209 dst = MLIB_U8_MAX; \ 210 } else { \ 211 dst = (mlib_u8)val; \ 212 } 213 214 #elif IMG_TYPE == 2 215 216 #define CLAMP_STORE(dst, val) \ 217 if (val >= MLIB_S16_MAX) \ 218 dst = MLIB_S16_MAX; \ 219 else if (val <= MLIB_S16_MIN) \ 220 dst = MLIB_S16_MIN; \ 221 else \ 222 dst = (mlib_s16)val 223 224 #elif IMG_TYPE == 3 225 226 #define CLAMP_STORE(dst, val) \ 227 if (val >= MLIB_U16_MAX) \ 228 dst = MLIB_U16_MAX; \ 229 else if (val <= MLIB_U16_MIN) \ 230 dst = MLIB_U16_MIN; \ 231 else \ 232 dst = (mlib_u16)val 233 234 #endif /* IMG_TYPE == 1 */ 235 236 /***************************************************************/ 237 #define MAX_KER 7 238 #define MAX_N 15 239 #define BUFF_SIZE 1600 240 #define CACHE_SIZE (64*1024) 241 242 static mlib_status mlib_ImageConv1xN_ext(mlib_image *dst, 243 const mlib_image *src, 244 const mlib_d64 *k, 245 mlib_s32 n, 246 mlib_s32 dy_t, 247 mlib_s32 dy_b, 248 mlib_s32 cmask) 249 { 250 DTYPE *adr_src, *sl; 251 DTYPE *adr_dst, *dl, *dp; 252 FTYPE buff[BUFF_SIZE]; 253 FTYPE *buffd; 254 FTYPE *pbuff = buff; 255 const FTYPE *pk; 256 FTYPE k0, k1, k2, k3; 257 FTYPE p0, p1, p2, p3, p4; 258 FTYPE *sbuff; 259 mlib_s32 l, k_off, off, bsize; 260 mlib_s32 max_hsize, smax_hsize, shgt, hsize, kh; 261 mlib_s32 d0, d1, ii; 262 mlib_s32 wid, hgt, sll, dll; 263 mlib_s32 nchannel; 264 mlib_s32 i, j, c; 265 GET_SRC_DST_PARAMETERS(DTYPE); 266 267 max_hsize = ((CACHE_SIZE/sizeof(DTYPE))/sll) - (n - 1); 268 269 if (max_hsize < 1) max_hsize = 1; 270 if (max_hsize > hgt) max_hsize = hgt; 271 272 shgt = hgt + (n - 1); 273 smax_hsize = max_hsize + (n - 1); 274 275 bsize = 2 * (smax_hsize + 1); 276 277 if (bsize > BUFF_SIZE) { 278 pbuff = mlib_malloc(sizeof(FTYPE)*bsize); 279 280 if (pbuff == NULL) return MLIB_FAILURE; 281 } 282 283 sbuff = pbuff; 284 buffd = sbuff + smax_hsize; 285 286 shgt -= (dy_t + dy_b); 287 k_off = 0; 288 289 for (l = 0; l < hgt; l += hsize) { 290 hsize = hgt - l; 291 292 if (hsize > max_hsize) hsize = max_hsize; 293 294 smax_hsize = hsize + (n - 1); 295 296 for (c = 0; c < nchannel; c++) { 297 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 298 299 sl = adr_src + c; 300 dl = adr_dst + c; 301 302 #ifdef __SUNPRO_C 303 #pragma pipeloop(0) 304 #endif /* __SUNPRO_C */ 305 for (i = 0; i < hsize; i++) buffd[i] = 0.0; 306 307 for (j = 0; j < wid; j++) { 308 FTYPE *buff = sbuff; 309 310 for (i = k_off, ii = 0; (i < dy_t) && (ii < smax_hsize); i++, ii++) { 311 sbuff[i - k_off] = (FTYPE)sl[0]; 312 } 313 314 #ifdef __SUNPRO_C 315 #pragma pipeloop(0) 316 #endif /* __SUNPRO_C */ 317 for (; (i < shgt + dy_t) && (ii < smax_hsize); i++, ii++) { 318 sbuff[i - k_off] = (FTYPE)sl[(i - dy_t)*sll]; 319 } 320 321 for (; (i < shgt + dy_t + dy_b) && (ii < smax_hsize); i++, ii++) { 322 sbuff[i - k_off] = (FTYPE)sl[(shgt - 1)*sll]; 323 } 324 325 pk = k; 326 327 for (off = 0; off < (n - 4); off += 4) { 328 329 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 330 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 331 332 #ifdef __SUNPRO_C 333 #pragma pipeloop(0) 334 #endif /* __SUNPRO_C */ 335 for (i = 0; i < hsize; i += 2) { 336 p0 = p2; p1 = p3; p2 = p4; 337 338 p3 = buff[i + 3]; p4 = buff[i + 4]; 339 340 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; 341 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; 342 } 343 344 pk += 4; 345 buff += 4; 346 } 347 348 dp = dl; 349 kh = n - off; 350 351 if (kh == 4) { 352 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 353 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 354 355 #ifdef __SUNPRO_C 356 #pragma pipeloop(0) 357 #endif /* __SUNPRO_C */ 358 for (i = 0; i <= (hsize - 2); i += 2) { 359 p0 = p2; p1 = p3; p2 = p4; 360 361 p3 = buff[i + 3]; p4 = buff[i + 4]; 362 363 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]); 364 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]); 365 366 dp[0 ] = FROM_S32(d0); 367 dp[dll] = FROM_S32(d1); 368 369 buffd[i ] = 0.0; 370 buffd[i + 1] = 0.0; 371 372 dp += 2*dll; 373 } 374 375 if (i < hsize) { 376 p0 = p2; p1 = p3; p2 = p4; 377 p3 = buff[i + 3]; 378 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i]); 379 dp[0] = FROM_S32(d0); 380 buffd[i] = 0.0; 381 } 382 383 } else if (kh == 3) { 384 385 p2 = buff[0]; p3 = buff[1]; 386 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; 387 388 #ifdef __SUNPRO_C 389 #pragma pipeloop(0) 390 #endif /* __SUNPRO_C */ 391 for (i = 0; i <= (hsize - 2); i += 2) { 392 p0 = p2; p1 = p3; 393 394 p2 = buff[i + 2]; p3 = buff[i + 3]; 395 396 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ]); 397 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]); 398 399 dp[0 ] = FROM_S32(d0); 400 dp[dll] = FROM_S32(d1); 401 402 buffd[i ] = 0.0; 403 buffd[i + 1] = 0.0; 404 405 dp += 2*dll; 406 } 407 408 if (i < hsize) { 409 p0 = p2; p1 = p3; 410 p2 = buff[i + 2]; 411 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i]); 412 dp[0] = FROM_S32(d0); 413 414 buffd[i] = 0.0; 415 } 416 417 } else if (kh == 2) { 418 419 p2 = buff[0]; 420 k0 = pk[0]; k1 = pk[1]; 421 422 #ifdef __SUNPRO_C 423 #pragma pipeloop(0) 424 #endif /* __SUNPRO_C */ 425 for (i = 0; i <= (hsize - 2); i += 2) { 426 p0 = p2; 427 428 p1 = buff[i + 1]; p2 = buff[i + 2]; 429 430 d0 = D2I(p0*k0 + p1*k1 + buffd[i ]); 431 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]); 432 433 dp[0 ] = FROM_S32(d0); 434 dp[dll] = FROM_S32(d1); 435 436 buffd[i ] = 0.0; 437 buffd[i + 1] = 0.0; 438 439 dp += 2*dll; 440 } 441 442 if (i < hsize) { 443 p0 = p2; 444 p1 = buff[i + 1]; 445 d0 = D2I(p0*k0 + p1*k1 + buffd[i]); 446 dp[0] = FROM_S32(d0); 447 448 buffd[i] = 0.0; 449 } 450 451 } else /* kh == 1 */{ 452 453 k0 = pk[0]; 454 455 #ifdef __SUNPRO_C 456 #pragma pipeloop(0) 457 #endif /* __SUNPRO_C */ 458 for (i = 0; i <= (hsize - 2); i += 2) { 459 p0 = buff[i]; p1 = buff[i + 1]; 460 461 d0 = D2I(p0*k0 + buffd[i ]); 462 d1 = D2I(p1*k0 + buffd[i + 1]); 463 464 dp[0 ] = FROM_S32(d0); 465 dp[dll] = FROM_S32(d1); 466 467 buffd[i ] = 0.0; 468 buffd[i + 1] = 0.0; 469 470 dp += 2*dll; 471 } 472 473 if (i < hsize) { 474 p0 = buff[i]; 475 d0 = D2I(p0*k0 + buffd[i]); 476 dp[0] = FROM_S32(d0); 477 478 buffd[i] = 0.0; 479 } 480 } 481 482 /* next line */ 483 sl += nchannel; 484 dl += nchannel; 485 } 486 } 487 488 k_off += max_hsize; 489 adr_dst += max_hsize*dll; 490 } 491 492 if (pbuff != buff) mlib_free(pbuff); 493 494 return MLIB_SUCCESS; 495 } 496 497 /***************************************************************/ 498 mlib_status CONV_FUNC_MxN 499 { 500 DTYPE *adr_src, *sl, *sp = NULL; 501 DTYPE *adr_dst, *dl, *dp = NULL; 502 FTYPE buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)]; 503 FTYPE **buffs = buffs_arr, *buffd; 504 FTYPE akernel[256], *k = akernel, fscale = DSCALE; 505 FTYPE *pbuff = buff; 506 FTYPE k0, k1, k2, k3, k4, k5, k6; 507 FTYPE p0, p1, p2, p3, p4, p5, p6, p7; 508 mlib_s32 *buffi; 509 mlib_s32 mn, l, off, kw, bsize, buff_ind; 510 mlib_s32 d0, d1; 511 mlib_s32 wid, hgt, sll, dll; 512 mlib_s32 nchannel, chan1, chan2; 513 mlib_s32 i, j, c, swid; 514 d64_2x32 dd; 515 mlib_status status = MLIB_SUCCESS; 516 517 GET_SRC_DST_PARAMETERS(DTYPE); 518 519 if (scale > 30) { 520 fscale *= 1.0/(1 << 30); 521 scale -= 30; 522 } 523 524 fscale /= (1 << scale); 525 526 mn = m*n; 527 528 if (mn > 256) { 529 k = mlib_malloc(mn*sizeof(mlib_d64)); 530 531 if (k == NULL) return MLIB_FAILURE; 532 } 533 534 for (i = 0; i < mn; i++) { 535 k[i] = kernel[i]*fscale; 536 } 537 538 if (m == 1) { 539 status = mlib_ImageConv1xN_ext(dst, src, k, n, dy_t, dy_b, cmask); 540 FREE_AND_RETURN_STATUS; 541 } 542 543 swid = wid + (m - 1); 544 545 bsize = (n + 3)*swid; 546 547 if ((bsize > BUFF_SIZE) || (n > MAX_N)) { 548 pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1)); 549 550 if (pbuff == NULL) { 551 status = MLIB_FAILURE; 552 FREE_AND_RETURN_STATUS; 553 } 554 buffs = (FTYPE **)(pbuff + bsize); 555 } 556 557 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*swid; 558 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l]; 559 buffd = buffs[n] + swid; 560 buffi = (mlib_s32*)(buffd + swid); 561 562 chan1 = nchannel; 563 chan2 = chan1 + chan1; 564 565 swid -= (dx_l + dx_r); 566 567 for (c = 0; c < nchannel; c++) { 568 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 569 570 sl = adr_src + c; 571 dl = adr_dst + c; 572 573 for (l = 0; l < n; l++) { 574 FTYPE *buff = buffs[l]; 575 576 for (i = 0; i < dx_l; i++) { 577 buff[i] = (FTYPE)sl[0]; 578 } 579 580 #ifdef __SUNPRO_C 581 #pragma pipeloop(0) 582 #endif /* __SUNPRO_C */ 583 for (i = 0; i < swid; i++) { 584 buff[i + dx_l] = (FTYPE)sl[i*chan1]; 585 } 586 587 for (i = 0; i < dx_r; i++) { 588 buff[swid + dx_l + i] = buff[swid + dx_l - 1]; 589 } 590 591 if ((l >= dy_t) && (l < hgt + n - dy_b - 2)) sl += sll; 592 } 593 594 buff_ind = 0; 595 596 #ifdef __SUNPRO_C 597 #pragma pipeloop(0) 598 #endif /* __SUNPRO_C */ 599 for (i = 0; i < wid; i++) buffd[i] = 0.0; 600 601 for (j = 0; j < hgt; j++) { 602 FTYPE **buffc = buffs + buff_ind; 603 FTYPE *buffn = buffc[n]; 604 FTYPE *pk = k; 605 606 for (l = 0; l < n; l++) { 607 FTYPE *buff_l = buffc[l]; 608 609 for (off = 0; off < m;) { 610 FTYPE *buff = buff_l + off; 611 612 kw = m - off; 613 614 if (kw > 2*MAX_KER) kw = MAX_KER; else 615 if (kw > MAX_KER) kw = kw/2; 616 off += kw; 617 618 sp = sl; 619 dp = dl; 620 621 if (kw == 7) { 622 623 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 624 p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; 625 626 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 627 k4 = pk[4]; k5 = pk[5]; k6 = pk[6]; 628 629 if (l < (n - 1) || off < m) { 630 #ifdef __SUNPRO_C 631 #pragma pipeloop(0) 632 #endif /* __SUNPRO_C */ 633 for (i = 0; i <= (wid - 2); i += 2) { 634 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 635 636 p6 = buff[i + 6]; p7 = buff[i + 7]; 637 638 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; 639 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; 640 } 641 642 } else { 643 #ifdef __SUNPRO_C 644 #pragma pipeloop(0) 645 #endif /* __SUNPRO_C */ 646 for (i = 0; i <= (wid - 2); i += 2) { 647 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 648 649 p6 = buff[i + 6]; p7 = buff[i + 7]; 650 651 LOAD_BUFF(buffi); 652 653 dd.d64 = *(FTYPE *)(buffi + i); 654 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 655 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 656 657 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); 658 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); 659 660 dp[0 ] = FROM_S32(d0); 661 dp[chan1] = FROM_S32(d1); 662 663 buffd[i ] = 0.0; 664 buffd[i + 1] = 0.0; 665 666 sp += chan2; 667 dp += chan2; 668 } 669 } 670 671 } else if (kw == 6) { 672 673 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 674 p5 = buff[3]; p6 = buff[4]; 675 676 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 677 k4 = pk[4]; k5 = pk[5]; 678 679 if (l < (n - 1) || off < m) { 680 #ifdef __SUNPRO_C 681 #pragma pipeloop(0) 682 #endif /* __SUNPRO_C */ 683 for (i = 0; i <= (wid - 2); i += 2) { 684 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 685 686 p5 = buff[i + 5]; p6 = buff[i + 6]; 687 688 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5; 689 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5; 690 } 691 692 } else { 693 #ifdef __SUNPRO_C 694 #pragma pipeloop(0) 695 #endif /* __SUNPRO_C */ 696 for (i = 0; i <= (wid - 2); i += 2) { 697 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 698 699 p5 = buff[i + 5]; p6 = buff[i + 6]; 700 701 LOAD_BUFF(buffi); 702 703 dd.d64 = *(FTYPE *)(buffi + i); 704 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 705 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 706 707 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]); 708 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]); 709 710 dp[0 ] = FROM_S32(d0); 711 dp[chan1] = FROM_S32(d1); 712 713 buffd[i ] = 0.0; 714 buffd[i + 1] = 0.0; 715 716 sp += chan2; 717 dp += chan2; 718 } 719 } 720 721 } else if (kw == 5) { 722 723 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 724 p5 = buff[3]; 725 726 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 727 k4 = pk[4]; 728 729 if (l < (n - 1) || off < m) { 730 #ifdef __SUNPRO_C 731 #pragma pipeloop(0) 732 #endif /* __SUNPRO_C */ 733 for (i = 0; i <= (wid - 2); i += 2) { 734 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 735 736 p4 = buff[i + 4]; p5 = buff[i + 5]; 737 738 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4; 739 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4; 740 } 741 742 } else { 743 #ifdef __SUNPRO_C 744 #pragma pipeloop(0) 745 #endif /* __SUNPRO_C */ 746 for (i = 0; i <= (wid - 2); i += 2) { 747 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 748 749 p4 = buff[i + 4]; p5 = buff[i + 5]; 750 751 LOAD_BUFF(buffi); 752 753 dd.d64 = *(FTYPE *)(buffi + i); 754 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 755 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 756 757 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]); 758 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]); 759 760 dp[0 ] = FROM_S32(d0); 761 dp[chan1] = FROM_S32(d1); 762 763 buffd[i ] = 0.0; 764 buffd[i + 1] = 0.0; 765 766 sp += chan2; 767 dp += chan2; 768 } 769 } 770 771 } else if (kw == 4) { 772 773 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 774 775 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 776 777 if (l < (n - 1) || off < m) { 778 #ifdef __SUNPRO_C 779 #pragma pipeloop(0) 780 #endif /* __SUNPRO_C */ 781 for (i = 0; i <= (wid - 2); i += 2) { 782 p0 = p2; p1 = p3; p2 = p4; 783 784 p3 = buff[i + 3]; p4 = buff[i + 4]; 785 786 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; 787 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; 788 } 789 790 } else { 791 #ifdef __SUNPRO_C 792 #pragma pipeloop(0) 793 #endif /* __SUNPRO_C */ 794 for (i = 0; i <= (wid - 2); i += 2) { 795 p0 = p2; p1 = p3; p2 = p4; 796 797 p3 = buff[i + 3]; p4 = buff[i + 4]; 798 799 LOAD_BUFF(buffi); 800 801 dd.d64 = *(FTYPE *)(buffi + i); 802 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 803 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 804 805 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]); 806 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]); 807 808 dp[0 ] = FROM_S32(d0); 809 dp[chan1] = FROM_S32(d1); 810 811 buffd[i ] = 0.0; 812 buffd[i + 1] = 0.0; 813 814 sp += chan2; 815 dp += chan2; 816 } 817 } 818 819 } else if (kw == 3) { 820 821 p2 = buff[0]; p3 = buff[1]; 822 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; 823 824 if (l < (n - 1) || off < m) { 825 #ifdef __SUNPRO_C 826 #pragma pipeloop(0) 827 #endif /* __SUNPRO_C */ 828 for (i = 0; i <= (wid - 2); i += 2) { 829 p0 = p2; p1 = p3; 830 831 p2 = buff[i + 2]; p3 = buff[i + 3]; 832 833 buffd[i ] += p0*k0 + p1*k1 + p2*k2; 834 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2; 835 } 836 837 } else { 838 #ifdef __SUNPRO_C 839 #pragma pipeloop(0) 840 #endif /* __SUNPRO_C */ 841 for (i = 0; i <= (wid - 2); i += 2) { 842 p0 = p2; p1 = p3; 843 844 p2 = buff[i + 2]; p3 = buff[i + 3]; 845 846 LOAD_BUFF(buffi); 847 848 dd.d64 = *(FTYPE *)(buffi + i); 849 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 850 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 851 852 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ]); 853 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]); 854 855 dp[0 ] = FROM_S32(d0); 856 dp[chan1] = FROM_S32(d1); 857 858 buffd[i ] = 0.0; 859 buffd[i + 1] = 0.0; 860 861 sp += chan2; 862 dp += chan2; 863 } 864 } 865 866 } else /* if (kw == 2) */ { 867 868 p2 = buff[0]; 869 k0 = pk[0]; k1 = pk[1]; 870 871 if (l < (n - 1) || off < m) { 872 #ifdef __SUNPRO_C 873 #pragma pipeloop(0) 874 #endif /* __SUNPRO_C */ 875 for (i = 0; i <= (wid - 2); i += 2) { 876 p0 = p2; 877 878 p1 = buff[i + 1]; p2 = buff[i + 2]; 879 880 buffd[i ] += p0*k0 + p1*k1; 881 buffd[i + 1] += p1*k0 + p2*k1; 882 } 883 884 } else { 885 #ifdef __SUNPRO_C 886 #pragma pipeloop(0) 887 #endif /* __SUNPRO_C */ 888 for (i = 0; i <= (wid - 2); i += 2) { 889 p0 = p2; 890 891 p1 = buff[i + 1]; p2 = buff[i + 2]; 892 893 LOAD_BUFF(buffi); 894 895 dd.d64 = *(FTYPE *)(buffi + i); 896 buffn[i + dx_l ] = (FTYPE)dd.i32s.i0; 897 buffn[i + dx_l + 1] = (FTYPE)dd.i32s.i1; 898 899 d0 = D2I(p0*k0 + p1*k1 + buffd[i ]); 900 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]); 901 902 dp[0 ] = FROM_S32(d0); 903 dp[chan1] = FROM_S32(d1); 904 905 buffd[i ] = 0.0; 906 buffd[i + 1] = 0.0; 907 908 sp += chan2; 909 dp += chan2; 910 } 911 } 912 } 913 914 pk += kw; 915 } 916 } 917 918 /* last pixels */ 919 for (; i < wid; i++) { 920 FTYPE *pk = k, s = 0; 921 mlib_s32 x, d0; 922 923 for (l = 0; l < n; l++) { 924 FTYPE *buff = buffc[l] + i; 925 926 for (x = 0; x < m; x++) s += buff[x] * (*pk++); 927 } 928 929 d0 = D2I(s); 930 dp[0] = FROM_S32(d0); 931 932 buffn[i + dx_l] = (FTYPE)sp[0]; 933 934 sp += chan1; 935 dp += chan1; 936 } 937 938 for (; i < swid; i++) { 939 buffn[i + dx_l] = (FTYPE)sp[0]; 940 sp += chan1; 941 } 942 943 for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l]; 944 for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1]; 945 946 /* next line */ 947 948 if (j < hgt - dy_b - 2) sl += sll; 949 dl += dll; 950 951 buff_ind++; 952 953 if (buff_ind >= n + 1) buff_ind = 0; 954 } 955 } 956 957 FREE_AND_RETURN_STATUS; 958 } 959 960 /***************************************************************/ 961 #define STORE_RES(res, x) \ 962 x >>= shift2; \ 963 CLAMP_STORE(res, x) 964 965 mlib_status CONV_FUNC_MxN_I 966 { 967 DTYPE *adr_src, *sl, *sp = NULL; 968 DTYPE *adr_dst, *dl, *dp = NULL; 969 mlib_s32 buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)]; 970 mlib_s32 *pbuff = buff; 971 mlib_s32 **buffs = buffs_arr, *buffd; 972 mlib_s32 l, off, kw, bsize, buff_ind; 973 mlib_s32 d0, d1, shift1, shift2; 974 mlib_s32 k0, k1, k2, k3, k4, k5, k6; 975 mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7; 976 mlib_s32 wid, hgt, sll, dll; 977 mlib_s32 nchannel, chan1; 978 mlib_s32 i, j, c, swid; 979 mlib_s32 chan2; 980 mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl; 981 GET_SRC_DST_PARAMETERS(DTYPE); 982 983 #if IMG_TYPE != 1 984 shift1 = 16; 985 #else 986 shift1 = 8; 987 #endif /* IMG_TYPE != 1 */ 988 shift2 = scale - shift1; 989 990 chan1 = nchannel; 991 chan2 = chan1 + chan1; 992 993 swid = wid + (m - 1); 994 995 bsize = (n + 2)*swid; 996 997 if ((bsize > BUFF_SIZE) || (n > MAX_N)) { 998 pbuff = mlib_malloc(sizeof(mlib_s32)*bsize + sizeof(mlib_s32 *)*2*(n + 1)); 999 1000 if (pbuff == NULL) return MLIB_FAILURE; 1001 buffs = (mlib_s32 **)(pbuff + bsize); 1002 } 1003 1004 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*swid; 1005 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l]; 1006 buffd = buffs[n] + swid; 1007 1008 if (m*n > MAX_N*MAX_N) { 1009 k = mlib_malloc(sizeof(mlib_s32)*(m*n)); 1010 1011 if (k == NULL) { 1012 if (pbuff != buff) mlib_free(pbuff); 1013 return MLIB_FAILURE; 1014 } 1015 } 1016 1017 for (i = 0; i < m*n; i++) { 1018 k[i] = kernel[i] >> shift1; 1019 } 1020 1021 swid -= (dx_l + dx_r); 1022 1023 for (c = 0; c < nchannel; c++) { 1024 if (!(cmask & (1 << (nchannel - 1 - c)))) continue; 1025 1026 sl = adr_src + c; 1027 dl = adr_dst + c; 1028 1029 for (l = 0; l < n; l++) { 1030 mlib_s32 *buff = buffs[l]; 1031 1032 for (i = 0; i < dx_l; i++) { 1033 buff[i] = (mlib_s32)sl[0]; 1034 } 1035 1036 #ifdef __SUNPRO_C 1037 #pragma pipeloop(0) 1038 #endif /* __SUNPRO_C */ 1039 for (i = 0; i < swid; i++) { 1040 buff[i + dx_l] = (mlib_s32)sl[i*chan1]; 1041 } 1042 1043 for (i = 0; i < dx_r; i++) { 1044 buff[swid + dx_l + i] = buff[swid + dx_l - 1]; 1045 } 1046 1047 if ((l >= dy_t) && (l < hgt + n - dy_b - 2)) sl += sll; 1048 } 1049 1050 buff_ind = 0; 1051 1052 #ifdef __SUNPRO_C 1053 #pragma pipeloop(0) 1054 #endif /* __SUNPRO_C */ 1055 for (i = 0; i < wid; i++) buffd[i] = 0; 1056 1057 for (j = 0; j < hgt; j++) { 1058 mlib_s32 **buffc = buffs + buff_ind; 1059 mlib_s32 *buffn = buffc[n]; 1060 mlib_s32 *pk = k; 1061 1062 for (l = 0; l < n; l++) { 1063 mlib_s32 *buff_l = buffc[l]; 1064 1065 for (off = 0; off < m;) { 1066 mlib_s32 *buff = buff_l + off; 1067 1068 sp = sl; 1069 dp = dl; 1070 1071 kw = m - off; 1072 1073 if (kw > 2*MAX_KER) kw = MAX_KER; else 1074 if (kw > MAX_KER) kw = kw/2; 1075 off += kw; 1076 1077 if (kw == 7) { 1078 1079 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 1080 p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; 1081 1082 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1083 k4 = pk[4]; k5 = pk[5]; k6 = pk[6]; 1084 1085 if (l < (n - 1) || off < m) { 1086 #ifdef __SUNPRO_C 1087 #pragma pipeloop(0) 1088 #endif /* __SUNPRO_C */ 1089 for (i = 0; i <= (wid - 2); i += 2) { 1090 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 1091 1092 p6 = buff[i + 6]; p7 = buff[i + 7]; 1093 1094 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; 1095 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; 1096 } 1097 1098 } else { 1099 #ifdef __SUNPRO_C 1100 #pragma pipeloop(0) 1101 #endif /* __SUNPRO_C */ 1102 for (i = 0; i <= (wid - 2); i += 2) { 1103 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 1104 1105 p6 = buff[i + 6]; p7 = buff[i + 7]; 1106 1107 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1108 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1109 1110 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); 1111 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); 1112 1113 STORE_RES(dp[0 ], d0); 1114 STORE_RES(dp[chan1], d1); 1115 1116 buffd[i ] = 0; 1117 buffd[i + 1] = 0; 1118 1119 sp += chan2; 1120 dp += chan2; 1121 } 1122 } 1123 1124 } else if (kw == 6) { 1125 1126 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 1127 p5 = buff[3]; p6 = buff[4]; 1128 1129 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1130 k4 = pk[4]; k5 = pk[5]; 1131 1132 if (l < (n - 1) || off < m) { 1133 #ifdef __SUNPRO_C 1134 #pragma pipeloop(0) 1135 #endif /* __SUNPRO_C */ 1136 for (i = 0; i <= (wid - 2); i += 2) { 1137 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 1138 1139 p5 = buff[i + 5]; p6 = buff[i + 6]; 1140 1141 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5; 1142 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5; 1143 } 1144 1145 } else { 1146 #ifdef __SUNPRO_C 1147 #pragma pipeloop(0) 1148 #endif /* __SUNPRO_C */ 1149 for (i = 0; i <= (wid - 2); i += 2) { 1150 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 1151 1152 p5 = buff[i + 5]; p6 = buff[i + 6]; 1153 1154 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1155 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1156 1157 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]); 1158 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]); 1159 1160 STORE_RES(dp[0 ], d0); 1161 STORE_RES(dp[chan1], d1); 1162 1163 buffd[i ] = 0; 1164 buffd[i + 1] = 0; 1165 1166 sp += chan2; 1167 dp += chan2; 1168 } 1169 } 1170 1171 } else if (kw == 5) { 1172 1173 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 1174 p5 = buff[3]; 1175 1176 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1177 k4 = pk[4]; 1178 1179 if (l < (n - 1) || off < m) { 1180 #ifdef __SUNPRO_C 1181 #pragma pipeloop(0) 1182 #endif /* __SUNPRO_C */ 1183 for (i = 0; i <= (wid - 2); i += 2) { 1184 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 1185 1186 p4 = buff[i + 4]; p5 = buff[i + 5]; 1187 1188 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4; 1189 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4; 1190 } 1191 1192 } else { 1193 #ifdef __SUNPRO_C 1194 #pragma pipeloop(0) 1195 #endif /* __SUNPRO_C */ 1196 for (i = 0; i <= (wid - 2); i += 2) { 1197 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 1198 1199 p4 = buff[i + 4]; p5 = buff[i + 5]; 1200 1201 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1202 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1203 1204 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]); 1205 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]); 1206 1207 STORE_RES(dp[0 ], d0); 1208 STORE_RES(dp[chan1], d1); 1209 1210 buffd[i ] = 0; 1211 buffd[i + 1] = 0; 1212 1213 sp += chan2; 1214 dp += chan2; 1215 } 1216 } 1217 1218 } else if (kw == 4) { 1219 1220 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 1221 1222 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1223 1224 if (l < (n - 1) || off < m) { 1225 #ifdef __SUNPRO_C 1226 #pragma pipeloop(0) 1227 #endif /* __SUNPRO_C */ 1228 for (i = 0; i <= (wid - 2); i += 2) { 1229 p0 = p2; p1 = p3; p2 = p4; 1230 1231 p3 = buff[i + 3]; p4 = buff[i + 4]; 1232 1233 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; 1234 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; 1235 } 1236 1237 } else { 1238 #ifdef __SUNPRO_C 1239 #pragma pipeloop(0) 1240 #endif /* __SUNPRO_C */ 1241 for (i = 0; i <= (wid - 2); i += 2) { 1242 p0 = p2; p1 = p3; p2 = p4; 1243 1244 p3 = buff[i + 3]; p4 = buff[i + 4]; 1245 1246 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1247 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1248 1249 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]); 1250 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]); 1251 1252 STORE_RES(dp[0 ], d0); 1253 STORE_RES(dp[chan1], d1); 1254 1255 buffd[i ] = 0; 1256 buffd[i + 1] = 0; 1257 1258 sp += chan2; 1259 dp += chan2; 1260 } 1261 } 1262 1263 } else if (kw == 3) { 1264 1265 p2 = buff[0]; p3 = buff[1]; 1266 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; 1267 1268 if (l < (n - 1) || off < m) { 1269 #ifdef __SUNPRO_C 1270 #pragma pipeloop(0) 1271 #endif /* __SUNPRO_C */ 1272 for (i = 0; i <= (wid - 2); i += 2) { 1273 p0 = p2; p1 = p3; 1274 1275 p2 = buff[i + 2]; p3 = buff[i + 3]; 1276 1277 buffd[i ] += p0*k0 + p1*k1 + p2*k2; 1278 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2; 1279 } 1280 1281 } else { 1282 #ifdef __SUNPRO_C 1283 #pragma pipeloop(0) 1284 #endif /* __SUNPRO_C */ 1285 for (i = 0; i <= (wid - 2); i += 2) { 1286 p0 = p2; p1 = p3; 1287 1288 p2 = buff[i + 2]; p3 = buff[i + 3]; 1289 1290 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1291 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1292 1293 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i ]); 1294 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]); 1295 1296 STORE_RES(dp[0 ], d0); 1297 STORE_RES(dp[chan1], d1); 1298 1299 buffd[i ] = 0; 1300 buffd[i + 1] = 0; 1301 1302 sp += chan2; 1303 dp += chan2; 1304 } 1305 } 1306 1307 } else if (kw == 2) { 1308 1309 p2 = buff[0]; 1310 k0 = pk[0]; k1 = pk[1]; 1311 1312 if (l < (n - 1) || off < m) { 1313 #ifdef __SUNPRO_C 1314 #pragma pipeloop(0) 1315 #endif /* __SUNPRO_C */ 1316 for (i = 0; i <= (wid - 2); i += 2) { 1317 p0 = p2; 1318 1319 p1 = buff[i + 1]; p2 = buff[i + 2]; 1320 1321 buffd[i ] += p0*k0 + p1*k1; 1322 buffd[i + 1] += p1*k0 + p2*k1; 1323 } 1324 1325 } else { 1326 #ifdef __SUNPRO_C 1327 #pragma pipeloop(0) 1328 #endif /* __SUNPRO_C */ 1329 for (i = 0; i <= (wid - 2); i += 2) { 1330 p0 = p2; 1331 1332 p1 = buff[i + 1]; p2 = buff[i + 2]; 1333 1334 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1335 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1336 1337 d0 = (p0*k0 + p1*k1 + buffd[i ]); 1338 d1 = (p1*k0 + p2*k1 + buffd[i + 1]); 1339 1340 STORE_RES(dp[0 ], d0); 1341 STORE_RES(dp[chan1], d1); 1342 1343 buffd[i ] = 0; 1344 buffd[i + 1] = 0; 1345 1346 sp += chan2; 1347 dp += chan2; 1348 } 1349 } 1350 1351 } else /* kw == 1 */{ 1352 1353 k0 = pk[0]; 1354 1355 if (l < (n - 1) || off < m) { 1356 #ifdef __SUNPRO_C 1357 #pragma pipeloop(0) 1358 #endif /* __SUNPRO_C */ 1359 for (i = 0; i <= (wid - 2); i += 2) { 1360 p0 = buff[i]; p1 = buff[i + 1]; 1361 1362 buffd[i ] += p0*k0; 1363 buffd[i + 1] += p1*k0; 1364 } 1365 1366 } else { 1367 #ifdef __SUNPRO_C 1368 #pragma pipeloop(0) 1369 #endif /* __SUNPRO_C */ 1370 for (i = 0; i <= (wid - 2); i += 2) { 1371 p0 = buff[i]; p1 = buff[i + 1]; 1372 1373 buffn[i + dx_l ] = (mlib_s32)sp[0]; 1374 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1]; 1375 1376 d0 = (p0*k0 + buffd[i ]); 1377 d1 = (p1*k0 + buffd[i + 1]); 1378 1379 STORE_RES(dp[0 ], d0); 1380 STORE_RES(dp[chan1], d1); 1381 1382 buffd[i ] = 0; 1383 buffd[i + 1] = 0; 1384 1385 sp += chan2; 1386 dp += chan2; 1387 } 1388 } 1389 } 1390 1391 pk += kw; 1392 } 1393 } 1394 1395 /* last pixels */ 1396 for (; i < wid; i++) { 1397 mlib_s32 *pk = k, x, s = 0; 1398 1399 for (l = 0; l < n; l++) { 1400 mlib_s32 *buff = buffc[l] + i; 1401 1402 for (x = 0; x < m; x++) s += buff[x] * (*pk++); 1403 } 1404 1405 STORE_RES(dp[0], s); 1406 1407 buffn[i + dx_l] = (mlib_s32)sp[0]; 1408 1409 sp += chan1; 1410 dp += chan1; 1411 } 1412 1413 for (; i < swid; i++) { 1414 buffn[i + dx_l] = (mlib_s32)sp[0]; 1415 sp += chan1; 1416 } 1417 1418 for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l]; 1419 for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1]; 1420 1421 /* next line */ 1422 1423 if (j < hgt - dy_b - 2) sl += sll; 1424 dl += dll; 1425 1426 buff_ind++; 1427 1428 if (buff_ind >= n + 1) buff_ind = 0; 1429 } 1430 } 1431 1432 if (pbuff != buff) mlib_free(pbuff); 1433 if (k != k_locl) mlib_free(k); 1434 1435 return MLIB_SUCCESS; 1436 } 1437 1438 /***************************************************************/