1 /* 2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * Internal functions for mlib_ImageConv* on S32 type and 30 * MLIB_EDGE_DST_NO_WRITE mask 31 * 32 */ 33 34 #include "mlib_image.h" 35 #include "mlib_ImageConv.h" 36 37 /***************************************************************/ 38 #define BUFF_LINE 256 39 40 #define CACHE_SIZE (64*1024) 41 42 /***************************************************************/ 43 #define CONV_FUNC(KERN) mlib_conv##KERN##nw_s32 44 45 /***************************************************************/ 46 #ifndef MLIB_USE_FTOI_CLAMPING 47 48 #define CLAMP_S32(dst, src) \ 49 if (src > (mlib_d64)MLIB_S32_MAX) src = (mlib_d64)MLIB_S32_MAX; \ 50 if (src < (mlib_d64)MLIB_S32_MIN) src = (mlib_d64)MLIB_S32_MIN; \ 51 dst = (mlib_s32)src 52 53 #else 54 55 #define CLAMP_S32(dst, src) dst = (mlib_s32)(src) 56 57 #endif /* MLIB_USE_FTOI_CLAMPING */ 58 59 /***************************************************************/ 60 #define GET_SRC_DST_PARAMETERS(type) \ 61 mlib_s32 hgt = mlib_ImageGetHeight(src); \ 62 mlib_s32 wid = mlib_ImageGetWidth(src); \ 63 mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(type); \ 64 mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(type); \ 65 type* adr_src = mlib_ImageGetData(src); \ 66 type* adr_dst = mlib_ImageGetData(dst); \ 67 mlib_s32 chan1 = mlib_ImageGetChannels(src) 68 /* mlib_s32 chan2 = chan1 + chan1 */ 69 70 /***************************************************************/ 71 #define DEF_VARS(type) \ 72 GET_SRC_DST_PARAMETERS(type); \ 73 type *sl, *sp, *sl1, *dl, *dp; \ 74 mlib_d64 *pbuff = buff, *buff0, *buff1, *buff2, *buffT; \ 75 mlib_s32 i, j, c; \ 76 mlib_d64 scalef, d0, d1 77 78 /***************************************************************/ 79 #define DEF_VARS_MxN(type) \ 80 GET_SRC_DST_PARAMETERS(type); \ 81 type *sl, *sp = NULL, *dl, *dp = NULL; \ 82 mlib_d64 *pbuff = buff; \ 83 mlib_s32 i, j, c 84 85 /***************************************************************/ 86 #define CALC_SCALE() \ 87 scalef = 1.0; \ 88 while (scalef_expon > 30) { \ 89 scalef /= (1 << 30); \ 90 scalef_expon -= 30; \ 91 } \ 92 \ 93 scalef /= (1 << scalef_expon) 94 95 /***************************************************************/ 96 #undef KSIZE 97 #define KSIZE 2 98 99 mlib_status CONV_FUNC(2x2)(mlib_image *dst, 100 const mlib_image *src, 101 const mlib_s32 *kern, 102 mlib_s32 scalef_expon, 103 mlib_s32 cmask) 104 { 105 mlib_d64 buff[(KSIZE + 1)*BUFF_LINE]; 106 mlib_d64 k0, k1, k2, k3; 107 mlib_d64 p00, p01, p02, p03, 108 p10, p11, p12, p13; 109 mlib_d64 d2; 110 DEF_VARS(mlib_s32); 111 mlib_s32 chan2 = chan1 + chan1; 112 mlib_s32 chan3 = chan1 + chan2; 113 114 if (wid > BUFF_LINE) { 115 pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid); 116 117 if (pbuff == NULL) return MLIB_FAILURE; 118 } 119 120 buff0 = pbuff; 121 buff1 = buff0 + wid; 122 buff2 = buff1 + wid; 123 124 wid -= (KSIZE - 1); 125 hgt -= (KSIZE - 1); 126 127 /* keep kernel in regs */ 128 CALC_SCALE(); 129 k0 = scalef * kern[0]; k1 = scalef * kern[1]; 130 k2 = scalef * kern[2]; k3 = scalef * kern[3]; 131 132 for (c = 0; c < chan1; c++) { 133 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 134 135 sl = adr_src + c; 136 dl = adr_dst + c; 137 138 sl1 = sl + sll; 139 #ifdef __SUNPRO_C 140 #pragma pipeloop(0) 141 #endif /* __SUNPRO_C */ 142 for (i = 0; i < wid + (KSIZE - 1); i++) { 143 buff0[i] = (mlib_d64)sl[i*chan1]; 144 buff1[i] = (mlib_d64)sl1[i*chan1]; 145 } 146 147 sl += KSIZE*sll; 148 149 for (j = 0; j < hgt; j++) { 150 p03 = buff0[0]; 151 p13 = buff1[0]; 152 153 sp = sl; 154 dp = dl; 155 156 #ifdef __SUNPRO_C 157 #pragma pipeloop(0) 158 #endif /* __SUNPRO_C */ 159 for (i = 0; i <= (wid - 3); i += 3) { 160 161 p00 = p03; p10 = p13; 162 163 p01 = buff0[i + 1]; p11 = buff1[i + 1]; 164 p02 = buff0[i + 2]; p12 = buff1[i + 2]; 165 p03 = buff0[i + 3]; p13 = buff1[i + 3]; 166 167 buff2[i ] = (mlib_d64)sp[0]; 168 buff2[i + 1] = (mlib_d64)sp[chan1]; 169 buff2[i + 2] = (mlib_d64)sp[chan2]; 170 171 d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; 172 d1 = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3; 173 d2 = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3; 174 175 CLAMP_S32(dp[0 ], d0); 176 CLAMP_S32(dp[chan1], d1); 177 CLAMP_S32(dp[chan2], d2); 178 179 sp += chan3; 180 dp += chan3; 181 } 182 183 for (; i < wid; i++) { 184 p00 = buff0[i]; p10 = buff1[i]; 185 p01 = buff0[i + 1]; p11 = buff1[i + 1]; 186 187 buff2[i] = (mlib_d64)sp[0]; 188 189 d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3; 190 CLAMP_S32(dp[0], d0); 191 192 sp += chan1; 193 dp += chan1; 194 } 195 196 buff2[wid] = (mlib_d64)sp[0]; 197 198 sl += sll; 199 dl += dll; 200 201 buffT = buff0; 202 buff0 = buff1; 203 buff1 = buff2; 204 buff2 = buffT; 205 } 206 } 207 208 if (pbuff != buff) mlib_free(pbuff); 209 210 return MLIB_SUCCESS; 211 } 212 213 /***************************************************************/ 214 #undef KSIZE 215 #define KSIZE 3 216 217 mlib_status CONV_FUNC(3x3)(mlib_image *dst, 218 const mlib_image *src, 219 const mlib_s32 *kern, 220 mlib_s32 scalef_expon, 221 mlib_s32 cmask) 222 { 223 mlib_d64 buff[(KSIZE + 1)*BUFF_LINE], *buff3; 224 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8; 225 mlib_d64 p00, p01, p02, p03, 226 p10, p11, p12, p13, 227 p20, p21, p22, p23; 228 mlib_s32 *sl2; 229 DEF_VARS(mlib_s32); 230 mlib_s32 chan2 = chan1 + chan1; 231 232 if (wid > BUFF_LINE) { 233 pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid); 234 235 if (pbuff == NULL) return MLIB_FAILURE; 236 } 237 238 buff0 = pbuff; 239 buff1 = buff0 + wid; 240 buff2 = buff1 + wid; 241 buff3 = buff2 + wid; 242 243 wid -= (KSIZE - 1); 244 hgt -= (KSIZE - 1); 245 246 adr_dst += ((KSIZE - 1)/2)*(dll + chan1); 247 248 CALC_SCALE(); 249 k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; 250 k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5]; 251 k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8]; 252 253 for (c = 0; c < chan1; c++) { 254 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 255 256 sl = adr_src + c; 257 dl = adr_dst + c; 258 259 sl1 = sl + sll; 260 sl2 = sl1 + sll; 261 #ifdef __SUNPRO_C 262 #pragma pipeloop(0) 263 #endif /* __SUNPRO_C */ 264 for (i = 0; i < wid + (KSIZE - 1); i++) { 265 buff0[i] = (mlib_d64)sl[i*chan1]; 266 buff1[i] = (mlib_d64)sl1[i*chan1]; 267 buff2[i] = (mlib_d64)sl2[i*chan1]; 268 } 269 270 sl += KSIZE*sll; 271 272 for (j = 0; j < hgt; j++) { 273 mlib_d64 s0, s1; 274 275 p02 = buff0[0]; 276 p12 = buff1[0]; 277 p22 = buff2[0]; 278 279 p03 = buff0[1]; 280 p13 = buff1[1]; 281 p23 = buff2[1]; 282 283 sp = sl; 284 dp = dl; 285 286 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; 287 s1 = p03 * k0 + p13 * k3 + p23 * k6; 288 289 #ifdef __SUNPRO_C 290 #pragma pipeloop(0) 291 #endif /* __SUNPRO_C */ 292 for (i = 0; i <= (wid - 2); i += 2) { 293 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; 294 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; 295 296 buff3[i ] = (mlib_d64)sp[0]; 297 buff3[i + 1] = (mlib_d64)sp[chan1]; 298 299 d0 = s0 + p02 * k2 + p12 * k5 + p22 * k8; 300 d1 = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8; 301 302 CLAMP_S32(dp[0 ], d0); 303 CLAMP_S32(dp[chan1], d1); 304 305 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7; 306 s1 = p03 * k0 + p13 * k3 + p23 * k6; 307 308 sp += chan2; 309 dp += chan2; 310 } 311 312 for (; i < wid; i++) { 313 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; 314 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; 315 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; 316 317 buff3[i] = (mlib_d64)sp[0]; 318 319 d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 + 320 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8); 321 322 CLAMP_S32(dp[0], d0); 323 324 sp += chan1; 325 dp += chan1; 326 } 327 328 buff3[wid ] = (mlib_d64)sp[0]; 329 buff3[wid + 1] = (mlib_d64)sp[chan1]; 330 331 sl += sll; 332 dl += dll; 333 334 buffT = buff0; 335 buff0 = buff1; 336 buff1 = buff2; 337 buff2 = buff3; 338 buff3 = buffT; 339 } 340 } 341 342 if (pbuff != buff) mlib_free(pbuff); 343 344 return MLIB_SUCCESS; 345 } 346 347 /***************************************************************/ 348 #undef KSIZE 349 #define KSIZE 4 350 351 mlib_status CONV_FUNC(4x4)(mlib_image *dst, 352 const mlib_image *src, 353 const mlib_s32 *kern, 354 mlib_s32 scalef_expon, 355 mlib_s32 cmask) 356 { 357 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5; 358 mlib_d64 k[KSIZE*KSIZE]; 359 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7; 360 mlib_d64 p00, p01, p02, p03, p04, 361 p10, p11, p12, p13, p14, 362 p20, p21, p22, p23, 363 p30, p31, p32, p33; 364 mlib_s32 *sl2, *sl3; 365 DEF_VARS(mlib_s32); 366 mlib_s32 chan2 = chan1 + chan1; 367 368 if (wid > BUFF_LINE) { 369 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); 370 371 if (pbuff == NULL) return MLIB_FAILURE; 372 } 373 374 buff0 = pbuff; 375 buff1 = buff0 + wid; 376 buff2 = buff1 + wid; 377 buff3 = buff2 + wid; 378 buff4 = buff3 + wid; 379 buff5 = buff4 + wid; 380 381 wid -= (KSIZE - 1); 382 hgt -= (KSIZE - 1); 383 384 adr_dst += ((KSIZE - 1)/2)*(dll + chan1); 385 386 CALC_SCALE(); 387 for (j = 0; j < 16; j++) k[j] = scalef * kern[j]; 388 389 for (c = 0; c < chan1; c++) { 390 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 391 392 sl = adr_src + c; 393 dl = adr_dst + c; 394 395 sl1 = sl + sll; 396 sl2 = sl1 + sll; 397 sl3 = sl2 + sll; 398 #ifdef __SUNPRO_C 399 #pragma pipeloop(0) 400 #endif /* __SUNPRO_C */ 401 for (i = 0; i < wid + (KSIZE - 1); i++) { 402 buff0[i] = (mlib_d64)sl[i*chan1]; 403 buff1[i] = (mlib_d64)sl1[i*chan1]; 404 buff2[i] = (mlib_d64)sl2[i*chan1]; 405 buff3[i] = (mlib_d64)sl3[i*chan1]; 406 } 407 408 sl += KSIZE*sll; 409 410 for (j = 0; j < hgt; j++) { 411 /* 412 * First loop on two first lines of kernel 413 */ 414 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; 415 k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7]; 416 417 sp = sl; 418 dp = dl; 419 420 p02 = buff0[0]; 421 p12 = buff1[0]; 422 p03 = buff0[1]; 423 p13 = buff1[1]; 424 p04 = buff0[2]; 425 426 #ifdef __SUNPRO_C 427 #pragma pipeloop(0) 428 #endif /* __SUNPRO_C */ 429 for (i = 0; i <= (wid - 2); i += 2) { 430 p00 = p02; p10 = p12; 431 p01 = p03; p11 = p13; 432 p02 = p04; p12 = buff1[i + 2]; 433 p03 = buff0[i + 3]; p13 = buff1[i + 3]; 434 p04 = buff0[i + 4]; p14 = buff1[i + 4]; 435 436 buff4[i] = (mlib_d64)sp[0]; 437 buff4[i + 1] = (mlib_d64)sp[chan1]; 438 439 buff5[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + 440 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7); 441 buff5[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + 442 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7); 443 444 sp += chan2; 445 dp += chan2; 446 } 447 448 /* 449 * Second loop on two last lines of kernel 450 */ 451 k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11]; 452 k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15]; 453 454 sp = sl; 455 dp = dl; 456 457 p02 = buff2[0]; 458 p12 = buff3[0]; 459 p03 = buff2[1]; 460 p13 = buff3[1]; 461 p04 = buff2[2]; 462 463 #ifdef __SUNPRO_C 464 #pragma pipeloop(0) 465 #endif /* __SUNPRO_C */ 466 for (i = 0; i <= (wid - 2); i += 2) { 467 p00 = p02; p10 = p12; 468 p01 = p03; p11 = p13; 469 p02 = p04; p12 = buff3[i + 2]; 470 p03 = buff2[i + 3]; p13 = buff3[i + 3]; 471 p04 = buff2[i + 4]; p14 = buff3[i + 4]; 472 473 d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + 474 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buff5[i]); 475 d1 = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + 476 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buff5[i + 1]); 477 478 CLAMP_S32(dp[0 ], d0); 479 CLAMP_S32(dp[chan1], d1); 480 481 sp += chan2; 482 dp += chan2; 483 } 484 485 /* last pixels */ 486 for (; i < wid; i++) { 487 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; 488 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; 489 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; 490 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; 491 492 buff4[i] = (mlib_d64)sp[0]; 493 494 d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + 495 p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] + 496 p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] + 497 p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]); 498 499 CLAMP_S32(dp[0], d0); 500 501 sp += chan1; 502 dp += chan1; 503 } 504 505 buff4[wid ] = (mlib_d64)sp[0]; 506 buff4[wid + 1] = (mlib_d64)sp[chan1]; 507 buff4[wid + 2] = (mlib_d64)sp[chan2]; 508 509 /* next line */ 510 sl += sll; 511 dl += dll; 512 513 buffT = buff0; 514 buff0 = buff1; 515 buff1 = buff2; 516 buff2 = buff3; 517 buff3 = buff4; 518 buff4 = buffT; 519 } 520 } 521 522 if (pbuff != buff) mlib_free(pbuff); 523 524 return MLIB_SUCCESS; 525 } 526 527 /***************************************************************/ 528 #undef KSIZE 529 #define KSIZE 5 530 531 mlib_status CONV_FUNC(5x5)(mlib_image *dst, 532 const mlib_image *src, 533 const mlib_s32 *kern, 534 mlib_s32 scalef_expon, 535 mlib_s32 cmask) 536 { 537 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5, *buff6; 538 mlib_d64 k[KSIZE*KSIZE]; 539 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; 540 mlib_d64 p00, p01, p02, p03, p04, p05, 541 p10, p11, p12, p13, p14, p15, 542 p20, p21, p22, p23, p24, 543 p30, p31, p32, p33, p34, 544 p40, p41, p42, p43, p44; 545 mlib_s32 *sl2, *sl3, *sl4; 546 DEF_VARS(mlib_s32); 547 mlib_s32 chan2 = chan1 + chan1; 548 mlib_s32 chan3 = chan1 + chan2; 549 550 if (wid > BUFF_LINE) { 551 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); 552 553 if (pbuff == NULL) return MLIB_FAILURE; 554 } 555 556 buff0 = pbuff; 557 buff1 = buff0 + wid; 558 buff2 = buff1 + wid; 559 buff3 = buff2 + wid; 560 buff4 = buff3 + wid; 561 buff5 = buff4 + wid; 562 buff6 = buff5 + wid; 563 564 wid -= (KSIZE - 1); 565 hgt -= (KSIZE - 1); 566 567 adr_dst += ((KSIZE - 1)/2)*(dll + chan1); 568 569 CALC_SCALE(); 570 for (j = 0; j < 25; j++) k[j] = scalef * kern[j]; 571 572 for (c = 0; c < chan1; c++) { 573 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 574 575 sl = adr_src + c; 576 dl = adr_dst + c; 577 578 sl1 = sl + sll; 579 sl2 = sl1 + sll; 580 sl3 = sl2 + sll; 581 sl4 = sl3 + sll; 582 #ifdef __SUNPRO_C 583 #pragma pipeloop(0) 584 #endif /* __SUNPRO_C */ 585 for (i = 0; i < wid + (KSIZE - 1); i++) { 586 buff0[i] = (mlib_d64)sl[i*chan1]; 587 buff1[i] = (mlib_d64)sl1[i*chan1]; 588 buff2[i] = (mlib_d64)sl2[i*chan1]; 589 buff3[i] = (mlib_d64)sl3[i*chan1]; 590 buff4[i] = (mlib_d64)sl4[i*chan1]; 591 } 592 593 sl += KSIZE*sll; 594 595 for (j = 0; j < hgt; j++) { 596 /* 597 * First loop 598 */ 599 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4]; 600 k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9]; 601 602 sp = sl; 603 dp = dl; 604 605 p02 = buff0[0]; 606 p12 = buff1[0]; 607 p03 = buff0[1]; 608 p13 = buff1[1]; 609 p04 = buff0[2]; 610 p14 = buff1[2]; 611 612 #ifdef __SUNPRO_C 613 #pragma pipeloop(0) 614 #endif /* __SUNPRO_C */ 615 for (i = 0; i <= (wid - 2); i += 2) { 616 p00 = p02; p10 = p12; 617 p01 = p03; p11 = p13; 618 p02 = p04; p12 = p14; 619 620 p03 = buff0[i + 3]; p13 = buff1[i + 3]; 621 p04 = buff0[i + 4]; p14 = buff1[i + 4]; 622 p05 = buff0[i + 5]; p15 = buff1[i + 5]; 623 624 buff6[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + 625 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); 626 buff6[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + 627 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); 628 629 sp += chan2; 630 dp += chan2; 631 } 632 633 /* 634 * Second loop 635 */ 636 k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14]; 637 k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19]; 638 639 sp = sl; 640 dp = dl; 641 642 p02 = buff2[0]; 643 p12 = buff3[0]; 644 p03 = buff2[1]; 645 p13 = buff3[1]; 646 647 #ifdef __SUNPRO_C 648 #pragma pipeloop(0) 649 #endif /* __SUNPRO_C */ 650 for (i = 0; i <= (wid - 2); i += 2) { 651 p00 = p02; p10 = p12; 652 p01 = p03; p11 = p13; 653 654 p02 = buff2[i + 2]; p12 = buff3[i + 2]; 655 p03 = buff2[i + 3]; p13 = buff3[i + 3]; 656 p04 = buff2[i + 4]; p14 = buff3[i + 4]; 657 p05 = buff2[i + 5]; p15 = buff3[i + 5]; 658 659 buff6[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + 660 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9); 661 buff6[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + 662 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9); 663 664 sp += chan2; 665 dp += chan2; 666 } 667 668 /* 669 * 3 loop 670 */ 671 k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24]; 672 673 sp = sl; 674 dp = dl; 675 676 p02 = buff4[0]; 677 p03 = buff4[1]; 678 p04 = buff4[2]; 679 p05 = buff4[3]; 680 681 #ifdef __SUNPRO_C 682 #pragma pipeloop(0) 683 #endif /* __SUNPRO_C */ 684 for (i = 0; i <= (wid - 2); i += 2) { 685 p00 = p02; p01 = p03; p02 = p04; p03 = p05; 686 687 p04 = buff4[i + 4]; p05 = buff4[i + 5]; 688 689 buff5[i ] = (mlib_d64)sp[0]; 690 buff5[i + 1] = (mlib_d64)sp[chan1]; 691 692 d0 = p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buff6[i]; 693 d1 = p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buff6[i + 1]; 694 695 CLAMP_S32(dp[0 ], d0); 696 CLAMP_S32(dp[chan1], d1); 697 698 sp += chan2; 699 dp += chan2; 700 } 701 702 /* last pixels */ 703 for (; i < wid; i++) { 704 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i]; 705 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1]; 706 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2]; 707 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3]; 708 p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4]; 709 710 p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2]; 711 p43 = buff4[i + 3]; p44 = buff4[i + 4]; 712 713 buff5[i] = (mlib_d64)sp[0]; 714 715 d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] + 716 p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] + 717 p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] + 718 p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] + 719 p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]); 720 721 CLAMP_S32(dp[0], d0); 722 723 sp += chan1; 724 dp += chan1; 725 } 726 727 buff5[wid ] = (mlib_d64)sp[0]; 728 buff5[wid + 1] = (mlib_d64)sp[chan1]; 729 buff5[wid + 2] = (mlib_d64)sp[chan2]; 730 buff5[wid + 3] = (mlib_d64)sp[chan3]; 731 732 /* next line */ 733 sl += sll; 734 dl += dll; 735 736 buffT = buff0; 737 buff0 = buff1; 738 buff1 = buff2; 739 buff2 = buff3; 740 buff3 = buff4; 741 buff4 = buff5; 742 buff5 = buffT; 743 } 744 } 745 746 if (pbuff != buff) mlib_free(pbuff); 747 748 return MLIB_SUCCESS; 749 } 750 751 /***************************************************************/ 752 #undef KSIZE 753 #define KSIZE 7 754 755 mlib_status CONV_FUNC(7x7)(mlib_image *dst, 756 const mlib_image *src, 757 const mlib_s32 *kern, 758 mlib_s32 scalef_expon, 759 mlib_s32 cmask) 760 { 761 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd; 762 mlib_d64 k[KSIZE*KSIZE]; 763 mlib_d64 k0, k1, k2, k3, k4, k5, k6; 764 mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7; 765 mlib_d64 d0, d1; 766 mlib_s32 l, m, buff_ind, *sl2, *sl3, *sl4, *sl5, *sl6; 767 mlib_d64 scalef; 768 DEF_VARS_MxN(mlib_s32); 769 mlib_s32 chan2 = chan1 + chan1; 770 mlib_s32 *sl1; 771 772 if (wid > BUFF_LINE) { 773 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid); 774 775 if (pbuff == NULL) return MLIB_FAILURE; 776 } 777 778 for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid; 779 for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l]; 780 buffd = buffs[KSIZE] + wid; 781 782 wid -= (KSIZE - 1); 783 hgt -= (KSIZE - 1); 784 785 adr_dst += ((KSIZE - 1)/2)*(dll + chan1); 786 787 CALC_SCALE(); 788 for (j = 0; j < 49; j++) k[j] = scalef * kern[j]; 789 790 for (c = 0; c < chan1; c++) { 791 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 792 793 sl = adr_src + c; 794 dl = adr_dst + c; 795 796 sl1 = sl + sll; 797 sl2 = sl1 + sll; 798 sl3 = sl2 + sll; 799 sl4 = sl3 + sll; 800 sl5 = sl4 + sll; 801 sl6 = sl5 + sll; 802 #ifdef __SUNPRO_C 803 #pragma pipeloop(0) 804 #endif /* __SUNPRO_C */ 805 for (i = 0; i < wid + (KSIZE - 1); i++) { 806 buffs[0][i] = (mlib_d64)sl[i*chan1]; 807 buffs[1][i] = (mlib_d64)sl1[i*chan1]; 808 buffs[2][i] = (mlib_d64)sl2[i*chan1]; 809 buffs[3][i] = (mlib_d64)sl3[i*chan1]; 810 buffs[4][i] = (mlib_d64)sl4[i*chan1]; 811 buffs[5][i] = (mlib_d64)sl5[i*chan1]; 812 buffs[6][i] = (mlib_d64)sl6[i*chan1]; 813 } 814 815 buff_ind = 0; 816 817 #ifdef __SUNPRO_C 818 #pragma pipeloop(0) 819 #endif /* __SUNPRO_C */ 820 for (i = 0; i < wid; i++) buffd[i] = 0.0; 821 822 sl += KSIZE*sll; 823 824 for (j = 0; j < hgt; j++) { 825 mlib_d64 **buffc = buffs + buff_ind; 826 mlib_d64 *buffn = buffc[KSIZE]; 827 mlib_d64 *pk = k; 828 829 for (l = 0; l < KSIZE; l++) { 830 mlib_d64 *buff = buffc[l]; 831 832 sp = sl; 833 dp = dl; 834 835 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 836 p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; 837 838 k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++; 839 k4 = *pk++; k5 = *pk++; k6 = *pk++; 840 841 if (l < (KSIZE - 1)) { 842 #ifdef __SUNPRO_C 843 #pragma pipeloop(0) 844 #endif /* __SUNPRO_C */ 845 for (i = 0; i <= (wid - 2); i += 2) { 846 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 847 848 p6 = buff[i + 6]; p7 = buff[i + 7]; 849 850 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; 851 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; 852 } 853 854 } else { 855 #ifdef __SUNPRO_C 856 #pragma pipeloop(0) 857 #endif /* __SUNPRO_C */ 858 for (i = 0; i <= (wid - 2); i += 2) { 859 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 860 861 p6 = buff[i + 6]; p7 = buff[i + 7]; 862 863 buffn[i ] = (mlib_d64)sp[0]; 864 buffn[i + 1] = (mlib_d64)sp[chan1]; 865 866 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]; 867 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]; 868 869 CLAMP_S32(dp[0 ], d0); 870 CLAMP_S32(dp[chan1], d1); 871 872 buffd[i ] = 0.0; 873 buffd[i + 1] = 0.0; 874 875 sp += chan2; 876 dp += chan2; 877 } 878 } 879 } 880 881 /* last pixels */ 882 for (; i < wid; i++) { 883 mlib_d64 *pk = k, s = 0; 884 885 for (l = 0; l < KSIZE; l++) { 886 mlib_d64 *buff = buffc[l] + i; 887 888 for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++); 889 } 890 891 CLAMP_S32(dp[0], s); 892 893 buffn[i] = (mlib_d64)sp[0]; 894 895 sp += chan1; 896 dp += chan1; 897 } 898 899 for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1]; 900 901 /* next line */ 902 sl += sll; 903 dl += dll; 904 905 buff_ind++; 906 907 if (buff_ind >= KSIZE + 1) buff_ind = 0; 908 } 909 } 910 911 if (pbuff != buff) mlib_free(pbuff); 912 913 return MLIB_SUCCESS; 914 } 915 916 /***************************************************************/ 917 #define FTYPE mlib_d64 918 #define DTYPE mlib_s32 919 920 #define BUFF_SIZE 1600 921 922 static mlib_status mlib_ImageConv1xN(mlib_image *dst, 923 const mlib_image *src, 924 const mlib_d64 *k, 925 mlib_s32 n, 926 mlib_s32 dn, 927 mlib_s32 cmask) 928 { 929 FTYPE buff[BUFF_SIZE]; 930 mlib_s32 off, kh; 931 const FTYPE *pk; 932 FTYPE k0, k1, k2, k3, d0, d1; 933 FTYPE p0, p1, p2, p3, p4; 934 DTYPE *sl_c, *dl_c, *sl0; 935 mlib_s32 l, hsize, max_hsize; 936 DEF_VARS_MxN(DTYPE); 937 938 hgt -= (n - 1); 939 adr_dst += dn*dll; 940 941 max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll; 942 943 if (!max_hsize) max_hsize = 1; 944 945 if (max_hsize > BUFF_SIZE) { 946 pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize); 947 } 948 949 sl_c = adr_src; 950 dl_c = adr_dst; 951 952 for (l = 0; l < hgt; l += hsize) { 953 hsize = hgt - l; 954 955 if (hsize > max_hsize) hsize = max_hsize; 956 957 for (c = 0; c < chan1; c++) { 958 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 959 960 sl = sl_c + c; 961 dl = dl_c + c; 962 963 #ifdef __SUNPRO_C 964 #pragma pipeloop(0) 965 #endif /* __SUNPRO_C */ 966 for (j = 0; j < hsize; j++) pbuff[j] = 0.0; 967 968 for (i = 0; i < wid; i++) { 969 sl0 = sl; 970 971 for (off = 0; off < (n - 4); off += 4) { 972 pk = k + off; 973 sp = sl0; 974 975 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 976 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll]; 977 sp += 3*sll; 978 979 #ifdef __SUNPRO_C 980 #pragma pipeloop(0) 981 #endif /* __SUNPRO_C */ 982 for (j = 0; j < hsize; j += 2) { 983 p0 = p2; p1 = p3; p2 = p4; 984 p3 = sp[0]; 985 p4 = sp[sll]; 986 987 pbuff[j ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; 988 pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; 989 990 sp += 2*sll; 991 } 992 993 sl0 += 4*sll; 994 } 995 996 pk = k + off; 997 sp = sl0; 998 999 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1000 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll]; 1001 1002 dp = dl; 1003 kh = n - off; 1004 1005 if (kh == 4) { 1006 sp += 3*sll; 1007 1008 #ifdef __SUNPRO_C 1009 #pragma pipeloop(0) 1010 #endif /* __SUNPRO_C */ 1011 for (j = 0; j <= (hsize - 2); j += 2) { 1012 p0 = p2; p1 = p3; p2 = p4; 1013 p3 = sp[0]; 1014 p4 = sp[sll]; 1015 1016 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]; 1017 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]; 1018 CLAMP_S32(dp[0 ], d0); 1019 CLAMP_S32(dp[dll], d1); 1020 1021 pbuff[j] = 0; 1022 pbuff[j + 1] = 0; 1023 1024 sp += 2*sll; 1025 dp += 2*dll; 1026 } 1027 1028 if (j < hsize) { 1029 p0 = p2; p1 = p3; p2 = p4; 1030 p3 = sp[0]; 1031 1032 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]; 1033 CLAMP_S32(dp[0], d0); 1034 1035 pbuff[j] = 0; 1036 } 1037 1038 } else if (kh == 3) { 1039 sp += 2*sll; 1040 1041 #ifdef __SUNPRO_C 1042 #pragma pipeloop(0) 1043 #endif /* __SUNPRO_C */ 1044 for (j = 0; j <= (hsize - 2); j += 2) { 1045 p0 = p2; p1 = p3; 1046 p2 = sp[0]; 1047 p3 = sp[sll]; 1048 1049 d0 = p0*k0 + p1*k1 + p2*k2 + pbuff[j]; 1050 d1 = p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]; 1051 CLAMP_S32(dp[0 ], d0); 1052 CLAMP_S32(dp[dll], d1); 1053 1054 pbuff[j] = 0; 1055 pbuff[j + 1] = 0; 1056 1057 sp += 2*sll; 1058 dp += 2*dll; 1059 } 1060 1061 if (j < hsize) { 1062 p0 = p2; p1 = p3; 1063 p2 = sp[0]; 1064 1065 d0 = p0*k0 + p1*k1 + p2*k2 + pbuff[j]; 1066 CLAMP_S32(dp[0], d0); 1067 1068 pbuff[j] = 0; 1069 } 1070 1071 } else if (kh == 2) { 1072 sp += sll; 1073 1074 #ifdef __SUNPRO_C 1075 #pragma pipeloop(0) 1076 #endif /* __SUNPRO_C */ 1077 for (j = 0; j <= (hsize - 2); j += 2) { 1078 p0 = p2; 1079 p1 = sp[0]; 1080 p2 = sp[sll]; 1081 1082 d0 = p0*k0 + p1*k1 + pbuff[j]; 1083 d1 = p1*k0 + p2*k1 + pbuff[j + 1]; 1084 CLAMP_S32(dp[0 ], d0); 1085 CLAMP_S32(dp[dll], d1); 1086 1087 pbuff[j] = 0; 1088 pbuff[j + 1] = 0; 1089 1090 sp += 2*sll; 1091 dp += 2*dll; 1092 } 1093 1094 if (j < hsize) { 1095 p0 = p2; 1096 p1 = sp[0]; 1097 1098 d0 = p0*k0 + p1*k1 + pbuff[j]; 1099 CLAMP_S32(dp[0], d0); 1100 1101 pbuff[j] = 0; 1102 } 1103 1104 } else /* if (kh == 1) */ { 1105 #ifdef __SUNPRO_C 1106 #pragma pipeloop(0) 1107 #endif /* __SUNPRO_C */ 1108 for (j = 0; j < hsize; j++) { 1109 p0 = sp[0]; 1110 1111 d0 = p0*k0 + pbuff[j]; 1112 CLAMP_S32(dp[0], d0); 1113 1114 pbuff[j] = 0; 1115 1116 sp += sll; 1117 dp += dll; 1118 } 1119 } 1120 1121 sl += chan1; 1122 dl += chan1; 1123 } 1124 } 1125 1126 sl_c += max_hsize*sll; 1127 dl_c += max_hsize*dll; 1128 } 1129 1130 if (pbuff != buff) mlib_free(pbuff); 1131 1132 return MLIB_SUCCESS; 1133 } 1134 1135 /***************************************************************/ 1136 #define MAX_KER 7 1137 1138 #define MAX_N 15 1139 1140 #undef BUFF_SIZE 1141 #define BUFF_SIZE 1500 1142 1143 mlib_status CONV_FUNC(MxN)(mlib_image *dst, 1144 const mlib_image *src, 1145 const mlib_s32 *kernel, 1146 mlib_s32 m, 1147 mlib_s32 n, 1148 mlib_s32 dm, 1149 mlib_s32 dn, 1150 mlib_s32 scale, 1151 mlib_s32 cmask) 1152 { 1153 mlib_d64 buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)]; 1154 mlib_d64 **buffs = buffs_arr, *buffd; 1155 mlib_d64 akernel[256], *k = akernel, fscale = 1.0; 1156 mlib_s32 l, off, kw, bsize, buff_ind, mn; 1157 mlib_d64 d0, d1; 1158 mlib_d64 k0, k1, k2, k3, k4, k5, k6; 1159 mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7; 1160 DEF_VARS_MxN(mlib_s32); 1161 mlib_s32 chan2 = chan1 + chan1; 1162 1163 mlib_status status = MLIB_SUCCESS; 1164 1165 if (scale > 30) { 1166 fscale *= 1.0/(1 << 30); 1167 scale -= 30; 1168 } 1169 1170 fscale /= (1 << scale); 1171 1172 mn = m*n; 1173 1174 if (mn > 256) { 1175 k = mlib_malloc(mn*sizeof(mlib_d64)); 1176 1177 if (k == NULL) return MLIB_FAILURE; 1178 } 1179 1180 for (i = 0; i < mn; i++) { 1181 k[i] = kernel[i]*fscale; 1182 } 1183 1184 if (m == 1) { 1185 status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask); 1186 FREE_AND_RETURN_STATUS; 1187 } 1188 1189 bsize = (n + 2)*wid; 1190 1191 if ((bsize > BUFF_SIZE) || (n > MAX_N)) { 1192 pbuff = mlib_malloc(sizeof(mlib_d64)*bsize + sizeof(mlib_d64*)*2*(n + 1)); 1193 1194 if (pbuff == NULL) { 1195 status = MLIB_FAILURE; 1196 FREE_AND_RETURN_STATUS; 1197 } 1198 buffs = (mlib_d64**)(pbuff + bsize); 1199 } 1200 1201 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid; 1202 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l]; 1203 buffd = buffs[n] + wid; 1204 1205 wid -= (m - 1); 1206 hgt -= (n - 1); 1207 adr_dst += dn*dll + dm*chan1; 1208 1209 for (c = 0; c < chan1; c++) { 1210 if (!(cmask & (1 << (chan1 - 1 - c)))) continue; 1211 1212 sl = adr_src + c; 1213 dl = adr_dst + c; 1214 1215 for (l = 0; l < n; l++) { 1216 mlib_d64 *buff = buffs[l]; 1217 1218 #ifdef __SUNPRO_C 1219 #pragma pipeloop(0) 1220 #endif /* __SUNPRO_C */ 1221 for (i = 0; i < wid + (m - 1); i++) { 1222 buff[i] = (mlib_d64)sl[i*chan1]; 1223 } 1224 1225 sl += sll; 1226 } 1227 1228 buff_ind = 0; 1229 1230 #ifdef __SUNPRO_C 1231 #pragma pipeloop(0) 1232 #endif /* __SUNPRO_C */ 1233 for (i = 0; i < wid; i++) buffd[i] = 0.0; 1234 1235 for (j = 0; j < hgt; j++) { 1236 mlib_d64 **buffc = buffs + buff_ind; 1237 mlib_d64 *buffn = buffc[n]; 1238 mlib_d64 *pk = k; 1239 1240 for (l = 0; l < n; l++) { 1241 mlib_d64 *buff_l = buffc[l]; 1242 1243 for (off = 0; off < m;) { 1244 mlib_d64 *buff = buff_l + off; 1245 1246 kw = m - off; 1247 1248 if (kw > 2*MAX_KER) kw = MAX_KER; else 1249 if (kw > MAX_KER) kw = kw/2; 1250 off += kw; 1251 1252 sp = sl; 1253 dp = dl; 1254 1255 p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; 1256 p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; 1257 1258 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; 1259 k4 = pk[4]; k5 = pk[5]; k6 = pk[6]; 1260 pk += kw; 1261 1262 if (kw == 7) { 1263 1264 if (l < (n - 1) || off < m) { 1265 #ifdef __SUNPRO_C 1266 #pragma pipeloop(0) 1267 #endif /* __SUNPRO_C */ 1268 for (i = 0; i <= (wid - 2); i += 2) { 1269 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 1270 1271 p6 = buff[i + 6]; p7 = buff[i + 7]; 1272 1273 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; 1274 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; 1275 } 1276 1277 } else { 1278 #ifdef __SUNPRO_C 1279 #pragma pipeloop(0) 1280 #endif /* __SUNPRO_C */ 1281 for (i = 0; i <= (wid - 2); i += 2) { 1282 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; 1283 1284 p6 = buff[i + 6]; p7 = buff[i + 7]; 1285 1286 buffn[i ] = (mlib_d64)sp[0]; 1287 buffn[i + 1] = (mlib_d64)sp[chan1]; 1288 1289 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]; 1290 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]; 1291 1292 CLAMP_S32(dp[0], d0); 1293 CLAMP_S32(dp[chan1], d1); 1294 1295 buffd[i ] = 0.0; 1296 buffd[i + 1] = 0.0; 1297 1298 sp += chan2; 1299 dp += chan2; 1300 } 1301 } 1302 1303 } else if (kw == 6) { 1304 1305 if (l < (n - 1) || off < m) { 1306 #ifdef __SUNPRO_C 1307 #pragma pipeloop(0) 1308 #endif /* __SUNPRO_C */ 1309 for (i = 0; i <= (wid - 2); i += 2) { 1310 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 1311 1312 p5 = buff[i + 5]; p6 = buff[i + 6]; 1313 1314 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5; 1315 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5; 1316 } 1317 1318 } else { 1319 #ifdef __SUNPRO_C 1320 #pragma pipeloop(0) 1321 #endif /* __SUNPRO_C */ 1322 for (i = 0; i <= (wid - 2); i += 2) { 1323 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; 1324 1325 p5 = buff[i + 5]; p6 = buff[i + 6]; 1326 1327 buffn[i ] = (mlib_d64)sp[0]; 1328 buffn[i + 1] = (mlib_d64)sp[chan1]; 1329 1330 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]; 1331 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]; 1332 1333 CLAMP_S32(dp[0], d0); 1334 CLAMP_S32(dp[chan1], d1); 1335 1336 buffd[i ] = 0.0; 1337 buffd[i + 1] = 0.0; 1338 1339 sp += chan2; 1340 dp += chan2; 1341 } 1342 } 1343 1344 } else if (kw == 5) { 1345 1346 if (l < (n - 1) || off < m) { 1347 #ifdef __SUNPRO_C 1348 #pragma pipeloop(0) 1349 #endif /* __SUNPRO_C */ 1350 for (i = 0; i <= (wid - 2); i += 2) { 1351 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 1352 1353 p4 = buff[i + 4]; p5 = buff[i + 5]; 1354 1355 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4; 1356 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4; 1357 } 1358 1359 } else { 1360 #ifdef __SUNPRO_C 1361 #pragma pipeloop(0) 1362 #endif /* __SUNPRO_C */ 1363 for (i = 0; i <= (wid - 2); i += 2) { 1364 p0 = p2; p1 = p3; p2 = p4; p3 = p5; 1365 1366 p4 = buff[i + 4]; p5 = buff[i + 5]; 1367 1368 buffn[i ] = (mlib_d64)sp[0]; 1369 buffn[i + 1] = (mlib_d64)sp[chan1]; 1370 1371 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]; 1372 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]; 1373 1374 CLAMP_S32(dp[0], d0); 1375 CLAMP_S32(dp[chan1], d1); 1376 1377 buffd[i ] = 0.0; 1378 buffd[i + 1] = 0.0; 1379 1380 sp += chan2; 1381 dp += chan2; 1382 } 1383 } 1384 1385 } else if (kw == 4) { 1386 1387 if (l < (n - 1) || off < m) { 1388 #ifdef __SUNPRO_C 1389 #pragma pipeloop(0) 1390 #endif /* __SUNPRO_C */ 1391 for (i = 0; i <= (wid - 2); i += 2) { 1392 p0 = p2; p1 = p3; p2 = p4; 1393 1394 p3 = buff[i + 3]; p4 = buff[i + 4]; 1395 1396 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; 1397 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; 1398 } 1399 1400 } else { 1401 #ifdef __SUNPRO_C 1402 #pragma pipeloop(0) 1403 #endif /* __SUNPRO_C */ 1404 for (i = 0; i <= (wid - 2); i += 2) { 1405 p0 = p2; p1 = p3; p2 = p4; 1406 1407 p3 = buff[i + 3]; p4 = buff[i + 4]; 1408 1409 buffn[i ] = (mlib_d64)sp[0]; 1410 buffn[i + 1] = (mlib_d64)sp[chan1]; 1411 1412 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]; 1413 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]; 1414 1415 CLAMP_S32(dp[0], d0); 1416 CLAMP_S32(dp[chan1], d1); 1417 1418 buffd[i ] = 0.0; 1419 buffd[i + 1] = 0.0; 1420 1421 sp += chan2; 1422 dp += chan2; 1423 } 1424 } 1425 1426 } else if (kw == 3) { 1427 1428 if (l < (n - 1) || off < m) { 1429 #ifdef __SUNPRO_C 1430 #pragma pipeloop(0) 1431 #endif /* __SUNPRO_C */ 1432 for (i = 0; i <= (wid - 2); i += 2) { 1433 p0 = p2; p1 = p3; 1434 1435 p2 = buff[i + 2]; p3 = buff[i + 3]; 1436 1437 buffd[i ] += p0*k0 + p1*k1 + p2*k2; 1438 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2; 1439 } 1440 1441 } else { 1442 #ifdef __SUNPRO_C 1443 #pragma pipeloop(0) 1444 #endif /* __SUNPRO_C */ 1445 for (i = 0; i <= (wid - 2); i += 2) { 1446 p0 = p2; p1 = p3; 1447 1448 p2 = buff[i + 2]; p3 = buff[i + 3]; 1449 1450 buffn[i ] = (mlib_d64)sp[0]; 1451 buffn[i + 1] = (mlib_d64)sp[chan1]; 1452 1453 d0 = p0*k0 + p1*k1 + p2*k2 + buffd[i ]; 1454 d1 = p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]; 1455 1456 CLAMP_S32(dp[0], d0); 1457 CLAMP_S32(dp[chan1], d1); 1458 1459 buffd[i ] = 0.0; 1460 buffd[i + 1] = 0.0; 1461 1462 sp += chan2; 1463 dp += chan2; 1464 } 1465 } 1466 1467 } else { /* kw == 2 */ 1468 1469 if (l < (n - 1) || off < m) { 1470 #ifdef __SUNPRO_C 1471 #pragma pipeloop(0) 1472 #endif /* __SUNPRO_C */ 1473 for (i = 0; i <= (wid - 2); i += 2) { 1474 p0 = p2; 1475 1476 p1 = buff[i + 1]; p2 = buff[i + 2]; 1477 1478 buffd[i ] += p0*k0 + p1*k1; 1479 buffd[i + 1] += p1*k0 + p2*k1; 1480 } 1481 1482 } else { 1483 #ifdef __SUNPRO_C 1484 #pragma pipeloop(0) 1485 #endif /* __SUNPRO_C */ 1486 for (i = 0; i <= (wid - 2); i += 2) { 1487 p0 = p2; 1488 1489 p1 = buff[i + 1]; p2 = buff[i + 2]; 1490 1491 buffn[i ] = (mlib_d64)sp[0]; 1492 buffn[i + 1] = (mlib_d64)sp[chan1]; 1493 1494 d0 = p0*k0 + p1*k1 + buffd[i ]; 1495 d1 = p1*k0 + p2*k1 + buffd[i + 1]; 1496 1497 CLAMP_S32(dp[0], d0); 1498 CLAMP_S32(dp[chan1], d1); 1499 1500 buffd[i ] = 0.0; 1501 buffd[i + 1] = 0.0; 1502 1503 sp += chan2; 1504 dp += chan2; 1505 } 1506 } 1507 } 1508 } 1509 } 1510 1511 /* last pixels */ 1512 for (; i < wid; i++) { 1513 mlib_d64 *pk = k, s = 0; 1514 mlib_s32 x; 1515 1516 for (l = 0; l < n; l++) { 1517 mlib_d64 *buff = buffc[l] + i; 1518 1519 for (x = 0; x < m; x++) s += buff[x] * (*pk++); 1520 } 1521 1522 CLAMP_S32(dp[0], s); 1523 1524 buffn[i] = (mlib_d64)sp[0]; 1525 1526 sp += chan1; 1527 dp += chan1; 1528 } 1529 1530 for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1]; 1531 1532 /* next line */ 1533 sl += sll; 1534 dl += dll; 1535 1536 buff_ind++; 1537 1538 if (buff_ind >= n + 1) buff_ind = 0; 1539 } 1540 } 1541 1542 FREE_AND_RETURN_STATUS; 1543 } 1544 1545 /***************************************************************/