1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * mlib_ImageConvMxN_Fp - image convolution with edge condition 30 * 31 * SYNOPSIS 32 * mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, 33 * const mlib_image *src, 34 * const mlib_d64 *kernel, 35 * mlib_s32 m, 36 * mlib_s32 n, 37 * mlib_s32 dm, 38 * mlib_s32 dn, 39 * mlib_s32 cmask, 40 * mlib_edge edge) 41 * 42 * ARGUMENTS 43 * dst Pointer to destination image. 44 * src Pointer to source image. 45 * m Kernel width (m must be not less than 1). 46 * n Kernel height (n must be not less than 1). 47 * dm, dn Position of key element in convolution kernel. 48 * kernel Pointer to convolution kernel. 49 * cmask Channel mask to indicate the channels to be convolved. 50 * Each bit of which represents a channel in the image. The 51 * channels corresponded to 1 bits are those to be processed. 52 * edge Type of edge condition. 53 * 54 * DESCRIPTION 55 * 2-D convolution, MxN kernel. 56 * 57 * The center of the source image is mapped to the center of the 58 * destination image. 59 * The unselected channels are not overwritten. If both src and dst have 60 * just one channel, cmask is ignored. 61 * 62 * The edge condition can be one of the following: 63 * MLIB_EDGE_DST_NO_WRITE (default) 64 * MLIB_EDGE_DST_FILL_ZERO 65 * MLIB_EDGE_DST_COPY_SRC 66 * MLIB_EDGE_SRC_EXTEND 67 * 68 * RESTRICTION 69 * The src and the dst must be the same type and have same number 70 * of channels (1, 2, 3, or 4). 71 * m >= 1, n >= 1, 72 * 0 <= dm < m, 0 <= dn < n. 73 */ 74 75 #include "mlib_image.h" 76 #include "mlib_ImageCheck.h" 77 #include "mlib_SysMath.h" 78 #include "mlib_ImageConv.h" 79 80 /***************************************************************/ 81 static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, 82 const mlib_f32 *src, 83 const mlib_d64 *kernel, 84 mlib_s32 n, 85 mlib_s32 m, 86 mlib_s32 nch, 87 mlib_s32 dnch); 88 89 static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, 90 const mlib_f32 *src, 91 mlib_s32 n, 92 mlib_s32 nch, 93 mlib_s32 dx_l, 94 mlib_s32 dx_r); 95 96 static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, 97 const mlib_d64 *src, 98 const mlib_d64 *kernel, 99 mlib_s32 n, 100 mlib_s32 m, 101 mlib_s32 nch, 102 mlib_s32 dnch); 103 104 static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, 105 const mlib_d64 *src, 106 mlib_s32 n, 107 mlib_s32 nch, 108 mlib_s32 dx_l, 109 mlib_s32 dx_r); 110 111 /***************************************************************/ 112 #if 0 113 static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, 114 mlib_f32 *vdst, 115 const mlib_f32 *src, 116 const mlib_d64 *hfilter, 117 const mlib_d64 *vfilter, 118 mlib_s32 n, 119 mlib_s32 m, 120 mlib_s32 nch, 121 mlib_s32 dnch); 122 123 static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, 124 mlib_d64 *vdst, 125 const mlib_d64 *src, 126 const mlib_d64 *hfilter, 127 const mlib_d64 *vfilter, 128 mlib_s32 n, 129 mlib_s32 m, 130 mlib_s32 nch, 131 mlib_s32 dnch); 132 #endif /* 0 */ 133 134 /***************************************************************/ 135 mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, 136 const mlib_image *src, 137 const mlib_d64 *kernel, 138 mlib_s32 m, 139 mlib_s32 n, 140 mlib_s32 dm, 141 mlib_s32 dn, 142 mlib_s32 cmask, 143 mlib_edge edge) 144 { 145 mlib_type type; 146 147 MLIB_IMAGE_CHECK(dst); 148 type = mlib_ImageGetType(dst); 149 150 if (type != MLIB_FLOAT && type != MLIB_DOUBLE) 151 return MLIB_FAILURE; 152 153 return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge); 154 } 155 156 /***************************************************************/ 157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, 158 const mlib_f32 *src, 159 const mlib_d64 *kernel, 160 mlib_s32 n, 161 mlib_s32 m, 162 mlib_s32 nch, 163 mlib_s32 dnch) 164 { 165 mlib_f32 *hdst1 = dst + dnch; 166 mlib_s32 i, j; 167 168 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { 169 const mlib_f32 *src2 = src + 2 * nch; 170 mlib_f32 hval0 = (mlib_f32) kernel[0]; 171 mlib_f32 hval1 = (mlib_f32) kernel[1]; 172 mlib_f32 hval2 = (mlib_f32) kernel[2]; 173 mlib_f32 val0 = src[0]; 174 mlib_f32 val1 = src[nch]; 175 mlib_f32 hdvl = dst[0]; 176 177 #ifdef __SUNPRO_C 178 #pragma pipeloop(0) 179 #endif /* __SUNPRO_C */ 180 for (i = 0; i < n; i++) { 181 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 182 mlib_f32 val2 = src2[i * nch]; 183 184 hdvl = hdst1[i * dnch]; 185 hdvl0 += val1 * hval1; 186 hdvl0 += val2 * hval2; 187 val0 = val1; 188 val1 = val2; 189 190 dst[i * dnch] = hdvl0; 191 } 192 } 193 194 if (j < m - 1) { 195 const mlib_f32 *src2 = src + 2 * nch; 196 mlib_f32 hval0 = (mlib_f32) kernel[0]; 197 mlib_f32 hval1 = (mlib_f32) kernel[1]; 198 mlib_f32 val0 = src[0]; 199 mlib_f32 val1 = src[nch]; 200 mlib_f32 hdvl = dst[0]; 201 #ifdef __SUNPRO_C 202 #pragma pipeloop(0) 203 #endif /* __SUNPRO_C */ 204 for (i = 0; i < n; i++) { 205 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 206 mlib_f32 val2 = src2[i * nch]; 207 208 hdvl = hdst1[i * dnch]; 209 hdvl0 += val1 * hval1; 210 val0 = val1; 211 val1 = val2; 212 213 dst[i * dnch] = hdvl0; 214 } 215 216 } 217 else if (j < m) { 218 const mlib_f32 *src2 = src + 2 * nch; 219 mlib_f32 hval0 = (mlib_f32) kernel[0]; 220 mlib_f32 val0 = src[0]; 221 mlib_f32 val1 = src[nch]; 222 mlib_f32 hdvl = dst[0]; 223 224 #ifdef __SUNPRO_C 225 #pragma pipeloop(0) 226 #endif /* __SUNPRO_C */ 227 for (i = 0; i < n; i++) { 228 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 229 mlib_f32 val2 = src2[i * nch]; 230 231 hdvl = hdst1[i * dnch]; 232 val0 = val1; 233 val1 = val2; 234 235 dst[i * dnch] = hdvl0; 236 } 237 } 238 } 239 240 /***************************************************************/ 241 void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, 242 const mlib_f32 *src, 243 mlib_s32 n, 244 mlib_s32 nch, 245 mlib_s32 dx_l, 246 mlib_s32 dx_r) 247 { 248 mlib_s32 i; 249 mlib_f32 val = src[0]; 250 251 for (i = 0; i < dx_l; i++) 252 dst[i] = val; 253 #ifdef __SUNPRO_C 254 #pragma pipeloop(0) 255 #endif /* __SUNPRO_C */ 256 for (; i < n - dx_r; i++) 257 dst[i] = src[nch * (i - dx_l)]; 258 val = dst[n - dx_r - 1]; 259 for (; i < n; i++) 260 dst[i] = val; 261 } 262 263 /***************************************************************/ 264 mlib_status mlib_convMxNext_f32(mlib_image *dst, 265 const mlib_image *src, 266 const mlib_d64 *kernel, 267 mlib_s32 m, 268 mlib_s32 n, 269 mlib_s32 dx_l, 270 mlib_s32 dx_r, 271 mlib_s32 dy_t, 272 mlib_s32 dy_b, 273 mlib_s32 cmask) 274 { 275 mlib_d64 dspace[1024], *dsa = dspace; 276 mlib_s32 wid_e = mlib_ImageGetWidth(src); 277 mlib_f32 *fsa; 278 mlib_f32 *da = mlib_ImageGetData(dst); 279 mlib_f32 *sa = mlib_ImageGetData(src); 280 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2; 281 mlib_s32 slb = mlib_ImageGetStride(src) >> 2; 282 mlib_s32 dw = mlib_ImageGetWidth(dst); 283 mlib_s32 dh = mlib_ImageGetHeight(dst); 284 mlib_s32 nch = mlib_ImageGetChannels(dst); 285 mlib_s32 i, j, j1, k; 286 287 if (3 * wid_e + m > 1024) { 288 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); 289 290 if (dsa == NULL) 291 return MLIB_FAILURE; 292 } 293 294 fsa = (mlib_f32 *) dsa; 295 296 for (j = 0; j < dh; j++, da += dlb) { 297 for (k = 0; k < nch; k++) 298 if (cmask & (1 << (nch - 1 - k))) { 299 const mlib_f32 *sa1 = sa + k; 300 mlib_f32 *da1 = da + k; 301 const mlib_d64 *kernel1 = kernel; 302 303 for (i = 0; i < dw; i++) 304 da1[i * nch] = 0.f; 305 for (j1 = 0; j1 < n; j1++, kernel1 += m) { 306 mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r); 307 mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch); 308 309 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) 310 sa1 += slb; 311 } 312 } 313 314 if ((j >= dy_t) && (j < dh + n - dy_b - 2)) 315 sa += slb; 316 } 317 318 if (dsa != dspace) 319 mlib_free(dsa); 320 return MLIB_SUCCESS; 321 } 322 323 /***************************************************************/ 324 #if 0 325 326 void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, 327 mlib_f32 *vdst, 328 const mlib_f32 *src, 329 const mlib_d64 *hfilter, 330 const mlib_d64 *vfilter, 331 mlib_s32 n, 332 mlib_s32 m, 333 mlib_s32 nch, 334 mlib_s32 dnch) 335 { 336 mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; 337 mlib_s32 i, j; 338 339 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { 340 mlib_f32 *src2 = src + 2 * nch; 341 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 342 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 343 mlib_f32 hval1 = (mlib_f32) hfilter[1]; 344 mlib_f32 vval1 = (mlib_f32) vfilter[1]; 345 mlib_f32 hval2 = (mlib_f32) hfilter[2]; 346 mlib_f32 vval2 = (mlib_f32) vfilter[2]; 347 mlib_f32 val0 = src[0]; 348 mlib_f32 val1 = src[nch]; 349 mlib_f32 hdvl = hdst[0]; 350 mlib_f32 vdvl = vdst[0]; 351 352 #ifdef __SUNPRO_C 353 #pragma pipeloop(0) 354 #endif /* __SUNPRO_C */ 355 for (i = 0; i < n; i++) { 356 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 357 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 358 mlib_f32 val2 = src2[i * nch]; 359 360 hdvl = hdst1[i * dnch]; 361 vdvl = vdst1[i * dnch]; 362 hdvl0 += val1 * hval1; 363 vdvl0 += val1 * vval1; 364 hdvl0 += val2 * hval2; 365 vdvl0 += val2 * vval2; 366 val0 = val1; 367 val1 = val2; 368 369 hdst[i * dnch] = hdvl0; 370 vdst[i * dnch] = vdvl0; 371 } 372 } 373 374 if (j < m - 1) { 375 mlib_f32 *src2 = src + 2 * nch; 376 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 377 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 378 mlib_f32 hval1 = (mlib_f32) hfilter[1]; 379 mlib_f32 vval1 = (mlib_f32) vfilter[1]; 380 mlib_f32 val0 = src[0]; 381 mlib_f32 val1 = src[nch]; 382 mlib_f32 hdvl = hdst[0]; 383 mlib_f32 vdvl = vdst[0]; 384 385 #ifdef __SUNPRO_C 386 #pragma pipeloop(0) 387 #endif /* __SUNPRO_C */ 388 for (i = 0; i < n; i++) { 389 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 390 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 391 mlib_f32 val2 = src2[i * nch]; 392 393 hdvl = hdst1[i * dnch]; 394 vdvl = vdst1[i * dnch]; 395 hdvl0 += val1 * hval1; 396 vdvl0 += val1 * vval1; 397 val0 = val1; 398 val1 = val2; 399 400 hdst[i * dnch] = hdvl0; 401 vdst[i * dnch] = vdvl0; 402 } 403 404 } 405 else if (j < m) { 406 mlib_f32 *src2 = src + 2 * nch; 407 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 408 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 409 mlib_f32 val0 = src[0]; 410 mlib_f32 val1 = src[nch]; 411 mlib_f32 hdvl = hdst[0]; 412 mlib_f32 vdvl = vdst[0]; 413 414 #ifdef __SUNPRO_C 415 #pragma pipeloop(0) 416 #endif /* __SUNPRO_C */ 417 for (i = 0; i < n; i++) { 418 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 419 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 420 mlib_f32 val2 = src2[i * nch]; 421 422 hdvl = hdst1[i * dnch]; 423 vdvl = vdst1[i * dnch]; 424 val0 = val1; 425 val1 = val2; 426 427 hdst[i * dnch] = hdvl0; 428 vdst[i * dnch] = vdvl0; 429 } 430 } 431 } 432 433 /***************************************************************/ 434 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, 435 mlib_d64 *vdst, 436 const mlib_d64 *src, 437 const mlib_d64 *hfilter, 438 const mlib_d64 *vfilter, 439 mlib_s32 n, 440 mlib_s32 m, 441 mlib_s32 nch, 442 mlib_s32 dnch) 443 { 444 mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; 445 mlib_s32 i, j; 446 447 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { 448 mlib_d64 *src2 = src + 2 * nch; 449 mlib_d64 hval0 = hfilter[0]; 450 mlib_d64 vval0 = vfilter[0]; 451 mlib_d64 hval1 = hfilter[1]; 452 mlib_d64 vval1 = vfilter[1]; 453 mlib_d64 hval2 = hfilter[2]; 454 mlib_d64 vval2 = vfilter[2]; 455 mlib_d64 val0 = src[0]; 456 mlib_d64 val1 = src[nch]; 457 mlib_d64 hdvl = hdst[0]; 458 mlib_d64 vdvl = vdst[0]; 459 460 #ifdef __SUNPRO_C 461 #pragma pipeloop(0) 462 #endif /* __SUNPRO_C */ 463 for (i = 0; i < n; i++) { 464 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 465 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 466 mlib_d64 val2 = src2[i * nch]; 467 468 hdvl = hdst1[i * dnch]; 469 vdvl = vdst1[i * dnch]; 470 hdvl0 += val1 * hval1; 471 vdvl0 += val1 * vval1; 472 hdvl0 += val2 * hval2; 473 vdvl0 += val2 * vval2; 474 val0 = val1; 475 val1 = val2; 476 477 hdst[i * dnch] = hdvl0; 478 vdst[i * dnch] = vdvl0; 479 } 480 } 481 482 if (j < m - 1) { 483 mlib_d64 *src2 = src + 2 * nch; 484 mlib_d64 hval0 = hfilter[0]; 485 mlib_d64 vval0 = vfilter[0]; 486 mlib_d64 hval1 = hfilter[1]; 487 mlib_d64 vval1 = vfilter[1]; 488 mlib_d64 val0 = src[0]; 489 mlib_d64 val1 = src[nch]; 490 mlib_d64 hdvl = hdst[0]; 491 mlib_d64 vdvl = vdst[0]; 492 493 #ifdef __SUNPRO_C 494 #pragma pipeloop(0) 495 #endif /* __SUNPRO_C */ 496 for (i = 0; i < n; i++) { 497 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 498 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 499 mlib_d64 val2 = src2[i * nch]; 500 501 hdvl = hdst1[i * dnch]; 502 vdvl = vdst1[i * dnch]; 503 hdvl0 += val1 * hval1; 504 vdvl0 += val1 * vval1; 505 val0 = val1; 506 val1 = val2; 507 508 hdst[i * dnch] = hdvl0; 509 vdst[i * dnch] = vdvl0; 510 } 511 512 } 513 else if (j < m) { 514 mlib_d64 *src2 = src + 2 * nch; 515 mlib_d64 hval0 = hfilter[0]; 516 mlib_d64 vval0 = vfilter[0]; 517 mlib_d64 val0 = src[0]; 518 mlib_d64 val1 = src[nch]; 519 mlib_d64 hdvl = hdst[0]; 520 mlib_d64 vdvl = vdst[0]; 521 522 #ifdef __SUNPRO_C 523 #pragma pipeloop(0) 524 #endif /* __SUNPRO_C */ 525 for (i = 0; i < n; i++) { 526 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 527 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 528 mlib_d64 val2 = src2[i * nch]; 529 530 hdvl = hdst1[i * dnch]; 531 vdvl = vdst1[i * dnch]; 532 val0 = val1; 533 val1 = val2; 534 535 hdst[i * dnch] = hdvl0; 536 vdst[i * dnch] = vdvl0; 537 } 538 } 539 } 540 541 #endif /* 0 */ 542 543 /***************************************************************/ 544 void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, 545 const mlib_d64 *src, 546 const mlib_d64 *kernel, 547 mlib_s32 n, 548 mlib_s32 m, 549 mlib_s32 nch, 550 mlib_s32 dnch) 551 { 552 mlib_d64 *hdst1 = dst + dnch; 553 mlib_s32 i, j; 554 555 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { 556 const mlib_d64 *src2 = src + 2 * nch; 557 mlib_d64 hval0 = kernel[0]; 558 mlib_d64 hval1 = kernel[1]; 559 mlib_d64 hval2 = kernel[2]; 560 mlib_d64 val0 = src[0]; 561 mlib_d64 val1 = src[nch]; 562 mlib_d64 hdvl = dst[0]; 563 564 #ifdef __SUNPRO_C 565 #pragma pipeloop(0) 566 #endif /* __SUNPRO_C */ 567 for (i = 0; i < n; i++) { 568 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 569 mlib_d64 val2 = src2[i * nch]; 570 571 hdvl = hdst1[i * dnch]; 572 hdvl0 += val1 * hval1; 573 hdvl0 += val2 * hval2; 574 val0 = val1; 575 val1 = val2; 576 577 dst[i * dnch] = hdvl0; 578 } 579 } 580 581 if (j < m - 1) { 582 const mlib_d64 *src2 = src + 2 * nch; 583 mlib_d64 hval0 = kernel[0]; 584 mlib_d64 hval1 = kernel[1]; 585 mlib_d64 val0 = src[0]; 586 mlib_d64 val1 = src[nch]; 587 mlib_d64 hdvl = dst[0]; 588 589 #ifdef __SUNPRO_C 590 #pragma pipeloop(0) 591 #endif /* __SUNPRO_C */ 592 for (i = 0; i < n; i++) { 593 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 594 mlib_d64 val2 = src2[i * nch]; 595 596 hdvl = hdst1[i * dnch]; 597 hdvl0 += val1 * hval1; 598 val0 = val1; 599 val1 = val2; 600 601 dst[i * dnch] = hdvl0; 602 } 603 604 } 605 else if (j < m) { 606 const mlib_d64 *src2 = src + 2 * nch; 607 mlib_d64 hval0 = kernel[0]; 608 mlib_d64 val0 = src[0]; 609 mlib_d64 val1 = src[nch]; 610 mlib_d64 hdvl = dst[0]; 611 612 #ifdef __SUNPRO_C 613 #pragma pipeloop(0) 614 #endif /* __SUNPRO_C */ 615 for (i = 0; i < n; i++) { 616 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 617 mlib_d64 val2 = src2[i * nch]; 618 619 hdvl = hdst1[i * dnch]; 620 val0 = val1; 621 val1 = val2; 622 623 dst[i * dnch] = hdvl0; 624 } 625 } 626 } 627 628 /***************************************************************/ 629 void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, 630 const mlib_d64 *src, 631 mlib_s32 n, 632 mlib_s32 nch, 633 mlib_s32 dx_l, 634 mlib_s32 dx_r) 635 { 636 mlib_s32 i; 637 mlib_d64 val = src[0]; 638 639 for (i = 0; i < dx_l; i++) 640 dst[i] = val; 641 #ifdef __SUNPRO_C 642 #pragma pipeloop(0) 643 #endif /* __SUNPRO_C */ 644 for (; i < n - dx_r; i++) 645 dst[i] = src[nch * (i - dx_l)]; 646 val = dst[n - dx_r - 1]; 647 for (; i < n; i++) 648 dst[i] = val; 649 } 650 651 /***************************************************************/ 652 mlib_status mlib_convMxNext_d64(mlib_image *dst, 653 const mlib_image *src, 654 const mlib_d64 *kernel, 655 mlib_s32 m, 656 mlib_s32 n, 657 mlib_s32 dx_l, 658 mlib_s32 dx_r, 659 mlib_s32 dy_t, 660 mlib_s32 dy_b, 661 mlib_s32 cmask) 662 { 663 mlib_d64 dspace[1024], *dsa = dspace; 664 mlib_s32 wid_e = mlib_ImageGetWidth(src); 665 mlib_d64 *da = mlib_ImageGetData(dst); 666 mlib_d64 *sa = mlib_ImageGetData(src); 667 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3; 668 mlib_s32 slb = mlib_ImageGetStride(src) >> 3; 669 mlib_s32 dw = mlib_ImageGetWidth(dst); 670 mlib_s32 dh = mlib_ImageGetHeight(dst); 671 mlib_s32 nch = mlib_ImageGetChannels(dst); 672 mlib_s32 i, j, j1, k; 673 674 if (3 * wid_e + m > 1024) { 675 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); 676 677 if (dsa == NULL) 678 return MLIB_FAILURE; 679 } 680 681 for (j = 0; j < dh; j++, da += dlb) { 682 for (k = 0; k < nch; k++) 683 if (cmask & (1 << (nch - 1 - k))) { 684 mlib_d64 *sa1 = sa + k; 685 mlib_d64 *da1 = da + k; 686 const mlib_d64 *kernel1 = kernel; 687 688 for (i = 0; i < dw; i++) 689 da1[i * nch] = 0.; 690 for (j1 = 0; j1 < n; j1++, kernel1 += m) { 691 mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r); 692 mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch); 693 694 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) 695 sa1 += slb; 696 } 697 } 698 699 if ((j >= dy_t) && (j < dh + n - dy_b - 2)) 700 sa += slb; 701 } 702 703 if (dsa != dspace) 704 mlib_free(dsa); 705 return MLIB_SUCCESS; 706 } 707 708 /***************************************************************/