1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 /* 28 * FUNCTION 29 * mlib_ImageConvMxN_Fp - image convolution with edge condition 30 * 31 * SYNOPSIS 32 * mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, 33 * const mlib_image *src, 34 * const mlib_d64 *kernel, 35 * mlib_s32 m, 36 * mlib_s32 n, 37 * mlib_s32 dm, 38 * mlib_s32 dn, 39 * mlib_s32 cmask, 40 * mlib_edge edge) 41 * 42 * ARGUMENTS 43 * dst Pointer to destination image. 44 * src Pointer to source image. 45 * m Kernel width (m must be not less than 1). 46 * n Kernel height (n must be not less than 1). 47 * dm, dn Position of key element in convolution kernel. 48 * kernel Pointer to convolution kernel. 49 * cmask Channel mask to indicate the channels to be convolved. 50 * Each bit of which represents a channel in the image. The 51 * channels corresponded to 1 bits are those to be processed. 52 * edge Type of edge condition. 53 * 54 * DESCRIPTION 55 * 2-D convolution, MxN kernel. 56 * 57 * The center of the source image is mapped to the center of the 58 * destination image. 59 * The unselected channels are not overwritten. If both src and dst have 60 * just one channel, cmask is ignored. 61 * 62 * The edge condition can be one of the following: 63 * MLIB_EDGE_DST_NO_WRITE (default) 64 * MLIB_EDGE_DST_FILL_ZERO 65 * MLIB_EDGE_DST_COPY_SRC 66 * MLIB_EDGE_SRC_EXTEND 67 * 68 * RESTRICTION 69 * The src and the dst must be the same type and have same number 70 * of channels (1, 2, 3, or 4). 71 * m >= 1, n >= 1, 72 * 0 <= dm < m, 0 <= dn < n. 73 */ 74 75 #include "mlib_image.h" 76 #include "mlib_ImageCheck.h" 77 #include "mlib_SysMath.h" 78 #include "mlib_ImageConv.h" 79 80 /***************************************************************/ 81 static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, 82 const mlib_f32 *src, 83 const mlib_d64 *kernel, 84 mlib_s32 n, 85 mlib_s32 m, 86 mlib_s32 nch, 87 mlib_s32 dnch); 88 89 static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, 90 const mlib_f32 *src, 91 mlib_s32 n, 92 mlib_s32 nch, 93 mlib_s32 dx_l, 94 mlib_s32 dx_r); 95 96 static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, 97 const mlib_d64 *src, 98 const mlib_d64 *kernel, 99 mlib_s32 n, 100 mlib_s32 m, 101 mlib_s32 nch, 102 mlib_s32 dnch); 103 104 static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, 105 const mlib_d64 *src, 106 mlib_s32 n, 107 mlib_s32 nch, 108 mlib_s32 dx_l, 109 mlib_s32 dx_r); 110 111 /***************************************************************/ 112 #if 0 113 static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, 114 mlib_f32 *vdst, 115 const mlib_f32 *src, 116 const mlib_d64 *hfilter, 117 const mlib_d64 *vfilter, 118 mlib_s32 n, 119 mlib_s32 m, 120 mlib_s32 nch, 121 mlib_s32 dnch); 122 123 static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, 124 mlib_d64 *vdst, 125 const mlib_d64 *src, 126 const mlib_d64 *hfilter, 127 const mlib_d64 *vfilter, 128 mlib_s32 n, 129 mlib_s32 m, 130 mlib_s32 nch, 131 mlib_s32 dnch); 132 #endif /* 0 */ 133 134 /***************************************************************/ 135 mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, 136 const mlib_image *src, 137 const mlib_d64 *kernel, 138 mlib_s32 m, 139 mlib_s32 n, 140 mlib_s32 dm, 141 mlib_s32 dn, 142 mlib_s32 cmask, 143 mlib_edge edge) 144 { 145 mlib_type type; 146 147 MLIB_IMAGE_CHECK(dst); 148 type = mlib_ImageGetType(dst); 149 150 if (type != MLIB_FLOAT && type != MLIB_DOUBLE) 151 return MLIB_FAILURE; 152 153 return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge); 154 } 155 156 /***************************************************************/ 157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, 158 const mlib_f32 *src, 159 const mlib_d64 *kernel, 160 mlib_s32 n, 161 mlib_s32 m, 162 mlib_s32 nch, 163 mlib_s32 dnch) 164 { 165 mlib_f32 *hdst1 = dst + dnch; 166 mlib_s32 i, j; 167 168 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { 169 const mlib_f32 *src2 = src + 2 * nch; 170 mlib_f32 hval0 = (mlib_f32) kernel[0]; 171 mlib_f32 hval1 = (mlib_f32) kernel[1]; 172 mlib_f32 hval2 = (mlib_f32) kernel[2]; 173 mlib_f32 val0 = src[0]; 174 mlib_f32 val1 = src[nch]; 175 mlib_f32 hdvl = dst[0]; 176 177 for (i = 0; i < n; i++) { 178 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 179 mlib_f32 val2 = src2[i * nch]; 180 181 hdvl = hdst1[i * dnch]; 182 hdvl0 += val1 * hval1; 183 hdvl0 += val2 * hval2; 184 val0 = val1; 185 val1 = val2; 186 187 dst[i * dnch] = hdvl0; 188 } 189 } 190 191 if (j < m - 1) { 192 const mlib_f32 *src2 = src + 2 * nch; 193 mlib_f32 hval0 = (mlib_f32) kernel[0]; 194 mlib_f32 hval1 = (mlib_f32) kernel[1]; 195 mlib_f32 val0 = src[0]; 196 mlib_f32 val1 = src[nch]; 197 mlib_f32 hdvl = dst[0]; 198 for (i = 0; i < n; i++) { 199 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 200 mlib_f32 val2 = src2[i * nch]; 201 202 hdvl = hdst1[i * dnch]; 203 hdvl0 += val1 * hval1; 204 val0 = val1; 205 val1 = val2; 206 207 dst[i * dnch] = hdvl0; 208 } 209 210 } 211 else if (j < m) { 212 const mlib_f32 *src2 = src + 2 * nch; 213 mlib_f32 hval0 = (mlib_f32) kernel[0]; 214 mlib_f32 val0 = src[0]; 215 mlib_f32 val1 = src[nch]; 216 mlib_f32 hdvl = dst[0]; 217 218 for (i = 0; i < n; i++) { 219 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 220 mlib_f32 val2 = src2[i * nch]; 221 222 hdvl = hdst1[i * dnch]; 223 val0 = val1; 224 val1 = val2; 225 226 dst[i * dnch] = hdvl0; 227 } 228 } 229 } 230 231 /***************************************************************/ 232 void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, 233 const mlib_f32 *src, 234 mlib_s32 n, 235 mlib_s32 nch, 236 mlib_s32 dx_l, 237 mlib_s32 dx_r) 238 { 239 mlib_s32 i; 240 mlib_f32 val = src[0]; 241 242 for (i = 0; i < dx_l; i++) 243 dst[i] = val; 244 for (; i < n - dx_r; i++) 245 dst[i] = src[nch * (i - dx_l)]; 246 val = dst[n - dx_r - 1]; 247 for (; i < n; i++) 248 dst[i] = val; 249 } 250 251 /***************************************************************/ 252 mlib_status mlib_convMxNext_f32(mlib_image *dst, 253 const mlib_image *src, 254 const mlib_d64 *kernel, 255 mlib_s32 m, 256 mlib_s32 n, 257 mlib_s32 dx_l, 258 mlib_s32 dx_r, 259 mlib_s32 dy_t, 260 mlib_s32 dy_b, 261 mlib_s32 cmask) 262 { 263 mlib_d64 dspace[1024], *dsa = dspace; 264 mlib_s32 wid_e = mlib_ImageGetWidth(src); 265 mlib_f32 *fsa; 266 mlib_f32 *da = mlib_ImageGetData(dst); 267 mlib_f32 *sa = mlib_ImageGetData(src); 268 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2; 269 mlib_s32 slb = mlib_ImageGetStride(src) >> 2; 270 mlib_s32 dw = mlib_ImageGetWidth(dst); 271 mlib_s32 dh = mlib_ImageGetHeight(dst); 272 mlib_s32 nch = mlib_ImageGetChannels(dst); 273 mlib_s32 i, j, j1, k; 274 275 if (3 * wid_e + m > 1024) { 276 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); 277 278 if (dsa == NULL) 279 return MLIB_FAILURE; 280 } 281 282 fsa = (mlib_f32 *) dsa; 283 284 for (j = 0; j < dh; j++, da += dlb) { 285 for (k = 0; k < nch; k++) 286 if (cmask & (1 << (nch - 1 - k))) { 287 const mlib_f32 *sa1 = sa + k; 288 mlib_f32 *da1 = da + k; 289 const mlib_d64 *kernel1 = kernel; 290 291 for (i = 0; i < dw; i++) 292 da1[i * nch] = 0.f; 293 for (j1 = 0; j1 < n; j1++, kernel1 += m) { 294 mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r); 295 mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch); 296 297 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) 298 sa1 += slb; 299 } 300 } 301 302 if ((j >= dy_t) && (j < dh + n - dy_b - 2)) 303 sa += slb; 304 } 305 306 if (dsa != dspace) 307 mlib_free(dsa); 308 return MLIB_SUCCESS; 309 } 310 311 /***************************************************************/ 312 #if 0 313 314 void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, 315 mlib_f32 *vdst, 316 const mlib_f32 *src, 317 const mlib_d64 *hfilter, 318 const mlib_d64 *vfilter, 319 mlib_s32 n, 320 mlib_s32 m, 321 mlib_s32 nch, 322 mlib_s32 dnch) 323 { 324 mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; 325 mlib_s32 i, j; 326 327 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { 328 mlib_f32 *src2 = src + 2 * nch; 329 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 330 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 331 mlib_f32 hval1 = (mlib_f32) hfilter[1]; 332 mlib_f32 vval1 = (mlib_f32) vfilter[1]; 333 mlib_f32 hval2 = (mlib_f32) hfilter[2]; 334 mlib_f32 vval2 = (mlib_f32) vfilter[2]; 335 mlib_f32 val0 = src[0]; 336 mlib_f32 val1 = src[nch]; 337 mlib_f32 hdvl = hdst[0]; 338 mlib_f32 vdvl = vdst[0]; 339 340 for (i = 0; i < n; i++) { 341 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 342 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 343 mlib_f32 val2 = src2[i * nch]; 344 345 hdvl = hdst1[i * dnch]; 346 vdvl = vdst1[i * dnch]; 347 hdvl0 += val1 * hval1; 348 vdvl0 += val1 * vval1; 349 hdvl0 += val2 * hval2; 350 vdvl0 += val2 * vval2; 351 val0 = val1; 352 val1 = val2; 353 354 hdst[i * dnch] = hdvl0; 355 vdst[i * dnch] = vdvl0; 356 } 357 } 358 359 if (j < m - 1) { 360 mlib_f32 *src2 = src + 2 * nch; 361 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 362 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 363 mlib_f32 hval1 = (mlib_f32) hfilter[1]; 364 mlib_f32 vval1 = (mlib_f32) vfilter[1]; 365 mlib_f32 val0 = src[0]; 366 mlib_f32 val1 = src[nch]; 367 mlib_f32 hdvl = hdst[0]; 368 mlib_f32 vdvl = vdst[0]; 369 370 for (i = 0; i < n; i++) { 371 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 372 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 373 mlib_f32 val2 = src2[i * nch]; 374 375 hdvl = hdst1[i * dnch]; 376 vdvl = vdst1[i * dnch]; 377 hdvl0 += val1 * hval1; 378 vdvl0 += val1 * vval1; 379 val0 = val1; 380 val1 = val2; 381 382 hdst[i * dnch] = hdvl0; 383 vdst[i * dnch] = vdvl0; 384 } 385 386 } 387 else if (j < m) { 388 mlib_f32 *src2 = src + 2 * nch; 389 mlib_f32 hval0 = (mlib_f32) hfilter[0]; 390 mlib_f32 vval0 = (mlib_f32) vfilter[0]; 391 mlib_f32 val0 = src[0]; 392 mlib_f32 val1 = src[nch]; 393 mlib_f32 hdvl = hdst[0]; 394 mlib_f32 vdvl = vdst[0]; 395 396 for (i = 0; i < n; i++) { 397 mlib_f32 hdvl0 = val0 * hval0 + hdvl; 398 mlib_f32 vdvl0 = val0 * vval0 + vdvl; 399 mlib_f32 val2 = src2[i * nch]; 400 401 hdvl = hdst1[i * dnch]; 402 vdvl = vdst1[i * dnch]; 403 val0 = val1; 404 val1 = val2; 405 406 hdst[i * dnch] = hdvl0; 407 vdst[i * dnch] = vdvl0; 408 } 409 } 410 } 411 412 /***************************************************************/ 413 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, 414 mlib_d64 *vdst, 415 const mlib_d64 *src, 416 const mlib_d64 *hfilter, 417 const mlib_d64 *vfilter, 418 mlib_s32 n, 419 mlib_s32 m, 420 mlib_s32 nch, 421 mlib_s32 dnch) 422 { 423 mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; 424 mlib_s32 i, j; 425 426 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { 427 mlib_d64 *src2 = src + 2 * nch; 428 mlib_d64 hval0 = hfilter[0]; 429 mlib_d64 vval0 = vfilter[0]; 430 mlib_d64 hval1 = hfilter[1]; 431 mlib_d64 vval1 = vfilter[1]; 432 mlib_d64 hval2 = hfilter[2]; 433 mlib_d64 vval2 = vfilter[2]; 434 mlib_d64 val0 = src[0]; 435 mlib_d64 val1 = src[nch]; 436 mlib_d64 hdvl = hdst[0]; 437 mlib_d64 vdvl = vdst[0]; 438 439 for (i = 0; i < n; i++) { 440 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 441 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 442 mlib_d64 val2 = src2[i * nch]; 443 444 hdvl = hdst1[i * dnch]; 445 vdvl = vdst1[i * dnch]; 446 hdvl0 += val1 * hval1; 447 vdvl0 += val1 * vval1; 448 hdvl0 += val2 * hval2; 449 vdvl0 += val2 * vval2; 450 val0 = val1; 451 val1 = val2; 452 453 hdst[i * dnch] = hdvl0; 454 vdst[i * dnch] = vdvl0; 455 } 456 } 457 458 if (j < m - 1) { 459 mlib_d64 *src2 = src + 2 * nch; 460 mlib_d64 hval0 = hfilter[0]; 461 mlib_d64 vval0 = vfilter[0]; 462 mlib_d64 hval1 = hfilter[1]; 463 mlib_d64 vval1 = vfilter[1]; 464 mlib_d64 val0 = src[0]; 465 mlib_d64 val1 = src[nch]; 466 mlib_d64 hdvl = hdst[0]; 467 mlib_d64 vdvl = vdst[0]; 468 469 for (i = 0; i < n; i++) { 470 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 471 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 472 mlib_d64 val2 = src2[i * nch]; 473 474 hdvl = hdst1[i * dnch]; 475 vdvl = vdst1[i * dnch]; 476 hdvl0 += val1 * hval1; 477 vdvl0 += val1 * vval1; 478 val0 = val1; 479 val1 = val2; 480 481 hdst[i * dnch] = hdvl0; 482 vdst[i * dnch] = vdvl0; 483 } 484 485 } 486 else if (j < m) { 487 mlib_d64 *src2 = src + 2 * nch; 488 mlib_d64 hval0 = hfilter[0]; 489 mlib_d64 vval0 = vfilter[0]; 490 mlib_d64 val0 = src[0]; 491 mlib_d64 val1 = src[nch]; 492 mlib_d64 hdvl = hdst[0]; 493 mlib_d64 vdvl = vdst[0]; 494 495 for (i = 0; i < n; i++) { 496 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 497 mlib_d64 vdvl0 = val0 * vval0 + vdvl; 498 mlib_d64 val2 = src2[i * nch]; 499 500 hdvl = hdst1[i * dnch]; 501 vdvl = vdst1[i * dnch]; 502 val0 = val1; 503 val1 = val2; 504 505 hdst[i * dnch] = hdvl0; 506 vdst[i * dnch] = vdvl0; 507 } 508 } 509 } 510 511 #endif /* 0 */ 512 513 /***************************************************************/ 514 void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, 515 const mlib_d64 *src, 516 const mlib_d64 *kernel, 517 mlib_s32 n, 518 mlib_s32 m, 519 mlib_s32 nch, 520 mlib_s32 dnch) 521 { 522 mlib_d64 *hdst1 = dst + dnch; 523 mlib_s32 i, j; 524 525 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { 526 const mlib_d64 *src2 = src + 2 * nch; 527 mlib_d64 hval0 = kernel[0]; 528 mlib_d64 hval1 = kernel[1]; 529 mlib_d64 hval2 = kernel[2]; 530 mlib_d64 val0 = src[0]; 531 mlib_d64 val1 = src[nch]; 532 mlib_d64 hdvl = dst[0]; 533 534 for (i = 0; i < n; i++) { 535 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 536 mlib_d64 val2 = src2[i * nch]; 537 538 hdvl = hdst1[i * dnch]; 539 hdvl0 += val1 * hval1; 540 hdvl0 += val2 * hval2; 541 val0 = val1; 542 val1 = val2; 543 544 dst[i * dnch] = hdvl0; 545 } 546 } 547 548 if (j < m - 1) { 549 const mlib_d64 *src2 = src + 2 * nch; 550 mlib_d64 hval0 = kernel[0]; 551 mlib_d64 hval1 = kernel[1]; 552 mlib_d64 val0 = src[0]; 553 mlib_d64 val1 = src[nch]; 554 mlib_d64 hdvl = dst[0]; 555 556 for (i = 0; i < n; i++) { 557 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 558 mlib_d64 val2 = src2[i * nch]; 559 560 hdvl = hdst1[i * dnch]; 561 hdvl0 += val1 * hval1; 562 val0 = val1; 563 val1 = val2; 564 565 dst[i * dnch] = hdvl0; 566 } 567 568 } 569 else if (j < m) { 570 const mlib_d64 *src2 = src + 2 * nch; 571 mlib_d64 hval0 = kernel[0]; 572 mlib_d64 val0 = src[0]; 573 mlib_d64 val1 = src[nch]; 574 mlib_d64 hdvl = dst[0]; 575 576 for (i = 0; i < n; i++) { 577 mlib_d64 hdvl0 = val0 * hval0 + hdvl; 578 mlib_d64 val2 = src2[i * nch]; 579 580 hdvl = hdst1[i * dnch]; 581 val0 = val1; 582 val1 = val2; 583 584 dst[i * dnch] = hdvl0; 585 } 586 } 587 } 588 589 /***************************************************************/ 590 void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, 591 const mlib_d64 *src, 592 mlib_s32 n, 593 mlib_s32 nch, 594 mlib_s32 dx_l, 595 mlib_s32 dx_r) 596 { 597 mlib_s32 i; 598 mlib_d64 val = src[0]; 599 600 for (i = 0; i < dx_l; i++) 601 dst[i] = val; 602 for (; i < n - dx_r; i++) 603 dst[i] = src[nch * (i - dx_l)]; 604 val = dst[n - dx_r - 1]; 605 for (; i < n; i++) 606 dst[i] = val; 607 } 608 609 /***************************************************************/ 610 mlib_status mlib_convMxNext_d64(mlib_image *dst, 611 const mlib_image *src, 612 const mlib_d64 *kernel, 613 mlib_s32 m, 614 mlib_s32 n, 615 mlib_s32 dx_l, 616 mlib_s32 dx_r, 617 mlib_s32 dy_t, 618 mlib_s32 dy_b, 619 mlib_s32 cmask) 620 { 621 mlib_d64 dspace[1024], *dsa = dspace; 622 mlib_s32 wid_e = mlib_ImageGetWidth(src); 623 mlib_d64 *da = mlib_ImageGetData(dst); 624 mlib_d64 *sa = mlib_ImageGetData(src); 625 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3; 626 mlib_s32 slb = mlib_ImageGetStride(src) >> 3; 627 mlib_s32 dw = mlib_ImageGetWidth(dst); 628 mlib_s32 dh = mlib_ImageGetHeight(dst); 629 mlib_s32 nch = mlib_ImageGetChannels(dst); 630 mlib_s32 i, j, j1, k; 631 632 if (3 * wid_e + m > 1024) { 633 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); 634 635 if (dsa == NULL) 636 return MLIB_FAILURE; 637 } 638 639 for (j = 0; j < dh; j++, da += dlb) { 640 for (k = 0; k < nch; k++) 641 if (cmask & (1 << (nch - 1 - k))) { 642 mlib_d64 *sa1 = sa + k; 643 mlib_d64 *da1 = da + k; 644 const mlib_d64 *kernel1 = kernel; 645 646 for (i = 0; i < dw; i++) 647 da1[i * nch] = 0.; 648 for (j1 = 0; j1 < n; j1++, kernel1 += m) { 649 mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r); 650 mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch); 651 652 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) 653 sa1 += slb; 654 } 655 } 656 657 if ((j >= dy_t) && (j < dh + n - dy_b - 2)) 658 sa += slb; 659 } 660 661 if (dsa != dspace) 662 mlib_free(dsa); 663 return MLIB_SUCCESS; 664 } 665 666 /***************************************************************/