1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageConvMxN_Fp - image convolution with edge condition
  30  *
  31  * SYNOPSIS
  32  *      mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
  33  *                                       const mlib_image *src,
  34  *                                       const mlib_d64   *kernel,
  35  *                                       mlib_s32         m,
  36  *                                       mlib_s32         n,
  37  *                                       mlib_s32         dm,
  38  *                                       mlib_s32         dn,
  39  *                                       mlib_s32         cmask,
  40  *                                       mlib_edge        edge)
  41  *
  42  * ARGUMENTS
  43  *      dst       Pointer to destination image.
  44  *      src       Pointer to source image.
  45  *      m         Kernel width (m must be not less than 1).
  46  *      n         Kernel height (n must be not less than 1).
  47  *      dm, dn    Position of key element in convolution kernel.
  48  *      kernel    Pointer to convolution kernel.
  49  *      cmask     Channel mask to indicate the channels to be convolved.
  50  *                Each bit of which represents a channel in the image. The
  51  *                channels corresponded to 1 bits are those to be processed.
  52  *      edge      Type of edge condition.
  53  *
  54  * DESCRIPTION
  55  *      2-D convolution, MxN kernel.
  56  *
  57  *      The center of the source image is mapped to the center of the
  58  *      destination image.
  59  *      The unselected channels are not overwritten. If both src and dst have
  60  *      just one channel, cmask is ignored.
  61  *
  62  *      The edge condition can be one of the following:
  63  *              MLIB_EDGE_DST_NO_WRITE  (default)
  64  *              MLIB_EDGE_DST_FILL_ZERO
  65  *              MLIB_EDGE_DST_COPY_SRC
  66  *              MLIB_EDGE_SRC_EXTEND
  67  *
  68  * RESTRICTION
  69  *      The src and the dst must be the same type and have same number
  70  *      of channels (1, 2, 3, or 4).
  71  *      m >= 1, n >= 1,
  72  *      0 <= dm < m, 0 <= dn < n.
  73  */
  74 
  75 #include "mlib_image.h"
  76 #include "mlib_ImageCheck.h"
  77 #include "mlib_SysMath.h"
  78 #include "mlib_ImageConv.h"
  79 
  80 /***************************************************************/
  81 static void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
  82                                         const mlib_f32 *src,
  83                                         const mlib_d64 *kernel,
  84                                         mlib_s32       n,
  85                                         mlib_s32       m,
  86                                         mlib_s32       nch,
  87                                         mlib_s32       dnch);
  88 
  89 static void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
  90                                          const mlib_f32 *src,
  91                                          mlib_s32       n,
  92                                          mlib_s32       nch,
  93                                          mlib_s32       dx_l,
  94                                          mlib_s32       dx_r);
  95 
  96 static void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
  97                                         const mlib_d64 *src,
  98                                         const mlib_d64 *kernel,
  99                                         mlib_s32       n,
 100                                         mlib_s32       m,
 101                                         mlib_s32       nch,
 102                                         mlib_s32       dnch);
 103 
 104 static void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
 105                                          const mlib_d64 *src,
 106                                          mlib_s32       n,
 107                                          mlib_s32       nch,
 108                                          mlib_s32       dx_l,
 109                                          mlib_s32       dx_r);
 110 
 111 /***************************************************************/
 112 #if 0
 113 static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
 114                                          mlib_f32       *vdst,
 115                                          const mlib_f32 *src,
 116                                          const mlib_d64 *hfilter,
 117                                          const mlib_d64 *vfilter,
 118                                          mlib_s32       n,
 119                                          mlib_s32       m,
 120                                          mlib_s32       nch,
 121                                          mlib_s32       dnch);
 122 
 123 static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
 124                                          mlib_d64       *vdst,
 125                                          const mlib_d64 *src,
 126                                          const mlib_d64 *hfilter,
 127                                          const mlib_d64 *vfilter,
 128                                          mlib_s32       n,
 129                                          mlib_s32       m,
 130                                          mlib_s32       nch,
 131                                          mlib_s32       dnch);
 132 #endif /* 0 */
 133 
 134 /***************************************************************/
 135 mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
 136                                  const mlib_image *src,
 137                                  const mlib_d64   *kernel,
 138                                  mlib_s32         m,
 139                                  mlib_s32         n,
 140                                  mlib_s32         dm,
 141                                  mlib_s32         dn,
 142                                  mlib_s32         cmask,
 143                                  mlib_edge        edge)
 144 {
 145   mlib_type type;
 146 
 147   MLIB_IMAGE_CHECK(dst);
 148   type = mlib_ImageGetType(dst);
 149 
 150   if (type != MLIB_FLOAT && type != MLIB_DOUBLE)
 151     return MLIB_FAILURE;
 152 
 153   return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge);
 154 }
 155 
 156 /***************************************************************/
 157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
 158                                  const mlib_f32 *src,
 159                                  const mlib_d64 *kernel,
 160                                  mlib_s32       n,
 161                                  mlib_s32       m,
 162                                  mlib_s32       nch,
 163                                  mlib_s32       dnch)
 164 {
 165   mlib_f32 *hdst1 = dst + dnch;
 166   mlib_s32 i, j;
 167 
 168   for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
 169     const mlib_f32 *src2 = src + 2 * nch;
 170     mlib_f32 hval0 = (mlib_f32) kernel[0];
 171     mlib_f32 hval1 = (mlib_f32) kernel[1];
 172     mlib_f32 hval2 = (mlib_f32) kernel[2];
 173     mlib_f32 val0 = src[0];
 174     mlib_f32 val1 = src[nch];
 175     mlib_f32 hdvl = dst[0];
 176 
 177     for (i = 0; i < n; i++) {
 178       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 179       mlib_f32 val2 = src2[i * nch];
 180 
 181       hdvl = hdst1[i * dnch];
 182       hdvl0 += val1 * hval1;
 183       hdvl0 += val2 * hval2;
 184       val0 = val1;
 185       val1 = val2;
 186 
 187       dst[i * dnch] = hdvl0;
 188     }
 189   }
 190 
 191   if (j < m - 1) {
 192     const mlib_f32 *src2 = src + 2 * nch;
 193     mlib_f32 hval0 = (mlib_f32) kernel[0];
 194     mlib_f32 hval1 = (mlib_f32) kernel[1];
 195     mlib_f32 val0 = src[0];
 196     mlib_f32 val1 = src[nch];
 197     mlib_f32 hdvl = dst[0];
 198     for (i = 0; i < n; i++) {
 199       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 200       mlib_f32 val2 = src2[i * nch];
 201 
 202       hdvl = hdst1[i * dnch];
 203       hdvl0 += val1 * hval1;
 204       val0 = val1;
 205       val1 = val2;
 206 
 207       dst[i * dnch] = hdvl0;
 208     }
 209 
 210   }
 211   else if (j < m) {
 212     const mlib_f32 *src2 = src + 2 * nch;
 213     mlib_f32 hval0 = (mlib_f32) kernel[0];
 214     mlib_f32 val0 = src[0];
 215     mlib_f32 val1 = src[nch];
 216     mlib_f32 hdvl = dst[0];
 217 
 218     for (i = 0; i < n; i++) {
 219       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 220       mlib_f32 val2 = src2[i * nch];
 221 
 222       hdvl = hdst1[i * dnch];
 223       val0 = val1;
 224       val1 = val2;
 225 
 226       dst[i * dnch] = hdvl0;
 227     }
 228   }
 229 }
 230 
 231 /***************************************************************/
 232 void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
 233                                   const mlib_f32 *src,
 234                                   mlib_s32       n,
 235                                   mlib_s32       nch,
 236                                   mlib_s32       dx_l,
 237                                   mlib_s32       dx_r)
 238 {
 239   mlib_s32 i;
 240   mlib_f32 val = src[0];
 241 
 242   for (i = 0; i < dx_l; i++)
 243     dst[i] = val;
 244   for (; i < n - dx_r; i++)
 245     dst[i] = src[nch * (i - dx_l)];
 246   val = dst[n - dx_r - 1];
 247   for (; i < n; i++)
 248     dst[i] = val;
 249 }
 250 
 251 /***************************************************************/
 252 mlib_status mlib_convMxNext_f32(mlib_image       *dst,
 253                                 const mlib_image *src,
 254                                 const mlib_d64   *kernel,
 255                                 mlib_s32         m,
 256                                 mlib_s32         n,
 257                                 mlib_s32         dx_l,
 258                                 mlib_s32         dx_r,
 259                                 mlib_s32         dy_t,
 260                                 mlib_s32         dy_b,
 261                                 mlib_s32         cmask)
 262 {
 263   mlib_d64 dspace[1024], *dsa = dspace;
 264   mlib_s32 wid_e = mlib_ImageGetWidth(src);
 265   mlib_f32 *fsa;
 266   mlib_f32 *da = mlib_ImageGetData(dst);
 267   mlib_f32 *sa = mlib_ImageGetData(src);
 268   mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2;
 269   mlib_s32 slb = mlib_ImageGetStride(src) >> 2;
 270   mlib_s32 dw = mlib_ImageGetWidth(dst);
 271   mlib_s32 dh = mlib_ImageGetHeight(dst);
 272   mlib_s32 nch = mlib_ImageGetChannels(dst);
 273   mlib_s32 i, j, j1, k;
 274 
 275   if (3 * wid_e + m > 1024) {
 276     dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
 277 
 278     if (dsa == NULL)
 279       return MLIB_FAILURE;
 280   }
 281 
 282   fsa = (mlib_f32 *) dsa;
 283 
 284   for (j = 0; j < dh; j++, da += dlb) {
 285     for (k = 0; k < nch; k++)
 286       if (cmask & (1 << (nch - 1 - k))) {
 287         const mlib_f32 *sa1 = sa + k;
 288         mlib_f32 *da1 = da + k;
 289         const mlib_d64 *kernel1 = kernel;
 290 
 291         for (i = 0; i < dw; i++)
 292           da1[i * nch] = 0.f;
 293         for (j1 = 0; j1 < n; j1++, kernel1 += m) {
 294           mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r);
 295           mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch);
 296 
 297           if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
 298             sa1 += slb;
 299         }
 300       }
 301 
 302     if ((j >= dy_t) && (j < dh + n - dy_b - 2))
 303       sa += slb;
 304   }
 305 
 306   if (dsa != dspace)
 307     mlib_free(dsa);
 308   return MLIB_SUCCESS;
 309 }
 310 
 311 /***************************************************************/
 312 #if 0
 313 
 314 void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
 315                                   mlib_f32       *vdst,
 316                                   const mlib_f32 *src,
 317                                   const mlib_d64 *hfilter,
 318                                   const mlib_d64 *vfilter,
 319                                   mlib_s32       n,
 320                                   mlib_s32       m,
 321                                   mlib_s32       nch,
 322                                   mlib_s32       dnch)
 323 {
 324   mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
 325   mlib_s32 i, j;
 326 
 327   for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
 328     mlib_f32 *src2 = src + 2 * nch;
 329     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 330     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 331     mlib_f32 hval1 = (mlib_f32) hfilter[1];
 332     mlib_f32 vval1 = (mlib_f32) vfilter[1];
 333     mlib_f32 hval2 = (mlib_f32) hfilter[2];
 334     mlib_f32 vval2 = (mlib_f32) vfilter[2];
 335     mlib_f32 val0 = src[0];
 336     mlib_f32 val1 = src[nch];
 337     mlib_f32 hdvl = hdst[0];
 338     mlib_f32 vdvl = vdst[0];
 339 
 340     for (i = 0; i < n; i++) {
 341       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 342       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 343       mlib_f32 val2 = src2[i * nch];
 344 
 345       hdvl = hdst1[i * dnch];
 346       vdvl = vdst1[i * dnch];
 347       hdvl0 += val1 * hval1;
 348       vdvl0 += val1 * vval1;
 349       hdvl0 += val2 * hval2;
 350       vdvl0 += val2 * vval2;
 351       val0 = val1;
 352       val1 = val2;
 353 
 354       hdst[i * dnch] = hdvl0;
 355       vdst[i * dnch] = vdvl0;
 356     }
 357   }
 358 
 359   if (j < m - 1) {
 360     mlib_f32 *src2 = src + 2 * nch;
 361     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 362     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 363     mlib_f32 hval1 = (mlib_f32) hfilter[1];
 364     mlib_f32 vval1 = (mlib_f32) vfilter[1];
 365     mlib_f32 val0 = src[0];
 366     mlib_f32 val1 = src[nch];
 367     mlib_f32 hdvl = hdst[0];
 368     mlib_f32 vdvl = vdst[0];
 369 
 370     for (i = 0; i < n; i++) {
 371       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 372       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 373       mlib_f32 val2 = src2[i * nch];
 374 
 375       hdvl = hdst1[i * dnch];
 376       vdvl = vdst1[i * dnch];
 377       hdvl0 += val1 * hval1;
 378       vdvl0 += val1 * vval1;
 379       val0 = val1;
 380       val1 = val2;
 381 
 382       hdst[i * dnch] = hdvl0;
 383       vdst[i * dnch] = vdvl0;
 384     }
 385 
 386   }
 387   else if (j < m) {
 388     mlib_f32 *src2 = src + 2 * nch;
 389     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 390     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 391     mlib_f32 val0 = src[0];
 392     mlib_f32 val1 = src[nch];
 393     mlib_f32 hdvl = hdst[0];
 394     mlib_f32 vdvl = vdst[0];
 395 
 396     for (i = 0; i < n; i++) {
 397       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 398       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 399       mlib_f32 val2 = src2[i * nch];
 400 
 401       hdvl = hdst1[i * dnch];
 402       vdvl = vdst1[i * dnch];
 403       val0 = val1;
 404       val1 = val2;
 405 
 406       hdst[i * dnch] = hdvl0;
 407       vdst[i * dnch] = vdvl0;
 408     }
 409   }
 410 }
 411 
 412 /***************************************************************/
 413 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
 414                                   mlib_d64       *vdst,
 415                                   const mlib_d64 *src,
 416                                   const mlib_d64 *hfilter,
 417                                   const mlib_d64 *vfilter,
 418                                   mlib_s32       n,
 419                                   mlib_s32       m,
 420                                   mlib_s32       nch,
 421                                   mlib_s32       dnch)
 422 {
 423   mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
 424   mlib_s32 i, j;
 425 
 426   for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
 427     mlib_d64 *src2 = src + 2 * nch;
 428     mlib_d64 hval0 = hfilter[0];
 429     mlib_d64 vval0 = vfilter[0];
 430     mlib_d64 hval1 = hfilter[1];
 431     mlib_d64 vval1 = vfilter[1];
 432     mlib_d64 hval2 = hfilter[2];
 433     mlib_d64 vval2 = vfilter[2];
 434     mlib_d64 val0 = src[0];
 435     mlib_d64 val1 = src[nch];
 436     mlib_d64 hdvl = hdst[0];
 437     mlib_d64 vdvl = vdst[0];
 438 
 439     for (i = 0; i < n; i++) {
 440       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 441       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 442       mlib_d64 val2 = src2[i * nch];
 443 
 444       hdvl = hdst1[i * dnch];
 445       vdvl = vdst1[i * dnch];
 446       hdvl0 += val1 * hval1;
 447       vdvl0 += val1 * vval1;
 448       hdvl0 += val2 * hval2;
 449       vdvl0 += val2 * vval2;
 450       val0 = val1;
 451       val1 = val2;
 452 
 453       hdst[i * dnch] = hdvl0;
 454       vdst[i * dnch] = vdvl0;
 455     }
 456   }
 457 
 458   if (j < m - 1) {
 459     mlib_d64 *src2 = src + 2 * nch;
 460     mlib_d64 hval0 = hfilter[0];
 461     mlib_d64 vval0 = vfilter[0];
 462     mlib_d64 hval1 = hfilter[1];
 463     mlib_d64 vval1 = vfilter[1];
 464     mlib_d64 val0 = src[0];
 465     mlib_d64 val1 = src[nch];
 466     mlib_d64 hdvl = hdst[0];
 467     mlib_d64 vdvl = vdst[0];
 468 
 469     for (i = 0; i < n; i++) {
 470       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 471       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 472       mlib_d64 val2 = src2[i * nch];
 473 
 474       hdvl = hdst1[i * dnch];
 475       vdvl = vdst1[i * dnch];
 476       hdvl0 += val1 * hval1;
 477       vdvl0 += val1 * vval1;
 478       val0 = val1;
 479       val1 = val2;
 480 
 481       hdst[i * dnch] = hdvl0;
 482       vdst[i * dnch] = vdvl0;
 483     }
 484 
 485   }
 486   else if (j < m) {
 487     mlib_d64 *src2 = src + 2 * nch;
 488     mlib_d64 hval0 = hfilter[0];
 489     mlib_d64 vval0 = vfilter[0];
 490     mlib_d64 val0 = src[0];
 491     mlib_d64 val1 = src[nch];
 492     mlib_d64 hdvl = hdst[0];
 493     mlib_d64 vdvl = vdst[0];
 494 
 495     for (i = 0; i < n; i++) {
 496       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 497       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 498       mlib_d64 val2 = src2[i * nch];
 499 
 500       hdvl = hdst1[i * dnch];
 501       vdvl = vdst1[i * dnch];
 502       val0 = val1;
 503       val1 = val2;
 504 
 505       hdst[i * dnch] = hdvl0;
 506       vdst[i * dnch] = vdvl0;
 507     }
 508   }
 509 }
 510 
 511 #endif /* 0 */
 512 
 513 /***************************************************************/
 514 void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
 515                                  const mlib_d64 *src,
 516                                  const mlib_d64 *kernel,
 517                                  mlib_s32       n,
 518                                  mlib_s32       m,
 519                                  mlib_s32       nch,
 520                                  mlib_s32       dnch)
 521 {
 522   mlib_d64 *hdst1 = dst + dnch;
 523   mlib_s32 i, j;
 524 
 525   for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
 526     const mlib_d64 *src2 = src + 2 * nch;
 527     mlib_d64 hval0 = kernel[0];
 528     mlib_d64 hval1 = kernel[1];
 529     mlib_d64 hval2 = kernel[2];
 530     mlib_d64 val0 = src[0];
 531     mlib_d64 val1 = src[nch];
 532     mlib_d64 hdvl = dst[0];
 533 
 534     for (i = 0; i < n; i++) {
 535       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 536       mlib_d64 val2 = src2[i * nch];
 537 
 538       hdvl = hdst1[i * dnch];
 539       hdvl0 += val1 * hval1;
 540       hdvl0 += val2 * hval2;
 541       val0 = val1;
 542       val1 = val2;
 543 
 544       dst[i * dnch] = hdvl0;
 545     }
 546   }
 547 
 548   if (j < m - 1) {
 549     const mlib_d64 *src2 = src + 2 * nch;
 550     mlib_d64 hval0 = kernel[0];
 551     mlib_d64 hval1 = kernel[1];
 552     mlib_d64 val0 = src[0];
 553     mlib_d64 val1 = src[nch];
 554     mlib_d64 hdvl = dst[0];
 555 
 556     for (i = 0; i < n; i++) {
 557       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 558       mlib_d64 val2 = src2[i * nch];
 559 
 560       hdvl = hdst1[i * dnch];
 561       hdvl0 += val1 * hval1;
 562       val0 = val1;
 563       val1 = val2;
 564 
 565       dst[i * dnch] = hdvl0;
 566     }
 567 
 568   }
 569   else if (j < m) {
 570     const mlib_d64 *src2 = src + 2 * nch;
 571     mlib_d64 hval0 = kernel[0];
 572     mlib_d64 val0 = src[0];
 573     mlib_d64 val1 = src[nch];
 574     mlib_d64 hdvl = dst[0];
 575 
 576     for (i = 0; i < n; i++) {
 577       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 578       mlib_d64 val2 = src2[i * nch];
 579 
 580       hdvl = hdst1[i * dnch];
 581       val0 = val1;
 582       val1 = val2;
 583 
 584       dst[i * dnch] = hdvl0;
 585     }
 586   }
 587 }
 588 
 589 /***************************************************************/
 590 void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
 591                                   const mlib_d64 *src,
 592                                   mlib_s32       n,
 593                                   mlib_s32       nch,
 594                                   mlib_s32       dx_l,
 595                                   mlib_s32       dx_r)
 596 {
 597   mlib_s32 i;
 598   mlib_d64 val = src[0];
 599 
 600   for (i = 0; i < dx_l; i++)
 601     dst[i] = val;
 602   for (; i < n - dx_r; i++)
 603     dst[i] = src[nch * (i - dx_l)];
 604   val = dst[n - dx_r - 1];
 605   for (; i < n; i++)
 606     dst[i] = val;
 607 }
 608 
 609 /***************************************************************/
 610 mlib_status mlib_convMxNext_d64(mlib_image       *dst,
 611                                 const mlib_image *src,
 612                                 const mlib_d64   *kernel,
 613                                 mlib_s32         m,
 614                                 mlib_s32         n,
 615                                 mlib_s32         dx_l,
 616                                 mlib_s32         dx_r,
 617                                 mlib_s32         dy_t,
 618                                 mlib_s32         dy_b,
 619                                 mlib_s32         cmask)
 620 {
 621   mlib_d64 dspace[1024], *dsa = dspace;
 622   mlib_s32 wid_e = mlib_ImageGetWidth(src);
 623   mlib_d64 *da = mlib_ImageGetData(dst);
 624   mlib_d64 *sa = mlib_ImageGetData(src);
 625   mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3;
 626   mlib_s32 slb = mlib_ImageGetStride(src) >> 3;
 627   mlib_s32 dw = mlib_ImageGetWidth(dst);
 628   mlib_s32 dh = mlib_ImageGetHeight(dst);
 629   mlib_s32 nch = mlib_ImageGetChannels(dst);
 630   mlib_s32 i, j, j1, k;
 631 
 632   if (3 * wid_e + m > 1024) {
 633     dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
 634 
 635     if (dsa == NULL)
 636       return MLIB_FAILURE;
 637   }
 638 
 639   for (j = 0; j < dh; j++, da += dlb) {
 640     for (k = 0; k < nch; k++)
 641       if (cmask & (1 << (nch - 1 - k))) {
 642         mlib_d64 *sa1 = sa + k;
 643         mlib_d64 *da1 = da + k;
 644         const mlib_d64 *kernel1 = kernel;
 645 
 646         for (i = 0; i < dw; i++)
 647           da1[i * nch] = 0.;
 648         for (j1 = 0; j1 < n; j1++, kernel1 += m) {
 649           mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r);
 650           mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch);
 651 
 652           if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
 653             sa1 += slb;
 654         }
 655       }
 656 
 657     if ((j >= dy_t) && (j < dh + n - dy_b - 2))
 658       sa += slb;
 659   }
 660 
 661   if (dsa != dspace)
 662     mlib_free(dsa);
 663   return MLIB_SUCCESS;
 664 }
 665 
 666 /***************************************************************/