1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *      mlib_ImageConvMxN_Fp - image convolution with edge condition
  30  *
  31  * SYNOPSIS
  32  *      mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
  33  *                                       const mlib_image *src,
  34  *                                       const mlib_d64   *kernel,
  35  *                                       mlib_s32         m,
  36  *                                       mlib_s32         n,
  37  *                                       mlib_s32         dm,
  38  *                                       mlib_s32         dn,
  39  *                                       mlib_s32         cmask,
  40  *                                       mlib_edge        edge)
  41  *
  42  * ARGUMENTS
  43  *      dst       Pointer to destination image.
  44  *      src       Pointer to source image.
  45  *      m         Kernel width (m must be not less than 1).
  46  *      n         Kernel height (n must be not less than 1).
  47  *      dm, dn    Position of key element in convolution kernel.
  48  *      kernel    Pointer to convolution kernel.
  49  *      cmask     Channel mask to indicate the channels to be convolved.
  50  *                Each bit of which represents a channel in the image. The
  51  *                channels corresponded to 1 bits are those to be processed.
  52  *      edge      Type of edge condition.
  53  *
  54  * DESCRIPTION
  55  *      2-D convolution, MxN kernel.
  56  *
  57  *      The center of the source image is mapped to the center of the
  58  *      destination image.
  59  *      The unselected channels are not overwritten. If both src and dst have
  60  *      just one channel, cmask is ignored.
  61  *
  62  *      The edge condition can be one of the following:
  63  *              MLIB_EDGE_DST_NO_WRITE  (default)
  64  *              MLIB_EDGE_DST_FILL_ZERO
  65  *              MLIB_EDGE_DST_COPY_SRC
  66  *              MLIB_EDGE_SRC_EXTEND
  67  *
  68  * RESTRICTION
  69  *      The src and the dst must be the same type and have same number
  70  *      of channels (1, 2, 3, or 4).
  71  *      m >= 1, n >= 1,
  72  *      0 <= dm < m, 0 <= dn < n.
  73  */
  74 
  75 #include "mlib_image.h"
  76 #include "mlib_ImageCheck.h"
  77 #include "mlib_SysMath.h"
  78 #include "mlib_ImageConv.h"
  79 
  80 /***************************************************************/
  81 static void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
  82                                         const mlib_f32 *src,
  83                                         const mlib_d64 *kernel,
  84                                         mlib_s32       n,
  85                                         mlib_s32       m,
  86                                         mlib_s32       nch,
  87                                         mlib_s32       dnch);
  88 
  89 static void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
  90                                          const mlib_f32 *src,
  91                                          mlib_s32       n,
  92                                          mlib_s32       nch,
  93                                          mlib_s32       dx_l,
  94                                          mlib_s32       dx_r);
  95 
  96 static void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
  97                                         const mlib_d64 *src,
  98                                         const mlib_d64 *kernel,
  99                                         mlib_s32       n,
 100                                         mlib_s32       m,
 101                                         mlib_s32       nch,
 102                                         mlib_s32       dnch);
 103 
 104 static void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
 105                                          const mlib_d64 *src,
 106                                          mlib_s32       n,
 107                                          mlib_s32       nch,
 108                                          mlib_s32       dx_l,
 109                                          mlib_s32       dx_r);
 110 
 111 /***************************************************************/
 112 #if 0
 113 static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
 114                                          mlib_f32       *vdst,
 115                                          const mlib_f32 *src,
 116                                          const mlib_d64 *hfilter,
 117                                          const mlib_d64 *vfilter,
 118                                          mlib_s32       n,
 119                                          mlib_s32       m,
 120                                          mlib_s32       nch,
 121                                          mlib_s32       dnch);
 122 
 123 static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
 124                                          mlib_d64       *vdst,
 125                                          const mlib_d64 *src,
 126                                          const mlib_d64 *hfilter,
 127                                          const mlib_d64 *vfilter,
 128                                          mlib_s32       n,
 129                                          mlib_s32       m,
 130                                          mlib_s32       nch,
 131                                          mlib_s32       dnch);
 132 #endif /* 0 */
 133 
 134 /***************************************************************/
 135 mlib_status mlib_ImageConvMxN_Fp(mlib_image       *dst,
 136                                  const mlib_image *src,
 137                                  const mlib_d64   *kernel,
 138                                  mlib_s32         m,
 139                                  mlib_s32         n,
 140                                  mlib_s32         dm,
 141                                  mlib_s32         dn,
 142                                  mlib_s32         cmask,
 143                                  mlib_edge        edge)
 144 {
 145   mlib_type type;
 146 
 147   MLIB_IMAGE_CHECK(dst);
 148   type = mlib_ImageGetType(dst);
 149 
 150   if (type != MLIB_FLOAT && type != MLIB_DOUBLE)
 151     return MLIB_FAILURE;
 152 
 153   return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge);
 154 }
 155 
 156 /***************************************************************/
 157 void mlib_ImageConvMxNMulAdd_F32(mlib_f32       *dst,
 158                                  const mlib_f32 *src,
 159                                  const mlib_d64 *kernel,
 160                                  mlib_s32       n,
 161                                  mlib_s32       m,
 162                                  mlib_s32       nch,
 163                                  mlib_s32       dnch)
 164 {
 165   mlib_f32 *hdst1 = dst + dnch;
 166   mlib_s32 i, j;
 167 
 168   for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
 169     const mlib_f32 *src2 = src + 2 * nch;
 170     mlib_f32 hval0 = (mlib_f32) kernel[0];
 171     mlib_f32 hval1 = (mlib_f32) kernel[1];
 172     mlib_f32 hval2 = (mlib_f32) kernel[2];
 173     mlib_f32 val0 = src[0];
 174     mlib_f32 val1 = src[nch];
 175     mlib_f32 hdvl = dst[0];
 176 
 177 #ifdef __SUNPRO_C
 178 #pragma pipeloop(0)
 179 #endif /* __SUNPRO_C */
 180     for (i = 0; i < n; i++) {
 181       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 182       mlib_f32 val2 = src2[i * nch];
 183 
 184       hdvl = hdst1[i * dnch];
 185       hdvl0 += val1 * hval1;
 186       hdvl0 += val2 * hval2;
 187       val0 = val1;
 188       val1 = val2;
 189 
 190       dst[i * dnch] = hdvl0;
 191     }
 192   }
 193 
 194   if (j < m - 1) {
 195     const mlib_f32 *src2 = src + 2 * nch;
 196     mlib_f32 hval0 = (mlib_f32) kernel[0];
 197     mlib_f32 hval1 = (mlib_f32) kernel[1];
 198     mlib_f32 val0 = src[0];
 199     mlib_f32 val1 = src[nch];
 200     mlib_f32 hdvl = dst[0];
 201 #ifdef __SUNPRO_C
 202 #pragma pipeloop(0)
 203 #endif /* __SUNPRO_C */
 204     for (i = 0; i < n; i++) {
 205       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 206       mlib_f32 val2 = src2[i * nch];
 207 
 208       hdvl = hdst1[i * dnch];
 209       hdvl0 += val1 * hval1;
 210       val0 = val1;
 211       val1 = val2;
 212 
 213       dst[i * dnch] = hdvl0;
 214     }
 215 
 216   }
 217   else if (j < m) {
 218     const mlib_f32 *src2 = src + 2 * nch;
 219     mlib_f32 hval0 = (mlib_f32) kernel[0];
 220     mlib_f32 val0 = src[0];
 221     mlib_f32 val1 = src[nch];
 222     mlib_f32 hdvl = dst[0];
 223 
 224 #ifdef __SUNPRO_C
 225 #pragma pipeloop(0)
 226 #endif /* __SUNPRO_C */
 227     for (i = 0; i < n; i++) {
 228       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 229       mlib_f32 val2 = src2[i * nch];
 230 
 231       hdvl = hdst1[i * dnch];
 232       val0 = val1;
 233       val1 = val2;
 234 
 235       dst[i * dnch] = hdvl0;
 236     }
 237   }
 238 }
 239 
 240 /***************************************************************/
 241 void mlib_ImageConvMxNF322F32_ext(mlib_f32       *dst,
 242                                   const mlib_f32 *src,
 243                                   mlib_s32       n,
 244                                   mlib_s32       nch,
 245                                   mlib_s32       dx_l,
 246                                   mlib_s32       dx_r)
 247 {
 248   mlib_s32 i;
 249   mlib_f32 val = src[0];
 250 
 251   for (i = 0; i < dx_l; i++)
 252     dst[i] = val;
 253 #ifdef __SUNPRO_C
 254 #pragma pipeloop(0)
 255 #endif /* __SUNPRO_C */
 256   for (; i < n - dx_r; i++)
 257     dst[i] = src[nch * (i - dx_l)];
 258   val = dst[n - dx_r - 1];
 259   for (; i < n; i++)
 260     dst[i] = val;
 261 }
 262 
 263 /***************************************************************/
 264 mlib_status mlib_convMxNext_f32(mlib_image       *dst,
 265                                 const mlib_image *src,
 266                                 const mlib_d64   *kernel,
 267                                 mlib_s32         m,
 268                                 mlib_s32         n,
 269                                 mlib_s32         dx_l,
 270                                 mlib_s32         dx_r,
 271                                 mlib_s32         dy_t,
 272                                 mlib_s32         dy_b,
 273                                 mlib_s32         cmask)
 274 {
 275   mlib_d64 dspace[1024], *dsa = dspace;
 276   mlib_s32 wid_e = mlib_ImageGetWidth(src);
 277   mlib_f32 *fsa;
 278   mlib_f32 *da = mlib_ImageGetData(dst);
 279   mlib_f32 *sa = mlib_ImageGetData(src);
 280   mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2;
 281   mlib_s32 slb = mlib_ImageGetStride(src) >> 2;
 282   mlib_s32 dw = mlib_ImageGetWidth(dst);
 283   mlib_s32 dh = mlib_ImageGetHeight(dst);
 284   mlib_s32 nch = mlib_ImageGetChannels(dst);
 285   mlib_s32 i, j, j1, k;
 286 
 287   if (3 * wid_e + m > 1024) {
 288     dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
 289 
 290     if (dsa == NULL)
 291       return MLIB_FAILURE;
 292   }
 293 
 294   fsa = (mlib_f32 *) dsa;
 295 
 296   for (j = 0; j < dh; j++, da += dlb) {
 297     for (k = 0; k < nch; k++)
 298       if (cmask & (1 << (nch - 1 - k))) {
 299         const mlib_f32 *sa1 = sa + k;
 300         mlib_f32 *da1 = da + k;
 301         const mlib_d64 *kernel1 = kernel;
 302 
 303         for (i = 0; i < dw; i++)
 304           da1[i * nch] = 0.f;
 305         for (j1 = 0; j1 < n; j1++, kernel1 += m) {
 306           mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r);
 307           mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch);
 308 
 309           if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
 310             sa1 += slb;
 311         }
 312       }
 313 
 314     if ((j >= dy_t) && (j < dh + n - dy_b - 2))
 315       sa += slb;
 316   }
 317 
 318   if (dsa != dspace)
 319     mlib_free(dsa);
 320   return MLIB_SUCCESS;
 321 }
 322 
 323 /***************************************************************/
 324 #if 0
 325 
 326 void mlib_ImageConvMxNMulAdd2_F32(mlib_f32       *hdst,
 327                                   mlib_f32       *vdst,
 328                                   const mlib_f32 *src,
 329                                   const mlib_d64 *hfilter,
 330                                   const mlib_d64 *vfilter,
 331                                   mlib_s32       n,
 332                                   mlib_s32       m,
 333                                   mlib_s32       nch,
 334                                   mlib_s32       dnch)
 335 {
 336   mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
 337   mlib_s32 i, j;
 338 
 339   for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
 340     mlib_f32 *src2 = src + 2 * nch;
 341     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 342     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 343     mlib_f32 hval1 = (mlib_f32) hfilter[1];
 344     mlib_f32 vval1 = (mlib_f32) vfilter[1];
 345     mlib_f32 hval2 = (mlib_f32) hfilter[2];
 346     mlib_f32 vval2 = (mlib_f32) vfilter[2];
 347     mlib_f32 val0 = src[0];
 348     mlib_f32 val1 = src[nch];
 349     mlib_f32 hdvl = hdst[0];
 350     mlib_f32 vdvl = vdst[0];
 351 
 352 #ifdef __SUNPRO_C
 353 #pragma pipeloop(0)
 354 #endif /* __SUNPRO_C */
 355     for (i = 0; i < n; i++) {
 356       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 357       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 358       mlib_f32 val2 = src2[i * nch];
 359 
 360       hdvl = hdst1[i * dnch];
 361       vdvl = vdst1[i * dnch];
 362       hdvl0 += val1 * hval1;
 363       vdvl0 += val1 * vval1;
 364       hdvl0 += val2 * hval2;
 365       vdvl0 += val2 * vval2;
 366       val0 = val1;
 367       val1 = val2;
 368 
 369       hdst[i * dnch] = hdvl0;
 370       vdst[i * dnch] = vdvl0;
 371     }
 372   }
 373 
 374   if (j < m - 1) {
 375     mlib_f32 *src2 = src + 2 * nch;
 376     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 377     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 378     mlib_f32 hval1 = (mlib_f32) hfilter[1];
 379     mlib_f32 vval1 = (mlib_f32) vfilter[1];
 380     mlib_f32 val0 = src[0];
 381     mlib_f32 val1 = src[nch];
 382     mlib_f32 hdvl = hdst[0];
 383     mlib_f32 vdvl = vdst[0];
 384 
 385 #ifdef __SUNPRO_C
 386 #pragma pipeloop(0)
 387 #endif /* __SUNPRO_C */
 388     for (i = 0; i < n; i++) {
 389       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 390       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 391       mlib_f32 val2 = src2[i * nch];
 392 
 393       hdvl = hdst1[i * dnch];
 394       vdvl = vdst1[i * dnch];
 395       hdvl0 += val1 * hval1;
 396       vdvl0 += val1 * vval1;
 397       val0 = val1;
 398       val1 = val2;
 399 
 400       hdst[i * dnch] = hdvl0;
 401       vdst[i * dnch] = vdvl0;
 402     }
 403 
 404   }
 405   else if (j < m) {
 406     mlib_f32 *src2 = src + 2 * nch;
 407     mlib_f32 hval0 = (mlib_f32) hfilter[0];
 408     mlib_f32 vval0 = (mlib_f32) vfilter[0];
 409     mlib_f32 val0 = src[0];
 410     mlib_f32 val1 = src[nch];
 411     mlib_f32 hdvl = hdst[0];
 412     mlib_f32 vdvl = vdst[0];
 413 
 414 #ifdef __SUNPRO_C
 415 #pragma pipeloop(0)
 416 #endif /* __SUNPRO_C */
 417     for (i = 0; i < n; i++) {
 418       mlib_f32 hdvl0 = val0 * hval0 + hdvl;
 419       mlib_f32 vdvl0 = val0 * vval0 + vdvl;
 420       mlib_f32 val2 = src2[i * nch];
 421 
 422       hdvl = hdst1[i * dnch];
 423       vdvl = vdst1[i * dnch];
 424       val0 = val1;
 425       val1 = val2;
 426 
 427       hdst[i * dnch] = hdvl0;
 428       vdst[i * dnch] = vdvl0;
 429     }
 430   }
 431 }
 432 
 433 /***************************************************************/
 434 void mlib_ImageConvMxNMulAdd2_D64(mlib_d64       *hdst,
 435                                   mlib_d64       *vdst,
 436                                   const mlib_d64 *src,
 437                                   const mlib_d64 *hfilter,
 438                                   const mlib_d64 *vfilter,
 439                                   mlib_s32       n,
 440                                   mlib_s32       m,
 441                                   mlib_s32       nch,
 442                                   mlib_s32       dnch)
 443 {
 444   mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
 445   mlib_s32 i, j;
 446 
 447   for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
 448     mlib_d64 *src2 = src + 2 * nch;
 449     mlib_d64 hval0 = hfilter[0];
 450     mlib_d64 vval0 = vfilter[0];
 451     mlib_d64 hval1 = hfilter[1];
 452     mlib_d64 vval1 = vfilter[1];
 453     mlib_d64 hval2 = hfilter[2];
 454     mlib_d64 vval2 = vfilter[2];
 455     mlib_d64 val0 = src[0];
 456     mlib_d64 val1 = src[nch];
 457     mlib_d64 hdvl = hdst[0];
 458     mlib_d64 vdvl = vdst[0];
 459 
 460 #ifdef __SUNPRO_C
 461 #pragma pipeloop(0)
 462 #endif /* __SUNPRO_C */
 463     for (i = 0; i < n; i++) {
 464       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 465       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 466       mlib_d64 val2 = src2[i * nch];
 467 
 468       hdvl = hdst1[i * dnch];
 469       vdvl = vdst1[i * dnch];
 470       hdvl0 += val1 * hval1;
 471       vdvl0 += val1 * vval1;
 472       hdvl0 += val2 * hval2;
 473       vdvl0 += val2 * vval2;
 474       val0 = val1;
 475       val1 = val2;
 476 
 477       hdst[i * dnch] = hdvl0;
 478       vdst[i * dnch] = vdvl0;
 479     }
 480   }
 481 
 482   if (j < m - 1) {
 483     mlib_d64 *src2 = src + 2 * nch;
 484     mlib_d64 hval0 = hfilter[0];
 485     mlib_d64 vval0 = vfilter[0];
 486     mlib_d64 hval1 = hfilter[1];
 487     mlib_d64 vval1 = vfilter[1];
 488     mlib_d64 val0 = src[0];
 489     mlib_d64 val1 = src[nch];
 490     mlib_d64 hdvl = hdst[0];
 491     mlib_d64 vdvl = vdst[0];
 492 
 493 #ifdef __SUNPRO_C
 494 #pragma pipeloop(0)
 495 #endif /* __SUNPRO_C */
 496     for (i = 0; i < n; i++) {
 497       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 498       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 499       mlib_d64 val2 = src2[i * nch];
 500 
 501       hdvl = hdst1[i * dnch];
 502       vdvl = vdst1[i * dnch];
 503       hdvl0 += val1 * hval1;
 504       vdvl0 += val1 * vval1;
 505       val0 = val1;
 506       val1 = val2;
 507 
 508       hdst[i * dnch] = hdvl0;
 509       vdst[i * dnch] = vdvl0;
 510     }
 511 
 512   }
 513   else if (j < m) {
 514     mlib_d64 *src2 = src + 2 * nch;
 515     mlib_d64 hval0 = hfilter[0];
 516     mlib_d64 vval0 = vfilter[0];
 517     mlib_d64 val0 = src[0];
 518     mlib_d64 val1 = src[nch];
 519     mlib_d64 hdvl = hdst[0];
 520     mlib_d64 vdvl = vdst[0];
 521 
 522 #ifdef __SUNPRO_C
 523 #pragma pipeloop(0)
 524 #endif /* __SUNPRO_C */
 525     for (i = 0; i < n; i++) {
 526       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 527       mlib_d64 vdvl0 = val0 * vval0 + vdvl;
 528       mlib_d64 val2 = src2[i * nch];
 529 
 530       hdvl = hdst1[i * dnch];
 531       vdvl = vdst1[i * dnch];
 532       val0 = val1;
 533       val1 = val2;
 534 
 535       hdst[i * dnch] = hdvl0;
 536       vdst[i * dnch] = vdvl0;
 537     }
 538   }
 539 }
 540 
 541 #endif /* 0 */
 542 
 543 /***************************************************************/
 544 void mlib_ImageConvMxNMulAdd_D64(mlib_d64       *dst,
 545                                  const mlib_d64 *src,
 546                                  const mlib_d64 *kernel,
 547                                  mlib_s32       n,
 548                                  mlib_s32       m,
 549                                  mlib_s32       nch,
 550                                  mlib_s32       dnch)
 551 {
 552   mlib_d64 *hdst1 = dst + dnch;
 553   mlib_s32 i, j;
 554 
 555   for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
 556     const mlib_d64 *src2 = src + 2 * nch;
 557     mlib_d64 hval0 = kernel[0];
 558     mlib_d64 hval1 = kernel[1];
 559     mlib_d64 hval2 = kernel[2];
 560     mlib_d64 val0 = src[0];
 561     mlib_d64 val1 = src[nch];
 562     mlib_d64 hdvl = dst[0];
 563 
 564 #ifdef __SUNPRO_C
 565 #pragma pipeloop(0)
 566 #endif /* __SUNPRO_C */
 567     for (i = 0; i < n; i++) {
 568       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 569       mlib_d64 val2 = src2[i * nch];
 570 
 571       hdvl = hdst1[i * dnch];
 572       hdvl0 += val1 * hval1;
 573       hdvl0 += val2 * hval2;
 574       val0 = val1;
 575       val1 = val2;
 576 
 577       dst[i * dnch] = hdvl0;
 578     }
 579   }
 580 
 581   if (j < m - 1) {
 582     const mlib_d64 *src2 = src + 2 * nch;
 583     mlib_d64 hval0 = kernel[0];
 584     mlib_d64 hval1 = kernel[1];
 585     mlib_d64 val0 = src[0];
 586     mlib_d64 val1 = src[nch];
 587     mlib_d64 hdvl = dst[0];
 588 
 589 #ifdef __SUNPRO_C
 590 #pragma pipeloop(0)
 591 #endif /* __SUNPRO_C */
 592     for (i = 0; i < n; i++) {
 593       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 594       mlib_d64 val2 = src2[i * nch];
 595 
 596       hdvl = hdst1[i * dnch];
 597       hdvl0 += val1 * hval1;
 598       val0 = val1;
 599       val1 = val2;
 600 
 601       dst[i * dnch] = hdvl0;
 602     }
 603 
 604   }
 605   else if (j < m) {
 606     const mlib_d64 *src2 = src + 2 * nch;
 607     mlib_d64 hval0 = kernel[0];
 608     mlib_d64 val0 = src[0];
 609     mlib_d64 val1 = src[nch];
 610     mlib_d64 hdvl = dst[0];
 611 
 612 #ifdef __SUNPRO_C
 613 #pragma pipeloop(0)
 614 #endif /* __SUNPRO_C */
 615     for (i = 0; i < n; i++) {
 616       mlib_d64 hdvl0 = val0 * hval0 + hdvl;
 617       mlib_d64 val2 = src2[i * nch];
 618 
 619       hdvl = hdst1[i * dnch];
 620       val0 = val1;
 621       val1 = val2;
 622 
 623       dst[i * dnch] = hdvl0;
 624     }
 625   }
 626 }
 627 
 628 /***************************************************************/
 629 void mlib_ImageConvMxND642D64_ext(mlib_d64       *dst,
 630                                   const mlib_d64 *src,
 631                                   mlib_s32       n,
 632                                   mlib_s32       nch,
 633                                   mlib_s32       dx_l,
 634                                   mlib_s32       dx_r)
 635 {
 636   mlib_s32 i;
 637   mlib_d64 val = src[0];
 638 
 639   for (i = 0; i < dx_l; i++)
 640     dst[i] = val;
 641 #ifdef __SUNPRO_C
 642 #pragma pipeloop(0)
 643 #endif /* __SUNPRO_C */
 644   for (; i < n - dx_r; i++)
 645     dst[i] = src[nch * (i - dx_l)];
 646   val = dst[n - dx_r - 1];
 647   for (; i < n; i++)
 648     dst[i] = val;
 649 }
 650 
 651 /***************************************************************/
 652 mlib_status mlib_convMxNext_d64(mlib_image       *dst,
 653                                 const mlib_image *src,
 654                                 const mlib_d64   *kernel,
 655                                 mlib_s32         m,
 656                                 mlib_s32         n,
 657                                 mlib_s32         dx_l,
 658                                 mlib_s32         dx_r,
 659                                 mlib_s32         dy_t,
 660                                 mlib_s32         dy_b,
 661                                 mlib_s32         cmask)
 662 {
 663   mlib_d64 dspace[1024], *dsa = dspace;
 664   mlib_s32 wid_e = mlib_ImageGetWidth(src);
 665   mlib_d64 *da = mlib_ImageGetData(dst);
 666   mlib_d64 *sa = mlib_ImageGetData(src);
 667   mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3;
 668   mlib_s32 slb = mlib_ImageGetStride(src) >> 3;
 669   mlib_s32 dw = mlib_ImageGetWidth(dst);
 670   mlib_s32 dh = mlib_ImageGetHeight(dst);
 671   mlib_s32 nch = mlib_ImageGetChannels(dst);
 672   mlib_s32 i, j, j1, k;
 673 
 674   if (3 * wid_e + m > 1024) {
 675     dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
 676 
 677     if (dsa == NULL)
 678       return MLIB_FAILURE;
 679   }
 680 
 681   for (j = 0; j < dh; j++, da += dlb) {
 682     for (k = 0; k < nch; k++)
 683       if (cmask & (1 << (nch - 1 - k))) {
 684         mlib_d64 *sa1 = sa + k;
 685         mlib_d64 *da1 = da + k;
 686         const mlib_d64 *kernel1 = kernel;
 687 
 688         for (i = 0; i < dw; i++)
 689           da1[i * nch] = 0.;
 690         for (j1 = 0; j1 < n; j1++, kernel1 += m) {
 691           mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r);
 692           mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch);
 693 
 694           if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
 695             sa1 += slb;
 696         }
 697       }
 698 
 699     if ((j >= dy_t) && (j < dh + n - dy_b - 2))
 700       sa += slb;
 701   }
 702 
 703   if (dsa != dspace)
 704     mlib_free(dsa);
 705   return MLIB_SUCCESS;
 706 }
 707 
 708 /***************************************************************/