1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *   Internal functions for mlib_ImageConv* on U8/S16/U16 types and
  30  *   MLIB_EDGE_DST_NO_WRITE mask
  31  */
  32 
  33 #include "mlib_image.h"
  34 #include "mlib_ImageConv.h"
  35 #include "mlib_c_ImageConv.h"
  36 
  37 /*
  38   This define switches between functions of different data types
  39 */
  40 #define IMG_TYPE 1
  41 
  42 /***************************************************************/
  43 #if IMG_TYPE == 1
  44 
  45 #define DTYPE             mlib_u8
  46 #define CONV_FUNC(KERN)   mlib_c_conv##KERN##nw_u8
  47 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u8
  48 #define DSCALE            (1 << 24)
  49 #define FROM_S32(x)       (((x) >> 24) ^ 128)
  50 #define S64TOS32(x)       (x)
  51 #define SAT_OFF           -(1u << 31)
  52 
  53 #elif IMG_TYPE == 2
  54 
  55 #define DTYPE             mlib_s16
  56 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_s16
  57 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_s16
  58 #define DSCALE            65536.0
  59 #define FROM_S32(x)       ((x) >> 16)
  60 #define S64TOS32(x)       ((x) & 0xffffffff)
  61 #define SAT_OFF
  62 
  63 #elif IMG_TYPE == 3
  64 
  65 #define DTYPE             mlib_u16
  66 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_u16
  67 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u16
  68 #define DSCALE            65536.0
  69 #define FROM_S32(x)       (((x) >> 16) ^ 0x8000)
  70 #define S64TOS32(x)       (x)
  71 #define SAT_OFF           -(1u << 31)
  72 
  73 #endif /* IMG_TYPE == 1 */
  74 
  75 /***************************************************************/
  76 #define BUFF_SIZE   1600
  77 
  78 #define CACHE_SIZE  (64*1024)
  79 
  80 /***************************************************************/
  81 #define FTYPE mlib_d64
  82 
  83 #ifndef MLIB_USE_FTOI_CLAMPING
  84 
  85 #define CLAMP_S32(x)                                            \
  86   (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
  87 
  88 #else
  89 
  90 #define CLAMP_S32(x) ((mlib_s32)(x))
  91 
  92 #endif /* MLIB_USE_FTOI_CLAMPING */
  93 
  94 /***************************************************************/
  95 #define D2I(x) CLAMP_S32((x) SAT_OFF)
  96 
  97 /***************************************************************/
  98 #ifdef _LITTLE_ENDIAN
  99 
 100 #define STORE2(res0, res1)                                      \
 101   dp[0    ] = res1;                                             \
 102   dp[chan1] = res0
 103 
 104 #else
 105 
 106 #define STORE2(res0, res1)                                      \
 107   dp[0    ] = res0;                                             \
 108   dp[chan1] = res1
 109 
 110 #endif /* _LITTLE_ENDIAN */
 111 
 112 /***************************************************************/
 113 #ifdef _NO_LONGLONG
 114 
 115 #define LOAD_BUFF(buff)                                         \
 116   buff[i    ] = sp[0];                                          \
 117   buff[i + 1] = sp[chan1]
 118 
 119 #else /* _NO_LONGLONG */
 120 
 121 #ifdef _LITTLE_ENDIAN
 122 
 123 #define LOAD_BUFF(buff)                                         \
 124   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])
 125 
 126 #else /* _LITTLE_ENDIAN */
 127 
 128 #define LOAD_BUFF(buff)                                         \
 129   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
 130 
 131 #endif /* _LITTLE_ENDIAN */
 132 #endif /* _NO_LONGLONG */
 133 
 134 /***************************************************************/
 135 typedef union {
 136   mlib_d64 d64;
 137   struct {
 138     mlib_s32 i0;
 139     mlib_s32 i1;
 140   } i32s;
 141   struct {
 142     mlib_s32 f0;
 143     mlib_s32 f1;
 144   } f32s;
 145 } d64_2x32;
 146 
 147 /***************************************************************/
 148 #define DEF_VARS(type)                                          \
 149   type     *adr_src, *sl, *sp = NULL;                           \
 150   type     *adr_dst, *dl, *dp = NULL;                           \
 151   FTYPE    *pbuff = buff;                                       \
 152   mlib_s32 wid, hgt, sll, dll;                                  \
 153   mlib_s32 nchannel, chan1;                                     \
 154   mlib_s32 i, j, c
 155 
 156 /***************************************************************/
 157 #define GET_SRC_DST_PARAMETERS(type)                            \
 158   hgt = mlib_ImageGetHeight(src);                               \
 159   wid = mlib_ImageGetWidth(src);                                \
 160   nchannel = mlib_ImageGetChannels(src);                        \
 161   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 162   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 163   adr_src = (type *)mlib_ImageGetData(src);                     \
 164   adr_dst = (type *)mlib_ImageGetData(dst)
 165 
 166 /***************************************************************/
 167 #if IMG_TYPE == 1
 168 
 169 /* Test for the presence of any "1" bit in bits
 170    8 to 31 of val. If present, then val is either
 171    negative or >255. If over/underflows of 8 bits
 172    are uncommon, then this technique can be a win,
 173    since only a single test, rather than two, is
 174    necessary to determine if clamping is needed.
 175    On the other hand, if over/underflows are common,
 176    it adds an extra test.
 177 */
 178 #define CLAMP_STORE(dst, val)                                   \
 179   if (val & 0xffffff00) {                                       \
 180     if (val < MLIB_U8_MIN)                                      \
 181       dst = MLIB_U8_MIN;                                        \
 182     else                                                        \
 183       dst = MLIB_U8_MAX;                                        \
 184   } else {                                                      \
 185     dst = (mlib_u8)val;                                         \
 186   }
 187 
 188 #elif IMG_TYPE == 2
 189 
 190 #define CLAMP_STORE(dst, val)                                   \
 191   if (val >= MLIB_S16_MAX)                                      \
 192     dst = MLIB_S16_MAX;                                         \
 193   else if (val <= MLIB_S16_MIN)                                 \
 194     dst = MLIB_S16_MIN;                                         \
 195   else                                                          \
 196     dst = (mlib_s16)val
 197 
 198 #elif IMG_TYPE == 3
 199 
 200 #define CLAMP_STORE(dst, val)                                   \
 201   if (val >= MLIB_U16_MAX)                                      \
 202     dst = MLIB_U16_MAX;                                         \
 203   else if (val <= MLIB_U16_MIN)                                 \
 204     dst = MLIB_U16_MIN;                                         \
 205   else                                                          \
 206     dst = (mlib_u16)val
 207 
 208 #endif /* IMG_TYPE == 1 */
 209 
 210 /***************************************************************/
 211 #define MAX_KER   7
 212 #define MAX_N    15
 213 
 214 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 215                                      const mlib_image *src,
 216                                      const mlib_d64   *k,
 217                                      mlib_s32         n,
 218                                      mlib_s32         dn,
 219                                      mlib_s32         cmask)
 220 {
 221   FTYPE    buff[BUFF_SIZE];
 222   mlib_s32 off, kh;
 223   mlib_s32 d0, d1;
 224   const FTYPE    *pk;
 225   FTYPE    k0, k1, k2, k3;
 226   FTYPE    p0, p1, p2, p3, p4;
 227   DEF_VARS(DTYPE);
 228   DTYPE    *sl_c, *dl_c, *sl0;
 229   mlib_s32 l, hsize, max_hsize;
 230   GET_SRC_DST_PARAMETERS(DTYPE);
 231 
 232   hgt -= (n - 1);
 233   adr_dst += dn*dll;
 234 
 235   max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
 236 
 237   if (!max_hsize) max_hsize = 1;
 238 
 239   if (max_hsize > BUFF_SIZE) {
 240     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 241   }
 242 
 243   chan1 = nchannel;
 244 
 245   sl_c = adr_src;
 246   dl_c = adr_dst;
 247 
 248   for (l = 0; l < hgt; l += hsize) {
 249     hsize = hgt - l;
 250 
 251     if (hsize > max_hsize) hsize = max_hsize;
 252 
 253     for (c = 0; c < nchannel; c++) {
 254       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 255 
 256       sl = sl_c + c;
 257       dl = dl_c + c;
 258 
 259 #ifdef __SUNPRO_C
 260 #pragma pipeloop(0)
 261 #endif /* __SUNPRO_C */
 262       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 263 
 264       for (i = 0; i < wid; i++) {
 265         sl0 = sl;
 266 
 267         for (off = 0; off < (n - 4); off += 4) {
 268           pk = k + off;
 269           sp = sl0;
 270 
 271           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 272           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 273           sp += 3*sll;
 274 
 275 #ifdef __SUNPRO_C
 276 #pragma pipeloop(0)
 277 #endif /* __SUNPRO_C */
 278           for (j = 0; j < hsize; j += 2) {
 279             p0 = p2; p1 = p3; p2 = p4;
 280             p3 = sp[0];
 281             p4 = sp[sll];
 282 
 283             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 284             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 285 
 286             sp += 2*sll;
 287           }
 288 
 289           sl0 += 4*sll;
 290         }
 291 
 292         pk = k + off;
 293         sp = sl0;
 294 
 295         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 296         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 297 
 298         dp = dl;
 299         kh = n - off;
 300 
 301         if (kh == 4) {
 302           sp += 3*sll;
 303 
 304 #ifdef __SUNPRO_C
 305 #pragma pipeloop(0)
 306 #endif /* __SUNPRO_C */
 307           for (j = 0; j <= (hsize - 2); j += 2) {
 308             p0 = p2; p1 = p3; p2 = p4;
 309             p3 = sp[0];
 310             p4 = sp[sll];
 311 
 312             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 313             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 314 
 315             dp[0  ] = FROM_S32(d0);
 316             dp[dll] = FROM_S32(d1);
 317 
 318             pbuff[j] = 0;
 319             pbuff[j + 1] = 0;
 320 
 321             sp += 2*sll;
 322             dp += 2*dll;
 323           }
 324 
 325           if (j < hsize) {
 326             p0 = p2; p1 = p3; p2 = p4;
 327             p3 = sp[0];
 328 
 329             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 330 
 331             pbuff[j] = 0;
 332 
 333             dp[0] = FROM_S32(d0);
 334           }
 335 
 336         } else if (kh == 3) {
 337           sp += 2*sll;
 338 
 339 #ifdef __SUNPRO_C
 340 #pragma pipeloop(0)
 341 #endif /* __SUNPRO_C */
 342           for (j = 0; j <= (hsize - 2); j += 2) {
 343             p0 = p2; p1 = p3;
 344             p2 = sp[0];
 345             p3 = sp[sll];
 346 
 347             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 348             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 349 
 350             dp[0  ] = FROM_S32(d0);
 351             dp[dll] = FROM_S32(d1);
 352 
 353             pbuff[j] = 0;
 354             pbuff[j + 1] = 0;
 355 
 356             sp += 2*sll;
 357             dp += 2*dll;
 358           }
 359 
 360           if (j < hsize) {
 361             p0 = p2; p1 = p3;
 362             p2 = sp[0];
 363 
 364             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 365 
 366             pbuff[j] = 0;
 367 
 368             dp[0] = FROM_S32(d0);
 369           }
 370 
 371         } else if (kh == 2) {
 372           sp += sll;
 373 
 374 #ifdef __SUNPRO_C
 375 #pragma pipeloop(0)
 376 #endif /* __SUNPRO_C */
 377           for (j = 0; j <= (hsize - 2); j += 2) {
 378             p0 = p2;
 379             p1 = sp[0];
 380             p2 = sp[sll];
 381 
 382             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 383             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 384 
 385             dp[0  ] = FROM_S32(d0);
 386             dp[dll] = FROM_S32(d1);
 387 
 388             pbuff[j] = 0;
 389             pbuff[j + 1] = 0;
 390 
 391             sp += 2*sll;
 392             dp += 2*dll;
 393           }
 394 
 395           if (j < hsize) {
 396             p0 = p2;
 397             p1 = sp[0];
 398 
 399             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 400 
 401             pbuff[j] = 0;
 402 
 403             dp[0] = FROM_S32(d0);
 404           }
 405 
 406         } else /* if (kh == 1) */ {
 407 #ifdef __SUNPRO_C
 408 #pragma pipeloop(0)
 409 #endif /* __SUNPRO_C */
 410           for (j = 0; j < hsize; j++) {
 411             p0 = sp[0];
 412 
 413             d0 = D2I(p0*k0 + pbuff[j]);
 414 
 415             dp[0] = FROM_S32(d0);
 416 
 417             pbuff[j] = 0;
 418 
 419             sp += sll;
 420             dp += dll;
 421           }
 422         }
 423 
 424         sl += chan1;
 425         dl += chan1;
 426       }
 427     }
 428 
 429     sl_c += max_hsize*sll;
 430     dl_c += max_hsize*dll;
 431   }
 432 
 433   if (pbuff != buff) mlib_free(pbuff);
 434 
 435   return MLIB_SUCCESS;
 436 }
 437 
 438 /***************************************************************/
 439 mlib_status CONV_FUNC(MxN)(mlib_image       *dst,
 440                            const mlib_image *src,
 441                            const mlib_s32   *kernel,
 442                            mlib_s32         m,
 443                            mlib_s32         n,
 444                            mlib_s32         dm,
 445                            mlib_s32         dn,
 446                            mlib_s32         scale,
 447                            mlib_s32         cmask)
 448 {
 449   FTYPE    buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
 450   FTYPE    **buffs = buffs_arr, *buffd;
 451   FTYPE    akernel[256], *k = akernel, fscale = DSCALE;
 452   mlib_s32 mn, l, off, kw, bsize, buff_ind;
 453   mlib_s32 d0, d1;
 454   FTYPE    k0, k1, k2, k3, k4, k5, k6;
 455   FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
 456   d64_2x32 dd;
 457   DEF_VARS(DTYPE);
 458   mlib_s32 chan2;
 459   mlib_s32 *buffo, *buffi;
 460   mlib_status status = MLIB_SUCCESS;
 461 
 462   GET_SRC_DST_PARAMETERS(DTYPE);
 463 
 464   if (scale > 30) {
 465     fscale *= 1.0/(1 << 30);
 466     scale -= 30;
 467   }
 468 
 469   fscale /= (1 << scale);
 470 
 471   mn = m*n;
 472 
 473   if (mn > 256) {
 474     k = mlib_malloc(mn*sizeof(mlib_d64));
 475 
 476     if (k == NULL) return MLIB_FAILURE;
 477   }
 478 
 479   for (i = 0; i < mn; i++) {
 480     k[i] = kernel[i]*fscale;
 481   }
 482 
 483   if (m == 1) {
 484     status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
 485     FREE_AND_RETURN_STATUS;
 486   }
 487 
 488   bsize = (n + 3)*wid;
 489 
 490   if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
 491     pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1));
 492 
 493     if (pbuff == NULL) {
 494       status = MLIB_FAILURE;
 495       FREE_AND_RETURN_STATUS;
 496     }
 497     buffs = (FTYPE   **)(pbuff + bsize);
 498   }
 499 
 500   for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
 501   for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
 502   buffd = buffs[n] + wid;
 503   buffo = (mlib_s32*)(buffd + wid);
 504   buffi = buffo + (wid &~ 1);
 505 
 506   chan1 = nchannel;
 507   chan2 = chan1 + chan1;
 508 
 509   wid -= (m - 1);
 510   hgt -= (n - 1);
 511   adr_dst += dn*dll + dm*nchannel;
 512 
 513   for (c = 0; c < nchannel; c++) {
 514     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 515 
 516     sl = adr_src + c;
 517     dl = adr_dst + c;
 518 
 519     for (l = 0; l < n; l++) {
 520       FTYPE    *buff = buffs[l];
 521 
 522 #ifdef __SUNPRO_C
 523 #pragma pipeloop(0)
 524 #endif /* __SUNPRO_C */
 525       for (i = 0; i < wid + (m - 1); i++) {
 526         buff[i] = (FTYPE)sl[i*chan1];
 527       }
 528 
 529       sl += sll;
 530     }
 531 
 532     buff_ind = 0;
 533 
 534 #ifdef __SUNPRO_C
 535 #pragma pipeloop(0)
 536 #endif /* __SUNPRO_C */
 537     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 538 
 539     for (j = 0; j < hgt; j++) {
 540       FTYPE    **buffc = buffs + buff_ind;
 541       FTYPE    *buffn = buffc[n];
 542       FTYPE    *pk = k;
 543 
 544       for (l = 0; l < n; l++) {
 545         FTYPE    *buff_l = buffc[l];
 546 
 547         for (off = 0; off < m;) {
 548           FTYPE    *buff = buff_l + off;
 549 
 550           kw = m - off;
 551 
 552           if (kw > 2*MAX_KER) kw = MAX_KER; else
 553             if (kw > MAX_KER) kw = kw/2;
 554           off += kw;
 555 
 556           sp = sl;
 557           dp = dl;
 558 
 559           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 560           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 561 
 562           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 563           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 564           pk += kw;
 565 
 566           if (kw == 7) {
 567 
 568             if (l < (n - 1) || off < m) {
 569 #ifdef __SUNPRO_C
 570 #pragma pipeloop(0)
 571 #endif /* __SUNPRO_C */
 572               for (i = 0; i <= (wid - 2); i += 2) {
 573                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 574 
 575                 p6 = buff[i + 6]; p7 = buff[i + 7];
 576 
 577                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 578                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 579               }
 580 
 581             } else {
 582 #ifdef __SUNPRO_C
 583 #pragma pipeloop(0)
 584 #endif /* __SUNPRO_C */
 585               for (i = 0; i <= (wid - 2); i += 2) {
 586                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 587 
 588                 p6 = buff[i + 6]; p7 = buff[i + 7];
 589 
 590                 LOAD_BUFF(buffi);
 591 
 592                 dd.d64 = *(FTYPE   *)(buffi + i);
 593                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 594                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 595 
 596                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 597                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 598 
 599                 dp[0    ] = FROM_S32(d0);
 600                 dp[chan1] = FROM_S32(d1);
 601 
 602                 buffd[i    ] = 0.0;
 603                 buffd[i + 1] = 0.0;
 604 
 605                 sp += chan2;
 606                 dp += chan2;
 607               }
 608             }
 609 
 610           } else if (kw == 6) {
 611 
 612             if (l < (n - 1) || off < m) {
 613 #ifdef __SUNPRO_C
 614 #pragma pipeloop(0)
 615 #endif /* __SUNPRO_C */
 616               for (i = 0; i <= (wid - 2); i += 2) {
 617                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 618 
 619                 p5 = buff[i + 5]; p6 = buff[i + 6];
 620 
 621                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 622                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 623               }
 624 
 625             } else {
 626 #ifdef __SUNPRO_C
 627 #pragma pipeloop(0)
 628 #endif /* __SUNPRO_C */
 629               for (i = 0; i <= (wid - 2); i += 2) {
 630                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 631 
 632                 p5 = buff[i + 5]; p6 = buff[i + 6];
 633 
 634                 buffn[i    ] = (FTYPE)sp[0];
 635                 buffn[i + 1] = (FTYPE)sp[chan1];
 636 
 637                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 638                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 639 
 640                 dp[0    ] = FROM_S32(d0);
 641                 dp[chan1] = FROM_S32(d1);
 642 
 643                 buffd[i    ] = 0.0;
 644                 buffd[i + 1] = 0.0;
 645 
 646                 sp += chan2;
 647                 dp += chan2;
 648               }
 649             }
 650 
 651           } else if (kw == 5) {
 652 
 653             if (l < (n - 1) || off < m) {
 654 #ifdef __SUNPRO_C
 655 #pragma pipeloop(0)
 656 #endif /* __SUNPRO_C */
 657               for (i = 0; i <= (wid - 2); i += 2) {
 658                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 659 
 660                 p4 = buff[i + 4]; p5 = buff[i + 5];
 661 
 662                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 663                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 664               }
 665 
 666             } else {
 667 #ifdef __SUNPRO_C
 668 #pragma pipeloop(0)
 669 #endif /* __SUNPRO_C */
 670               for (i = 0; i <= (wid - 2); i += 2) {
 671                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 672 
 673                 p4 = buff[i + 4]; p5 = buff[i + 5];
 674 
 675                 buffn[i    ] = (FTYPE)sp[0];
 676                 buffn[i + 1] = (FTYPE)sp[chan1];
 677 
 678                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 679                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 680 
 681                 dp[0    ] = FROM_S32(d0);
 682                 dp[chan1] = FROM_S32(d1);
 683 
 684                 buffd[i    ] = 0.0;
 685                 buffd[i + 1] = 0.0;
 686 
 687                 sp += chan2;
 688                 dp += chan2;
 689               }
 690             }
 691 
 692           } else if (kw == 4) {
 693 
 694             if (l < (n - 1) || off < m) {
 695 #ifdef __SUNPRO_C
 696 #pragma pipeloop(0)
 697 #endif /* __SUNPRO_C */
 698               for (i = 0; i <= (wid - 2); i += 2) {
 699                 p0 = p2; p1 = p3; p2 = p4;
 700 
 701                 p3 = buff[i + 3]; p4 = buff[i + 4];
 702 
 703                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 704                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 705               }
 706 
 707             } else {
 708 #ifdef __SUNPRO_C
 709 #pragma pipeloop(0)
 710 #endif /* __SUNPRO_C */
 711               for (i = 0; i <= (wid - 2); i += 2) {
 712                 p0 = p2; p1 = p3; p2 = p4;
 713 
 714                 p3 = buff[i + 3]; p4 = buff[i + 4];
 715 
 716                 buffn[i    ] = (FTYPE)sp[0];
 717                 buffn[i + 1] = (FTYPE)sp[chan1];
 718 
 719                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 720                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 721 
 722                 dp[0    ] = FROM_S32(d0);
 723                 dp[chan1] = FROM_S32(d1);
 724 
 725                 buffd[i    ] = 0.0;
 726                 buffd[i + 1] = 0.0;
 727 
 728                 sp += chan2;
 729                 dp += chan2;
 730               }
 731             }
 732 
 733           } else if (kw == 3) {
 734 
 735             if (l < (n - 1) || off < m) {
 736 #ifdef __SUNPRO_C
 737 #pragma pipeloop(0)
 738 #endif /* __SUNPRO_C */
 739               for (i = 0; i <= (wid - 2); i += 2) {
 740                 p0 = p2; p1 = p3;
 741 
 742                 p2 = buff[i + 2]; p3 = buff[i + 3];
 743 
 744                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 745                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 746               }
 747 
 748             } else {
 749 #ifdef __SUNPRO_C
 750 #pragma pipeloop(0)
 751 #endif /* __SUNPRO_C */
 752               for (i = 0; i <= (wid - 2); i += 2) {
 753                 p0 = p2; p1 = p3;
 754 
 755                 p2 = buff[i + 2]; p3 = buff[i + 3];
 756 
 757                 buffn[i    ] = (FTYPE)sp[0];
 758                 buffn[i + 1] = (FTYPE)sp[chan1];
 759 
 760                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 761                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 762 
 763                 dp[0    ] = FROM_S32(d0);
 764                 dp[chan1] = FROM_S32(d1);
 765 
 766                 buffd[i    ] = 0.0;
 767                 buffd[i + 1] = 0.0;
 768 
 769                 sp += chan2;
 770                 dp += chan2;
 771               }
 772             }
 773 
 774           } else /*if (kw == 2)*/ {
 775 
 776             if (l < (n - 1) || off < m) {
 777 #ifdef __SUNPRO_C
 778 #pragma pipeloop(0)
 779 #endif /* __SUNPRO_C */
 780               for (i = 0; i <= (wid - 2); i += 2) {
 781                 p0 = p2;
 782 
 783                 p1 = buff[i + 1]; p2 = buff[i + 2];
 784 
 785                 buffd[i    ] += p0*k0 + p1*k1;
 786                 buffd[i + 1] += p1*k0 + p2*k1;
 787               }
 788 
 789             } else {
 790 #ifdef __SUNPRO_C
 791 #pragma pipeloop(0)
 792 #endif /* __SUNPRO_C */
 793               for (i = 0; i <= (wid - 2); i += 2) {
 794                 p0 = p2;
 795 
 796                 p1 = buff[i + 1]; p2 = buff[i + 2];
 797 
 798                 buffn[i    ] = (FTYPE)sp[0];
 799                 buffn[i + 1] = (FTYPE)sp[chan1];
 800 
 801                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 802                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 803 
 804                 dp[0    ] = FROM_S32(d0);
 805                 dp[chan1] = FROM_S32(d1);
 806 
 807                 buffd[i    ] = 0.0;
 808                 buffd[i + 1] = 0.0;
 809 
 810                 sp += chan2;
 811                 dp += chan2;
 812               }
 813             }
 814           }
 815         }
 816       }
 817 
 818       /* last pixels */
 819       for (; i < wid; i++) {
 820         FTYPE    *pk = k, s = 0;
 821         mlib_s32 x, d0;
 822 
 823         for (l = 0; l < n; l++) {
 824           FTYPE    *buff = buffc[l] + i;
 825 
 826           for (x = 0; x < m; x++) s += buff[x] * (*pk++);
 827         }
 828 
 829         d0 = D2I(s);
 830         dp[0] = FROM_S32(d0);
 831 
 832         buffn[i] = (FTYPE)sp[0];
 833 
 834         sp += chan1;
 835         dp += chan1;
 836       }
 837 
 838       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 839 
 840       /* next line */
 841       sl += sll;
 842       dl += dll;
 843 
 844       buff_ind++;
 845 
 846       if (buff_ind >= n + 1) buff_ind = 0;
 847     }
 848   }
 849 
 850   FREE_AND_RETURN_STATUS;
 851 }
 852 
 853 /***************************************************************/
 854 /* for x86, using integer multiplies is faster */
 855 
 856 #define STORE_RES(res, x)                                       \
 857   x >>= shift2;                                                 \
 858   CLAMP_STORE(res, x)
 859 
 860 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 861                              const mlib_image *src,
 862                              const mlib_s32   *kernel,
 863                              mlib_s32         m,
 864                              mlib_s32         n,
 865                              mlib_s32         dm,
 866                              mlib_s32         dn,
 867                              mlib_s32         scale,
 868                              mlib_s32         cmask)
 869 {
 870   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 871   mlib_s32 l, off, kw;
 872   mlib_s32 d0, d1, shift1, shift2;
 873   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 874   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
 875   DTYPE    *adr_src, *sl, *sp = NULL;
 876   DTYPE    *adr_dst, *dl, *dp = NULL;
 877   mlib_s32 wid, hgt, sll, dll;
 878   mlib_s32 nchannel, chan1;
 879   mlib_s32 i, j, c;
 880   mlib_s32 chan2;
 881   mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl;
 882   GET_SRC_DST_PARAMETERS(DTYPE);
 883 
 884 #if IMG_TYPE != 1
 885   shift1 = 16;
 886 #else
 887   shift1 = 8;
 888 #endif /* IMG_TYPE != 1 */
 889   shift2 = scale - shift1;
 890 
 891   chan1 = nchannel;
 892   chan2 = chan1 + chan1;
 893 
 894   wid -= (m - 1);
 895   hgt -= (n - 1);
 896   adr_dst += dn*dll + dm*nchannel;
 897 
 898   if (wid > BUFF_SIZE) {
 899     buffd = mlib_malloc(sizeof(mlib_s32)*wid);
 900 
 901     if (buffd == NULL) return MLIB_FAILURE;
 902   }
 903 
 904   if (m*n > MAX_N*MAX_N) {
 905     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 906 
 907     if (k == NULL) {
 908       if (buffd != buff) mlib_free(buffd);
 909       return MLIB_FAILURE;
 910     }
 911   }
 912 
 913   for (i = 0; i < m*n; i++) {
 914     k[i] = kernel[i] >> shift1;
 915   }
 916 
 917   for (c = 0; c < nchannel; c++) {
 918     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 919 
 920     sl = adr_src + c;
 921     dl = adr_dst + c;
 922 
 923 #ifdef __SUNPRO_C
 924 #pragma pipeloop(0)
 925 #endif /* __SUNPRO_C */
 926     for (i = 0; i < wid; i++) buffd[i] = 0;
 927 
 928     for (j = 0; j < hgt; j++) {
 929       mlib_s32 *pk = k;
 930 
 931       for (l = 0; l < n; l++) {
 932         DTYPE *sp0 = sl + l*sll;
 933 
 934         for (off = 0; off < m;) {
 935           sp = sp0 + off*chan1;
 936           dp = dl;
 937 
 938           kw = m - off;
 939 
 940           if (kw > 2*MAX_KER) kw = MAX_KER; else
 941             if (kw > MAX_KER) kw = kw/2;
 942           off += kw;
 943 
 944           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 945           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 946 
 947           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 948           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 949           pk += kw;
 950 
 951           sp += (kw - 1)*chan1;
 952 
 953           if (kw == 7) {
 954 
 955             if (l < (n - 1) || off < m) {
 956 #ifdef __SUNPRO_C
 957 #pragma pipeloop(0)
 958 #endif /* __SUNPRO_C */
 959               for (i = 0; i <= (wid - 2); i += 2) {
 960                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 961                 p6 = sp[0];
 962                 p7 = sp[chan1];
 963 
 964                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 965                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 966 
 967                 sp += chan2;
 968               }
 969 
 970             } else {
 971 #ifdef __SUNPRO_C
 972 #pragma pipeloop(0)
 973 #endif /* __SUNPRO_C */
 974               for (i = 0; i <= (wid - 2); i += 2) {
 975                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 976                 p6 = sp[0];
 977                 p7 = sp[chan1];
 978 
 979                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 980                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 981 
 982                 STORE_RES(dp[0    ], d0);
 983                 STORE_RES(dp[chan1], d1);
 984 
 985                 buffd[i    ] = 0;
 986                 buffd[i + 1] = 0;
 987 
 988                 sp += chan2;
 989                 dp += chan2;
 990               }
 991             }
 992 
 993           } else if (kw == 6) {
 994 
 995             if (l < (n - 1) || off < m) {
 996 #ifdef __SUNPRO_C
 997 #pragma pipeloop(0)
 998 #endif /* __SUNPRO_C */
 999               for (i = 0; i <= (wid - 2); i += 2) {
1000                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1001                 p5 = sp[0];
1002                 p6 = sp[chan1];
1003 
1004                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1005                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1006 
1007                 sp += chan2;
1008               }
1009 
1010             } else {
1011 #ifdef __SUNPRO_C
1012 #pragma pipeloop(0)
1013 #endif /* __SUNPRO_C */
1014               for (i = 0; i <= (wid - 2); i += 2) {
1015                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1016                 p5 = sp[0];
1017                 p6 = sp[chan1];
1018 
1019                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
1020                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1021 
1022                 STORE_RES(dp[0    ], d0);
1023                 STORE_RES(dp[chan1], d1);
1024 
1025                 buffd[i    ] = 0;
1026                 buffd[i + 1] = 0;
1027 
1028                 sp += chan2;
1029                 dp += chan2;
1030               }
1031             }
1032 
1033           } else if (kw == 5) {
1034 
1035             if (l < (n - 1) || off < m) {
1036 #ifdef __SUNPRO_C
1037 #pragma pipeloop(0)
1038 #endif /* __SUNPRO_C */
1039               for (i = 0; i <= (wid - 2); i += 2) {
1040                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1041                 p4 = sp[0];
1042                 p5 = sp[chan1];
1043 
1044                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1045                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1046 
1047                 sp += chan2;
1048               }
1049 
1050             } else {
1051 #ifdef __SUNPRO_C
1052 #pragma pipeloop(0)
1053 #endif /* __SUNPRO_C */
1054               for (i = 0; i <= (wid - 2); i += 2) {
1055                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1056                 p4 = sp[0];
1057                 p5 = sp[chan1];
1058 
1059                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
1060                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1061 
1062                 STORE_RES(dp[0    ], d0);
1063                 STORE_RES(dp[chan1], d1);
1064 
1065                 buffd[i    ] = 0;
1066                 buffd[i + 1] = 0;
1067 
1068                 sp += chan2;
1069                 dp += chan2;
1070               }
1071             }
1072 
1073           } else if (kw == 4) {
1074 
1075             if (l < (n - 1) || off < m) {
1076 #ifdef __SUNPRO_C
1077 #pragma pipeloop(0)
1078 #endif /* __SUNPRO_C */
1079               for (i = 0; i <= (wid - 2); i += 2) {
1080                 p0 = p2; p1 = p3; p2 = p4;
1081                 p3 = sp[0];
1082                 p4 = sp[chan1];
1083 
1084                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1085                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1086 
1087                 sp += chan2;
1088               }
1089 
1090             } else {
1091 #ifdef __SUNPRO_C
1092 #pragma pipeloop(0)
1093 #endif /* __SUNPRO_C */
1094               for (i = 0; i <= (wid - 2); i += 2) {
1095                 p0 = p2; p1 = p3; p2 = p4;
1096                 p3 = sp[0];
1097                 p4 = sp[chan1];
1098 
1099                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1100                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1101 
1102                 STORE_RES(dp[0    ], d0);
1103                 STORE_RES(dp[chan1], d1);
1104 
1105                 buffd[i    ] = 0;
1106                 buffd[i + 1] = 0;
1107 
1108                 sp += chan2;
1109                 dp += chan2;
1110               }
1111             }
1112 
1113           } else if (kw == 3) {
1114 
1115             if (l < (n - 1) || off < m) {
1116 #ifdef __SUNPRO_C
1117 #pragma pipeloop(0)
1118 #endif /* __SUNPRO_C */
1119               for (i = 0; i <= (wid - 2); i += 2) {
1120                 p0 = p2; p1 = p3;
1121                 p2 = sp[0];
1122                 p3 = sp[chan1];
1123 
1124                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1125                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1126 
1127                 sp += chan2;
1128               }
1129 
1130             } else {
1131 #ifdef __SUNPRO_C
1132 #pragma pipeloop(0)
1133 #endif /* __SUNPRO_C */
1134               for (i = 0; i <= (wid - 2); i += 2) {
1135                 p0 = p2; p1 = p3;
1136                 p2 = sp[0];
1137                 p3 = sp[chan1];
1138 
1139                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1140                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1141 
1142                 STORE_RES(dp[0    ], d0);
1143                 STORE_RES(dp[chan1], d1);
1144 
1145                 buffd[i    ] = 0;
1146                 buffd[i + 1] = 0;
1147 
1148                 sp += chan2;
1149                 dp += chan2;
1150               }
1151             }
1152 
1153           } else if (kw == 2) {
1154 
1155             if (l < (n - 1) || off < m) {
1156 #ifdef __SUNPRO_C
1157 #pragma pipeloop(0)
1158 #endif /* __SUNPRO_C */
1159               for (i = 0; i <= (wid - 2); i += 2) {
1160                 p0 = p2;
1161                 p1 = sp[0];
1162                 p2 = sp[chan1];
1163 
1164                 buffd[i    ] += p0*k0 + p1*k1;
1165                 buffd[i + 1] += p1*k0 + p2*k1;
1166 
1167                 sp += chan2;
1168               }
1169 
1170             } else {
1171 #ifdef __SUNPRO_C
1172 #pragma pipeloop(0)
1173 #endif /* __SUNPRO_C */
1174               for (i = 0; i <= (wid - 2); i += 2) {
1175                 p0 = p2;
1176                 p1 = sp[0];
1177                 p2 = sp[chan1];
1178 
1179                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1180                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1181 
1182                 STORE_RES(dp[0    ], d0);
1183                 STORE_RES(dp[chan1], d1);
1184 
1185                 buffd[i    ] = 0;
1186                 buffd[i + 1] = 0;
1187 
1188                 sp += chan2;
1189                 dp += chan2;
1190               }
1191             }
1192 
1193           } else /*if (kw == 1)*/ {
1194 
1195             if (l < (n - 1) || off < m) {
1196 #ifdef __SUNPRO_C
1197 #pragma pipeloop(0)
1198 #endif /* __SUNPRO_C */
1199               for (i = 0; i <= (wid - 2); i += 2) {
1200                 p0 = sp[0];
1201                 p1 = sp[chan1];
1202 
1203                 buffd[i    ] += p0*k0;
1204                 buffd[i + 1] += p1*k0;
1205 
1206                 sp += chan2;
1207               }
1208 
1209             } else {
1210 #ifdef __SUNPRO_C
1211 #pragma pipeloop(0)
1212 #endif /* __SUNPRO_C */
1213               for (i = 0; i <= (wid - 2); i += 2) {
1214                 p0 = sp[0];
1215                 p1 = sp[chan1];
1216 
1217                 d0 = (p0*k0 + buffd[i    ]);
1218                 d1 = (p1*k0 + buffd[i + 1]);
1219 
1220                 STORE_RES(dp[0    ], d0);
1221                 STORE_RES(dp[chan1], d1);
1222 
1223                 buffd[i    ] = 0;
1224                 buffd[i + 1] = 0;
1225 
1226                 sp += chan2;
1227                 dp += chan2;
1228               }
1229             }
1230           }
1231         }
1232       }
1233 
1234       /* last pixels */
1235       for (; i < wid; i++) {
1236         mlib_s32 *pk = k, s = 0;
1237         mlib_s32 x;
1238 
1239         for (l = 0; l < n; l++) {
1240           sp = sl + l*sll + i*chan1;
1241 
1242           for (x = 0; x < m; x++) {
1243             s += sp[0] * pk[0];
1244             sp += chan1;
1245             pk ++;
1246           }
1247         }
1248 
1249         STORE_RES(dp[0], s);
1250 
1251         sp += chan1;
1252         dp += chan1;
1253       }
1254 
1255       sl += sll;
1256       dl += dll;
1257     }
1258   }
1259 
1260   if (buffd != buff) mlib_free(buffd);
1261   if (k != k_locl) mlib_free(k);
1262 
1263   return MLIB_SUCCESS;
1264 }
1265 
1266 /***************************************************************/