1 /*
   2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *   Internal functions for mlib_ImageConv* on U8/S16/U16 types and
  30  *   MLIB_EDGE_DST_NO_WRITE mask
  31  */
  32 
  33 #include "mlib_image.h"
  34 #include "mlib_c_ImageConv.h"
  35 
  36 /*
  37   This define switches between functions of different data types
  38 */
  39 #define IMG_TYPE 2
  40 
  41 /***************************************************************/
  42 #if IMG_TYPE == 1
  43 
  44 #define DTYPE             mlib_u8
  45 #define CONV_FUNC(KERN)   mlib_c_conv##KERN##nw_u8
  46 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u8
  47 #define DSCALE            (1 << 24)
  48 #define FROM_S32(x)       (((x) >> 24) ^ 128)
  49 #define S64TOS32(x)       (x)
  50 #define SAT_OFF           -(1u << 31)
  51 
  52 #elif IMG_TYPE == 2
  53 
  54 #define DTYPE             mlib_s16
  55 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_s16
  56 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_s16
  57 #define DSCALE            65536.0
  58 #define FROM_S32(x)       ((x) >> 16)
  59 #define S64TOS32(x)       ((x) & 0xffffffff)
  60 #define SAT_OFF
  61 
  62 #elif IMG_TYPE == 3
  63 
  64 #define DTYPE             mlib_u16
  65 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_u16
  66 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u16
  67 #define DSCALE            65536.0
  68 #define FROM_S32(x)       (((x) >> 16) ^ 0x8000)
  69 #define S64TOS32(x)       (x)
  70 #define SAT_OFF           -(1u << 31)
  71 
  72 #endif /* IMG_TYPE == 1 */
  73 
  74 /***************************************************************/
  75 #define BUFF_SIZE   1600
  76 
  77 #define CACHE_SIZE  (64*1024)
  78 
  79 /***************************************************************/
  80 #define FTYPE mlib_d64
  81 
  82 #ifndef MLIB_USE_FTOI_CLAMPING
  83 
  84 #define CLAMP_S32(x)                                            \
  85   (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
  86 
  87 #else
  88 
  89 #define CLAMP_S32(x) ((mlib_s32)(x))
  90 
  91 #endif /* MLIB_USE_FTOI_CLAMPING */
  92 
  93 /***************************************************************/
  94 #define D2I(x) CLAMP_S32((x) SAT_OFF)
  95 
  96 /***************************************************************/
  97 #ifdef _LITTLE_ENDIAN
  98 
  99 #define STORE2(res0, res1)                                      \
 100   dp[0    ] = res1;                                             \
 101   dp[chan1] = res0
 102 
 103 #else
 104 
 105 #define STORE2(res0, res1)                                      \
 106   dp[0    ] = res0;                                             \
 107   dp[chan1] = res1
 108 
 109 #endif /* _LITTLE_ENDIAN */
 110 
 111 /***************************************************************/
 112 #ifdef _NO_LONGLONG
 113 
 114 #define LOAD_BUFF(buff)                                         \
 115   buff[i    ] = sp[0];                                          \
 116   buff[i + 1] = sp[chan1]
 117 
 118 #else /* _NO_LONGLONG */
 119 
 120 #ifdef _LITTLE_ENDIAN
 121 
 122 #define LOAD_BUFF(buff)                                         \
 123   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])
 124 
 125 #else /* _LITTLE_ENDIAN */
 126 
 127 #define LOAD_BUFF(buff)                                         \
 128   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
 129 
 130 #endif /* _LITTLE_ENDIAN */
 131 #endif /* _NO_LONGLONG */
 132 
 133 /***************************************************************/
 134 typedef union {
 135   mlib_d64 d64;
 136   struct {
 137     mlib_s32 i0;
 138     mlib_s32 i1;
 139   } i32s;
 140   struct {
 141     mlib_s32 f0;
 142     mlib_s32 f1;
 143   } f32s;
 144 } d64_2x32;
 145 
 146 /***************************************************************/
 147 #define DEF_VARS(type)                                          \
 148   type     *adr_src, *sl, *sp = NULL;                           \
 149   type     *adr_dst, *dl, *dp = NULL;                           \
 150   FTYPE    *pbuff = buff;                                       \
 151   mlib_s32 wid, hgt, sll, dll;                                  \
 152   mlib_s32 nchannel, chan1;                                     \
 153   mlib_s32 i, j, c
 154 
 155 /***************************************************************/
 156 #define GET_SRC_DST_PARAMETERS(type)                            \
 157   hgt = mlib_ImageGetHeight(src);                               \
 158   wid = mlib_ImageGetWidth(src);                                \
 159   nchannel = mlib_ImageGetChannels(src);                        \
 160   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 161   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 162   adr_src = (type *)mlib_ImageGetData(src);                     \
 163   adr_dst = (type *)mlib_ImageGetData(dst)
 164 
 165 /***************************************************************/
 166 #if IMG_TYPE == 1
 167 
 168 /* Test for the presence of any "1" bit in bits
 169    8 to 31 of val. If present, then val is either
 170    negative or >255. If over/underflows of 8 bits
 171    are uncommon, then this technique can be a win,
 172    since only a single test, rather than two, is
 173    necessary to determine if clamping is needed.
 174    On the other hand, if over/underflows are common,
 175    it adds an extra test.
 176 */
 177 #define CLAMP_STORE(dst, val)                                   \
 178   if (val & 0xffffff00) {                                       \
 179     if (val < MLIB_U8_MIN)                                      \
 180       dst = MLIB_U8_MIN;                                        \
 181     else                                                        \
 182       dst = MLIB_U8_MAX;                                        \
 183   } else {                                                      \
 184     dst = (mlib_u8)val;                                         \
 185   }
 186 
 187 #elif IMG_TYPE == 2
 188 
 189 #define CLAMP_STORE(dst, val)                                   \
 190   if (val >= MLIB_S16_MAX)                                      \
 191     dst = MLIB_S16_MAX;                                         \
 192   else if (val <= MLIB_S16_MIN)                                 \
 193     dst = MLIB_S16_MIN;                                         \
 194   else                                                          \
 195     dst = (mlib_s16)val
 196 
 197 #elif IMG_TYPE == 3
 198 
 199 #define CLAMP_STORE(dst, val)                                   \
 200   if (val >= MLIB_U16_MAX)                                      \
 201     dst = MLIB_U16_MAX;                                         \
 202   else if (val <= MLIB_U16_MIN)                                 \
 203     dst = MLIB_U16_MIN;                                         \
 204   else                                                          \
 205     dst = (mlib_u16)val
 206 
 207 #endif /* IMG_TYPE == 1 */
 208 
 209 /***************************************************************/
 210 #define MAX_KER   7
 211 #define MAX_N    15
 212 
 213 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 214                                      const mlib_image *src,
 215                                      const mlib_d64   *k,
 216                                      mlib_s32         n,
 217                                      mlib_s32         dn,
 218                                      mlib_s32         cmask)
 219 {
 220   FTYPE    buff[BUFF_SIZE];
 221   mlib_s32 off, kh;
 222   mlib_s32 d0, d1;
 223   const FTYPE    *pk;
 224   FTYPE    k0, k1, k2, k3;
 225   FTYPE    p0, p1, p2, p3, p4;
 226   DEF_VARS(DTYPE);
 227   DTYPE    *sl_c, *dl_c, *sl0;
 228   mlib_s32 l, hsize, max_hsize;
 229   GET_SRC_DST_PARAMETERS(DTYPE);
 230 
 231   hgt -= (n - 1);
 232   adr_dst += dn*dll;
 233 
 234   max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
 235 
 236   if (!max_hsize) max_hsize = 1;
 237 
 238   if (max_hsize > BUFF_SIZE) {
 239     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 240   }
 241 
 242   chan1 = nchannel;
 243 
 244   sl_c = adr_src;
 245   dl_c = adr_dst;
 246 
 247   for (l = 0; l < hgt; l += hsize) {
 248     hsize = hgt - l;
 249 
 250     if (hsize > max_hsize) hsize = max_hsize;
 251 
 252     for (c = 0; c < nchannel; c++) {
 253       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 254 
 255       sl = sl_c + c;
 256       dl = dl_c + c;
 257 
 258 #ifdef __SUNPRO_C
 259 #pragma pipeloop(0)
 260 #endif /* __SUNPRO_C */
 261       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 262 
 263       for (i = 0; i < wid; i++) {
 264         sl0 = sl;
 265 
 266         for (off = 0; off < (n - 4); off += 4) {
 267           pk = k + off;
 268           sp = sl0;
 269 
 270           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 271           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 272           sp += 3*sll;
 273 
 274 #ifdef __SUNPRO_C
 275 #pragma pipeloop(0)
 276 #endif /* __SUNPRO_C */
 277           for (j = 0; j < hsize; j += 2) {
 278             p0 = p2; p1 = p3; p2 = p4;
 279             p3 = sp[0];
 280             p4 = sp[sll];
 281 
 282             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 283             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 284 
 285             sp += 2*sll;
 286           }
 287 
 288           sl0 += 4*sll;
 289         }
 290 
 291         pk = k + off;
 292         sp = sl0;
 293 
 294         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 295         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 296 
 297         dp = dl;
 298         kh = n - off;
 299 
 300         if (kh == 4) {
 301           sp += 3*sll;
 302 
 303 #ifdef __SUNPRO_C
 304 #pragma pipeloop(0)
 305 #endif /* __SUNPRO_C */
 306           for (j = 0; j <= (hsize - 2); j += 2) {
 307             p0 = p2; p1 = p3; p2 = p4;
 308             p3 = sp[0];
 309             p4 = sp[sll];
 310 
 311             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 312             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 313 
 314             dp[0  ] = FROM_S32(d0);
 315             dp[dll] = FROM_S32(d1);
 316 
 317             pbuff[j] = 0;
 318             pbuff[j + 1] = 0;
 319 
 320             sp += 2*sll;
 321             dp += 2*dll;
 322           }
 323 
 324           if (j < hsize) {
 325             p0 = p2; p1 = p3; p2 = p4;
 326             p3 = sp[0];
 327 
 328             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 329 
 330             pbuff[j] = 0;
 331 
 332             dp[0] = FROM_S32(d0);
 333           }
 334 
 335         } else if (kh == 3) {
 336           sp += 2*sll;
 337 
 338 #ifdef __SUNPRO_C
 339 #pragma pipeloop(0)
 340 #endif /* __SUNPRO_C */
 341           for (j = 0; j <= (hsize - 2); j += 2) {
 342             p0 = p2; p1 = p3;
 343             p2 = sp[0];
 344             p3 = sp[sll];
 345 
 346             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 347             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 348 
 349             dp[0  ] = FROM_S32(d0);
 350             dp[dll] = FROM_S32(d1);
 351 
 352             pbuff[j] = 0;
 353             pbuff[j + 1] = 0;
 354 
 355             sp += 2*sll;
 356             dp += 2*dll;
 357           }
 358 
 359           if (j < hsize) {
 360             p0 = p2; p1 = p3;
 361             p2 = sp[0];
 362 
 363             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 364 
 365             pbuff[j] = 0;
 366 
 367             dp[0] = FROM_S32(d0);
 368           }
 369 
 370         } else if (kh == 2) {
 371           sp += sll;
 372 
 373 #ifdef __SUNPRO_C
 374 #pragma pipeloop(0)
 375 #endif /* __SUNPRO_C */
 376           for (j = 0; j <= (hsize - 2); j += 2) {
 377             p0 = p2;
 378             p1 = sp[0];
 379             p2 = sp[sll];
 380 
 381             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 382             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 383 
 384             dp[0  ] = FROM_S32(d0);
 385             dp[dll] = FROM_S32(d1);
 386 
 387             pbuff[j] = 0;
 388             pbuff[j + 1] = 0;
 389 
 390             sp += 2*sll;
 391             dp += 2*dll;
 392           }
 393 
 394           if (j < hsize) {
 395             p0 = p2;
 396             p1 = sp[0];
 397 
 398             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 399 
 400             pbuff[j] = 0;
 401 
 402             dp[0] = FROM_S32(d0);
 403           }
 404 
 405         } else /* if (kh == 1) */ {
 406 #ifdef __SUNPRO_C
 407 #pragma pipeloop(0)
 408 #endif /* __SUNPRO_C */
 409           for (j = 0; j < hsize; j++) {
 410             p0 = sp[0];
 411 
 412             d0 = D2I(p0*k0 + pbuff[j]);
 413 
 414             dp[0] = FROM_S32(d0);
 415 
 416             pbuff[j] = 0;
 417 
 418             sp += sll;
 419             dp += dll;
 420           }
 421         }
 422 
 423         sl += chan1;
 424         dl += chan1;
 425       }
 426     }
 427 
 428     sl_c += max_hsize*sll;
 429     dl_c += max_hsize*dll;
 430   }
 431 
 432   if (pbuff != buff) mlib_free(pbuff);
 433 
 434   return MLIB_SUCCESS;
 435 }
 436 
 437 /***************************************************************/
 438 mlib_status CONV_FUNC(MxN)(mlib_image       *dst,
 439                            const mlib_image *src,
 440                            const mlib_s32   *kernel,
 441                            mlib_s32         m,
 442                            mlib_s32         n,
 443                            mlib_s32         dm,
 444                            mlib_s32         dn,
 445                            mlib_s32         scale,
 446                            mlib_s32         cmask)
 447 {
 448   FTYPE    buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
 449   FTYPE    **buffs = buffs_arr, *buffd;
 450   FTYPE    akernel[256], *k = akernel, fscale = DSCALE;
 451   mlib_s32 mn, l, off, kw, bsize, buff_ind;
 452   mlib_s32 d0, d1;
 453   FTYPE    k0, k1, k2, k3, k4, k5, k6;
 454   FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
 455   d64_2x32 dd;
 456   DEF_VARS(DTYPE);
 457   mlib_s32 chan2;
 458   mlib_s32 *buffo, *buffi;
 459   mlib_status status = MLIB_SUCCESS;
 460 
 461   GET_SRC_DST_PARAMETERS(DTYPE);
 462 
 463   if (scale > 30) {
 464     fscale *= 1.0/(1 << 30);
 465     scale -= 30;
 466   }
 467 
 468   fscale /= (1 << scale);
 469 
 470   mn = m*n;
 471 
 472   if (mn > 256) {
 473     k = mlib_malloc(mn*sizeof(mlib_d64));
 474 
 475     if (k == NULL) return MLIB_FAILURE;
 476   }
 477 
 478   for (i = 0; i < mn; i++) {
 479     k[i] = kernel[i]*fscale;
 480   }
 481 
 482   if (m == 1) {
 483     status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
 484     FREE_AND_RETURN_STATUS;
 485   }
 486 
 487   bsize = (n + 3)*wid;
 488 
 489   if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
 490     pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1));
 491 
 492     if (pbuff == NULL) {
 493       status = MLIB_FAILURE;
 494       FREE_AND_RETURN_STATUS;
 495     }
 496     buffs = (FTYPE   **)(pbuff + bsize);
 497   }
 498 
 499   for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
 500   for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
 501   buffd = buffs[n] + wid;
 502   buffo = (mlib_s32*)(buffd + wid);
 503   buffi = buffo + (wid &~ 1);
 504 
 505   chan1 = nchannel;
 506   chan2 = chan1 + chan1;
 507 
 508   wid -= (m - 1);
 509   hgt -= (n - 1);
 510   adr_dst += dn*dll + dm*nchannel;
 511 
 512   for (c = 0; c < nchannel; c++) {
 513     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 514 
 515     sl = adr_src + c;
 516     dl = adr_dst + c;
 517 
 518     for (l = 0; l < n; l++) {
 519       FTYPE    *buff = buffs[l];
 520 
 521 #ifdef __SUNPRO_C
 522 #pragma pipeloop(0)
 523 #endif /* __SUNPRO_C */
 524       for (i = 0; i < wid + (m - 1); i++) {
 525         buff[i] = (FTYPE)sl[i*chan1];
 526       }
 527 
 528       sl += sll;
 529     }
 530 
 531     buff_ind = 0;
 532 
 533 #ifdef __SUNPRO_C
 534 #pragma pipeloop(0)
 535 #endif /* __SUNPRO_C */
 536     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 537 
 538     for (j = 0; j < hgt; j++) {
 539       FTYPE    **buffc = buffs + buff_ind;
 540       FTYPE    *buffn = buffc[n];
 541       FTYPE    *pk = k;
 542 
 543       for (l = 0; l < n; l++) {
 544         FTYPE    *buff_l = buffc[l];
 545 
 546         for (off = 0; off < m;) {
 547           FTYPE    *buff = buff_l + off;
 548 
 549           kw = m - off;
 550 
 551           if (kw > 2*MAX_KER) kw = MAX_KER; else
 552             if (kw > MAX_KER) kw = kw/2;
 553           off += kw;
 554 
 555           sp = sl;
 556           dp = dl;
 557 
 558           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 559           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 560 
 561           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 562           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 563           pk += kw;
 564 
 565           if (kw == 7) {
 566 
 567             if (l < (n - 1) || off < m) {
 568 #ifdef __SUNPRO_C
 569 #pragma pipeloop(0)
 570 #endif /* __SUNPRO_C */
 571               for (i = 0; i <= (wid - 2); i += 2) {
 572                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 573 
 574                 p6 = buff[i + 6]; p7 = buff[i + 7];
 575 
 576                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 577                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 578               }
 579 
 580             } else {
 581 #ifdef __SUNPRO_C
 582 #pragma pipeloop(0)
 583 #endif /* __SUNPRO_C */
 584               for (i = 0; i <= (wid - 2); i += 2) {
 585                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 586 
 587                 p6 = buff[i + 6]; p7 = buff[i + 7];
 588 
 589                 LOAD_BUFF(buffi);
 590 
 591                 dd.d64 = *(FTYPE   *)(buffi + i);
 592                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 593                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 594 
 595                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 596                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 597 
 598                 dp[0    ] = FROM_S32(d0);
 599                 dp[chan1] = FROM_S32(d1);
 600 
 601                 buffd[i    ] = 0.0;
 602                 buffd[i + 1] = 0.0;
 603 
 604                 sp += chan2;
 605                 dp += chan2;
 606               }
 607             }
 608 
 609           } else if (kw == 6) {
 610 
 611             if (l < (n - 1) || off < m) {
 612 #ifdef __SUNPRO_C
 613 #pragma pipeloop(0)
 614 #endif /* __SUNPRO_C */
 615               for (i = 0; i <= (wid - 2); i += 2) {
 616                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 617 
 618                 p5 = buff[i + 5]; p6 = buff[i + 6];
 619 
 620                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 621                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 622               }
 623 
 624             } else {
 625 #ifdef __SUNPRO_C
 626 #pragma pipeloop(0)
 627 #endif /* __SUNPRO_C */
 628               for (i = 0; i <= (wid - 2); i += 2) {
 629                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 630 
 631                 p5 = buff[i + 5]; p6 = buff[i + 6];
 632 
 633                 buffn[i    ] = (FTYPE)sp[0];
 634                 buffn[i + 1] = (FTYPE)sp[chan1];
 635 
 636                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 637                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 638 
 639                 dp[0    ] = FROM_S32(d0);
 640                 dp[chan1] = FROM_S32(d1);
 641 
 642                 buffd[i    ] = 0.0;
 643                 buffd[i + 1] = 0.0;
 644 
 645                 sp += chan2;
 646                 dp += chan2;
 647               }
 648             }
 649 
 650           } else if (kw == 5) {
 651 
 652             if (l < (n - 1) || off < m) {
 653 #ifdef __SUNPRO_C
 654 #pragma pipeloop(0)
 655 #endif /* __SUNPRO_C */
 656               for (i = 0; i <= (wid - 2); i += 2) {
 657                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 658 
 659                 p4 = buff[i + 4]; p5 = buff[i + 5];
 660 
 661                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 662                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 663               }
 664 
 665             } else {
 666 #ifdef __SUNPRO_C
 667 #pragma pipeloop(0)
 668 #endif /* __SUNPRO_C */
 669               for (i = 0; i <= (wid - 2); i += 2) {
 670                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 671 
 672                 p4 = buff[i + 4]; p5 = buff[i + 5];
 673 
 674                 buffn[i    ] = (FTYPE)sp[0];
 675                 buffn[i + 1] = (FTYPE)sp[chan1];
 676 
 677                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 678                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 679 
 680                 dp[0    ] = FROM_S32(d0);
 681                 dp[chan1] = FROM_S32(d1);
 682 
 683                 buffd[i    ] = 0.0;
 684                 buffd[i + 1] = 0.0;
 685 
 686                 sp += chan2;
 687                 dp += chan2;
 688               }
 689             }
 690 
 691           } else if (kw == 4) {
 692 
 693             if (l < (n - 1) || off < m) {
 694 #ifdef __SUNPRO_C
 695 #pragma pipeloop(0)
 696 #endif /* __SUNPRO_C */
 697               for (i = 0; i <= (wid - 2); i += 2) {
 698                 p0 = p2; p1 = p3; p2 = p4;
 699 
 700                 p3 = buff[i + 3]; p4 = buff[i + 4];
 701 
 702                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 703                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 704               }
 705 
 706             } else {
 707 #ifdef __SUNPRO_C
 708 #pragma pipeloop(0)
 709 #endif /* __SUNPRO_C */
 710               for (i = 0; i <= (wid - 2); i += 2) {
 711                 p0 = p2; p1 = p3; p2 = p4;
 712 
 713                 p3 = buff[i + 3]; p4 = buff[i + 4];
 714 
 715                 buffn[i    ] = (FTYPE)sp[0];
 716                 buffn[i + 1] = (FTYPE)sp[chan1];
 717 
 718                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 719                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 720 
 721                 dp[0    ] = FROM_S32(d0);
 722                 dp[chan1] = FROM_S32(d1);
 723 
 724                 buffd[i    ] = 0.0;
 725                 buffd[i + 1] = 0.0;
 726 
 727                 sp += chan2;
 728                 dp += chan2;
 729               }
 730             }
 731 
 732           } else if (kw == 3) {
 733 
 734             if (l < (n - 1) || off < m) {
 735 #ifdef __SUNPRO_C
 736 #pragma pipeloop(0)
 737 #endif /* __SUNPRO_C */
 738               for (i = 0; i <= (wid - 2); i += 2) {
 739                 p0 = p2; p1 = p3;
 740 
 741                 p2 = buff[i + 2]; p3 = buff[i + 3];
 742 
 743                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 744                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 745               }
 746 
 747             } else {
 748 #ifdef __SUNPRO_C
 749 #pragma pipeloop(0)
 750 #endif /* __SUNPRO_C */
 751               for (i = 0; i <= (wid - 2); i += 2) {
 752                 p0 = p2; p1 = p3;
 753 
 754                 p2 = buff[i + 2]; p3 = buff[i + 3];
 755 
 756                 buffn[i    ] = (FTYPE)sp[0];
 757                 buffn[i + 1] = (FTYPE)sp[chan1];
 758 
 759                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 760                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 761 
 762                 dp[0    ] = FROM_S32(d0);
 763                 dp[chan1] = FROM_S32(d1);
 764 
 765                 buffd[i    ] = 0.0;
 766                 buffd[i + 1] = 0.0;
 767 
 768                 sp += chan2;
 769                 dp += chan2;
 770               }
 771             }
 772 
 773           } else /*if (kw == 2)*/ {
 774 
 775             if (l < (n - 1) || off < m) {
 776 #ifdef __SUNPRO_C
 777 #pragma pipeloop(0)
 778 #endif /* __SUNPRO_C */
 779               for (i = 0; i <= (wid - 2); i += 2) {
 780                 p0 = p2;
 781 
 782                 p1 = buff[i + 1]; p2 = buff[i + 2];
 783 
 784                 buffd[i    ] += p0*k0 + p1*k1;
 785                 buffd[i + 1] += p1*k0 + p2*k1;
 786               }
 787 
 788             } else {
 789 #ifdef __SUNPRO_C
 790 #pragma pipeloop(0)
 791 #endif /* __SUNPRO_C */
 792               for (i = 0; i <= (wid - 2); i += 2) {
 793                 p0 = p2;
 794 
 795                 p1 = buff[i + 1]; p2 = buff[i + 2];
 796 
 797                 buffn[i    ] = (FTYPE)sp[0];
 798                 buffn[i + 1] = (FTYPE)sp[chan1];
 799 
 800                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 801                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 802 
 803                 dp[0    ] = FROM_S32(d0);
 804                 dp[chan1] = FROM_S32(d1);
 805 
 806                 buffd[i    ] = 0.0;
 807                 buffd[i + 1] = 0.0;
 808 
 809                 sp += chan2;
 810                 dp += chan2;
 811               }
 812             }
 813           }
 814         }
 815       }
 816 
 817       /* last pixels */
 818       for (; i < wid; i++) {
 819         FTYPE    *pk = k, s = 0;
 820         mlib_s32 x, d0;
 821 
 822         for (l = 0; l < n; l++) {
 823           FTYPE    *buff = buffc[l] + i;
 824 
 825           for (x = 0; x < m; x++) s += buff[x] * (*pk++);
 826         }
 827 
 828         d0 = D2I(s);
 829         dp[0] = FROM_S32(d0);
 830 
 831         buffn[i] = (FTYPE)sp[0];
 832 
 833         sp += chan1;
 834         dp += chan1;
 835       }
 836 
 837       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 838 
 839       /* next line */
 840       sl += sll;
 841       dl += dll;
 842 
 843       buff_ind++;
 844 
 845       if (buff_ind >= n + 1) buff_ind = 0;
 846     }
 847   }
 848 
 849   FREE_AND_RETURN_STATUS;
 850 }
 851 
 852 /***************************************************************/
 853 /* for x86, using integer multiplies is faster */
 854 
 855 #define STORE_RES(res, x)                                       \
 856   x >>= shift2;                                                 \
 857   CLAMP_STORE(res, x)
 858 
 859 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 860                              const mlib_image *src,
 861                              const mlib_s32   *kernel,
 862                              mlib_s32         m,
 863                              mlib_s32         n,
 864                              mlib_s32         dm,
 865                              mlib_s32         dn,
 866                              mlib_s32         scale,
 867                              mlib_s32         cmask)
 868 {
 869   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 870   mlib_s32 l, off, kw;
 871   mlib_s32 d0, d1, shift1, shift2;
 872   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 873   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
 874   DTYPE    *adr_src, *sl, *sp = NULL;
 875   DTYPE    *adr_dst, *dl, *dp = NULL;
 876   mlib_s32 wid, hgt, sll, dll;
 877   mlib_s32 nchannel, chan1;
 878   mlib_s32 i, j, c;
 879   mlib_s32 chan2;
 880   mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl;
 881   GET_SRC_DST_PARAMETERS(DTYPE);
 882 
 883 #if IMG_TYPE != 1
 884   shift1 = 16;
 885 #else
 886   shift1 = 8;
 887 #endif /* IMG_TYPE != 1 */
 888   shift2 = scale - shift1;
 889 
 890   chan1 = nchannel;
 891   chan2 = chan1 + chan1;
 892 
 893   wid -= (m - 1);
 894   hgt -= (n - 1);
 895   adr_dst += dn*dll + dm*nchannel;
 896 
 897   if (wid > BUFF_SIZE) {
 898     buffd = mlib_malloc(sizeof(mlib_s32)*wid);
 899 
 900     if (buffd == NULL) return MLIB_FAILURE;
 901   }
 902 
 903   if (m*n > MAX_N*MAX_N) {
 904     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 905 
 906     if (k == NULL) {
 907       if (buffd != buff) mlib_free(buffd);
 908       return MLIB_FAILURE;
 909     }
 910   }
 911 
 912   for (i = 0; i < m*n; i++) {
 913     k[i] = kernel[i] >> shift1;
 914   }
 915 
 916   for (c = 0; c < nchannel; c++) {
 917     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 918 
 919     sl = adr_src + c;
 920     dl = adr_dst + c;
 921 
 922 #ifdef __SUNPRO_C
 923 #pragma pipeloop(0)
 924 #endif /* __SUNPRO_C */
 925     for (i = 0; i < wid; i++) buffd[i] = 0;
 926 
 927     for (j = 0; j < hgt; j++) {
 928       mlib_s32 *pk = k;
 929 
 930       for (l = 0; l < n; l++) {
 931         DTYPE *sp0 = sl + l*sll;
 932 
 933         for (off = 0; off < m;) {
 934           sp = sp0 + off*chan1;
 935           dp = dl;
 936 
 937           kw = m - off;
 938 
 939           if (kw > 2*MAX_KER) kw = MAX_KER; else
 940             if (kw > MAX_KER) kw = kw/2;
 941           off += kw;
 942 
 943           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 944           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 945 
 946           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 947           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 948           pk += kw;
 949 
 950           sp += (kw - 1)*chan1;
 951 
 952           if (kw == 7) {
 953 
 954             if (l < (n - 1) || off < m) {
 955 #ifdef __SUNPRO_C
 956 #pragma pipeloop(0)
 957 #endif /* __SUNPRO_C */
 958               for (i = 0; i <= (wid - 2); i += 2) {
 959                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 960                 p6 = sp[0];
 961                 p7 = sp[chan1];
 962 
 963                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 964                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 965 
 966                 sp += chan2;
 967               }
 968 
 969             } else {
 970 #ifdef __SUNPRO_C
 971 #pragma pipeloop(0)
 972 #endif /* __SUNPRO_C */
 973               for (i = 0; i <= (wid - 2); i += 2) {
 974                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 975                 p6 = sp[0];
 976                 p7 = sp[chan1];
 977 
 978                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 979                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 980 
 981                 STORE_RES(dp[0    ], d0);
 982                 STORE_RES(dp[chan1], d1);
 983 
 984                 buffd[i    ] = 0;
 985                 buffd[i + 1] = 0;
 986 
 987                 sp += chan2;
 988                 dp += chan2;
 989               }
 990             }
 991 
 992           } else if (kw == 6) {
 993 
 994             if (l < (n - 1) || off < m) {
 995 #ifdef __SUNPRO_C
 996 #pragma pipeloop(0)
 997 #endif /* __SUNPRO_C */
 998               for (i = 0; i <= (wid - 2); i += 2) {
 999                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1000                 p5 = sp[0];
1001                 p6 = sp[chan1];
1002 
1003                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1004                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1005 
1006                 sp += chan2;
1007               }
1008 
1009             } else {
1010 #ifdef __SUNPRO_C
1011 #pragma pipeloop(0)
1012 #endif /* __SUNPRO_C */
1013               for (i = 0; i <= (wid - 2); i += 2) {
1014                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1015                 p5 = sp[0];
1016                 p6 = sp[chan1];
1017 
1018                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
1019                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1020 
1021                 STORE_RES(dp[0    ], d0);
1022                 STORE_RES(dp[chan1], d1);
1023 
1024                 buffd[i    ] = 0;
1025                 buffd[i + 1] = 0;
1026 
1027                 sp += chan2;
1028                 dp += chan2;
1029               }
1030             }
1031 
1032           } else if (kw == 5) {
1033 
1034             if (l < (n - 1) || off < m) {
1035 #ifdef __SUNPRO_C
1036 #pragma pipeloop(0)
1037 #endif /* __SUNPRO_C */
1038               for (i = 0; i <= (wid - 2); i += 2) {
1039                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1040                 p4 = sp[0];
1041                 p5 = sp[chan1];
1042 
1043                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1044                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1045 
1046                 sp += chan2;
1047               }
1048 
1049             } else {
1050 #ifdef __SUNPRO_C
1051 #pragma pipeloop(0)
1052 #endif /* __SUNPRO_C */
1053               for (i = 0; i <= (wid - 2); i += 2) {
1054                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1055                 p4 = sp[0];
1056                 p5 = sp[chan1];
1057 
1058                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
1059                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1060 
1061                 STORE_RES(dp[0    ], d0);
1062                 STORE_RES(dp[chan1], d1);
1063 
1064                 buffd[i    ] = 0;
1065                 buffd[i + 1] = 0;
1066 
1067                 sp += chan2;
1068                 dp += chan2;
1069               }
1070             }
1071 
1072           } else if (kw == 4) {
1073 
1074             if (l < (n - 1) || off < m) {
1075 #ifdef __SUNPRO_C
1076 #pragma pipeloop(0)
1077 #endif /* __SUNPRO_C */
1078               for (i = 0; i <= (wid - 2); i += 2) {
1079                 p0 = p2; p1 = p3; p2 = p4;
1080                 p3 = sp[0];
1081                 p4 = sp[chan1];
1082 
1083                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1084                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1085 
1086                 sp += chan2;
1087               }
1088 
1089             } else {
1090 #ifdef __SUNPRO_C
1091 #pragma pipeloop(0)
1092 #endif /* __SUNPRO_C */
1093               for (i = 0; i <= (wid - 2); i += 2) {
1094                 p0 = p2; p1 = p3; p2 = p4;
1095                 p3 = sp[0];
1096                 p4 = sp[chan1];
1097 
1098                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1099                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1100 
1101                 STORE_RES(dp[0    ], d0);
1102                 STORE_RES(dp[chan1], d1);
1103 
1104                 buffd[i    ] = 0;
1105                 buffd[i + 1] = 0;
1106 
1107                 sp += chan2;
1108                 dp += chan2;
1109               }
1110             }
1111 
1112           } else if (kw == 3) {
1113 
1114             if (l < (n - 1) || off < m) {
1115 #ifdef __SUNPRO_C
1116 #pragma pipeloop(0)
1117 #endif /* __SUNPRO_C */
1118               for (i = 0; i <= (wid - 2); i += 2) {
1119                 p0 = p2; p1 = p3;
1120                 p2 = sp[0];
1121                 p3 = sp[chan1];
1122 
1123                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1124                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1125 
1126                 sp += chan2;
1127               }
1128 
1129             } else {
1130 #ifdef __SUNPRO_C
1131 #pragma pipeloop(0)
1132 #endif /* __SUNPRO_C */
1133               for (i = 0; i <= (wid - 2); i += 2) {
1134                 p0 = p2; p1 = p3;
1135                 p2 = sp[0];
1136                 p3 = sp[chan1];
1137 
1138                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1139                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1140 
1141                 STORE_RES(dp[0    ], d0);
1142                 STORE_RES(dp[chan1], d1);
1143 
1144                 buffd[i    ] = 0;
1145                 buffd[i + 1] = 0;
1146 
1147                 sp += chan2;
1148                 dp += chan2;
1149               }
1150             }
1151 
1152           } else if (kw == 2) {
1153 
1154             if (l < (n - 1) || off < m) {
1155 #ifdef __SUNPRO_C
1156 #pragma pipeloop(0)
1157 #endif /* __SUNPRO_C */
1158               for (i = 0; i <= (wid - 2); i += 2) {
1159                 p0 = p2;
1160                 p1 = sp[0];
1161                 p2 = sp[chan1];
1162 
1163                 buffd[i    ] += p0*k0 + p1*k1;
1164                 buffd[i + 1] += p1*k0 + p2*k1;
1165 
1166                 sp += chan2;
1167               }
1168 
1169             } else {
1170 #ifdef __SUNPRO_C
1171 #pragma pipeloop(0)
1172 #endif /* __SUNPRO_C */
1173               for (i = 0; i <= (wid - 2); i += 2) {
1174                 p0 = p2;
1175                 p1 = sp[0];
1176                 p2 = sp[chan1];
1177 
1178                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1179                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1180 
1181                 STORE_RES(dp[0    ], d0);
1182                 STORE_RES(dp[chan1], d1);
1183 
1184                 buffd[i    ] = 0;
1185                 buffd[i + 1] = 0;
1186 
1187                 sp += chan2;
1188                 dp += chan2;
1189               }
1190             }
1191 
1192           } else /*if (kw == 1)*/ {
1193 
1194             if (l < (n - 1) || off < m) {
1195 #ifdef __SUNPRO_C
1196 #pragma pipeloop(0)
1197 #endif /* __SUNPRO_C */
1198               for (i = 0; i <= (wid - 2); i += 2) {
1199                 p0 = sp[0];
1200                 p1 = sp[chan1];
1201 
1202                 buffd[i    ] += p0*k0;
1203                 buffd[i + 1] += p1*k0;
1204 
1205                 sp += chan2;
1206               }
1207 
1208             } else {
1209 #ifdef __SUNPRO_C
1210 #pragma pipeloop(0)
1211 #endif /* __SUNPRO_C */
1212               for (i = 0; i <= (wid - 2); i += 2) {
1213                 p0 = sp[0];
1214                 p1 = sp[chan1];
1215 
1216                 d0 = (p0*k0 + buffd[i    ]);
1217                 d1 = (p1*k0 + buffd[i + 1]);
1218 
1219                 STORE_RES(dp[0    ], d0);
1220                 STORE_RES(dp[chan1], d1);
1221 
1222                 buffd[i    ] = 0;
1223                 buffd[i + 1] = 0;
1224 
1225                 sp += chan2;
1226                 dp += chan2;
1227               }
1228             }
1229           }
1230         }
1231       }
1232 
1233       /* last pixels */
1234       for (; i < wid; i++) {
1235         mlib_s32 *pk = k, s = 0;
1236         mlib_s32 x;
1237 
1238         for (l = 0; l < n; l++) {
1239           sp = sl + l*sll + i*chan1;
1240 
1241           for (x = 0; x < m; x++) {
1242             s += sp[0] * pk[0];
1243             sp += chan1;
1244             pk ++;
1245           }
1246         }
1247 
1248         STORE_RES(dp[0], s);
1249 
1250         sp += chan1;
1251         dp += chan1;
1252       }
1253 
1254       sl += sll;
1255       dl += dll;
1256     }
1257   }
1258 
1259   if (buffd != buff) mlib_free(buffd);
1260   if (k != k_locl) mlib_free(k);
1261 
1262   return MLIB_SUCCESS;
1263 }
1264 
1265 /***************************************************************/