1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *   Internal functions for mlib_ImageConv* on U8/S16/U16 types and
  30  *   MLIB_EDGE_DST_NO_WRITE mask
  31  */
  32 
  33 #include "mlib_image.h"
  34 #include "mlib_c_ImageConv.h"
  35 
  36 /*
  37   This define switches between functions of different data types
  38 */
  39 #define IMG_TYPE 3
  40 
  41 /***************************************************************/
  42 #if IMG_TYPE == 1
  43 
  44 #define DTYPE             mlib_u8
  45 #define CONV_FUNC(KERN)   mlib_c_conv##KERN##nw_u8
  46 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u8
  47 #define DSCALE            (1 << 24)
  48 #define FROM_S32(x)       (((x) >> 24) ^ 128)
  49 #define S64TOS32(x)       (x)
  50 #define SAT_OFF           -(1u << 31)
  51 
  52 #elif IMG_TYPE == 2
  53 
  54 #define DTYPE             mlib_s16
  55 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_s16
  56 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_s16
  57 #define DSCALE            65536.0
  58 #define FROM_S32(x)       ((x) >> 16)
  59 #define S64TOS32(x)       ((x) & 0xffffffff)
  60 #define SAT_OFF
  61 
  62 #elif IMG_TYPE == 3
  63 
  64 #define DTYPE             mlib_u16
  65 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_u16
  66 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u16
  67 #define DSCALE            65536.0
  68 #define FROM_S32(x)       (((x) >> 16) ^ 0x8000)
  69 #define S64TOS32(x)       (x)
  70 #define SAT_OFF           -(1u << 31)
  71 
  72 #endif /* IMG_TYPE == 1 */
  73 
  74 /***************************************************************/
  75 #define BUFF_SIZE   1600
  76 
  77 #define CACHE_SIZE  (64*1024)
  78 
  79 /***************************************************************/
  80 #define FTYPE mlib_d64
  81 
  82 #ifndef MLIB_USE_FTOI_CLAMPING
  83 
  84 #define CLAMP_S32(x)                                            \
  85   (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
  86 
  87 #else
  88 
  89 #define CLAMP_S32(x) ((mlib_s32)(x))
  90 
  91 #endif /* MLIB_USE_FTOI_CLAMPING */
  92 
  93 /***************************************************************/
  94 #define D2I(x) CLAMP_S32((x) SAT_OFF)
  95 
  96 /***************************************************************/
  97 #ifdef _LITTLE_ENDIAN
  98 
  99 #define STORE2(res0, res1)                                      \
 100   dp[0    ] = res1;                                             \
 101   dp[chan1] = res0
 102 
 103 #else
 104 
 105 #define STORE2(res0, res1)                                      \
 106   dp[0    ] = res0;                                             \
 107   dp[chan1] = res1
 108 
 109 #endif /* _LITTLE_ENDIAN */
 110 
 111 /***************************************************************/
 112 #ifdef _NO_LONGLONG
 113 
 114 #define LOAD_BUFF(buff)                                         \
 115   buff[i    ] = sp[0];                                          \
 116   buff[i + 1] = sp[chan1]
 117 
 118 #else /* _NO_LONGLONG */
 119 
 120 #ifdef _LITTLE_ENDIAN
 121 
 122 #define LOAD_BUFF(buff)                                         \
 123   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])
 124 
 125 #else /* _LITTLE_ENDIAN */
 126 
 127 #define LOAD_BUFF(buff)                                         \
 128   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
 129 
 130 #endif /* _LITTLE_ENDIAN */
 131 #endif /* _NO_LONGLONG */
 132 
 133 /***************************************************************/
 134 typedef union {
 135   mlib_d64 d64;
 136   struct {
 137     mlib_s32 i0;
 138     mlib_s32 i1;
 139   } i32s;
 140   struct {
 141     mlib_s32 f0;
 142     mlib_s32 f1;
 143   } f32s;
 144 } d64_2x32;
 145 
 146 /***************************************************************/
 147 #define DEF_VARS(type)                                          \
 148   type     *adr_src, *sl, *sp = NULL;                           \
 149   type     *adr_dst, *dl, *dp = NULL;                           \
 150   FTYPE    *pbuff = buff;                                       \
 151   mlib_s32 wid, hgt, sll, dll;                                  \
 152   mlib_s32 nchannel, chan1;                                     \
 153   mlib_s32 i, j, c
 154 
 155 /***************************************************************/
 156 #define GET_SRC_DST_PARAMETERS(type)                            \
 157   hgt = mlib_ImageGetHeight(src);                               \
 158   wid = mlib_ImageGetWidth(src);                                \
 159   nchannel = mlib_ImageGetChannels(src);                        \
 160   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 161   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 162   adr_src = (type *)mlib_ImageGetData(src);                     \
 163   adr_dst = (type *)mlib_ImageGetData(dst)
 164 
 165 /***************************************************************/
 166 #ifndef __sparc
 167 
 168 #if IMG_TYPE == 1
 169 
 170 /* Test for the presence of any "1" bit in bits
 171    8 to 31 of val. If present, then val is either
 172    negative or >255. If over/underflows of 8 bits
 173    are uncommon, then this technique can be a win,
 174    since only a single test, rather than two, is
 175    necessary to determine if clamping is needed.
 176    On the other hand, if over/underflows are common,
 177    it adds an extra test.
 178 */
 179 #define CLAMP_STORE(dst, val)                                   \
 180   if (val & 0xffffff00) {                                       \
 181     if (val < MLIB_U8_MIN)                                      \
 182       dst = MLIB_U8_MIN;                                        \
 183     else                                                        \
 184       dst = MLIB_U8_MAX;                                        \
 185   } else {                                                      \
 186     dst = (mlib_u8)val;                                         \
 187   }
 188 
 189 #elif IMG_TYPE == 2
 190 
 191 #define CLAMP_STORE(dst, val)                                   \
 192   if (val >= MLIB_S16_MAX)                                      \
 193     dst = MLIB_S16_MAX;                                         \
 194   else if (val <= MLIB_S16_MIN)                                 \
 195     dst = MLIB_S16_MIN;                                         \
 196   else                                                          \
 197     dst = (mlib_s16)val
 198 
 199 #elif IMG_TYPE == 3
 200 
 201 #define CLAMP_STORE(dst, val)                                   \
 202   if (val >= MLIB_U16_MAX)                                      \
 203     dst = MLIB_U16_MAX;                                         \
 204   else if (val <= MLIB_U16_MIN)                                 \
 205     dst = MLIB_U16_MIN;                                         \
 206   else                                                          \
 207     dst = (mlib_u16)val
 208 
 209 #endif /* IMG_TYPE == 1 */
 210 #endif /* __sparc */
 211 
 212 /***************************************************************/
 213 #define MAX_KER   7
 214 #define MAX_N    15
 215 
 216 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 217                                      const mlib_image *src,
 218                                      const mlib_d64   *k,
 219                                      mlib_s32         n,
 220                                      mlib_s32         dn,
 221                                      mlib_s32         cmask)
 222 {
 223   FTYPE    buff[BUFF_SIZE];
 224   mlib_s32 off, kh;
 225   mlib_s32 d0, d1;
 226   const FTYPE    *pk;
 227   FTYPE    k0, k1, k2, k3;
 228   FTYPE    p0, p1, p2, p3, p4;
 229   DEF_VARS(DTYPE);
 230   DTYPE    *sl_c, *dl_c, *sl0;
 231   mlib_s32 l, hsize, max_hsize;
 232   GET_SRC_DST_PARAMETERS(DTYPE);
 233 
 234   hgt -= (n - 1);
 235   adr_dst += dn*dll;
 236 
 237   max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
 238 
 239   if (!max_hsize) max_hsize = 1;
 240 
 241   if (max_hsize > BUFF_SIZE) {
 242     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 243   }
 244 
 245   chan1 = nchannel;
 246 
 247   sl_c = adr_src;
 248   dl_c = adr_dst;
 249 
 250   for (l = 0; l < hgt; l += hsize) {
 251     hsize = hgt - l;
 252 
 253     if (hsize > max_hsize) hsize = max_hsize;
 254 
 255     for (c = 0; c < nchannel; c++) {
 256       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 257 
 258       sl = sl_c + c;
 259       dl = dl_c + c;
 260 
 261 #ifdef __SUNPRO_C
 262 #pragma pipeloop(0)
 263 #endif /* __SUNPRO_C */
 264       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 265 
 266       for (i = 0; i < wid; i++) {
 267         sl0 = sl;
 268 
 269         for (off = 0; off < (n - 4); off += 4) {
 270           pk = k + off;
 271           sp = sl0;
 272 
 273           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 274           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 275           sp += 3*sll;
 276 
 277 #ifdef __SUNPRO_C
 278 #pragma pipeloop(0)
 279 #endif /* __SUNPRO_C */
 280           for (j = 0; j < hsize; j += 2) {
 281             p0 = p2; p1 = p3; p2 = p4;
 282             p3 = sp[0];
 283             p4 = sp[sll];
 284 
 285             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 286             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 287 
 288             sp += 2*sll;
 289           }
 290 
 291           sl0 += 4*sll;
 292         }
 293 
 294         pk = k + off;
 295         sp = sl0;
 296 
 297         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 298         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 299 
 300         dp = dl;
 301         kh = n - off;
 302 
 303         if (kh == 4) {
 304           sp += 3*sll;
 305 
 306 #ifdef __SUNPRO_C
 307 #pragma pipeloop(0)
 308 #endif /* __SUNPRO_C */
 309           for (j = 0; j <= (hsize - 2); j += 2) {
 310             p0 = p2; p1 = p3; p2 = p4;
 311             p3 = sp[0];
 312             p4 = sp[sll];
 313 
 314             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 315             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 316 
 317             dp[0  ] = FROM_S32(d0);
 318             dp[dll] = FROM_S32(d1);
 319 
 320             pbuff[j] = 0;
 321             pbuff[j + 1] = 0;
 322 
 323             sp += 2*sll;
 324             dp += 2*dll;
 325           }
 326 
 327           if (j < hsize) {
 328             p0 = p2; p1 = p3; p2 = p4;
 329             p3 = sp[0];
 330 
 331             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 332 
 333             pbuff[j] = 0;
 334 
 335             dp[0] = FROM_S32(d0);
 336           }
 337 
 338         } else if (kh == 3) {
 339           sp += 2*sll;
 340 
 341 #ifdef __SUNPRO_C
 342 #pragma pipeloop(0)
 343 #endif /* __SUNPRO_C */
 344           for (j = 0; j <= (hsize - 2); j += 2) {
 345             p0 = p2; p1 = p3;
 346             p2 = sp[0];
 347             p3 = sp[sll];
 348 
 349             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 350             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 351 
 352             dp[0  ] = FROM_S32(d0);
 353             dp[dll] = FROM_S32(d1);
 354 
 355             pbuff[j] = 0;
 356             pbuff[j + 1] = 0;
 357 
 358             sp += 2*sll;
 359             dp += 2*dll;
 360           }
 361 
 362           if (j < hsize) {
 363             p0 = p2; p1 = p3;
 364             p2 = sp[0];
 365 
 366             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 367 
 368             pbuff[j] = 0;
 369 
 370             dp[0] = FROM_S32(d0);
 371           }
 372 
 373         } else if (kh == 2) {
 374           sp += sll;
 375 
 376 #ifdef __SUNPRO_C
 377 #pragma pipeloop(0)
 378 #endif /* __SUNPRO_C */
 379           for (j = 0; j <= (hsize - 2); j += 2) {
 380             p0 = p2;
 381             p1 = sp[0];
 382             p2 = sp[sll];
 383 
 384             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 385             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 386 
 387             dp[0  ] = FROM_S32(d0);
 388             dp[dll] = FROM_S32(d1);
 389 
 390             pbuff[j] = 0;
 391             pbuff[j + 1] = 0;
 392 
 393             sp += 2*sll;
 394             dp += 2*dll;
 395           }
 396 
 397           if (j < hsize) {
 398             p0 = p2;
 399             p1 = sp[0];
 400 
 401             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 402 
 403             pbuff[j] = 0;
 404 
 405             dp[0] = FROM_S32(d0);
 406           }
 407 
 408         } else /* if (kh == 1) */ {
 409 #ifdef __SUNPRO_C
 410 #pragma pipeloop(0)
 411 #endif /* __SUNPRO_C */
 412           for (j = 0; j < hsize; j++) {
 413             p0 = sp[0];
 414 
 415             d0 = D2I(p0*k0 + pbuff[j]);
 416 
 417             dp[0] = FROM_S32(d0);
 418 
 419             pbuff[j] = 0;
 420 
 421             sp += sll;
 422             dp += dll;
 423           }
 424         }
 425 
 426         sl += chan1;
 427         dl += chan1;
 428       }
 429     }
 430 
 431     sl_c += max_hsize*sll;
 432     dl_c += max_hsize*dll;
 433   }
 434 
 435   if (pbuff != buff) mlib_free(pbuff);
 436 
 437   return MLIB_SUCCESS;
 438 }
 439 
 440 /***************************************************************/
 441 mlib_status CONV_FUNC(MxN)(mlib_image       *dst,
 442                            const mlib_image *src,
 443                            const mlib_s32   *kernel,
 444                            mlib_s32         m,
 445                            mlib_s32         n,
 446                            mlib_s32         dm,
 447                            mlib_s32         dn,
 448                            mlib_s32         scale,
 449                            mlib_s32         cmask)
 450 {
 451   FTYPE    buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
 452   FTYPE    **buffs = buffs_arr, *buffd;
 453   FTYPE    akernel[256], *k = akernel, fscale = DSCALE;
 454   mlib_s32 mn, l, off, kw, bsize, buff_ind;
 455   mlib_s32 d0, d1;
 456   FTYPE    k0, k1, k2, k3, k4, k5, k6;
 457   FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
 458   d64_2x32 dd;
 459   DEF_VARS(DTYPE);
 460   mlib_s32 chan2;
 461   mlib_s32 *buffo, *buffi;
 462   mlib_status status = MLIB_SUCCESS;
 463 
 464   GET_SRC_DST_PARAMETERS(DTYPE);
 465 
 466   if (scale > 30) {
 467     fscale *= 1.0/(1 << 30);
 468     scale -= 30;
 469   }
 470 
 471   fscale /= (1 << scale);
 472 
 473   mn = m*n;
 474 
 475   if (mn > 256) {
 476     k = mlib_malloc(mn*sizeof(mlib_d64));
 477 
 478     if (k == NULL) return MLIB_FAILURE;
 479   }
 480 
 481   for (i = 0; i < mn; i++) {
 482     k[i] = kernel[i]*fscale;
 483   }
 484 
 485   if (m == 1) {
 486     status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
 487     FREE_AND_RETURN_STATUS;
 488   }
 489 
 490   bsize = (n + 3)*wid;
 491 
 492   if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
 493     pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1));
 494 
 495     if (pbuff == NULL) {
 496       status = MLIB_FAILURE;
 497       FREE_AND_RETURN_STATUS;
 498     }
 499     buffs = (FTYPE   **)(pbuff + bsize);
 500   }
 501 
 502   for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
 503   for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
 504   buffd = buffs[n] + wid;
 505   buffo = (mlib_s32*)(buffd + wid);
 506   buffi = buffo + (wid &~ 1);
 507 
 508   chan1 = nchannel;
 509   chan2 = chan1 + chan1;
 510 
 511   wid -= (m - 1);
 512   hgt -= (n - 1);
 513   adr_dst += dn*dll + dm*nchannel;
 514 
 515   for (c = 0; c < nchannel; c++) {
 516     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 517 
 518     sl = adr_src + c;
 519     dl = adr_dst + c;
 520 
 521     for (l = 0; l < n; l++) {
 522       FTYPE    *buff = buffs[l];
 523 
 524 #ifdef __SUNPRO_C
 525 #pragma pipeloop(0)
 526 #endif /* __SUNPRO_C */
 527       for (i = 0; i < wid + (m - 1); i++) {
 528         buff[i] = (FTYPE)sl[i*chan1];
 529       }
 530 
 531       sl += sll;
 532     }
 533 
 534     buff_ind = 0;
 535 
 536 #ifdef __SUNPRO_C
 537 #pragma pipeloop(0)
 538 #endif /* __SUNPRO_C */
 539     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 540 
 541     for (j = 0; j < hgt; j++) {
 542       FTYPE    **buffc = buffs + buff_ind;
 543       FTYPE    *buffn = buffc[n];
 544       FTYPE    *pk = k;
 545 
 546       for (l = 0; l < n; l++) {
 547         FTYPE    *buff_l = buffc[l];
 548 
 549         for (off = 0; off < m;) {
 550           FTYPE    *buff = buff_l + off;
 551 
 552           kw = m - off;
 553 
 554           if (kw > 2*MAX_KER) kw = MAX_KER; else
 555             if (kw > MAX_KER) kw = kw/2;
 556           off += kw;
 557 
 558           sp = sl;
 559           dp = dl;
 560 
 561           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 562           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 563 
 564           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 565           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 566           pk += kw;
 567 
 568           if (kw == 7) {
 569 
 570             if (l < (n - 1) || off < m) {
 571 #ifdef __SUNPRO_C
 572 #pragma pipeloop(0)
 573 #endif /* __SUNPRO_C */
 574               for (i = 0; i <= (wid - 2); i += 2) {
 575                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 576 
 577                 p6 = buff[i + 6]; p7 = buff[i + 7];
 578 
 579                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 580                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 581               }
 582 
 583             } else {
 584 #ifdef __SUNPRO_C
 585 #pragma pipeloop(0)
 586 #endif /* __SUNPRO_C */
 587               for (i = 0; i <= (wid - 2); i += 2) {
 588                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 589 
 590                 p6 = buff[i + 6]; p7 = buff[i + 7];
 591 
 592                 LOAD_BUFF(buffi);
 593 
 594                 dd.d64 = *(FTYPE   *)(buffi + i);
 595                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 596                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 597 
 598                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 599                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 600 
 601                 dp[0    ] = FROM_S32(d0);
 602                 dp[chan1] = FROM_S32(d1);
 603 
 604                 buffd[i    ] = 0.0;
 605                 buffd[i + 1] = 0.0;
 606 
 607                 sp += chan2;
 608                 dp += chan2;
 609               }
 610             }
 611 
 612           } else if (kw == 6) {
 613 
 614             if (l < (n - 1) || off < m) {
 615 #ifdef __SUNPRO_C
 616 #pragma pipeloop(0)
 617 #endif /* __SUNPRO_C */
 618               for (i = 0; i <= (wid - 2); i += 2) {
 619                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 620 
 621                 p5 = buff[i + 5]; p6 = buff[i + 6];
 622 
 623                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 624                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 625               }
 626 
 627             } else {
 628 #ifdef __SUNPRO_C
 629 #pragma pipeloop(0)
 630 #endif /* __SUNPRO_C */
 631               for (i = 0; i <= (wid - 2); i += 2) {
 632                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 633 
 634                 p5 = buff[i + 5]; p6 = buff[i + 6];
 635 
 636                 buffn[i    ] = (FTYPE)sp[0];
 637                 buffn[i + 1] = (FTYPE)sp[chan1];
 638 
 639                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 640                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 641 
 642                 dp[0    ] = FROM_S32(d0);
 643                 dp[chan1] = FROM_S32(d1);
 644 
 645                 buffd[i    ] = 0.0;
 646                 buffd[i + 1] = 0.0;
 647 
 648                 sp += chan2;
 649                 dp += chan2;
 650               }
 651             }
 652 
 653           } else if (kw == 5) {
 654 
 655             if (l < (n - 1) || off < m) {
 656 #ifdef __SUNPRO_C
 657 #pragma pipeloop(0)
 658 #endif /* __SUNPRO_C */
 659               for (i = 0; i <= (wid - 2); i += 2) {
 660                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 661 
 662                 p4 = buff[i + 4]; p5 = buff[i + 5];
 663 
 664                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 665                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 666               }
 667 
 668             } else {
 669 #ifdef __SUNPRO_C
 670 #pragma pipeloop(0)
 671 #endif /* __SUNPRO_C */
 672               for (i = 0; i <= (wid - 2); i += 2) {
 673                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 674 
 675                 p4 = buff[i + 4]; p5 = buff[i + 5];
 676 
 677                 buffn[i    ] = (FTYPE)sp[0];
 678                 buffn[i + 1] = (FTYPE)sp[chan1];
 679 
 680                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 681                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 682 
 683                 dp[0    ] = FROM_S32(d0);
 684                 dp[chan1] = FROM_S32(d1);
 685 
 686                 buffd[i    ] = 0.0;
 687                 buffd[i + 1] = 0.0;
 688 
 689                 sp += chan2;
 690                 dp += chan2;
 691               }
 692             }
 693 
 694           } else if (kw == 4) {
 695 
 696             if (l < (n - 1) || off < m) {
 697 #ifdef __SUNPRO_C
 698 #pragma pipeloop(0)
 699 #endif /* __SUNPRO_C */
 700               for (i = 0; i <= (wid - 2); i += 2) {
 701                 p0 = p2; p1 = p3; p2 = p4;
 702 
 703                 p3 = buff[i + 3]; p4 = buff[i + 4];
 704 
 705                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 706                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 707               }
 708 
 709             } else {
 710 #ifdef __SUNPRO_C
 711 #pragma pipeloop(0)
 712 #endif /* __SUNPRO_C */
 713               for (i = 0; i <= (wid - 2); i += 2) {
 714                 p0 = p2; p1 = p3; p2 = p4;
 715 
 716                 p3 = buff[i + 3]; p4 = buff[i + 4];
 717 
 718                 buffn[i    ] = (FTYPE)sp[0];
 719                 buffn[i + 1] = (FTYPE)sp[chan1];
 720 
 721                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 722                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 723 
 724                 dp[0    ] = FROM_S32(d0);
 725                 dp[chan1] = FROM_S32(d1);
 726 
 727                 buffd[i    ] = 0.0;
 728                 buffd[i + 1] = 0.0;
 729 
 730                 sp += chan2;
 731                 dp += chan2;
 732               }
 733             }
 734 
 735           } else if (kw == 3) {
 736 
 737             if (l < (n - 1) || off < m) {
 738 #ifdef __SUNPRO_C
 739 #pragma pipeloop(0)
 740 #endif /* __SUNPRO_C */
 741               for (i = 0; i <= (wid - 2); i += 2) {
 742                 p0 = p2; p1 = p3;
 743 
 744                 p2 = buff[i + 2]; p3 = buff[i + 3];
 745 
 746                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 747                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 748               }
 749 
 750             } else {
 751 #ifdef __SUNPRO_C
 752 #pragma pipeloop(0)
 753 #endif /* __SUNPRO_C */
 754               for (i = 0; i <= (wid - 2); i += 2) {
 755                 p0 = p2; p1 = p3;
 756 
 757                 p2 = buff[i + 2]; p3 = buff[i + 3];
 758 
 759                 buffn[i    ] = (FTYPE)sp[0];
 760                 buffn[i + 1] = (FTYPE)sp[chan1];
 761 
 762                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 763                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 764 
 765                 dp[0    ] = FROM_S32(d0);
 766                 dp[chan1] = FROM_S32(d1);
 767 
 768                 buffd[i    ] = 0.0;
 769                 buffd[i + 1] = 0.0;
 770 
 771                 sp += chan2;
 772                 dp += chan2;
 773               }
 774             }
 775 
 776           } else /*if (kw == 2)*/ {
 777 
 778             if (l < (n - 1) || off < m) {
 779 #ifdef __SUNPRO_C
 780 #pragma pipeloop(0)
 781 #endif /* __SUNPRO_C */
 782               for (i = 0; i <= (wid - 2); i += 2) {
 783                 p0 = p2;
 784 
 785                 p1 = buff[i + 1]; p2 = buff[i + 2];
 786 
 787                 buffd[i    ] += p0*k0 + p1*k1;
 788                 buffd[i + 1] += p1*k0 + p2*k1;
 789               }
 790 
 791             } else {
 792 #ifdef __SUNPRO_C
 793 #pragma pipeloop(0)
 794 #endif /* __SUNPRO_C */
 795               for (i = 0; i <= (wid - 2); i += 2) {
 796                 p0 = p2;
 797 
 798                 p1 = buff[i + 1]; p2 = buff[i + 2];
 799 
 800                 buffn[i    ] = (FTYPE)sp[0];
 801                 buffn[i + 1] = (FTYPE)sp[chan1];
 802 
 803                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 804                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 805 
 806                 dp[0    ] = FROM_S32(d0);
 807                 dp[chan1] = FROM_S32(d1);
 808 
 809                 buffd[i    ] = 0.0;
 810                 buffd[i + 1] = 0.0;
 811 
 812                 sp += chan2;
 813                 dp += chan2;
 814               }
 815             }
 816           }
 817         }
 818       }
 819 
 820       /* last pixels */
 821       for (; i < wid; i++) {
 822         FTYPE    *pk = k, s = 0;
 823         mlib_s32 x, d0;
 824 
 825         for (l = 0; l < n; l++) {
 826           FTYPE    *buff = buffc[l] + i;
 827 
 828           for (x = 0; x < m; x++) s += buff[x] * (*pk++);
 829         }
 830 
 831         d0 = D2I(s);
 832         dp[0] = FROM_S32(d0);
 833 
 834         buffn[i] = (FTYPE)sp[0];
 835 
 836         sp += chan1;
 837         dp += chan1;
 838       }
 839 
 840       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 841 
 842       /* next line */
 843       sl += sll;
 844       dl += dll;
 845 
 846       buff_ind++;
 847 
 848       if (buff_ind >= n + 1) buff_ind = 0;
 849     }
 850   }
 851 
 852   FREE_AND_RETURN_STATUS;
 853 }
 854 
 855 /***************************************************************/
 856 #ifndef __sparc /* for x86, using integer multiplies is faster */
 857 
 858 #define STORE_RES(res, x)                                       \
 859   x >>= shift2;                                                 \
 860   CLAMP_STORE(res, x)
 861 
 862 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 863                              const mlib_image *src,
 864                              const mlib_s32   *kernel,
 865                              mlib_s32         m,
 866                              mlib_s32         n,
 867                              mlib_s32         dm,
 868                              mlib_s32         dn,
 869                              mlib_s32         scale,
 870                              mlib_s32         cmask)
 871 {
 872   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 873   mlib_s32 l, off, kw;
 874   mlib_s32 d0, d1, shift1, shift2;
 875   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 876   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
 877   DTYPE    *adr_src, *sl, *sp = NULL;
 878   DTYPE    *adr_dst, *dl, *dp = NULL;
 879   mlib_s32 wid, hgt, sll, dll;
 880   mlib_s32 nchannel, chan1;
 881   mlib_s32 i, j, c;
 882   mlib_s32 chan2;
 883   mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl;
 884   GET_SRC_DST_PARAMETERS(DTYPE);
 885 
 886 #if IMG_TYPE != 1
 887   shift1 = 16;
 888 #else
 889   shift1 = 8;
 890 #endif /* IMG_TYPE != 1 */
 891   shift2 = scale - shift1;
 892 
 893   chan1 = nchannel;
 894   chan2 = chan1 + chan1;
 895 
 896   wid -= (m - 1);
 897   hgt -= (n - 1);
 898   adr_dst += dn*dll + dm*nchannel;
 899 
 900   if (wid > BUFF_SIZE) {
 901     buffd = mlib_malloc(sizeof(mlib_s32)*wid);
 902 
 903     if (buffd == NULL) return MLIB_FAILURE;
 904   }
 905 
 906   if (m*n > MAX_N*MAX_N) {
 907     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 908 
 909     if (k == NULL) {
 910       if (buffd != buff) mlib_free(buffd);
 911       return MLIB_FAILURE;
 912     }
 913   }
 914 
 915   for (i = 0; i < m*n; i++) {
 916     k[i] = kernel[i] >> shift1;
 917   }
 918 
 919   for (c = 0; c < nchannel; c++) {
 920     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 921 
 922     sl = adr_src + c;
 923     dl = adr_dst + c;
 924 
 925 #ifdef __SUNPRO_C
 926 #pragma pipeloop(0)
 927 #endif /* __SUNPRO_C */
 928     for (i = 0; i < wid; i++) buffd[i] = 0;
 929 
 930     for (j = 0; j < hgt; j++) {
 931       mlib_s32 *pk = k;
 932 
 933       for (l = 0; l < n; l++) {
 934         DTYPE *sp0 = sl + l*sll;
 935 
 936         for (off = 0; off < m;) {
 937           sp = sp0 + off*chan1;
 938           dp = dl;
 939 
 940           kw = m - off;
 941 
 942           if (kw > 2*MAX_KER) kw = MAX_KER; else
 943             if (kw > MAX_KER) kw = kw/2;
 944           off += kw;
 945 
 946           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 947           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 948 
 949           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 950           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 951           pk += kw;
 952 
 953           sp += (kw - 1)*chan1;
 954 
 955           if (kw == 7) {
 956 
 957             if (l < (n - 1) || off < m) {
 958 #ifdef __SUNPRO_C
 959 #pragma pipeloop(0)
 960 #endif /* __SUNPRO_C */
 961               for (i = 0; i <= (wid - 2); i += 2) {
 962                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 963                 p6 = sp[0];
 964                 p7 = sp[chan1];
 965 
 966                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 967                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 968 
 969                 sp += chan2;
 970               }
 971 
 972             } else {
 973 #ifdef __SUNPRO_C
 974 #pragma pipeloop(0)
 975 #endif /* __SUNPRO_C */
 976               for (i = 0; i <= (wid - 2); i += 2) {
 977                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 978                 p6 = sp[0];
 979                 p7 = sp[chan1];
 980 
 981                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 982                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 983 
 984                 STORE_RES(dp[0    ], d0);
 985                 STORE_RES(dp[chan1], d1);
 986 
 987                 buffd[i    ] = 0;
 988                 buffd[i + 1] = 0;
 989 
 990                 sp += chan2;
 991                 dp += chan2;
 992               }
 993             }
 994 
 995           } else if (kw == 6) {
 996 
 997             if (l < (n - 1) || off < m) {
 998 #ifdef __SUNPRO_C
 999 #pragma pipeloop(0)
1000 #endif /* __SUNPRO_C */
1001               for (i = 0; i <= (wid - 2); i += 2) {
1002                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1003                 p5 = sp[0];
1004                 p6 = sp[chan1];
1005 
1006                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1007                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1008 
1009                 sp += chan2;
1010               }
1011 
1012             } else {
1013 #ifdef __SUNPRO_C
1014 #pragma pipeloop(0)
1015 #endif /* __SUNPRO_C */
1016               for (i = 0; i <= (wid - 2); i += 2) {
1017                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1018                 p5 = sp[0];
1019                 p6 = sp[chan1];
1020 
1021                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
1022                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1023 
1024                 STORE_RES(dp[0    ], d0);
1025                 STORE_RES(dp[chan1], d1);
1026 
1027                 buffd[i    ] = 0;
1028                 buffd[i + 1] = 0;
1029 
1030                 sp += chan2;
1031                 dp += chan2;
1032               }
1033             }
1034 
1035           } else if (kw == 5) {
1036 
1037             if (l < (n - 1) || off < m) {
1038 #ifdef __SUNPRO_C
1039 #pragma pipeloop(0)
1040 #endif /* __SUNPRO_C */
1041               for (i = 0; i <= (wid - 2); i += 2) {
1042                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1043                 p4 = sp[0];
1044                 p5 = sp[chan1];
1045 
1046                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1047                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1048 
1049                 sp += chan2;
1050               }
1051 
1052             } else {
1053 #ifdef __SUNPRO_C
1054 #pragma pipeloop(0)
1055 #endif /* __SUNPRO_C */
1056               for (i = 0; i <= (wid - 2); i += 2) {
1057                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1058                 p4 = sp[0];
1059                 p5 = sp[chan1];
1060 
1061                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
1062                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1063 
1064                 STORE_RES(dp[0    ], d0);
1065                 STORE_RES(dp[chan1], d1);
1066 
1067                 buffd[i    ] = 0;
1068                 buffd[i + 1] = 0;
1069 
1070                 sp += chan2;
1071                 dp += chan2;
1072               }
1073             }
1074 
1075           } else if (kw == 4) {
1076 
1077             if (l < (n - 1) || off < m) {
1078 #ifdef __SUNPRO_C
1079 #pragma pipeloop(0)
1080 #endif /* __SUNPRO_C */
1081               for (i = 0; i <= (wid - 2); i += 2) {
1082                 p0 = p2; p1 = p3; p2 = p4;
1083                 p3 = sp[0];
1084                 p4 = sp[chan1];
1085 
1086                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1087                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1088 
1089                 sp += chan2;
1090               }
1091 
1092             } else {
1093 #ifdef __SUNPRO_C
1094 #pragma pipeloop(0)
1095 #endif /* __SUNPRO_C */
1096               for (i = 0; i <= (wid - 2); i += 2) {
1097                 p0 = p2; p1 = p3; p2 = p4;
1098                 p3 = sp[0];
1099                 p4 = sp[chan1];
1100 
1101                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1102                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1103 
1104                 STORE_RES(dp[0    ], d0);
1105                 STORE_RES(dp[chan1], d1);
1106 
1107                 buffd[i    ] = 0;
1108                 buffd[i + 1] = 0;
1109 
1110                 sp += chan2;
1111                 dp += chan2;
1112               }
1113             }
1114 
1115           } else if (kw == 3) {
1116 
1117             if (l < (n - 1) || off < m) {
1118 #ifdef __SUNPRO_C
1119 #pragma pipeloop(0)
1120 #endif /* __SUNPRO_C */
1121               for (i = 0; i <= (wid - 2); i += 2) {
1122                 p0 = p2; p1 = p3;
1123                 p2 = sp[0];
1124                 p3 = sp[chan1];
1125 
1126                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1127                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1128 
1129                 sp += chan2;
1130               }
1131 
1132             } else {
1133 #ifdef __SUNPRO_C
1134 #pragma pipeloop(0)
1135 #endif /* __SUNPRO_C */
1136               for (i = 0; i <= (wid - 2); i += 2) {
1137                 p0 = p2; p1 = p3;
1138                 p2 = sp[0];
1139                 p3 = sp[chan1];
1140 
1141                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1142                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1143 
1144                 STORE_RES(dp[0    ], d0);
1145                 STORE_RES(dp[chan1], d1);
1146 
1147                 buffd[i    ] = 0;
1148                 buffd[i + 1] = 0;
1149 
1150                 sp += chan2;
1151                 dp += chan2;
1152               }
1153             }
1154 
1155           } else if (kw == 2) {
1156 
1157             if (l < (n - 1) || off < m) {
1158 #ifdef __SUNPRO_C
1159 #pragma pipeloop(0)
1160 #endif /* __SUNPRO_C */
1161               for (i = 0; i <= (wid - 2); i += 2) {
1162                 p0 = p2;
1163                 p1 = sp[0];
1164                 p2 = sp[chan1];
1165 
1166                 buffd[i    ] += p0*k0 + p1*k1;
1167                 buffd[i + 1] += p1*k0 + p2*k1;
1168 
1169                 sp += chan2;
1170               }
1171 
1172             } else {
1173 #ifdef __SUNPRO_C
1174 #pragma pipeloop(0)
1175 #endif /* __SUNPRO_C */
1176               for (i = 0; i <= (wid - 2); i += 2) {
1177                 p0 = p2;
1178                 p1 = sp[0];
1179                 p2 = sp[chan1];
1180 
1181                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1182                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1183 
1184                 STORE_RES(dp[0    ], d0);
1185                 STORE_RES(dp[chan1], d1);
1186 
1187                 buffd[i    ] = 0;
1188                 buffd[i + 1] = 0;
1189 
1190                 sp += chan2;
1191                 dp += chan2;
1192               }
1193             }
1194 
1195           } else /*if (kw == 1)*/ {
1196 
1197             if (l < (n - 1) || off < m) {
1198 #ifdef __SUNPRO_C
1199 #pragma pipeloop(0)
1200 #endif /* __SUNPRO_C */
1201               for (i = 0; i <= (wid - 2); i += 2) {
1202                 p0 = sp[0];
1203                 p1 = sp[chan1];
1204 
1205                 buffd[i    ] += p0*k0;
1206                 buffd[i + 1] += p1*k0;
1207 
1208                 sp += chan2;
1209               }
1210 
1211             } else {
1212 #ifdef __SUNPRO_C
1213 #pragma pipeloop(0)
1214 #endif /* __SUNPRO_C */
1215               for (i = 0; i <= (wid - 2); i += 2) {
1216                 p0 = sp[0];
1217                 p1 = sp[chan1];
1218 
1219                 d0 = (p0*k0 + buffd[i    ]);
1220                 d1 = (p1*k0 + buffd[i + 1]);
1221 
1222                 STORE_RES(dp[0    ], d0);
1223                 STORE_RES(dp[chan1], d1);
1224 
1225                 buffd[i    ] = 0;
1226                 buffd[i + 1] = 0;
1227 
1228                 sp += chan2;
1229                 dp += chan2;
1230               }
1231             }
1232           }
1233         }
1234       }
1235 
1236       /* last pixels */
1237       for (; i < wid; i++) {
1238         mlib_s32 *pk = k, s = 0;
1239         mlib_s32 x;
1240 
1241         for (l = 0; l < n; l++) {
1242           sp = sl + l*sll + i*chan1;
1243 
1244           for (x = 0; x < m; x++) {
1245             s += sp[0] * pk[0];
1246             sp += chan1;
1247             pk ++;
1248           }
1249         }
1250 
1251         STORE_RES(dp[0], s);
1252 
1253         sp += chan1;
1254         dp += chan1;
1255       }
1256 
1257       sl += sll;
1258       dl += dll;
1259     }
1260   }
1261 
1262   if (buffd != buff) mlib_free(buffd);
1263   if (k != k_locl) mlib_free(k);
1264 
1265   return MLIB_SUCCESS;
1266 }
1267 
1268 /***************************************************************/
1269 #endif /* __sparc ( for x86, using integer multiplies is faster ) */
1270 
1271 /***************************************************************/