1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 /*
  28  * FUNCTION
  29  *   Internal functions for mlib_ImageConv* on U8/S16/U16 types and
  30  *   MLIB_EDGE_DST_NO_WRITE mask
  31  */
  32 
  33 #include "mlib_image.h"
  34 #include "mlib_ImageConv.h"
  35 #include "mlib_c_ImageConv.h"
  36 
  37 /*
  38   This define switches between functions of different data types
  39 */
  40 #define IMG_TYPE 1
  41 
  42 /***************************************************************/
  43 #if IMG_TYPE == 1
  44 
  45 #define DTYPE             mlib_u8
  46 #define CONV_FUNC(KERN)   mlib_c_conv##KERN##nw_u8
  47 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u8
  48 #define DSCALE            (1 << 24)
  49 #define FROM_S32(x)       (((x) >> 24) ^ 128)
  50 #define S64TOS32(x)       (x)
  51 #define SAT_OFF           -(1u << 31)
  52 
  53 #elif IMG_TYPE == 2
  54 
  55 #define DTYPE             mlib_s16
  56 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_s16
  57 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_s16
  58 #define DSCALE            65536.0
  59 #define FROM_S32(x)       ((x) >> 16)
  60 #define S64TOS32(x)       ((x) & 0xffffffff)
  61 #define SAT_OFF
  62 
  63 #elif IMG_TYPE == 3
  64 
  65 #define DTYPE             mlib_u16
  66 #define CONV_FUNC(KERN)   mlib_conv##KERN##nw_u16
  67 #define CONV_FUNC_I(KERN) mlib_i_conv##KERN##nw_u16
  68 #define DSCALE            65536.0
  69 #define FROM_S32(x)       (((x) >> 16) ^ 0x8000)
  70 #define S64TOS32(x)       (x)
  71 #define SAT_OFF           -(1u << 31)
  72 
  73 #endif /* IMG_TYPE == 1 */
  74 
  75 /***************************************************************/
  76 #define BUFF_SIZE   1600
  77 
  78 #define CACHE_SIZE  (64*1024)
  79 
  80 /***************************************************************/
  81 #define FTYPE mlib_d64
  82 
  83 #ifndef MLIB_USE_FTOI_CLAMPING
  84 
  85 #define CLAMP_S32(x)                                            \
  86   (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
  87 
  88 #else
  89 
  90 #define CLAMP_S32(x) ((mlib_s32)(x))
  91 
  92 #endif /* MLIB_USE_FTOI_CLAMPING */
  93 
  94 /***************************************************************/
  95 #define D2I(x) CLAMP_S32((x) SAT_OFF)
  96 
  97 /***************************************************************/
  98 #ifdef _LITTLE_ENDIAN
  99 
 100 #define STORE2(res0, res1)                                      \
 101   dp[0    ] = res1;                                             \
 102   dp[chan1] = res0
 103 
 104 #else
 105 
 106 #define STORE2(res0, res1)                                      \
 107   dp[0    ] = res0;                                             \
 108   dp[chan1] = res1
 109 
 110 #endif /* _LITTLE_ENDIAN */
 111 
 112 /***************************************************************/
 113 #ifdef _NO_LONGLONG
 114 
 115 #define LOAD_BUFF(buff)                                         \
 116   buff[i    ] = sp[0];                                          \
 117   buff[i + 1] = sp[chan1]
 118 
 119 #else /* _NO_LONGLONG */
 120 
 121 #ifdef _LITTLE_ENDIAN
 122 
 123 #define LOAD_BUFF(buff)                                         \
 124   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])
 125 
 126 #else /* _LITTLE_ENDIAN */
 127 
 128 #define LOAD_BUFF(buff)                                         \
 129   *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])
 130 
 131 #endif /* _LITTLE_ENDIAN */
 132 #endif /* _NO_LONGLONG */
 133 
 134 /***************************************************************/
 135 typedef union {
 136   mlib_d64 d64;
 137   struct {
 138     mlib_s32 i0;
 139     mlib_s32 i1;
 140   } i32s;
 141   struct {
 142     mlib_s32 f0;
 143     mlib_s32 f1;
 144   } f32s;
 145 } d64_2x32;
 146 
 147 /***************************************************************/
 148 #define DEF_VARS(type)                                          \
 149   type     *adr_src, *sl, *sp = NULL;                           \
 150   type     *adr_dst, *dl, *dp = NULL;                           \
 151   FTYPE    *pbuff = buff;                                       \
 152   mlib_s32 wid, hgt, sll, dll;                                  \
 153   mlib_s32 nchannel, chan1;                                     \
 154   mlib_s32 i, j, c
 155 
 156 /***************************************************************/
 157 #define GET_SRC_DST_PARAMETERS(type)                            \
 158   hgt = mlib_ImageGetHeight(src);                               \
 159   wid = mlib_ImageGetWidth(src);                                \
 160   nchannel = mlib_ImageGetChannels(src);                        \
 161   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 162   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 163   adr_src = (type *)mlib_ImageGetData(src);                     \
 164   adr_dst = (type *)mlib_ImageGetData(dst)
 165 
 166 /***************************************************************/
 167 #ifndef __sparc
 168 
 169 #if IMG_TYPE == 1
 170 
 171 /* Test for the presence of any "1" bit in bits
 172    8 to 31 of val. If present, then val is either
 173    negative or >255. If over/underflows of 8 bits
 174    are uncommon, then this technique can be a win,
 175    since only a single test, rather than two, is
 176    necessary to determine if clamping is needed.
 177    On the other hand, if over/underflows are common,
 178    it adds an extra test.
 179 */
 180 #define CLAMP_STORE(dst, val)                                   \
 181   if (val & 0xffffff00) {                                       \
 182     if (val < MLIB_U8_MIN)                                      \
 183       dst = MLIB_U8_MIN;                                        \
 184     else                                                        \
 185       dst = MLIB_U8_MAX;                                        \
 186   } else {                                                      \
 187     dst = (mlib_u8)val;                                         \
 188   }
 189 
 190 #elif IMG_TYPE == 2
 191 
 192 #define CLAMP_STORE(dst, val)                                   \
 193   if (val >= MLIB_S16_MAX)                                      \
 194     dst = MLIB_S16_MAX;                                         \
 195   else if (val <= MLIB_S16_MIN)                                 \
 196     dst = MLIB_S16_MIN;                                         \
 197   else                                                          \
 198     dst = (mlib_s16)val
 199 
 200 #elif IMG_TYPE == 3
 201 
 202 #define CLAMP_STORE(dst, val)                                   \
 203   if (val >= MLIB_U16_MAX)                                      \
 204     dst = MLIB_U16_MAX;                                         \
 205   else if (val <= MLIB_U16_MIN)                                 \
 206     dst = MLIB_U16_MIN;                                         \
 207   else                                                          \
 208     dst = (mlib_u16)val
 209 
 210 #endif /* IMG_TYPE == 1 */
 211 #endif /* __sparc */
 212 
 213 /***************************************************************/
 214 #define MAX_KER   7
 215 #define MAX_N    15
 216 
 217 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 218                                      const mlib_image *src,
 219                                      const mlib_d64   *k,
 220                                      mlib_s32         n,
 221                                      mlib_s32         dn,
 222                                      mlib_s32         cmask)
 223 {
 224   FTYPE    buff[BUFF_SIZE];
 225   mlib_s32 off, kh;
 226   mlib_s32 d0, d1;
 227   const FTYPE    *pk;
 228   FTYPE    k0, k1, k2, k3;
 229   FTYPE    p0, p1, p2, p3, p4;
 230   DEF_VARS(DTYPE);
 231   DTYPE    *sl_c, *dl_c, *sl0;
 232   mlib_s32 l, hsize, max_hsize;
 233   GET_SRC_DST_PARAMETERS(DTYPE);
 234 
 235   hgt -= (n - 1);
 236   adr_dst += dn*dll;
 237 
 238   max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
 239 
 240   if (!max_hsize) max_hsize = 1;
 241 
 242   if (max_hsize > BUFF_SIZE) {
 243     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 244   }
 245 
 246   chan1 = nchannel;
 247 
 248   sl_c = adr_src;
 249   dl_c = adr_dst;
 250 
 251   for (l = 0; l < hgt; l += hsize) {
 252     hsize = hgt - l;
 253 
 254     if (hsize > max_hsize) hsize = max_hsize;
 255 
 256     for (c = 0; c < nchannel; c++) {
 257       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 258 
 259       sl = sl_c + c;
 260       dl = dl_c + c;
 261 
 262 #ifdef __SUNPRO_C
 263 #pragma pipeloop(0)
 264 #endif /* __SUNPRO_C */
 265       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 266 
 267       for (i = 0; i < wid; i++) {
 268         sl0 = sl;
 269 
 270         for (off = 0; off < (n - 4); off += 4) {
 271           pk = k + off;
 272           sp = sl0;
 273 
 274           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 275           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 276           sp += 3*sll;
 277 
 278 #ifdef __SUNPRO_C
 279 #pragma pipeloop(0)
 280 #endif /* __SUNPRO_C */
 281           for (j = 0; j < hsize; j += 2) {
 282             p0 = p2; p1 = p3; p2 = p4;
 283             p3 = sp[0];
 284             p4 = sp[sll];
 285 
 286             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 287             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 288 
 289             sp += 2*sll;
 290           }
 291 
 292           sl0 += 4*sll;
 293         }
 294 
 295         pk = k + off;
 296         sp = sl0;
 297 
 298         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 299         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 300 
 301         dp = dl;
 302         kh = n - off;
 303 
 304         if (kh == 4) {
 305           sp += 3*sll;
 306 
 307 #ifdef __SUNPRO_C
 308 #pragma pipeloop(0)
 309 #endif /* __SUNPRO_C */
 310           for (j = 0; j <= (hsize - 2); j += 2) {
 311             p0 = p2; p1 = p3; p2 = p4;
 312             p3 = sp[0];
 313             p4 = sp[sll];
 314 
 315             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 316             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 317 
 318             dp[0  ] = FROM_S32(d0);
 319             dp[dll] = FROM_S32(d1);
 320 
 321             pbuff[j] = 0;
 322             pbuff[j + 1] = 0;
 323 
 324             sp += 2*sll;
 325             dp += 2*dll;
 326           }
 327 
 328           if (j < hsize) {
 329             p0 = p2; p1 = p3; p2 = p4;
 330             p3 = sp[0];
 331 
 332             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 333 
 334             pbuff[j] = 0;
 335 
 336             dp[0] = FROM_S32(d0);
 337           }
 338 
 339         } else if (kh == 3) {
 340           sp += 2*sll;
 341 
 342 #ifdef __SUNPRO_C
 343 #pragma pipeloop(0)
 344 #endif /* __SUNPRO_C */
 345           for (j = 0; j <= (hsize - 2); j += 2) {
 346             p0 = p2; p1 = p3;
 347             p2 = sp[0];
 348             p3 = sp[sll];
 349 
 350             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 351             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 352 
 353             dp[0  ] = FROM_S32(d0);
 354             dp[dll] = FROM_S32(d1);
 355 
 356             pbuff[j] = 0;
 357             pbuff[j + 1] = 0;
 358 
 359             sp += 2*sll;
 360             dp += 2*dll;
 361           }
 362 
 363           if (j < hsize) {
 364             p0 = p2; p1 = p3;
 365             p2 = sp[0];
 366 
 367             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 368 
 369             pbuff[j] = 0;
 370 
 371             dp[0] = FROM_S32(d0);
 372           }
 373 
 374         } else if (kh == 2) {
 375           sp += sll;
 376 
 377 #ifdef __SUNPRO_C
 378 #pragma pipeloop(0)
 379 #endif /* __SUNPRO_C */
 380           for (j = 0; j <= (hsize - 2); j += 2) {
 381             p0 = p2;
 382             p1 = sp[0];
 383             p2 = sp[sll];
 384 
 385             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 386             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 387 
 388             dp[0  ] = FROM_S32(d0);
 389             dp[dll] = FROM_S32(d1);
 390 
 391             pbuff[j] = 0;
 392             pbuff[j + 1] = 0;
 393 
 394             sp += 2*sll;
 395             dp += 2*dll;
 396           }
 397 
 398           if (j < hsize) {
 399             p0 = p2;
 400             p1 = sp[0];
 401 
 402             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 403 
 404             pbuff[j] = 0;
 405 
 406             dp[0] = FROM_S32(d0);
 407           }
 408 
 409         } else /* if (kh == 1) */ {
 410 #ifdef __SUNPRO_C
 411 #pragma pipeloop(0)
 412 #endif /* __SUNPRO_C */
 413           for (j = 0; j < hsize; j++) {
 414             p0 = sp[0];
 415 
 416             d0 = D2I(p0*k0 + pbuff[j]);
 417 
 418             dp[0] = FROM_S32(d0);
 419 
 420             pbuff[j] = 0;
 421 
 422             sp += sll;
 423             dp += dll;
 424           }
 425         }
 426 
 427         sl += chan1;
 428         dl += chan1;
 429       }
 430     }
 431 
 432     sl_c += max_hsize*sll;
 433     dl_c += max_hsize*dll;
 434   }
 435 
 436   if (pbuff != buff) mlib_free(pbuff);
 437 
 438   return MLIB_SUCCESS;
 439 }
 440 
 441 /***************************************************************/
 442 mlib_status CONV_FUNC(MxN)(mlib_image       *dst,
 443                            const mlib_image *src,
 444                            const mlib_s32   *kernel,
 445                            mlib_s32         m,
 446                            mlib_s32         n,
 447                            mlib_s32         dm,
 448                            mlib_s32         dn,
 449                            mlib_s32         scale,
 450                            mlib_s32         cmask)
 451 {
 452   FTYPE    buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
 453   FTYPE    **buffs = buffs_arr, *buffd;
 454   FTYPE    akernel[256], *k = akernel, fscale = DSCALE;
 455   mlib_s32 mn, l, off, kw, bsize, buff_ind;
 456   mlib_s32 d0, d1;
 457   FTYPE    k0, k1, k2, k3, k4, k5, k6;
 458   FTYPE    p0, p1, p2, p3, p4, p5, p6, p7;
 459   d64_2x32 dd;
 460   DEF_VARS(DTYPE);
 461   mlib_s32 chan2;
 462   mlib_s32 *buffo, *buffi;
 463   mlib_status status = MLIB_SUCCESS;
 464 
 465   GET_SRC_DST_PARAMETERS(DTYPE);
 466 
 467   if (scale > 30) {
 468     fscale *= 1.0/(1 << 30);
 469     scale -= 30;
 470   }
 471 
 472   fscale /= (1 << scale);
 473 
 474   mn = m*n;
 475 
 476   if (mn > 256) {
 477     k = mlib_malloc(mn*sizeof(mlib_d64));
 478 
 479     if (k == NULL) return MLIB_FAILURE;
 480   }
 481 
 482   for (i = 0; i < mn; i++) {
 483     k[i] = kernel[i]*fscale;
 484   }
 485 
 486   if (m == 1) {
 487     status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
 488     FREE_AND_RETURN_STATUS;
 489   }
 490 
 491   bsize = (n + 3)*wid;
 492 
 493   if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
 494     pbuff = mlib_malloc(sizeof(FTYPE)*bsize + sizeof(FTYPE *)*2*(n + 1));
 495 
 496     if (pbuff == NULL) {
 497       status = MLIB_FAILURE;
 498       FREE_AND_RETURN_STATUS;
 499     }
 500     buffs = (FTYPE   **)(pbuff + bsize);
 501   }
 502 
 503   for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
 504   for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
 505   buffd = buffs[n] + wid;
 506   buffo = (mlib_s32*)(buffd + wid);
 507   buffi = buffo + (wid &~ 1);
 508 
 509   chan1 = nchannel;
 510   chan2 = chan1 + chan1;
 511 
 512   wid -= (m - 1);
 513   hgt -= (n - 1);
 514   adr_dst += dn*dll + dm*nchannel;
 515 
 516   for (c = 0; c < nchannel; c++) {
 517     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 518 
 519     sl = adr_src + c;
 520     dl = adr_dst + c;
 521 
 522     for (l = 0; l < n; l++) {
 523       FTYPE    *buff = buffs[l];
 524 
 525 #ifdef __SUNPRO_C
 526 #pragma pipeloop(0)
 527 #endif /* __SUNPRO_C */
 528       for (i = 0; i < wid + (m - 1); i++) {
 529         buff[i] = (FTYPE)sl[i*chan1];
 530       }
 531 
 532       sl += sll;
 533     }
 534 
 535     buff_ind = 0;
 536 
 537 #ifdef __SUNPRO_C
 538 #pragma pipeloop(0)
 539 #endif /* __SUNPRO_C */
 540     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 541 
 542     for (j = 0; j < hgt; j++) {
 543       FTYPE    **buffc = buffs + buff_ind;
 544       FTYPE    *buffn = buffc[n];
 545       FTYPE    *pk = k;
 546 
 547       for (l = 0; l < n; l++) {
 548         FTYPE    *buff_l = buffc[l];
 549 
 550         for (off = 0; off < m;) {
 551           FTYPE    *buff = buff_l + off;
 552 
 553           kw = m - off;
 554 
 555           if (kw > 2*MAX_KER) kw = MAX_KER; else
 556             if (kw > MAX_KER) kw = kw/2;
 557           off += kw;
 558 
 559           sp = sl;
 560           dp = dl;
 561 
 562           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 563           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 564 
 565           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 566           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 567           pk += kw;
 568 
 569           if (kw == 7) {
 570 
 571             if (l < (n - 1) || off < m) {
 572 #ifdef __SUNPRO_C
 573 #pragma pipeloop(0)
 574 #endif /* __SUNPRO_C */
 575               for (i = 0; i <= (wid - 2); i += 2) {
 576                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 577 
 578                 p6 = buff[i + 6]; p7 = buff[i + 7];
 579 
 580                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 581                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 582               }
 583 
 584             } else {
 585 #ifdef __SUNPRO_C
 586 #pragma pipeloop(0)
 587 #endif /* __SUNPRO_C */
 588               for (i = 0; i <= (wid - 2); i += 2) {
 589                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 590 
 591                 p6 = buff[i + 6]; p7 = buff[i + 7];
 592 
 593                 LOAD_BUFF(buffi);
 594 
 595                 dd.d64 = *(FTYPE   *)(buffi + i);
 596                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 597                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 598 
 599                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 600                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 601 
 602                 dp[0    ] = FROM_S32(d0);
 603                 dp[chan1] = FROM_S32(d1);
 604 
 605                 buffd[i    ] = 0.0;
 606                 buffd[i + 1] = 0.0;
 607 
 608                 sp += chan2;
 609                 dp += chan2;
 610               }
 611             }
 612 
 613           } else if (kw == 6) {
 614 
 615             if (l < (n - 1) || off < m) {
 616 #ifdef __SUNPRO_C
 617 #pragma pipeloop(0)
 618 #endif /* __SUNPRO_C */
 619               for (i = 0; i <= (wid - 2); i += 2) {
 620                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 621 
 622                 p5 = buff[i + 5]; p6 = buff[i + 6];
 623 
 624                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 625                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 626               }
 627 
 628             } else {
 629 #ifdef __SUNPRO_C
 630 #pragma pipeloop(0)
 631 #endif /* __SUNPRO_C */
 632               for (i = 0; i <= (wid - 2); i += 2) {
 633                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 634 
 635                 p5 = buff[i + 5]; p6 = buff[i + 6];
 636 
 637                 buffn[i    ] = (FTYPE)sp[0];
 638                 buffn[i + 1] = (FTYPE)sp[chan1];
 639 
 640                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 641                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 642 
 643                 dp[0    ] = FROM_S32(d0);
 644                 dp[chan1] = FROM_S32(d1);
 645 
 646                 buffd[i    ] = 0.0;
 647                 buffd[i + 1] = 0.0;
 648 
 649                 sp += chan2;
 650                 dp += chan2;
 651               }
 652             }
 653 
 654           } else if (kw == 5) {
 655 
 656             if (l < (n - 1) || off < m) {
 657 #ifdef __SUNPRO_C
 658 #pragma pipeloop(0)
 659 #endif /* __SUNPRO_C */
 660               for (i = 0; i <= (wid - 2); i += 2) {
 661                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 662 
 663                 p4 = buff[i + 4]; p5 = buff[i + 5];
 664 
 665                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 666                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 667               }
 668 
 669             } else {
 670 #ifdef __SUNPRO_C
 671 #pragma pipeloop(0)
 672 #endif /* __SUNPRO_C */
 673               for (i = 0; i <= (wid - 2); i += 2) {
 674                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 675 
 676                 p4 = buff[i + 4]; p5 = buff[i + 5];
 677 
 678                 buffn[i    ] = (FTYPE)sp[0];
 679                 buffn[i + 1] = (FTYPE)sp[chan1];
 680 
 681                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 682                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 683 
 684                 dp[0    ] = FROM_S32(d0);
 685                 dp[chan1] = FROM_S32(d1);
 686 
 687                 buffd[i    ] = 0.0;
 688                 buffd[i + 1] = 0.0;
 689 
 690                 sp += chan2;
 691                 dp += chan2;
 692               }
 693             }
 694 
 695           } else if (kw == 4) {
 696 
 697             if (l < (n - 1) || off < m) {
 698 #ifdef __SUNPRO_C
 699 #pragma pipeloop(0)
 700 #endif /* __SUNPRO_C */
 701               for (i = 0; i <= (wid - 2); i += 2) {
 702                 p0 = p2; p1 = p3; p2 = p4;
 703 
 704                 p3 = buff[i + 3]; p4 = buff[i + 4];
 705 
 706                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 707                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 708               }
 709 
 710             } else {
 711 #ifdef __SUNPRO_C
 712 #pragma pipeloop(0)
 713 #endif /* __SUNPRO_C */
 714               for (i = 0; i <= (wid - 2); i += 2) {
 715                 p0 = p2; p1 = p3; p2 = p4;
 716 
 717                 p3 = buff[i + 3]; p4 = buff[i + 4];
 718 
 719                 buffn[i    ] = (FTYPE)sp[0];
 720                 buffn[i + 1] = (FTYPE)sp[chan1];
 721 
 722                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 723                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 724 
 725                 dp[0    ] = FROM_S32(d0);
 726                 dp[chan1] = FROM_S32(d1);
 727 
 728                 buffd[i    ] = 0.0;
 729                 buffd[i + 1] = 0.0;
 730 
 731                 sp += chan2;
 732                 dp += chan2;
 733               }
 734             }
 735 
 736           } else if (kw == 3) {
 737 
 738             if (l < (n - 1) || off < m) {
 739 #ifdef __SUNPRO_C
 740 #pragma pipeloop(0)
 741 #endif /* __SUNPRO_C */
 742               for (i = 0; i <= (wid - 2); i += 2) {
 743                 p0 = p2; p1 = p3;
 744 
 745                 p2 = buff[i + 2]; p3 = buff[i + 3];
 746 
 747                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 748                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 749               }
 750 
 751             } else {
 752 #ifdef __SUNPRO_C
 753 #pragma pipeloop(0)
 754 #endif /* __SUNPRO_C */
 755               for (i = 0; i <= (wid - 2); i += 2) {
 756                 p0 = p2; p1 = p3;
 757 
 758                 p2 = buff[i + 2]; p3 = buff[i + 3];
 759 
 760                 buffn[i    ] = (FTYPE)sp[0];
 761                 buffn[i + 1] = (FTYPE)sp[chan1];
 762 
 763                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 764                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 765 
 766                 dp[0    ] = FROM_S32(d0);
 767                 dp[chan1] = FROM_S32(d1);
 768 
 769                 buffd[i    ] = 0.0;
 770                 buffd[i + 1] = 0.0;
 771 
 772                 sp += chan2;
 773                 dp += chan2;
 774               }
 775             }
 776 
 777           } else /*if (kw == 2)*/ {
 778 
 779             if (l < (n - 1) || off < m) {
 780 #ifdef __SUNPRO_C
 781 #pragma pipeloop(0)
 782 #endif /* __SUNPRO_C */
 783               for (i = 0; i <= (wid - 2); i += 2) {
 784                 p0 = p2;
 785 
 786                 p1 = buff[i + 1]; p2 = buff[i + 2];
 787 
 788                 buffd[i    ] += p0*k0 + p1*k1;
 789                 buffd[i + 1] += p1*k0 + p2*k1;
 790               }
 791 
 792             } else {
 793 #ifdef __SUNPRO_C
 794 #pragma pipeloop(0)
 795 #endif /* __SUNPRO_C */
 796               for (i = 0; i <= (wid - 2); i += 2) {
 797                 p0 = p2;
 798 
 799                 p1 = buff[i + 1]; p2 = buff[i + 2];
 800 
 801                 buffn[i    ] = (FTYPE)sp[0];
 802                 buffn[i + 1] = (FTYPE)sp[chan1];
 803 
 804                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 805                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 806 
 807                 dp[0    ] = FROM_S32(d0);
 808                 dp[chan1] = FROM_S32(d1);
 809 
 810                 buffd[i    ] = 0.0;
 811                 buffd[i + 1] = 0.0;
 812 
 813                 sp += chan2;
 814                 dp += chan2;
 815               }
 816             }
 817           }
 818         }
 819       }
 820 
 821       /* last pixels */
 822       for (; i < wid; i++) {
 823         FTYPE    *pk = k, s = 0;
 824         mlib_s32 x, d0;
 825 
 826         for (l = 0; l < n; l++) {
 827           FTYPE    *buff = buffc[l] + i;
 828 
 829           for (x = 0; x < m; x++) s += buff[x] * (*pk++);
 830         }
 831 
 832         d0 = D2I(s);
 833         dp[0] = FROM_S32(d0);
 834 
 835         buffn[i] = (FTYPE)sp[0];
 836 
 837         sp += chan1;
 838         dp += chan1;
 839       }
 840 
 841       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 842 
 843       /* next line */
 844       sl += sll;
 845       dl += dll;
 846 
 847       buff_ind++;
 848 
 849       if (buff_ind >= n + 1) buff_ind = 0;
 850     }
 851   }
 852 
 853   FREE_AND_RETURN_STATUS;
 854 }
 855 
 856 /***************************************************************/
 857 #ifndef __sparc /* for x86, using integer multiplies is faster */
 858 
 859 #define STORE_RES(res, x)                                       \
 860   x >>= shift2;                                                 \
 861   CLAMP_STORE(res, x)
 862 
 863 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 864                              const mlib_image *src,
 865                              const mlib_s32   *kernel,
 866                              mlib_s32         m,
 867                              mlib_s32         n,
 868                              mlib_s32         dm,
 869                              mlib_s32         dn,
 870                              mlib_s32         scale,
 871                              mlib_s32         cmask)
 872 {
 873   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 874   mlib_s32 l, off, kw;
 875   mlib_s32 d0, d1, shift1, shift2;
 876   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 877   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
 878   DTYPE    *adr_src, *sl, *sp = NULL;
 879   DTYPE    *adr_dst, *dl, *dp = NULL;
 880   mlib_s32 wid, hgt, sll, dll;
 881   mlib_s32 nchannel, chan1;
 882   mlib_s32 i, j, c;
 883   mlib_s32 chan2;
 884   mlib_s32 k_locl[MAX_N*MAX_N], *k = k_locl;
 885   GET_SRC_DST_PARAMETERS(DTYPE);
 886 
 887 #if IMG_TYPE != 1
 888   shift1 = 16;
 889 #else
 890   shift1 = 8;
 891 #endif /* IMG_TYPE != 1 */
 892   shift2 = scale - shift1;
 893 
 894   chan1 = nchannel;
 895   chan2 = chan1 + chan1;
 896 
 897   wid -= (m - 1);
 898   hgt -= (n - 1);
 899   adr_dst += dn*dll + dm*nchannel;
 900 
 901   if (wid > BUFF_SIZE) {
 902     buffd = mlib_malloc(sizeof(mlib_s32)*wid);
 903 
 904     if (buffd == NULL) return MLIB_FAILURE;
 905   }
 906 
 907   if (m*n > MAX_N*MAX_N) {
 908     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 909 
 910     if (k == NULL) {
 911       if (buffd != buff) mlib_free(buffd);
 912       return MLIB_FAILURE;
 913     }
 914   }
 915 
 916   for (i = 0; i < m*n; i++) {
 917     k[i] = kernel[i] >> shift1;
 918   }
 919 
 920   for (c = 0; c < nchannel; c++) {
 921     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 922 
 923     sl = adr_src + c;
 924     dl = adr_dst + c;
 925 
 926 #ifdef __SUNPRO_C
 927 #pragma pipeloop(0)
 928 #endif /* __SUNPRO_C */
 929     for (i = 0; i < wid; i++) buffd[i] = 0;
 930 
 931     for (j = 0; j < hgt; j++) {
 932       mlib_s32 *pk = k;
 933 
 934       for (l = 0; l < n; l++) {
 935         DTYPE *sp0 = sl + l*sll;
 936 
 937         for (off = 0; off < m;) {
 938           sp = sp0 + off*chan1;
 939           dp = dl;
 940 
 941           kw = m - off;
 942 
 943           if (kw > 2*MAX_KER) kw = MAX_KER; else
 944             if (kw > MAX_KER) kw = kw/2;
 945           off += kw;
 946 
 947           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 948           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 949 
 950           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 951           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 952           pk += kw;
 953 
 954           sp += (kw - 1)*chan1;
 955 
 956           if (kw == 7) {
 957 
 958             if (l < (n - 1) || off < m) {
 959 #ifdef __SUNPRO_C
 960 #pragma pipeloop(0)
 961 #endif /* __SUNPRO_C */
 962               for (i = 0; i <= (wid - 2); i += 2) {
 963                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 964                 p6 = sp[0];
 965                 p7 = sp[chan1];
 966 
 967                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 968                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 969 
 970                 sp += chan2;
 971               }
 972 
 973             } else {
 974 #ifdef __SUNPRO_C
 975 #pragma pipeloop(0)
 976 #endif /* __SUNPRO_C */
 977               for (i = 0; i <= (wid - 2); i += 2) {
 978                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 979                 p6 = sp[0];
 980                 p7 = sp[chan1];
 981 
 982                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 983                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 984 
 985                 STORE_RES(dp[0    ], d0);
 986                 STORE_RES(dp[chan1], d1);
 987 
 988                 buffd[i    ] = 0;
 989                 buffd[i + 1] = 0;
 990 
 991                 sp += chan2;
 992                 dp += chan2;
 993               }
 994             }
 995 
 996           } else if (kw == 6) {
 997 
 998             if (l < (n - 1) || off < m) {
 999 #ifdef __SUNPRO_C
1000 #pragma pipeloop(0)
1001 #endif /* __SUNPRO_C */
1002               for (i = 0; i <= (wid - 2); i += 2) {
1003                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1004                 p5 = sp[0];
1005                 p6 = sp[chan1];
1006 
1007                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1008                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1009 
1010                 sp += chan2;
1011               }
1012 
1013             } else {
1014 #ifdef __SUNPRO_C
1015 #pragma pipeloop(0)
1016 #endif /* __SUNPRO_C */
1017               for (i = 0; i <= (wid - 2); i += 2) {
1018                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1019                 p5 = sp[0];
1020                 p6 = sp[chan1];
1021 
1022                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
1023                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1024 
1025                 STORE_RES(dp[0    ], d0);
1026                 STORE_RES(dp[chan1], d1);
1027 
1028                 buffd[i    ] = 0;
1029                 buffd[i + 1] = 0;
1030 
1031                 sp += chan2;
1032                 dp += chan2;
1033               }
1034             }
1035 
1036           } else if (kw == 5) {
1037 
1038             if (l < (n - 1) || off < m) {
1039 #ifdef __SUNPRO_C
1040 #pragma pipeloop(0)
1041 #endif /* __SUNPRO_C */
1042               for (i = 0; i <= (wid - 2); i += 2) {
1043                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1044                 p4 = sp[0];
1045                 p5 = sp[chan1];
1046 
1047                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1048                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1049 
1050                 sp += chan2;
1051               }
1052 
1053             } else {
1054 #ifdef __SUNPRO_C
1055 #pragma pipeloop(0)
1056 #endif /* __SUNPRO_C */
1057               for (i = 0; i <= (wid - 2); i += 2) {
1058                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1059                 p4 = sp[0];
1060                 p5 = sp[chan1];
1061 
1062                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
1063                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1064 
1065                 STORE_RES(dp[0    ], d0);
1066                 STORE_RES(dp[chan1], d1);
1067 
1068                 buffd[i    ] = 0;
1069                 buffd[i + 1] = 0;
1070 
1071                 sp += chan2;
1072                 dp += chan2;
1073               }
1074             }
1075 
1076           } else if (kw == 4) {
1077 
1078             if (l < (n - 1) || off < m) {
1079 #ifdef __SUNPRO_C
1080 #pragma pipeloop(0)
1081 #endif /* __SUNPRO_C */
1082               for (i = 0; i <= (wid - 2); i += 2) {
1083                 p0 = p2; p1 = p3; p2 = p4;
1084                 p3 = sp[0];
1085                 p4 = sp[chan1];
1086 
1087                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1088                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1089 
1090                 sp += chan2;
1091               }
1092 
1093             } else {
1094 #ifdef __SUNPRO_C
1095 #pragma pipeloop(0)
1096 #endif /* __SUNPRO_C */
1097               for (i = 0; i <= (wid - 2); i += 2) {
1098                 p0 = p2; p1 = p3; p2 = p4;
1099                 p3 = sp[0];
1100                 p4 = sp[chan1];
1101 
1102                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1103                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1104 
1105                 STORE_RES(dp[0    ], d0);
1106                 STORE_RES(dp[chan1], d1);
1107 
1108                 buffd[i    ] = 0;
1109                 buffd[i + 1] = 0;
1110 
1111                 sp += chan2;
1112                 dp += chan2;
1113               }
1114             }
1115 
1116           } else if (kw == 3) {
1117 
1118             if (l < (n - 1) || off < m) {
1119 #ifdef __SUNPRO_C
1120 #pragma pipeloop(0)
1121 #endif /* __SUNPRO_C */
1122               for (i = 0; i <= (wid - 2); i += 2) {
1123                 p0 = p2; p1 = p3;
1124                 p2 = sp[0];
1125                 p3 = sp[chan1];
1126 
1127                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1128                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1129 
1130                 sp += chan2;
1131               }
1132 
1133             } else {
1134 #ifdef __SUNPRO_C
1135 #pragma pipeloop(0)
1136 #endif /* __SUNPRO_C */
1137               for (i = 0; i <= (wid - 2); i += 2) {
1138                 p0 = p2; p1 = p3;
1139                 p2 = sp[0];
1140                 p3 = sp[chan1];
1141 
1142                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1143                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1144 
1145                 STORE_RES(dp[0    ], d0);
1146                 STORE_RES(dp[chan1], d1);
1147 
1148                 buffd[i    ] = 0;
1149                 buffd[i + 1] = 0;
1150 
1151                 sp += chan2;
1152                 dp += chan2;
1153               }
1154             }
1155 
1156           } else if (kw == 2) {
1157 
1158             if (l < (n - 1) || off < m) {
1159 #ifdef __SUNPRO_C
1160 #pragma pipeloop(0)
1161 #endif /* __SUNPRO_C */
1162               for (i = 0; i <= (wid - 2); i += 2) {
1163                 p0 = p2;
1164                 p1 = sp[0];
1165                 p2 = sp[chan1];
1166 
1167                 buffd[i    ] += p0*k0 + p1*k1;
1168                 buffd[i + 1] += p1*k0 + p2*k1;
1169 
1170                 sp += chan2;
1171               }
1172 
1173             } else {
1174 #ifdef __SUNPRO_C
1175 #pragma pipeloop(0)
1176 #endif /* __SUNPRO_C */
1177               for (i = 0; i <= (wid - 2); i += 2) {
1178                 p0 = p2;
1179                 p1 = sp[0];
1180                 p2 = sp[chan1];
1181 
1182                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1183                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1184 
1185                 STORE_RES(dp[0    ], d0);
1186                 STORE_RES(dp[chan1], d1);
1187 
1188                 buffd[i    ] = 0;
1189                 buffd[i + 1] = 0;
1190 
1191                 sp += chan2;
1192                 dp += chan2;
1193               }
1194             }
1195 
1196           } else /*if (kw == 1)*/ {
1197 
1198             if (l < (n - 1) || off < m) {
1199 #ifdef __SUNPRO_C
1200 #pragma pipeloop(0)
1201 #endif /* __SUNPRO_C */
1202               for (i = 0; i <= (wid - 2); i += 2) {
1203                 p0 = sp[0];
1204                 p1 = sp[chan1];
1205 
1206                 buffd[i    ] += p0*k0;
1207                 buffd[i + 1] += p1*k0;
1208 
1209                 sp += chan2;
1210               }
1211 
1212             } else {
1213 #ifdef __SUNPRO_C
1214 #pragma pipeloop(0)
1215 #endif /* __SUNPRO_C */
1216               for (i = 0; i <= (wid - 2); i += 2) {
1217                 p0 = sp[0];
1218                 p1 = sp[chan1];
1219 
1220                 d0 = (p0*k0 + buffd[i    ]);
1221                 d1 = (p1*k0 + buffd[i + 1]);
1222 
1223                 STORE_RES(dp[0    ], d0);
1224                 STORE_RES(dp[chan1], d1);
1225 
1226                 buffd[i    ] = 0;
1227                 buffd[i + 1] = 0;
1228 
1229                 sp += chan2;
1230                 dp += chan2;
1231               }
1232             }
1233           }
1234         }
1235       }
1236 
1237       /* last pixels */
1238       for (; i < wid; i++) {
1239         mlib_s32 *pk = k, s = 0;
1240         mlib_s32 x;
1241 
1242         for (l = 0; l < n; l++) {
1243           sp = sl + l*sll + i*chan1;
1244 
1245           for (x = 0; x < m; x++) {
1246             s += sp[0] * pk[0];
1247             sp += chan1;
1248             pk ++;
1249           }
1250         }
1251 
1252         STORE_RES(dp[0], s);
1253 
1254         sp += chan1;
1255         dp += chan1;
1256       }
1257 
1258       sl += sll;
1259       dl += dll;
1260     }
1261   }
1262 
1263   if (buffd != buff) mlib_free(buffd);
1264   if (k != k_locl) mlib_free(k);
1265 
1266   return MLIB_SUCCESS;
1267 }
1268 
1269 /***************************************************************/
1270 #endif /* __sparc ( for x86, using integer multiplies is faster ) */
1271 
1272 /***************************************************************/