< prev index next >

src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c

Print this page
rev 59383 : [mq]: final
   1 /*
   2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 146 /***************************************************************/
 147 #define DEF_VARS(type)                                          \
 148   type     *adr_src, *sl, *sp = NULL;                           \
 149   type     *adr_dst, *dl, *dp = NULL;                           \
 150   FTYPE    *pbuff = buff;                                       \
 151   mlib_s32 wid, hgt, sll, dll;                                  \
 152   mlib_s32 nchannel, chan1;                                     \
 153   mlib_s32 i, j, c
 154 
 155 /***************************************************************/
 156 #define GET_SRC_DST_PARAMETERS(type)                            \
 157   hgt = mlib_ImageGetHeight(src);                               \
 158   wid = mlib_ImageGetWidth(src);                                \
 159   nchannel = mlib_ImageGetChannels(src);                        \
 160   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 161   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 162   adr_src = (type *)mlib_ImageGetData(src);                     \
 163   adr_dst = (type *)mlib_ImageGetData(dst)
 164 
 165 /***************************************************************/
 166 #ifndef __sparc
 167 
 168 #if IMG_TYPE == 1
 169 
 170 /* Test for the presence of any "1" bit in bits
 171    8 to 31 of val. If present, then val is either
 172    negative or >255. If over/underflows of 8 bits
 173    are uncommon, then this technique can be a win,
 174    since only a single test, rather than two, is
 175    necessary to determine if clamping is needed.
 176    On the other hand, if over/underflows are common,
 177    it adds an extra test.
 178 */
 179 #define CLAMP_STORE(dst, val)                                   \
 180   if (val & 0xffffff00) {                                       \
 181     if (val < MLIB_U8_MIN)                                      \
 182       dst = MLIB_U8_MIN;                                        \
 183     else                                                        \
 184       dst = MLIB_U8_MAX;                                        \
 185   } else {                                                      \
 186     dst = (mlib_u8)val;                                         \
 187   }


 190 
 191 #define CLAMP_STORE(dst, val)                                   \
 192   if (val >= MLIB_S16_MAX)                                      \
 193     dst = MLIB_S16_MAX;                                         \
 194   else if (val <= MLIB_S16_MIN)                                 \
 195     dst = MLIB_S16_MIN;                                         \
 196   else                                                          \
 197     dst = (mlib_s16)val
 198 
 199 #elif IMG_TYPE == 3
 200 
 201 #define CLAMP_STORE(dst, val)                                   \
 202   if (val >= MLIB_U16_MAX)                                      \
 203     dst = MLIB_U16_MAX;                                         \
 204   else if (val <= MLIB_U16_MIN)                                 \
 205     dst = MLIB_U16_MIN;                                         \
 206   else                                                          \
 207     dst = (mlib_u16)val
 208 
 209 #endif /* IMG_TYPE == 1 */
 210 #endif /* __sparc */
 211 
 212 /***************************************************************/
 213 #define MAX_KER   7
 214 #define MAX_N    15
 215 
 216 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 217                                      const mlib_image *src,
 218                                      const mlib_d64   *k,
 219                                      mlib_s32         n,
 220                                      mlib_s32         dn,
 221                                      mlib_s32         cmask)
 222 {
 223   FTYPE    buff[BUFF_SIZE];
 224   mlib_s32 off, kh;
 225   mlib_s32 d0, d1;
 226   const FTYPE    *pk;
 227   FTYPE    k0, k1, k2, k3;
 228   FTYPE    p0, p1, p2, p3, p4;
 229   DEF_VARS(DTYPE);
 230   DTYPE    *sl_c, *dl_c, *sl0;


 241   if (max_hsize > BUFF_SIZE) {
 242     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 243   }
 244 
 245   chan1 = nchannel;
 246 
 247   sl_c = adr_src;
 248   dl_c = adr_dst;
 249 
 250   for (l = 0; l < hgt; l += hsize) {
 251     hsize = hgt - l;
 252 
 253     if (hsize > max_hsize) hsize = max_hsize;
 254 
 255     for (c = 0; c < nchannel; c++) {
 256       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 257 
 258       sl = sl_c + c;
 259       dl = dl_c + c;
 260 
 261 #ifdef __SUNPRO_C
 262 #pragma pipeloop(0)
 263 #endif /* __SUNPRO_C */
 264       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 265 
 266       for (i = 0; i < wid; i++) {
 267         sl0 = sl;
 268 
 269         for (off = 0; off < (n - 4); off += 4) {
 270           pk = k + off;
 271           sp = sl0;
 272 
 273           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 274           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 275           sp += 3*sll;
 276 
 277 #ifdef __SUNPRO_C
 278 #pragma pipeloop(0)
 279 #endif /* __SUNPRO_C */
 280           for (j = 0; j < hsize; j += 2) {
 281             p0 = p2; p1 = p3; p2 = p4;
 282             p3 = sp[0];
 283             p4 = sp[sll];
 284 
 285             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 286             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 287 
 288             sp += 2*sll;
 289           }
 290 
 291           sl0 += 4*sll;
 292         }
 293 
 294         pk = k + off;
 295         sp = sl0;
 296 
 297         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 298         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 299 
 300         dp = dl;
 301         kh = n - off;
 302 
 303         if (kh == 4) {
 304           sp += 3*sll;
 305 
 306 #ifdef __SUNPRO_C
 307 #pragma pipeloop(0)
 308 #endif /* __SUNPRO_C */
 309           for (j = 0; j <= (hsize - 2); j += 2) {
 310             p0 = p2; p1 = p3; p2 = p4;
 311             p3 = sp[0];
 312             p4 = sp[sll];
 313 
 314             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 315             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 316 
 317             dp[0  ] = FROM_S32(d0);
 318             dp[dll] = FROM_S32(d1);
 319 
 320             pbuff[j] = 0;
 321             pbuff[j + 1] = 0;
 322 
 323             sp += 2*sll;
 324             dp += 2*dll;
 325           }
 326 
 327           if (j < hsize) {
 328             p0 = p2; p1 = p3; p2 = p4;
 329             p3 = sp[0];
 330 
 331             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 332 
 333             pbuff[j] = 0;
 334 
 335             dp[0] = FROM_S32(d0);
 336           }
 337 
 338         } else if (kh == 3) {
 339           sp += 2*sll;
 340 
 341 #ifdef __SUNPRO_C
 342 #pragma pipeloop(0)
 343 #endif /* __SUNPRO_C */
 344           for (j = 0; j <= (hsize - 2); j += 2) {
 345             p0 = p2; p1 = p3;
 346             p2 = sp[0];
 347             p3 = sp[sll];
 348 
 349             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 350             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 351 
 352             dp[0  ] = FROM_S32(d0);
 353             dp[dll] = FROM_S32(d1);
 354 
 355             pbuff[j] = 0;
 356             pbuff[j + 1] = 0;
 357 
 358             sp += 2*sll;
 359             dp += 2*dll;
 360           }
 361 
 362           if (j < hsize) {
 363             p0 = p2; p1 = p3;
 364             p2 = sp[0];
 365 
 366             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 367 
 368             pbuff[j] = 0;
 369 
 370             dp[0] = FROM_S32(d0);
 371           }
 372 
 373         } else if (kh == 2) {
 374           sp += sll;
 375 
 376 #ifdef __SUNPRO_C
 377 #pragma pipeloop(0)
 378 #endif /* __SUNPRO_C */
 379           for (j = 0; j <= (hsize - 2); j += 2) {
 380             p0 = p2;
 381             p1 = sp[0];
 382             p2 = sp[sll];
 383 
 384             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 385             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 386 
 387             dp[0  ] = FROM_S32(d0);
 388             dp[dll] = FROM_S32(d1);
 389 
 390             pbuff[j] = 0;
 391             pbuff[j + 1] = 0;
 392 
 393             sp += 2*sll;
 394             dp += 2*dll;
 395           }
 396 
 397           if (j < hsize) {
 398             p0 = p2;
 399             p1 = sp[0];
 400 
 401             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 402 
 403             pbuff[j] = 0;
 404 
 405             dp[0] = FROM_S32(d0);
 406           }
 407 
 408         } else /* if (kh == 1) */ {
 409 #ifdef __SUNPRO_C
 410 #pragma pipeloop(0)
 411 #endif /* __SUNPRO_C */
 412           for (j = 0; j < hsize; j++) {
 413             p0 = sp[0];
 414 
 415             d0 = D2I(p0*k0 + pbuff[j]);
 416 
 417             dp[0] = FROM_S32(d0);
 418 
 419             pbuff[j] = 0;
 420 
 421             sp += sll;
 422             dp += dll;
 423           }
 424         }
 425 
 426         sl += chan1;
 427         dl += chan1;
 428       }
 429     }
 430 
 431     sl_c += max_hsize*sll;


 504   buffd = buffs[n] + wid;
 505   buffo = (mlib_s32*)(buffd + wid);
 506   buffi = buffo + (wid &~ 1);
 507 
 508   chan1 = nchannel;
 509   chan2 = chan1 + chan1;
 510 
 511   wid -= (m - 1);
 512   hgt -= (n - 1);
 513   adr_dst += dn*dll + dm*nchannel;
 514 
 515   for (c = 0; c < nchannel; c++) {
 516     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 517 
 518     sl = adr_src + c;
 519     dl = adr_dst + c;
 520 
 521     for (l = 0; l < n; l++) {
 522       FTYPE    *buff = buffs[l];
 523 
 524 #ifdef __SUNPRO_C
 525 #pragma pipeloop(0)
 526 #endif /* __SUNPRO_C */
 527       for (i = 0; i < wid + (m - 1); i++) {
 528         buff[i] = (FTYPE)sl[i*chan1];
 529       }
 530 
 531       sl += sll;
 532     }
 533 
 534     buff_ind = 0;
 535 
 536 #ifdef __SUNPRO_C
 537 #pragma pipeloop(0)
 538 #endif /* __SUNPRO_C */
 539     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 540 
 541     for (j = 0; j < hgt; j++) {
 542       FTYPE    **buffc = buffs + buff_ind;
 543       FTYPE    *buffn = buffc[n];
 544       FTYPE    *pk = k;
 545 
 546       for (l = 0; l < n; l++) {
 547         FTYPE    *buff_l = buffc[l];
 548 
 549         for (off = 0; off < m;) {
 550           FTYPE    *buff = buff_l + off;
 551 
 552           kw = m - off;
 553 
 554           if (kw > 2*MAX_KER) kw = MAX_KER; else
 555             if (kw > MAX_KER) kw = kw/2;
 556           off += kw;
 557 
 558           sp = sl;
 559           dp = dl;
 560 
 561           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 562           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 563 
 564           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 565           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 566           pk += kw;
 567 
 568           if (kw == 7) {
 569 
 570             if (l < (n - 1) || off < m) {
 571 #ifdef __SUNPRO_C
 572 #pragma pipeloop(0)
 573 #endif /* __SUNPRO_C */
 574               for (i = 0; i <= (wid - 2); i += 2) {
 575                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 576 
 577                 p6 = buff[i + 6]; p7 = buff[i + 7];
 578 
 579                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 580                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 581               }
 582 
 583             } else {
 584 #ifdef __SUNPRO_C
 585 #pragma pipeloop(0)
 586 #endif /* __SUNPRO_C */
 587               for (i = 0; i <= (wid - 2); i += 2) {
 588                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 589 
 590                 p6 = buff[i + 6]; p7 = buff[i + 7];
 591 
 592                 LOAD_BUFF(buffi);
 593 
 594                 dd.d64 = *(FTYPE   *)(buffi + i);
 595                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 596                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 597 
 598                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 599                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 600 
 601                 dp[0    ] = FROM_S32(d0);
 602                 dp[chan1] = FROM_S32(d1);
 603 
 604                 buffd[i    ] = 0.0;
 605                 buffd[i + 1] = 0.0;
 606 
 607                 sp += chan2;
 608                 dp += chan2;
 609               }
 610             }
 611 
 612           } else if (kw == 6) {
 613 
 614             if (l < (n - 1) || off < m) {
 615 #ifdef __SUNPRO_C
 616 #pragma pipeloop(0)
 617 #endif /* __SUNPRO_C */
 618               for (i = 0; i <= (wid - 2); i += 2) {
 619                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 620 
 621                 p5 = buff[i + 5]; p6 = buff[i + 6];
 622 
 623                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 624                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 625               }
 626 
 627             } else {
 628 #ifdef __SUNPRO_C
 629 #pragma pipeloop(0)
 630 #endif /* __SUNPRO_C */
 631               for (i = 0; i <= (wid - 2); i += 2) {
 632                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 633 
 634                 p5 = buff[i + 5]; p6 = buff[i + 6];
 635 
 636                 buffn[i    ] = (FTYPE)sp[0];
 637                 buffn[i + 1] = (FTYPE)sp[chan1];
 638 
 639                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 640                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 641 
 642                 dp[0    ] = FROM_S32(d0);
 643                 dp[chan1] = FROM_S32(d1);
 644 
 645                 buffd[i    ] = 0.0;
 646                 buffd[i + 1] = 0.0;
 647 
 648                 sp += chan2;
 649                 dp += chan2;
 650               }
 651             }
 652 
 653           } else if (kw == 5) {
 654 
 655             if (l < (n - 1) || off < m) {
 656 #ifdef __SUNPRO_C
 657 #pragma pipeloop(0)
 658 #endif /* __SUNPRO_C */
 659               for (i = 0; i <= (wid - 2); i += 2) {
 660                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 661 
 662                 p4 = buff[i + 4]; p5 = buff[i + 5];
 663 
 664                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 665                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 666               }
 667 
 668             } else {
 669 #ifdef __SUNPRO_C
 670 #pragma pipeloop(0)
 671 #endif /* __SUNPRO_C */
 672               for (i = 0; i <= (wid - 2); i += 2) {
 673                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 674 
 675                 p4 = buff[i + 4]; p5 = buff[i + 5];
 676 
 677                 buffn[i    ] = (FTYPE)sp[0];
 678                 buffn[i + 1] = (FTYPE)sp[chan1];
 679 
 680                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 681                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 682 
 683                 dp[0    ] = FROM_S32(d0);
 684                 dp[chan1] = FROM_S32(d1);
 685 
 686                 buffd[i    ] = 0.0;
 687                 buffd[i + 1] = 0.0;
 688 
 689                 sp += chan2;
 690                 dp += chan2;
 691               }
 692             }
 693 
 694           } else if (kw == 4) {
 695 
 696             if (l < (n - 1) || off < m) {
 697 #ifdef __SUNPRO_C
 698 #pragma pipeloop(0)
 699 #endif /* __SUNPRO_C */
 700               for (i = 0; i <= (wid - 2); i += 2) {
 701                 p0 = p2; p1 = p3; p2 = p4;
 702 
 703                 p3 = buff[i + 3]; p4 = buff[i + 4];
 704 
 705                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 706                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 707               }
 708 
 709             } else {
 710 #ifdef __SUNPRO_C
 711 #pragma pipeloop(0)
 712 #endif /* __SUNPRO_C */
 713               for (i = 0; i <= (wid - 2); i += 2) {
 714                 p0 = p2; p1 = p3; p2 = p4;
 715 
 716                 p3 = buff[i + 3]; p4 = buff[i + 4];
 717 
 718                 buffn[i    ] = (FTYPE)sp[0];
 719                 buffn[i + 1] = (FTYPE)sp[chan1];
 720 
 721                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 722                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 723 
 724                 dp[0    ] = FROM_S32(d0);
 725                 dp[chan1] = FROM_S32(d1);
 726 
 727                 buffd[i    ] = 0.0;
 728                 buffd[i + 1] = 0.0;
 729 
 730                 sp += chan2;
 731                 dp += chan2;
 732               }
 733             }
 734 
 735           } else if (kw == 3) {
 736 
 737             if (l < (n - 1) || off < m) {
 738 #ifdef __SUNPRO_C
 739 #pragma pipeloop(0)
 740 #endif /* __SUNPRO_C */
 741               for (i = 0; i <= (wid - 2); i += 2) {
 742                 p0 = p2; p1 = p3;
 743 
 744                 p2 = buff[i + 2]; p3 = buff[i + 3];
 745 
 746                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 747                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 748               }
 749 
 750             } else {
 751 #ifdef __SUNPRO_C
 752 #pragma pipeloop(0)
 753 #endif /* __SUNPRO_C */
 754               for (i = 0; i <= (wid - 2); i += 2) {
 755                 p0 = p2; p1 = p3;
 756 
 757                 p2 = buff[i + 2]; p3 = buff[i + 3];
 758 
 759                 buffn[i    ] = (FTYPE)sp[0];
 760                 buffn[i + 1] = (FTYPE)sp[chan1];
 761 
 762                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 763                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 764 
 765                 dp[0    ] = FROM_S32(d0);
 766                 dp[chan1] = FROM_S32(d1);
 767 
 768                 buffd[i    ] = 0.0;
 769                 buffd[i + 1] = 0.0;
 770 
 771                 sp += chan2;
 772                 dp += chan2;
 773               }
 774             }
 775 
 776           } else /*if (kw == 2)*/ {
 777 
 778             if (l < (n - 1) || off < m) {
 779 #ifdef __SUNPRO_C
 780 #pragma pipeloop(0)
 781 #endif /* __SUNPRO_C */
 782               for (i = 0; i <= (wid - 2); i += 2) {
 783                 p0 = p2;
 784 
 785                 p1 = buff[i + 1]; p2 = buff[i + 2];
 786 
 787                 buffd[i    ] += p0*k0 + p1*k1;
 788                 buffd[i + 1] += p1*k0 + p2*k1;
 789               }
 790 
 791             } else {
 792 #ifdef __SUNPRO_C
 793 #pragma pipeloop(0)
 794 #endif /* __SUNPRO_C */
 795               for (i = 0; i <= (wid - 2); i += 2) {
 796                 p0 = p2;
 797 
 798                 p1 = buff[i + 1]; p2 = buff[i + 2];
 799 
 800                 buffn[i    ] = (FTYPE)sp[0];
 801                 buffn[i + 1] = (FTYPE)sp[chan1];
 802 
 803                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 804                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 805 
 806                 dp[0    ] = FROM_S32(d0);
 807                 dp[chan1] = FROM_S32(d1);
 808 
 809                 buffd[i    ] = 0.0;
 810                 buffd[i + 1] = 0.0;
 811 
 812                 sp += chan2;
 813                 dp += chan2;
 814               }


 836         sp += chan1;
 837         dp += chan1;
 838       }
 839 
 840       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 841 
 842       /* next line */
 843       sl += sll;
 844       dl += dll;
 845 
 846       buff_ind++;
 847 
 848       if (buff_ind >= n + 1) buff_ind = 0;
 849     }
 850   }
 851 
 852   FREE_AND_RETURN_STATUS;
 853 }
 854 
 855 /***************************************************************/
 856 #ifndef __sparc /* for x86, using integer multiplies is faster */
 857 
 858 #define STORE_RES(res, x)                                       \
 859   x >>= shift2;                                                 \
 860   CLAMP_STORE(res, x)
 861 
 862 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 863                              const mlib_image *src,
 864                              const mlib_s32   *kernel,
 865                              mlib_s32         m,
 866                              mlib_s32         n,
 867                              mlib_s32         dm,
 868                              mlib_s32         dn,
 869                              mlib_s32         scale,
 870                              mlib_s32         cmask)
 871 {
 872   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 873   mlib_s32 l, off, kw;
 874   mlib_s32 d0, d1, shift1, shift2;
 875   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 876   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;


 905 
 906   if (m*n > MAX_N*MAX_N) {
 907     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 908 
 909     if (k == NULL) {
 910       if (buffd != buff) mlib_free(buffd);
 911       return MLIB_FAILURE;
 912     }
 913   }
 914 
 915   for (i = 0; i < m*n; i++) {
 916     k[i] = kernel[i] >> shift1;
 917   }
 918 
 919   for (c = 0; c < nchannel; c++) {
 920     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 921 
 922     sl = adr_src + c;
 923     dl = adr_dst + c;
 924 
 925 #ifdef __SUNPRO_C
 926 #pragma pipeloop(0)
 927 #endif /* __SUNPRO_C */
 928     for (i = 0; i < wid; i++) buffd[i] = 0;
 929 
 930     for (j = 0; j < hgt; j++) {
 931       mlib_s32 *pk = k;
 932 
 933       for (l = 0; l < n; l++) {
 934         DTYPE *sp0 = sl + l*sll;
 935 
 936         for (off = 0; off < m;) {
 937           sp = sp0 + off*chan1;
 938           dp = dl;
 939 
 940           kw = m - off;
 941 
 942           if (kw > 2*MAX_KER) kw = MAX_KER; else
 943             if (kw > MAX_KER) kw = kw/2;
 944           off += kw;
 945 
 946           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 947           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 948 
 949           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 950           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 951           pk += kw;
 952 
 953           sp += (kw - 1)*chan1;
 954 
 955           if (kw == 7) {
 956 
 957             if (l < (n - 1) || off < m) {
 958 #ifdef __SUNPRO_C
 959 #pragma pipeloop(0)
 960 #endif /* __SUNPRO_C */
 961               for (i = 0; i <= (wid - 2); i += 2) {
 962                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 963                 p6 = sp[0];
 964                 p7 = sp[chan1];
 965 
 966                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 967                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 968 
 969                 sp += chan2;
 970               }
 971 
 972             } else {
 973 #ifdef __SUNPRO_C
 974 #pragma pipeloop(0)
 975 #endif /* __SUNPRO_C */
 976               for (i = 0; i <= (wid - 2); i += 2) {
 977                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 978                 p6 = sp[0];
 979                 p7 = sp[chan1];
 980 
 981                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 982                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 983 
 984                 STORE_RES(dp[0    ], d0);
 985                 STORE_RES(dp[chan1], d1);
 986 
 987                 buffd[i    ] = 0;
 988                 buffd[i + 1] = 0;
 989 
 990                 sp += chan2;
 991                 dp += chan2;
 992               }
 993             }
 994 
 995           } else if (kw == 6) {
 996 
 997             if (l < (n - 1) || off < m) {
 998 #ifdef __SUNPRO_C
 999 #pragma pipeloop(0)
1000 #endif /* __SUNPRO_C */
1001               for (i = 0; i <= (wid - 2); i += 2) {
1002                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1003                 p5 = sp[0];
1004                 p6 = sp[chan1];
1005 
1006                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1007                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1008 
1009                 sp += chan2;
1010               }
1011 
1012             } else {
1013 #ifdef __SUNPRO_C
1014 #pragma pipeloop(0)
1015 #endif /* __SUNPRO_C */
1016               for (i = 0; i <= (wid - 2); i += 2) {
1017                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1018                 p5 = sp[0];
1019                 p6 = sp[chan1];
1020 
1021                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
1022                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1023 
1024                 STORE_RES(dp[0    ], d0);
1025                 STORE_RES(dp[chan1], d1);
1026 
1027                 buffd[i    ] = 0;
1028                 buffd[i + 1] = 0;
1029 
1030                 sp += chan2;
1031                 dp += chan2;
1032               }
1033             }
1034 
1035           } else if (kw == 5) {
1036 
1037             if (l < (n - 1) || off < m) {
1038 #ifdef __SUNPRO_C
1039 #pragma pipeloop(0)
1040 #endif /* __SUNPRO_C */
1041               for (i = 0; i <= (wid - 2); i += 2) {
1042                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1043                 p4 = sp[0];
1044                 p5 = sp[chan1];
1045 
1046                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1047                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1048 
1049                 sp += chan2;
1050               }
1051 
1052             } else {
1053 #ifdef __SUNPRO_C
1054 #pragma pipeloop(0)
1055 #endif /* __SUNPRO_C */
1056               for (i = 0; i <= (wid - 2); i += 2) {
1057                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1058                 p4 = sp[0];
1059                 p5 = sp[chan1];
1060 
1061                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
1062                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1063 
1064                 STORE_RES(dp[0    ], d0);
1065                 STORE_RES(dp[chan1], d1);
1066 
1067                 buffd[i    ] = 0;
1068                 buffd[i + 1] = 0;
1069 
1070                 sp += chan2;
1071                 dp += chan2;
1072               }
1073             }
1074 
1075           } else if (kw == 4) {
1076 
1077             if (l < (n - 1) || off < m) {
1078 #ifdef __SUNPRO_C
1079 #pragma pipeloop(0)
1080 #endif /* __SUNPRO_C */
1081               for (i = 0; i <= (wid - 2); i += 2) {
1082                 p0 = p2; p1 = p3; p2 = p4;
1083                 p3 = sp[0];
1084                 p4 = sp[chan1];
1085 
1086                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1087                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1088 
1089                 sp += chan2;
1090               }
1091 
1092             } else {
1093 #ifdef __SUNPRO_C
1094 #pragma pipeloop(0)
1095 #endif /* __SUNPRO_C */
1096               for (i = 0; i <= (wid - 2); i += 2) {
1097                 p0 = p2; p1 = p3; p2 = p4;
1098                 p3 = sp[0];
1099                 p4 = sp[chan1];
1100 
1101                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1102                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1103 
1104                 STORE_RES(dp[0    ], d0);
1105                 STORE_RES(dp[chan1], d1);
1106 
1107                 buffd[i    ] = 0;
1108                 buffd[i + 1] = 0;
1109 
1110                 sp += chan2;
1111                 dp += chan2;
1112               }
1113             }
1114 
1115           } else if (kw == 3) {
1116 
1117             if (l < (n - 1) || off < m) {
1118 #ifdef __SUNPRO_C
1119 #pragma pipeloop(0)
1120 #endif /* __SUNPRO_C */
1121               for (i = 0; i <= (wid - 2); i += 2) {
1122                 p0 = p2; p1 = p3;
1123                 p2 = sp[0];
1124                 p3 = sp[chan1];
1125 
1126                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1127                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1128 
1129                 sp += chan2;
1130               }
1131 
1132             } else {
1133 #ifdef __SUNPRO_C
1134 #pragma pipeloop(0)
1135 #endif /* __SUNPRO_C */
1136               for (i = 0; i <= (wid - 2); i += 2) {
1137                 p0 = p2; p1 = p3;
1138                 p2 = sp[0];
1139                 p3 = sp[chan1];
1140 
1141                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1142                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1143 
1144                 STORE_RES(dp[0    ], d0);
1145                 STORE_RES(dp[chan1], d1);
1146 
1147                 buffd[i    ] = 0;
1148                 buffd[i + 1] = 0;
1149 
1150                 sp += chan2;
1151                 dp += chan2;
1152               }
1153             }
1154 
1155           } else if (kw == 2) {
1156 
1157             if (l < (n - 1) || off < m) {
1158 #ifdef __SUNPRO_C
1159 #pragma pipeloop(0)
1160 #endif /* __SUNPRO_C */
1161               for (i = 0; i <= (wid - 2); i += 2) {
1162                 p0 = p2;
1163                 p1 = sp[0];
1164                 p2 = sp[chan1];
1165 
1166                 buffd[i    ] += p0*k0 + p1*k1;
1167                 buffd[i + 1] += p1*k0 + p2*k1;
1168 
1169                 sp += chan2;
1170               }
1171 
1172             } else {
1173 #ifdef __SUNPRO_C
1174 #pragma pipeloop(0)
1175 #endif /* __SUNPRO_C */
1176               for (i = 0; i <= (wid - 2); i += 2) {
1177                 p0 = p2;
1178                 p1 = sp[0];
1179                 p2 = sp[chan1];
1180 
1181                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1182                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1183 
1184                 STORE_RES(dp[0    ], d0);
1185                 STORE_RES(dp[chan1], d1);
1186 
1187                 buffd[i    ] = 0;
1188                 buffd[i + 1] = 0;
1189 
1190                 sp += chan2;
1191                 dp += chan2;
1192               }
1193             }
1194 
1195           } else /*if (kw == 1)*/ {
1196 
1197             if (l < (n - 1) || off < m) {
1198 #ifdef __SUNPRO_C
1199 #pragma pipeloop(0)
1200 #endif /* __SUNPRO_C */
1201               for (i = 0; i <= (wid - 2); i += 2) {
1202                 p0 = sp[0];
1203                 p1 = sp[chan1];
1204 
1205                 buffd[i    ] += p0*k0;
1206                 buffd[i + 1] += p1*k0;
1207 
1208                 sp += chan2;
1209               }
1210 
1211             } else {
1212 #ifdef __SUNPRO_C
1213 #pragma pipeloop(0)
1214 #endif /* __SUNPRO_C */
1215               for (i = 0; i <= (wid - 2); i += 2) {
1216                 p0 = sp[0];
1217                 p1 = sp[chan1];
1218 
1219                 d0 = (p0*k0 + buffd[i    ]);
1220                 d1 = (p1*k0 + buffd[i + 1]);
1221 
1222                 STORE_RES(dp[0    ], d0);
1223                 STORE_RES(dp[chan1], d1);
1224 
1225                 buffd[i    ] = 0;
1226                 buffd[i + 1] = 0;
1227 
1228                 sp += chan2;
1229                 dp += chan2;
1230               }
1231             }
1232           }
1233         }
1234       }


1247             pk ++;
1248           }
1249         }
1250 
1251         STORE_RES(dp[0], s);
1252 
1253         sp += chan1;
1254         dp += chan1;
1255       }
1256 
1257       sl += sll;
1258       dl += dll;
1259     }
1260   }
1261 
1262   if (buffd != buff) mlib_free(buffd);
1263   if (k != k_locl) mlib_free(k);
1264 
1265   return MLIB_SUCCESS;
1266 }
1267 
1268 /***************************************************************/
1269 #endif /* __sparc ( for x86, using integer multiplies is faster ) */
1270 
1271 /***************************************************************/
   1 /*
   2  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 146 /***************************************************************/
 147 #define DEF_VARS(type)                                          \
 148   type     *adr_src, *sl, *sp = NULL;                           \
 149   type     *adr_dst, *dl, *dp = NULL;                           \
 150   FTYPE    *pbuff = buff;                                       \
 151   mlib_s32 wid, hgt, sll, dll;                                  \
 152   mlib_s32 nchannel, chan1;                                     \
 153   mlib_s32 i, j, c
 154 
 155 /***************************************************************/
 156 #define GET_SRC_DST_PARAMETERS(type)                            \
 157   hgt = mlib_ImageGetHeight(src);                               \
 158   wid = mlib_ImageGetWidth(src);                                \
 159   nchannel = mlib_ImageGetChannels(src);                        \
 160   sll = mlib_ImageGetStride(src) / sizeof(type);                \
 161   dll = mlib_ImageGetStride(dst) / sizeof(type);                \
 162   adr_src = (type *)mlib_ImageGetData(src);                     \
 163   adr_dst = (type *)mlib_ImageGetData(dst)
 164 
 165 /***************************************************************/


 166 #if IMG_TYPE == 1
 167 
 168 /* Test for the presence of any "1" bit in bits
 169    8 to 31 of val. If present, then val is either
 170    negative or >255. If over/underflows of 8 bits
 171    are uncommon, then this technique can be a win,
 172    since only a single test, rather than two, is
 173    necessary to determine if clamping is needed.
 174    On the other hand, if over/underflows are common,
 175    it adds an extra test.
 176 */
 177 #define CLAMP_STORE(dst, val)                                   \
 178   if (val & 0xffffff00) {                                       \
 179     if (val < MLIB_U8_MIN)                                      \
 180       dst = MLIB_U8_MIN;                                        \
 181     else                                                        \
 182       dst = MLIB_U8_MAX;                                        \
 183   } else {                                                      \
 184     dst = (mlib_u8)val;                                         \
 185   }


 188 
 189 #define CLAMP_STORE(dst, val)                                   \
 190   if (val >= MLIB_S16_MAX)                                      \
 191     dst = MLIB_S16_MAX;                                         \
 192   else if (val <= MLIB_S16_MIN)                                 \
 193     dst = MLIB_S16_MIN;                                         \
 194   else                                                          \
 195     dst = (mlib_s16)val
 196 
 197 #elif IMG_TYPE == 3
 198 
 199 #define CLAMP_STORE(dst, val)                                   \
 200   if (val >= MLIB_U16_MAX)                                      \
 201     dst = MLIB_U16_MAX;                                         \
 202   else if (val <= MLIB_U16_MIN)                                 \
 203     dst = MLIB_U16_MIN;                                         \
 204   else                                                          \
 205     dst = (mlib_u16)val
 206 
 207 #endif /* IMG_TYPE == 1 */

 208 
 209 /***************************************************************/
 210 #define MAX_KER   7
 211 #define MAX_N    15
 212 
 213 static mlib_status mlib_ImageConv1xN(mlib_image       *dst,
 214                                      const mlib_image *src,
 215                                      const mlib_d64   *k,
 216                                      mlib_s32         n,
 217                                      mlib_s32         dn,
 218                                      mlib_s32         cmask)
 219 {
 220   FTYPE    buff[BUFF_SIZE];
 221   mlib_s32 off, kh;
 222   mlib_s32 d0, d1;
 223   const FTYPE    *pk;
 224   FTYPE    k0, k1, k2, k3;
 225   FTYPE    p0, p1, p2, p3, p4;
 226   DEF_VARS(DTYPE);
 227   DTYPE    *sl_c, *dl_c, *sl0;


 238   if (max_hsize > BUFF_SIZE) {
 239     pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
 240   }
 241 
 242   chan1 = nchannel;
 243 
 244   sl_c = adr_src;
 245   dl_c = adr_dst;
 246 
 247   for (l = 0; l < hgt; l += hsize) {
 248     hsize = hgt - l;
 249 
 250     if (hsize > max_hsize) hsize = max_hsize;
 251 
 252     for (c = 0; c < nchannel; c++) {
 253       if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 254 
 255       sl = sl_c + c;
 256       dl = dl_c + c;
 257 



 258       for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
 259 
 260       for (i = 0; i < wid; i++) {
 261         sl0 = sl;
 262 
 263         for (off = 0; off < (n - 4); off += 4) {
 264           pk = k + off;
 265           sp = sl0;
 266 
 267           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 268           p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 269           sp += 3*sll;
 270 



 271           for (j = 0; j < hsize; j += 2) {
 272             p0 = p2; p1 = p3; p2 = p4;
 273             p3 = sp[0];
 274             p4 = sp[sll];
 275 
 276             pbuff[j    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 277             pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 278 
 279             sp += 2*sll;
 280           }
 281 
 282           sl0 += 4*sll;
 283         }
 284 
 285         pk = k + off;
 286         sp = sl0;
 287 
 288         k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 289         p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
 290 
 291         dp = dl;
 292         kh = n - off;
 293 
 294         if (kh == 4) {
 295           sp += 3*sll;
 296 



 297           for (j = 0; j <= (hsize - 2); j += 2) {
 298             p0 = p2; p1 = p3; p2 = p4;
 299             p3 = sp[0];
 300             p4 = sp[sll];
 301 
 302             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 303             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]);
 304 
 305             dp[0  ] = FROM_S32(d0);
 306             dp[dll] = FROM_S32(d1);
 307 
 308             pbuff[j] = 0;
 309             pbuff[j + 1] = 0;
 310 
 311             sp += 2*sll;
 312             dp += 2*dll;
 313           }
 314 
 315           if (j < hsize) {
 316             p0 = p2; p1 = p3; p2 = p4;
 317             p3 = sp[0];
 318 
 319             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]);
 320 
 321             pbuff[j] = 0;
 322 
 323             dp[0] = FROM_S32(d0);
 324           }
 325 
 326         } else if (kh == 3) {
 327           sp += 2*sll;
 328 



 329           for (j = 0; j <= (hsize - 2); j += 2) {
 330             p0 = p2; p1 = p3;
 331             p2 = sp[0];
 332             p3 = sp[sll];
 333 
 334             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 335             d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]);
 336 
 337             dp[0  ] = FROM_S32(d0);
 338             dp[dll] = FROM_S32(d1);
 339 
 340             pbuff[j] = 0;
 341             pbuff[j + 1] = 0;
 342 
 343             sp += 2*sll;
 344             dp += 2*dll;
 345           }
 346 
 347           if (j < hsize) {
 348             p0 = p2; p1 = p3;
 349             p2 = sp[0];
 350 
 351             d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j]);
 352 
 353             pbuff[j] = 0;
 354 
 355             dp[0] = FROM_S32(d0);
 356           }
 357 
 358         } else if (kh == 2) {
 359           sp += sll;
 360 



 361           for (j = 0; j <= (hsize - 2); j += 2) {
 362             p0 = p2;
 363             p1 = sp[0];
 364             p2 = sp[sll];
 365 
 366             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 367             d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1]);
 368 
 369             dp[0  ] = FROM_S32(d0);
 370             dp[dll] = FROM_S32(d1);
 371 
 372             pbuff[j] = 0;
 373             pbuff[j + 1] = 0;
 374 
 375             sp += 2*sll;
 376             dp += 2*dll;
 377           }
 378 
 379           if (j < hsize) {
 380             p0 = p2;
 381             p1 = sp[0];
 382 
 383             d0 = D2I(p0*k0 + p1*k1 + pbuff[j]);
 384 
 385             pbuff[j] = 0;
 386 
 387             dp[0] = FROM_S32(d0);
 388           }
 389 
 390         } else /* if (kh == 1) */ {



 391           for (j = 0; j < hsize; j++) {
 392             p0 = sp[0];
 393 
 394             d0 = D2I(p0*k0 + pbuff[j]);
 395 
 396             dp[0] = FROM_S32(d0);
 397 
 398             pbuff[j] = 0;
 399 
 400             sp += sll;
 401             dp += dll;
 402           }
 403         }
 404 
 405         sl += chan1;
 406         dl += chan1;
 407       }
 408     }
 409 
 410     sl_c += max_hsize*sll;


 483   buffd = buffs[n] + wid;
 484   buffo = (mlib_s32*)(buffd + wid);
 485   buffi = buffo + (wid &~ 1);
 486 
 487   chan1 = nchannel;
 488   chan2 = chan1 + chan1;
 489 
 490   wid -= (m - 1);
 491   hgt -= (n - 1);
 492   adr_dst += dn*dll + dm*nchannel;
 493 
 494   for (c = 0; c < nchannel; c++) {
 495     if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
 496 
 497     sl = adr_src + c;
 498     dl = adr_dst + c;
 499 
 500     for (l = 0; l < n; l++) {
 501       FTYPE    *buff = buffs[l];
 502 



 503       for (i = 0; i < wid + (m - 1); i++) {
 504         buff[i] = (FTYPE)sl[i*chan1];
 505       }
 506 
 507       sl += sll;
 508     }
 509 
 510     buff_ind = 0;
 511 



 512     for (i = 0; i < wid; i++) buffd[i] = 0.0;
 513 
 514     for (j = 0; j < hgt; j++) {
 515       FTYPE    **buffc = buffs + buff_ind;
 516       FTYPE    *buffn = buffc[n];
 517       FTYPE    *pk = k;
 518 
 519       for (l = 0; l < n; l++) {
 520         FTYPE    *buff_l = buffc[l];
 521 
 522         for (off = 0; off < m;) {
 523           FTYPE    *buff = buff_l + off;
 524 
 525           kw = m - off;
 526 
 527           if (kw > 2*MAX_KER) kw = MAX_KER; else
 528             if (kw > MAX_KER) kw = kw/2;
 529           off += kw;
 530 
 531           sp = sl;
 532           dp = dl;
 533 
 534           p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
 535           p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
 536 
 537           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 538           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 539           pk += kw;
 540 
 541           if (kw == 7) {
 542 
 543             if (l < (n - 1) || off < m) {



 544               for (i = 0; i <= (wid - 2); i += 2) {
 545                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 546 
 547                 p6 = buff[i + 6]; p7 = buff[i + 7];
 548 
 549                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 550                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 551               }
 552 
 553             } else {



 554               for (i = 0; i <= (wid - 2); i += 2) {
 555                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 556 
 557                 p6 = buff[i + 6]; p7 = buff[i + 7];
 558 
 559                 LOAD_BUFF(buffi);
 560 
 561                 dd.d64 = *(FTYPE   *)(buffi + i);
 562                 buffn[i    ] = (FTYPE)dd.i32s.i0;
 563                 buffn[i + 1] = (FTYPE)dd.i32s.i1;
 564 
 565                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 566                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 567 
 568                 dp[0    ] = FROM_S32(d0);
 569                 dp[chan1] = FROM_S32(d1);
 570 
 571                 buffd[i    ] = 0.0;
 572                 buffd[i + 1] = 0.0;
 573 
 574                 sp += chan2;
 575                 dp += chan2;
 576               }
 577             }
 578 
 579           } else if (kw == 6) {
 580 
 581             if (l < (n - 1) || off < m) {



 582               for (i = 0; i <= (wid - 2); i += 2) {
 583                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 584 
 585                 p5 = buff[i + 5]; p6 = buff[i + 6];
 586 
 587                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 588                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 589               }
 590 
 591             } else {



 592               for (i = 0; i <= (wid - 2); i += 2) {
 593                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 594 
 595                 p5 = buff[i + 5]; p6 = buff[i + 6];
 596 
 597                 buffn[i    ] = (FTYPE)sp[0];
 598                 buffn[i + 1] = (FTYPE)sp[chan1];
 599 
 600                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 601                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 602 
 603                 dp[0    ] = FROM_S32(d0);
 604                 dp[chan1] = FROM_S32(d1);
 605 
 606                 buffd[i    ] = 0.0;
 607                 buffd[i + 1] = 0.0;
 608 
 609                 sp += chan2;
 610                 dp += chan2;
 611               }
 612             }
 613 
 614           } else if (kw == 5) {
 615 
 616             if (l < (n - 1) || off < m) {



 617               for (i = 0; i <= (wid - 2); i += 2) {
 618                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 619 
 620                 p4 = buff[i + 4]; p5 = buff[i + 5];
 621 
 622                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 623                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 624               }
 625 
 626             } else {



 627               for (i = 0; i <= (wid - 2); i += 2) {
 628                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 629 
 630                 p4 = buff[i + 4]; p5 = buff[i + 5];
 631 
 632                 buffn[i    ] = (FTYPE)sp[0];
 633                 buffn[i + 1] = (FTYPE)sp[chan1];
 634 
 635                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 636                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 637 
 638                 dp[0    ] = FROM_S32(d0);
 639                 dp[chan1] = FROM_S32(d1);
 640 
 641                 buffd[i    ] = 0.0;
 642                 buffd[i + 1] = 0.0;
 643 
 644                 sp += chan2;
 645                 dp += chan2;
 646               }
 647             }
 648 
 649           } else if (kw == 4) {
 650 
 651             if (l < (n - 1) || off < m) {



 652               for (i = 0; i <= (wid - 2); i += 2) {
 653                 p0 = p2; p1 = p3; p2 = p4;
 654 
 655                 p3 = buff[i + 3]; p4 = buff[i + 4];
 656 
 657                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
 658                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
 659               }
 660 
 661             } else {



 662               for (i = 0; i <= (wid - 2); i += 2) {
 663                 p0 = p2; p1 = p3; p2 = p4;
 664 
 665                 p3 = buff[i + 3]; p4 = buff[i + 4];
 666 
 667                 buffn[i    ] = (FTYPE)sp[0];
 668                 buffn[i + 1] = (FTYPE)sp[chan1];
 669 
 670                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
 671                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
 672 
 673                 dp[0    ] = FROM_S32(d0);
 674                 dp[chan1] = FROM_S32(d1);
 675 
 676                 buffd[i    ] = 0.0;
 677                 buffd[i + 1] = 0.0;
 678 
 679                 sp += chan2;
 680                 dp += chan2;
 681               }
 682             }
 683 
 684           } else if (kw == 3) {
 685 
 686             if (l < (n - 1) || off < m) {



 687               for (i = 0; i <= (wid - 2); i += 2) {
 688                 p0 = p2; p1 = p3;
 689 
 690                 p2 = buff[i + 2]; p3 = buff[i + 3];
 691 
 692                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
 693                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
 694               }
 695 
 696             } else {



 697               for (i = 0; i <= (wid - 2); i += 2) {
 698                 p0 = p2; p1 = p3;
 699 
 700                 p2 = buff[i + 2]; p3 = buff[i + 3];
 701 
 702                 buffn[i    ] = (FTYPE)sp[0];
 703                 buffn[i + 1] = (FTYPE)sp[chan1];
 704 
 705                 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
 706                 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
 707 
 708                 dp[0    ] = FROM_S32(d0);
 709                 dp[chan1] = FROM_S32(d1);
 710 
 711                 buffd[i    ] = 0.0;
 712                 buffd[i + 1] = 0.0;
 713 
 714                 sp += chan2;
 715                 dp += chan2;
 716               }
 717             }
 718 
 719           } else /*if (kw == 2)*/ {
 720 
 721             if (l < (n - 1) || off < m) {



 722               for (i = 0; i <= (wid - 2); i += 2) {
 723                 p0 = p2;
 724 
 725                 p1 = buff[i + 1]; p2 = buff[i + 2];
 726 
 727                 buffd[i    ] += p0*k0 + p1*k1;
 728                 buffd[i + 1] += p1*k0 + p2*k1;
 729               }
 730 
 731             } else {



 732               for (i = 0; i <= (wid - 2); i += 2) {
 733                 p0 = p2;
 734 
 735                 p1 = buff[i + 1]; p2 = buff[i + 2];
 736 
 737                 buffn[i    ] = (FTYPE)sp[0];
 738                 buffn[i + 1] = (FTYPE)sp[chan1];
 739 
 740                 d0 = D2I(p0*k0 + p1*k1 + buffd[i    ]);
 741                 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1]);
 742 
 743                 dp[0    ] = FROM_S32(d0);
 744                 dp[chan1] = FROM_S32(d1);
 745 
 746                 buffd[i    ] = 0.0;
 747                 buffd[i + 1] = 0.0;
 748 
 749                 sp += chan2;
 750                 dp += chan2;
 751               }


 773         sp += chan1;
 774         dp += chan1;
 775       }
 776 
 777       for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
 778 
 779       /* next line */
 780       sl += sll;
 781       dl += dll;
 782 
 783       buff_ind++;
 784 
 785       if (buff_ind >= n + 1) buff_ind = 0;
 786     }
 787   }
 788 
 789   FREE_AND_RETURN_STATUS;
 790 }
 791 
 792 /***************************************************************/
 793 /* for x86, using integer multiplies is faster */
 794 
 795 #define STORE_RES(res, x)                                       \
 796   x >>= shift2;                                                 \
 797   CLAMP_STORE(res, x)
 798 
 799 mlib_status CONV_FUNC_I(MxN)(mlib_image       *dst,
 800                              const mlib_image *src,
 801                              const mlib_s32   *kernel,
 802                              mlib_s32         m,
 803                              mlib_s32         n,
 804                              mlib_s32         dm,
 805                              mlib_s32         dn,
 806                              mlib_s32         scale,
 807                              mlib_s32         cmask)
 808 {
 809   mlib_s32 buff[BUFF_SIZE], *buffd = buff;
 810   mlib_s32 l, off, kw;
 811   mlib_s32 d0, d1, shift1, shift2;
 812   mlib_s32 k0, k1, k2, k3, k4, k5, k6;
 813   mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;


 842 
 843   if (m*n > MAX_N*MAX_N) {
 844     k = mlib_malloc(sizeof(mlib_s32)*(m*n));
 845 
 846     if (k == NULL) {
 847       if (buffd != buff) mlib_free(buffd);
 848       return MLIB_FAILURE;
 849     }
 850   }
 851 
 852   for (i = 0; i < m*n; i++) {
 853     k[i] = kernel[i] >> shift1;
 854   }
 855 
 856   for (c = 0; c < nchannel; c++) {
 857     if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
 858 
 859     sl = adr_src + c;
 860     dl = adr_dst + c;
 861 



 862     for (i = 0; i < wid; i++) buffd[i] = 0;
 863 
 864     for (j = 0; j < hgt; j++) {
 865       mlib_s32 *pk = k;
 866 
 867       for (l = 0; l < n; l++) {
 868         DTYPE *sp0 = sl + l*sll;
 869 
 870         for (off = 0; off < m;) {
 871           sp = sp0 + off*chan1;
 872           dp = dl;
 873 
 874           kw = m - off;
 875 
 876           if (kw > 2*MAX_KER) kw = MAX_KER; else
 877             if (kw > MAX_KER) kw = kw/2;
 878           off += kw;
 879 
 880           p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
 881           p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
 882 
 883           k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
 884           k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
 885           pk += kw;
 886 
 887           sp += (kw - 1)*chan1;
 888 
 889           if (kw == 7) {
 890 
 891             if (l < (n - 1) || off < m) {



 892               for (i = 0; i <= (wid - 2); i += 2) {
 893                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 894                 p6 = sp[0];
 895                 p7 = sp[chan1];
 896 
 897                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
 898                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
 899 
 900                 sp += chan2;
 901               }
 902 
 903             } else {



 904               for (i = 0; i <= (wid - 2); i += 2) {
 905                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
 906                 p6 = sp[0];
 907                 p7 = sp[chan1];
 908 
 909                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i    ]);
 910                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
 911 
 912                 STORE_RES(dp[0    ], d0);
 913                 STORE_RES(dp[chan1], d1);
 914 
 915                 buffd[i    ] = 0;
 916                 buffd[i + 1] = 0;
 917 
 918                 sp += chan2;
 919                 dp += chan2;
 920               }
 921             }
 922 
 923           } else if (kw == 6) {
 924 
 925             if (l < (n - 1) || off < m) {



 926               for (i = 0; i <= (wid - 2); i += 2) {
 927                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 928                 p5 = sp[0];
 929                 p6 = sp[chan1];
 930 
 931                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
 932                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
 933 
 934                 sp += chan2;
 935               }
 936 
 937             } else {



 938               for (i = 0; i <= (wid - 2); i += 2) {
 939                 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
 940                 p5 = sp[0];
 941                 p6 = sp[chan1];
 942 
 943                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i    ]);
 944                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
 945 
 946                 STORE_RES(dp[0    ], d0);
 947                 STORE_RES(dp[chan1], d1);
 948 
 949                 buffd[i    ] = 0;
 950                 buffd[i + 1] = 0;
 951 
 952                 sp += chan2;
 953                 dp += chan2;
 954               }
 955             }
 956 
 957           } else if (kw == 5) {
 958 
 959             if (l < (n - 1) || off < m) {



 960               for (i = 0; i <= (wid - 2); i += 2) {
 961                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 962                 p4 = sp[0];
 963                 p5 = sp[chan1];
 964 
 965                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
 966                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
 967 
 968                 sp += chan2;
 969               }
 970 
 971             } else {



 972               for (i = 0; i <= (wid - 2); i += 2) {
 973                 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
 974                 p4 = sp[0];
 975                 p5 = sp[chan1];
 976 
 977                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i    ]);
 978                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
 979 
 980                 STORE_RES(dp[0    ], d0);
 981                 STORE_RES(dp[chan1], d1);
 982 
 983                 buffd[i    ] = 0;
 984                 buffd[i + 1] = 0;
 985 
 986                 sp += chan2;
 987                 dp += chan2;
 988               }
 989             }
 990 
 991           } else if (kw == 4) {
 992 
 993             if (l < (n - 1) || off < m) {



 994               for (i = 0; i <= (wid - 2); i += 2) {
 995                 p0 = p2; p1 = p3; p2 = p4;
 996                 p3 = sp[0];
 997                 p4 = sp[chan1];
 998 
 999                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1000                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1001 
1002                 sp += chan2;
1003               }
1004 
1005             } else {



1006               for (i = 0; i <= (wid - 2); i += 2) {
1007                 p0 = p2; p1 = p3; p2 = p4;
1008                 p3 = sp[0];
1009                 p4 = sp[chan1];
1010 
1011                 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i    ]);
1012                 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1013 
1014                 STORE_RES(dp[0    ], d0);
1015                 STORE_RES(dp[chan1], d1);
1016 
1017                 buffd[i    ] = 0;
1018                 buffd[i + 1] = 0;
1019 
1020                 sp += chan2;
1021                 dp += chan2;
1022               }
1023             }
1024 
1025           } else if (kw == 3) {
1026 
1027             if (l < (n - 1) || off < m) {



1028               for (i = 0; i <= (wid - 2); i += 2) {
1029                 p0 = p2; p1 = p3;
1030                 p2 = sp[0];
1031                 p3 = sp[chan1];
1032 
1033                 buffd[i    ] += p0*k0 + p1*k1 + p2*k2;
1034                 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1035 
1036                 sp += chan2;
1037               }
1038 
1039             } else {



1040               for (i = 0; i <= (wid - 2); i += 2) {
1041                 p0 = p2; p1 = p3;
1042                 p2 = sp[0];
1043                 p3 = sp[chan1];
1044 
1045                 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i    ]);
1046                 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1047 
1048                 STORE_RES(dp[0    ], d0);
1049                 STORE_RES(dp[chan1], d1);
1050 
1051                 buffd[i    ] = 0;
1052                 buffd[i + 1] = 0;
1053 
1054                 sp += chan2;
1055                 dp += chan2;
1056               }
1057             }
1058 
1059           } else if (kw == 2) {
1060 
1061             if (l < (n - 1) || off < m) {



1062               for (i = 0; i <= (wid - 2); i += 2) {
1063                 p0 = p2;
1064                 p1 = sp[0];
1065                 p2 = sp[chan1];
1066 
1067                 buffd[i    ] += p0*k0 + p1*k1;
1068                 buffd[i + 1] += p1*k0 + p2*k1;
1069 
1070                 sp += chan2;
1071               }
1072 
1073             } else {



1074               for (i = 0; i <= (wid - 2); i += 2) {
1075                 p0 = p2;
1076                 p1 = sp[0];
1077                 p2 = sp[chan1];
1078 
1079                 d0 = (p0*k0 + p1*k1 + buffd[i    ]);
1080                 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1081 
1082                 STORE_RES(dp[0    ], d0);
1083                 STORE_RES(dp[chan1], d1);
1084 
1085                 buffd[i    ] = 0;
1086                 buffd[i + 1] = 0;
1087 
1088                 sp += chan2;
1089                 dp += chan2;
1090               }
1091             }
1092 
1093           } else /*if (kw == 1)*/ {
1094 
1095             if (l < (n - 1) || off < m) {



1096               for (i = 0; i <= (wid - 2); i += 2) {
1097                 p0 = sp[0];
1098                 p1 = sp[chan1];
1099 
1100                 buffd[i    ] += p0*k0;
1101                 buffd[i + 1] += p1*k0;
1102 
1103                 sp += chan2;
1104               }
1105 
1106             } else {



1107               for (i = 0; i <= (wid - 2); i += 2) {
1108                 p0 = sp[0];
1109                 p1 = sp[chan1];
1110 
1111                 d0 = (p0*k0 + buffd[i    ]);
1112                 d1 = (p1*k0 + buffd[i + 1]);
1113 
1114                 STORE_RES(dp[0    ], d0);
1115                 STORE_RES(dp[chan1], d1);
1116 
1117                 buffd[i    ] = 0;
1118                 buffd[i + 1] = 0;
1119 
1120                 sp += chan2;
1121                 dp += chan2;
1122               }
1123             }
1124           }
1125         }
1126       }


1139             pk ++;
1140           }
1141         }
1142 
1143         STORE_RES(dp[0], s);
1144 
1145         sp += chan1;
1146         dp += chan1;
1147       }
1148 
1149       sl += sll;
1150       dl += dll;
1151     }
1152   }
1153 
1154   if (buffd != buff) mlib_free(buffd);
1155   if (k != k_locl) mlib_free(k);
1156 
1157   return MLIB_SUCCESS;
1158 }



1159 
1160 /***************************************************************/
< prev index next >