1 /* 2 * Copyright (c) 2000, 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 28 /* 29 * FUNCTION 30 * Internal functions for mlib_ImageConv* on U8 type 31 * and MLIB_EDGE_DST_NO_WRITE mask 32 * 33 */ 34 35 /***************************************************************/ 36 37 #include <vis_proto.h> 38 #include <mlib_image.h> 39 #include <mlib_ImageCheck.h> 40 #include <mlib_ImageColormap.h> 41 42 /* 43 This defines switches between functions in 44 files: mlib_v_ImageConv_8nw.c, 45 mlib_v_ImageConvIndex3_8_16nw.c, 46 mlib_v_ImageConvIndex4_8_16nw.c, 47 mlib_v_ImageConvIndex3_8_16nw.c, 48 mlib_v_ImageConvIndex4_8_16nw.c 49 */ 50 51 #define CONV_INDEX 52 53 #define DTYPE mlib_s16 54 #define LTYPE mlib_u8 55 56 /***************************************************************/ 57 58 #ifdef CONV_INDEX 59 60 #define CONV_FUNC(KERN) \ 61 mlib_conv##KERN##_Index3_8_16nw(mlib_image *dst, \ 62 mlib_image *src, \ 63 mlib_s32 *kern, \ 64 mlib_s32 scale, \ 65 void *colormap) 66 67 #else 68 69 #define CONV_FUNC(KERN) \ 70 mlib_conv##KERN##_8nw_f(mlib_image *dst, \ 71 mlib_image *src, \ 72 mlib_s32 *kern, \ 73 mlib_s32 scale) 74 75 #endif 76 77 /***************************************************************/ 78 79 #ifdef CONV_INDEX 80 81 #define NCHAN 3 82 83 #else 84 85 #define NCHAN nchan 86 87 #endif 88 89 /***************************************************************/ 90 91 #define DEF_VARS \ 92 DTYPE *sl, *sp, *dl; \ 93 mlib_s32 hgt = mlib_ImageGetHeight(src); \ 94 mlib_s32 wid = mlib_ImageGetWidth(src); \ 95 mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(DTYPE); \ 96 mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(DTYPE); \ 97 DTYPE *adr_src = (DTYPE *)mlib_ImageGetData(src); \ 98 DTYPE *adr_dst = (DTYPE *)mlib_ImageGetData(dst); \ 99 mlib_s32 ssize, xsize, dsize, esize, emask, buff_ind = 0; \ 100 mlib_d64 *pbuff, *dp; \ 101 mlib_f32 *karr = (mlib_f32 *)kern; \ 102 mlib_s32 gsr_scale = (31 - scale) << 3; \ 103 mlib_d64 drnd = vis_to_double_dup(mlib_round_8[31 - scale]); \ 104 mlib_s32 i, j, l 105 106 /***************************************************************/ 107 108 #ifdef CONV_INDEX 109 110 #define DEF_EXTRA_VARS \ 111 int offset = mlib_ImageGetLutOffset(colormap); \ 112 LTYPE **lut_table = (LTYPE**)mlib_ImageGetLutData(colormap); \ 113 LTYPE *ltbl0 = lut_table[0] - offset; \ 114 LTYPE *ltbl1 = lut_table[1] - offset; \ 115 LTYPE *ltbl2 = lut_table[2] - offset; \ 116 LTYPE *ltbl3 = (NCHAN > 3) ? lut_table[3] - offset : ltbl2 117 118 #else 119 120 #define DEF_EXTRA_VARS \ 121 mlib_s32 nchan = mlib_ImageGetChannels(dst) 122 123 #endif 124 125 /***************************************************************/ 126 127 #if NCHAN == 3 128 129 #define LOAD_SRC() { \ 130 mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ 131 mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ 132 mlib_d64 t0, t1, t2; \ 133 \ 134 t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s7), t2); \ 135 t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s7), t2); \ 136 t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s7), t2); \ 137 t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s6), t2); \ 138 t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s6), t2); \ 139 t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s6), t2); \ 140 t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ 141 t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ 142 t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t1); \ 143 t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t1); \ 144 t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t1); \ 145 t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t1); \ 146 t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ 147 t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ 148 t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ 149 t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ 150 t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t0); \ 151 t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t0); \ 152 t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ 153 t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ 154 t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ 155 t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ 156 t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ 157 t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ 158 \ 159 buffn[i] = t0; \ 160 buffn[i + 1] = t1; \ 161 buffn[i + 2] = t2; \ 162 \ 163 sp += 8; \ 164 } 165 166 #else 167 168 #define LOAD_SRC() { \ 169 mlib_s32 s0 = sp[0], s1 = sp[1], s2 = sp[2], s3 = sp[3]; \ 170 mlib_s32 s4 = sp[4], s5 = sp[5], s6 = sp[6], s7 = sp[7]; \ 171 mlib_d64 t0, t1, t2; \ 172 \ 173 t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s5), t2); \ 174 t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s5), t2); \ 175 t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s5), t2); \ 176 t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s5), t2); \ 177 t2 = vis_faligndata(vis_ld_u8_i(ltbl3, s4), t2); \ 178 t2 = vis_faligndata(vis_ld_u8_i(ltbl2, s4), t2); \ 179 t2 = vis_faligndata(vis_ld_u8_i(ltbl1, s4), t2); \ 180 t2 = vis_faligndata(vis_ld_u8_i(ltbl0, s4), t2); \ 181 t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s3), t1); \ 182 t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s3), t1); \ 183 t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s3), t1); \ 184 t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s3), t1); \ 185 t1 = vis_faligndata(vis_ld_u8_i(ltbl3, s2), t1); \ 186 t1 = vis_faligndata(vis_ld_u8_i(ltbl2, s2), t1); \ 187 t1 = vis_faligndata(vis_ld_u8_i(ltbl1, s2), t1); \ 188 t1 = vis_faligndata(vis_ld_u8_i(ltbl0, s2), t1); \ 189 t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s1), t0); \ 190 t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s1), t0); \ 191 t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s1), t0); \ 192 t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s1), t0); \ 193 t0 = vis_faligndata(vis_ld_u8_i(ltbl3, s0), t0); \ 194 t0 = vis_faligndata(vis_ld_u8_i(ltbl2, s0), t0); \ 195 t0 = vis_faligndata(vis_ld_u8_i(ltbl1, s0), t0); \ 196 t0 = vis_faligndata(vis_ld_u8_i(ltbl0, s0), t0); \ 197 \ 198 buffn[i] = t0; \ 199 buffn[i + 1] = t1; \ 200 buffn[i + 2] = t2; \ 201 \ 202 sp += 6; \ 203 } 204 205 #endif 206 207 /***************************************************************/ 208 209 static mlib_s32 mlib_round_8[16] = { 0x00400040, 0x00200020, 0x00100010, 0x00080008, 210 0x00040004, 0x00020002, 0x00010001, 0x00000000, 211 0x00000000, 0x00000000, 0x00000000, 0x00000000, 212 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; 213 214 /***************************************************************/ 215 216 void mlib_ImageCopy_na(mlib_u8 *sa, mlib_u8 *da, int size); 217 218 /***************************************************************/ 219 220 #define KSIZE 2 221 222 mlib_status CONV_FUNC(2x2) 223 { 224 mlib_d64 *buffs[2*(KSIZE + 1)]; 225 mlib_d64 *buff0, *buff1, *buffn, *buffd, *buffe; 226 mlib_d64 s00, s01, s10, s11, s0, s1; 227 mlib_d64 d0, d1, d00, d01, d10, d11; 228 DEF_VARS; 229 DEF_EXTRA_VARS; 230 231 sl = adr_src; 232 dl = adr_dst; 233 234 ssize = NCHAN*wid; 235 dsize = (ssize + 7)/8; 236 esize = dsize + 4; 237 pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); 238 if (pbuff == NULL) return MLIB_FAILURE; 239 240 for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; 241 for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; 242 buffd = buffs[KSIZE] + esize; 243 buffe = buffd + 2*esize; 244 245 wid -= (KSIZE - 1); 246 hgt -= (KSIZE - 1); 247 xsize = ssize - NCHAN*(KSIZE - 1); 248 emask = (0xFF00 >> (xsize & 7)) & 0xFF; 249 250 vis_write_gsr(gsr_scale + 7); 251 252 for (l = 0; l < KSIZE; l++) { 253 mlib_d64 *buffn = buffs[l]; 254 sp = sl + l*sll; 255 256 #ifndef CONV_INDEX 257 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 258 259 #else 260 #pragma pipeloop(0) 261 for (i = 0; i < dsize; i += 3) { 262 LOAD_SRC(); 263 } 264 #endif /* CONV_INDEX */ 265 } 266 267 for (j = 0; j < hgt; j++) { 268 mlib_d64 **buffc = buffs + buff_ind; 269 mlib_f32 *pk = karr, k0, k1; 270 sp = sl + KSIZE*sll; 271 272 buff0 = buffc[0]; 273 buff1 = buffc[1]; 274 buffn = buffc[KSIZE]; 275 276 #ifndef CONV_INDEX 277 if ((((mlib_addr)(sl )) & 7) == 0) buff0 = (mlib_d64*)sl; 278 if ((((mlib_addr)(sl + sll)) & 7) == 0) buff1 = (mlib_d64*)(sl + sll); 279 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 280 #endif 281 282 k0 = pk[1]; 283 k1 = pk[3]; 284 vis_write_gsr(gsr_scale + NCHAN); 285 286 s01 = buff0[0]; 287 s11 = buff1[0]; 288 #pragma pipeloop(0) 289 for (i = 0; i < (xsize + 7)/8; i++) { 290 s00 = s01; 291 s10 = s11; 292 s01 = buff0[i + 1]; 293 s11 = buff1[i + 1]; 294 s0 = vis_faligndata(s00, s01); 295 s1 = vis_faligndata(s10, s11); 296 297 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 298 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 299 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 300 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 301 302 d0 = vis_fpadd16(d00, d10); 303 d1 = vis_fpadd16(d01, d11); 304 buffd[2*i] = d0; 305 buffd[2*i + 1] = d1; 306 } 307 308 k0 = pk[0]; 309 k1 = pk[2]; 310 #ifndef CONV_INDEX 311 dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; 312 313 #pragma pipeloop(0) 314 for (i = 0; i < xsize/8; i++) { 315 s0 = buff0[i]; 316 s1 = buff1[i]; 317 318 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 319 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 320 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 321 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 322 323 d0 = buffd[2*i]; 324 d1 = buffd[2*i + 1]; 325 d00 = vis_fpadd16(d00, d10); 326 d0 = vis_fpadd16(d0, drnd); 327 d0 = vis_fpadd16(d0, d00); 328 d01 = vis_fpadd16(d01, d11); 329 d1 = vis_fpadd16(d1, drnd); 330 d1 = vis_fpadd16(d1, d01); 331 dp[i] = vis_fpack16_pair(d0, d1); 332 } 333 334 if (emask) { 335 s0 = buff0[i]; 336 s1 = buff1[i]; 337 338 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 339 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 340 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 341 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 342 343 d0 = buffd[2*i]; 344 d1 = buffd[2*i + 1]; 345 d00 = vis_fpadd16(d00, d10); 346 d0 = vis_fpadd16(d0, drnd); 347 d0 = vis_fpadd16(d0, d00); 348 d01 = vis_fpadd16(d01, d11); 349 d1 = vis_fpadd16(d1, drnd); 350 d1 = vis_fpadd16(d1, d01); 351 352 d0 = vis_fpack16_pair(d0, d1); 353 vis_pst_8(d0, dp + i, emask); 354 } 355 356 if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); 357 358 #else 359 vis_write_gsr(gsr_scale + 7); 360 361 #pragma pipeloop(0) 362 for (i = 0; i < dsize; i += 3) { 363 mlib_d64 d00, d01, d02, d03, d04, d05; 364 mlib_d64 d10, d11, d12, d13, d14, d15; 365 mlib_d64 d0, d1, d2, d3, d4, d5; 366 mlib_d64 s00 = buff0[i]; 367 mlib_d64 s01 = buff0[i + 1]; 368 mlib_d64 s02 = buff0[i + 2]; 369 mlib_d64 s10 = buff1[i]; 370 mlib_d64 s11 = buff1[i + 1]; 371 mlib_d64 s12 = buff1[i + 2]; 372 373 d00 = vis_fmul8x16au(vis_read_hi(s00), k0); 374 d01 = vis_fmul8x16au(vis_read_lo(s00), k0); 375 d02 = vis_fmul8x16au(vis_read_hi(s01), k0); 376 d03 = vis_fmul8x16au(vis_read_lo(s01), k0); 377 d04 = vis_fmul8x16au(vis_read_hi(s02), k0); 378 d05 = vis_fmul8x16au(vis_read_lo(s02), k0); 379 d10 = vis_fmul8x16au(vis_read_hi(s10), k1); 380 d11 = vis_fmul8x16au(vis_read_lo(s10), k1); 381 d12 = vis_fmul8x16au(vis_read_hi(s11), k1); 382 d13 = vis_fmul8x16au(vis_read_lo(s11), k1); 383 d14 = vis_fmul8x16au(vis_read_hi(s12), k1); 384 d15 = vis_fmul8x16au(vis_read_lo(s12), k1); 385 386 d0 = buffd[2*i]; 387 d1 = buffd[2*i + 1]; 388 d2 = buffd[2*i + 2]; 389 d3 = buffd[2*i + 3]; 390 d4 = buffd[2*i + 4]; 391 d5 = buffd[2*i + 5]; 392 d00 = vis_fpadd16(d00, d10); 393 d0 = vis_fpadd16(d0, drnd); 394 d0 = vis_fpadd16(d0, d00); 395 d01 = vis_fpadd16(d01, d11); 396 d1 = vis_fpadd16(d1, drnd); 397 d1 = vis_fpadd16(d1, d01); 398 d02 = vis_fpadd16(d02, d12); 399 d2 = vis_fpadd16(d2, drnd); 400 d2 = vis_fpadd16(d2, d02); 401 d03 = vis_fpadd16(d03, d13); 402 d3 = vis_fpadd16(d3, drnd); 403 d3 = vis_fpadd16(d3, d03); 404 d04 = vis_fpadd16(d04, d14); 405 d4 = vis_fpadd16(d4, drnd); 406 d4 = vis_fpadd16(d4, d04); 407 d05 = vis_fpadd16(d05, d15); 408 d5 = vis_fpadd16(d5, drnd); 409 d5 = vis_fpadd16(d5, d05); 410 411 buffe[i ] = vis_fpack16_pair(d0, d1); 412 buffe[i + 1] = vis_fpack16_pair(d2, d3); 413 buffe[i + 2] = vis_fpack16_pair(d4, d5); 414 415 LOAD_SRC(); 416 } 417 418 mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); 419 #endif /* CONV_INDEX */ 420 421 sl += sll; 422 dl += dll; 423 424 buff_ind++; 425 if (buff_ind >= (KSIZE + 1)) buff_ind = 0; 426 } 427 428 mlib_free(pbuff); 429 430 return MLIB_SUCCESS; 431 } 432 433 /***************************************************************/ 434 435 #undef KSIZE 436 #define KSIZE 3 437 438 mlib_status CONV_FUNC(3x3) 439 { 440 mlib_d64 *buffs[2*(KSIZE + 1)]; 441 mlib_d64 *buff0, *buff1, *buff2, *buffn, *buffd, *buffe; 442 mlib_d64 s00, s01, s10, s11, s20, s21, s0, s1, s2; 443 mlib_d64 dd, d0, d1, d00, d01, d10, d11, d20, d21; 444 mlib_s32 ik, ik_last, off, doff; 445 DEF_VARS; 446 DEF_EXTRA_VARS; 447 448 sl = adr_src; 449 #ifdef CONV_INDEX 450 dl = adr_dst + ((KSIZE - 1)/2)*(dll + 1); 451 #else 452 dl = adr_dst + ((KSIZE - 1)/2)*(dll + NCHAN); 453 #endif 454 455 ssize = NCHAN*wid; 456 dsize = (ssize + 7)/8; 457 esize = dsize + 4; 458 pbuff = mlib_malloc((KSIZE + 4)*esize*sizeof(mlib_d64)); 459 if (pbuff == NULL) return MLIB_FAILURE; 460 461 for (i = 0; i < (KSIZE + 1); i++) buffs[i] = pbuff + i*esize; 462 for (i = 0; i < (KSIZE + 1); i++) buffs[(KSIZE + 1) + i] = buffs[i]; 463 buffd = buffs[KSIZE] + esize; 464 buffe = buffd + 2*esize; 465 466 wid -= (KSIZE - 1); 467 hgt -= (KSIZE - 1); 468 xsize = ssize - NCHAN*(KSIZE - 1); 469 emask = (0xFF00 >> (xsize & 7)) & 0xFF; 470 471 vis_write_gsr(gsr_scale + 7); 472 473 for (l = 0; l < KSIZE; l++) { 474 mlib_d64 *buffn = buffs[l]; 475 sp = sl + l*sll; 476 477 #ifndef CONV_INDEX 478 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 479 #else 480 #pragma pipeloop(0) 481 for (i = 0; i < dsize; i += 3) { 482 LOAD_SRC(); 483 } 484 #endif /* CONV_INDEX */ 485 } 486 487 /* init buffer */ 488 #pragma pipeloop(0) 489 for (i = 0; i < (xsize + 7)/8; i++) { 490 buffd[2*i ] = drnd; 491 buffd[2*i + 1] = drnd; 492 } 493 494 for (j = 0; j < hgt; j++) { 495 mlib_d64 **buffc = buffs + buff_ind, *pbuff0, *pbuff1, *pbuff2; 496 mlib_f32 *pk = karr, k0, k1, k2; 497 sp = sl + KSIZE*sll; 498 499 pbuff0 = buffc[0]; 500 pbuff1 = buffc[1]; 501 pbuff2 = buffc[2]; 502 buffn = buffc[KSIZE]; 503 504 #ifndef CONV_INDEX 505 if ((((mlib_addr)(sl )) & 7) == 0) pbuff0 = (mlib_d64*)sl; 506 if ((((mlib_addr)(sl + sll)) & 7) == 0) pbuff1 = (mlib_d64*)(sl + sll); 507 if ((((mlib_addr)(sl + 2*sll)) & 7) == 0) pbuff2 = (mlib_d64*)(sl + 2*sll); 508 509 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 510 #endif 511 512 #ifdef CONV_INDEX 513 ik_last = 0; 514 #else 515 ik_last = (KSIZE - 1); 516 #endif 517 518 for (ik = 0; ik < KSIZE; ik++) { 519 k0 = pk[ik]; 520 k1 = pk[ik + KSIZE]; 521 k2 = pk[ik + 2*KSIZE]; 522 523 off = ik*NCHAN; 524 doff = off/8; 525 off &= 7; 526 buff0 = pbuff0 + doff; 527 buff1 = pbuff1 + doff; 528 buff2 = pbuff2 + doff; 529 vis_write_gsr(gsr_scale + off); 530 531 if (ik == ik_last) continue; 532 /*if (!ik_last) { 533 if ((off & 3) || (ik == (KSIZE - 1))) { 534 ik_last = ik; 535 continue; 536 } 537 }*/ 538 539 if (off == 0) { 540 #pragma pipeloop(0) 541 for (i = 0; i < (xsize + 7)/8; i++) { 542 s0 = buff0[i]; 543 s1 = buff1[i]; 544 s2 = buff2[i]; 545 546 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 547 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 548 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 549 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 550 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 551 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 552 553 d0 = buffd[2*i]; 554 d1 = buffd[2*i + 1]; 555 d0 = vis_fpadd16(d00, d0); 556 d0 = vis_fpadd16(d10, d0); 557 d0 = vis_fpadd16(d20, d0); 558 d1 = vis_fpadd16(d01, d1); 559 d1 = vis_fpadd16(d11, d1); 560 d1 = vis_fpadd16(d21, d1); 561 buffd[2*i] = d0; 562 buffd[2*i + 1] = d1; 563 } 564 565 } else if (off == 4) { 566 s01 = buff0[0]; 567 s11 = buff1[0]; 568 s21 = buff2[0]; 569 #pragma pipeloop(0) 570 for (i = 0; i < (xsize + 7)/8; i++) { 571 s00 = s01; 572 s10 = s11; 573 s20 = s21; 574 s01 = buff0[i + 1]; 575 s11 = buff1[i + 1]; 576 s21 = buff2[i + 1]; 577 578 d00 = vis_fmul8x16au(vis_read_lo(s00), k0); 579 d01 = vis_fmul8x16au(vis_read_hi(s01), k0); 580 d10 = vis_fmul8x16au(vis_read_lo(s10), k1); 581 d11 = vis_fmul8x16au(vis_read_hi(s11), k1); 582 d20 = vis_fmul8x16au(vis_read_lo(s20), k2); 583 d21 = vis_fmul8x16au(vis_read_hi(s21), k2); 584 585 d0 = buffd[2*i]; 586 d1 = buffd[2*i + 1]; 587 d0 = vis_fpadd16(d00, d0); 588 d0 = vis_fpadd16(d10, d0); 589 d0 = vis_fpadd16(d20, d0); 590 d1 = vis_fpadd16(d01, d1); 591 d1 = vis_fpadd16(d11, d1); 592 d1 = vis_fpadd16(d21, d1); 593 buffd[2*i] = d0; 594 buffd[2*i + 1] = d1; 595 } 596 597 } else { 598 s01 = buff0[0]; 599 s11 = buff1[0]; 600 s21 = buff2[0]; 601 #pragma pipeloop(0) 602 for (i = 0; i < (xsize + 7)/8; i++) { 603 s00 = s01; 604 s10 = s11; 605 s20 = s21; 606 s01 = buff0[i + 1]; 607 s11 = buff1[i + 1]; 608 s21 = buff2[i + 1]; 609 s0 = vis_faligndata(s00, s01); 610 s1 = vis_faligndata(s10, s11); 611 s2 = vis_faligndata(s20, s21); 612 613 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 614 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 615 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 616 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 617 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 618 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 619 620 d0 = buffd[2*i]; 621 d1 = buffd[2*i + 1]; 622 d0 = vis_fpadd16(d00, d0); 623 d0 = vis_fpadd16(d10, d0); 624 d0 = vis_fpadd16(d20, d0); 625 d1 = vis_fpadd16(d01, d1); 626 d1 = vis_fpadd16(d11, d1); 627 d1 = vis_fpadd16(d21, d1); 628 buffd[2*i] = d0; 629 buffd[2*i + 1] = d1; 630 } 631 } 632 } 633 634 k0 = pk[ik_last]; 635 k1 = pk[ik_last + KSIZE]; 636 k2 = pk[ik_last + 2*KSIZE]; 637 638 off = ik_last*NCHAN; 639 doff = off/8; 640 off &= 7; 641 buff0 = pbuff0 + doff; 642 buff1 = pbuff1 + doff; 643 buff2 = pbuff2 + doff; 644 vis_write_gsr(gsr_scale + off); 645 646 #ifndef CONV_INDEX 647 dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; 648 649 s01 = buff0[0]; 650 s11 = buff1[0]; 651 s21 = buff2[0]; 652 #pragma pipeloop(0) 653 for (i = 0; i < xsize/8; i++) { 654 s00 = s01; 655 s10 = s11; 656 s20 = s21; 657 s01 = buff0[i + 1]; 658 s11 = buff1[i + 1]; 659 s21 = buff2[i + 1]; 660 s0 = vis_faligndata(s00, s01); 661 s1 = vis_faligndata(s10, s11); 662 s2 = vis_faligndata(s20, s21); 663 664 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 665 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 666 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 667 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 668 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 669 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 670 671 d0 = buffd[2*i]; 672 d1 = buffd[2*i + 1]; 673 d0 = vis_fpadd16(d0, d00); 674 d0 = vis_fpadd16(d0, d10); 675 d0 = vis_fpadd16(d0, d20); 676 d1 = vis_fpadd16(d1, d01); 677 d1 = vis_fpadd16(d1, d11); 678 d1 = vis_fpadd16(d1, d21); 679 680 dd = vis_fpack16_pair(d0, d1); 681 dp[i] = dd; 682 683 buffd[2*i ] = drnd; 684 buffd[2*i + 1] = drnd; 685 } 686 687 if (emask) { 688 s00 = s01; 689 s10 = s11; 690 s20 = s21; 691 s01 = buff0[i + 1]; 692 s11 = buff1[i + 1]; 693 s21 = buff2[i + 1]; 694 s0 = vis_faligndata(s00, s01); 695 s1 = vis_faligndata(s10, s11); 696 s2 = vis_faligndata(s20, s21); 697 698 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 699 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 700 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 701 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 702 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 703 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 704 705 d0 = buffd[2*i]; 706 d1 = buffd[2*i + 1]; 707 d0 = vis_fpadd16(d0, d00); 708 d0 = vis_fpadd16(d0, d10); 709 d0 = vis_fpadd16(d0, d20); 710 d1 = vis_fpadd16(d1, d01); 711 d1 = vis_fpadd16(d1, d11); 712 d1 = vis_fpadd16(d1, d21); 713 714 dd = vis_fpack16_pair(d0, d1); 715 vis_pst_8(dd, dp + i, emask); 716 717 buffd[2*i ] = drnd; 718 buffd[2*i + 1] = drnd; 719 } 720 721 if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); 722 723 #else 724 vis_write_gsr(gsr_scale + 7); 725 726 #pragma pipeloop(0) 727 for (i = 0; i < dsize; i += 3) { 728 mlib_d64 d00, d01, d02, d03, d04, d05; 729 mlib_d64 d10, d11, d12, d13, d14, d15; 730 mlib_d64 d20, d21, d22, d23, d24, d25; 731 mlib_d64 d0, d1, d2, d3, d4, d5; 732 mlib_d64 s00 = buff0[i]; 733 mlib_d64 s01 = buff0[i + 1]; 734 mlib_d64 s02 = buff0[i + 2]; 735 mlib_d64 s10 = buff1[i]; 736 mlib_d64 s11 = buff1[i + 1]; 737 mlib_d64 s12 = buff1[i + 2]; 738 mlib_d64 s20 = buff2[i]; 739 mlib_d64 s21 = buff2[i + 1]; 740 mlib_d64 s22 = buff2[i + 2]; 741 742 d00 = vis_fmul8x16au(vis_read_hi(s00), k0); 743 d01 = vis_fmul8x16au(vis_read_lo(s00), k0); 744 d02 = vis_fmul8x16au(vis_read_hi(s01), k0); 745 d03 = vis_fmul8x16au(vis_read_lo(s01), k0); 746 d04 = vis_fmul8x16au(vis_read_hi(s02), k0); 747 d05 = vis_fmul8x16au(vis_read_lo(s02), k0); 748 d10 = vis_fmul8x16au(vis_read_hi(s10), k1); 749 d11 = vis_fmul8x16au(vis_read_lo(s10), k1); 750 d12 = vis_fmul8x16au(vis_read_hi(s11), k1); 751 d13 = vis_fmul8x16au(vis_read_lo(s11), k1); 752 d14 = vis_fmul8x16au(vis_read_hi(s12), k1); 753 d15 = vis_fmul8x16au(vis_read_lo(s12), k1); 754 d20 = vis_fmul8x16au(vis_read_hi(s20), k2); 755 d21 = vis_fmul8x16au(vis_read_lo(s20), k2); 756 d22 = vis_fmul8x16au(vis_read_hi(s21), k2); 757 d23 = vis_fmul8x16au(vis_read_lo(s21), k2); 758 d24 = vis_fmul8x16au(vis_read_hi(s22), k2); 759 d25 = vis_fmul8x16au(vis_read_lo(s22), k2); 760 761 d0 = buffd[2*i]; 762 d1 = buffd[2*i + 1]; 763 d2 = buffd[2*i + 2]; 764 d3 = buffd[2*i + 3]; 765 d4 = buffd[2*i + 4]; 766 d5 = buffd[2*i + 5]; 767 d0 = vis_fpadd16(d0, d00); 768 d0 = vis_fpadd16(d0, d10); 769 d0 = vis_fpadd16(d0, d20); 770 d1 = vis_fpadd16(d1, d01); 771 d1 = vis_fpadd16(d1, d11); 772 d1 = vis_fpadd16(d1, d21); 773 d2 = vis_fpadd16(d2, d02); 774 d2 = vis_fpadd16(d2, d12); 775 d2 = vis_fpadd16(d2, d22); 776 d3 = vis_fpadd16(d3, d03); 777 d3 = vis_fpadd16(d3, d13); 778 d3 = vis_fpadd16(d3, d23); 779 d4 = vis_fpadd16(d4, d04); 780 d4 = vis_fpadd16(d4, d14); 781 d4 = vis_fpadd16(d4, d24); 782 d5 = vis_fpadd16(d5, d05); 783 d5 = vis_fpadd16(d5, d15); 784 d5 = vis_fpadd16(d5, d25); 785 786 buffe[i ] = vis_fpack16_pair(d0, d1); 787 buffe[i + 1] = vis_fpack16_pair(d2, d3); 788 buffe[i + 2] = vis_fpack16_pair(d4, d5); 789 790 buffd[2*i ] = drnd; 791 buffd[2*i + 1] = drnd; 792 buffd[2*i + 2] = drnd; 793 buffd[2*i + 3] = drnd; 794 buffd[2*i + 4] = drnd; 795 buffd[2*i + 5] = drnd; 796 797 LOAD_SRC(); 798 } 799 800 mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); 801 #endif /* CONV_INDEX */ 802 803 sl += sll; 804 dl += dll; 805 806 buff_ind++; 807 if (buff_ind >= (KSIZE + 1)) buff_ind = 0; 808 } 809 810 mlib_free(pbuff); 811 812 return MLIB_SUCCESS; 813 } 814 815 /***************************************************************/ 816 817 #undef KSIZE 818 #define MAX_N 11 819 820 #ifdef CONV_INDEX 821 822 mlib_status mlib_convMxN_Index3_8_16nw(mlib_image *dst, 823 mlib_image *src, 824 mlib_s32 m, 825 mlib_s32 n, 826 mlib_s32 dm, 827 mlib_s32 dn, 828 mlib_s32 *kern, 829 mlib_s32 scale, 830 void *colormap) 831 832 #else 833 834 mlib_status mlib_convMxN_8nw_f(mlib_image *dst, 835 mlib_image *src, 836 mlib_s32 m, 837 mlib_s32 n, 838 mlib_s32 dm, 839 mlib_s32 dn, 840 mlib_s32 *kern, 841 mlib_s32 scale) 842 843 #endif 844 { 845 mlib_d64 *buffs_local[3*(MAX_N + 1)], **buffs = buffs_local, **buff; 846 mlib_d64 *buff0, *buff1, *buff2, *buff3, *buffn, *buffd, *buffe; 847 mlib_d64 s00, s01, s10, s11, s20, s21, s30, s31, s0, s1, s2, s3; 848 mlib_d64 d00, d01, d10, d11, d20, d21, d30, d31; 849 mlib_d64 dd, d0, d1; 850 mlib_s32 ik, jk, ik_last, jk_size, coff, off, doff; 851 DEF_VARS; 852 DEF_EXTRA_VARS; 853 854 if (n > MAX_N) { 855 buffs = mlib_malloc(3*(n + 1)*sizeof(mlib_d64*)); 856 if (buffs == NULL) return MLIB_FAILURE; 857 } 858 859 buff = buffs + 2*(n + 1); 860 861 sl = adr_src; 862 #ifdef CONV_INDEX 863 dl = adr_dst + dn*dll + dm; 864 #else 865 dl = adr_dst + dn*dll + dm*NCHAN; 866 #endif 867 868 ssize = NCHAN*wid; 869 dsize = (ssize + 7)/8; 870 esize = dsize + 4; 871 pbuff = mlib_malloc((n + 4)*esize*sizeof(mlib_d64)); 872 if (pbuff == NULL) { 873 if (buffs != buffs_local) mlib_free(buffs); 874 return MLIB_FAILURE; 875 } 876 877 for (i = 0; i < (n + 1); i++) buffs[i] = pbuff + i*esize; 878 for (i = 0; i < (n + 1); i++) buffs[(n + 1) + i] = buffs[i]; 879 buffd = buffs[n] + esize; 880 buffe = buffd + 2*esize; 881 882 wid -= (m - 1); 883 hgt -= (n - 1); 884 xsize = ssize - NCHAN*(m - 1); 885 emask = (0xFF00 >> (xsize & 7)) & 0xFF; 886 887 vis_write_gsr(gsr_scale + 7); 888 889 for (l = 0; l < n; l++) { 890 mlib_d64 *buffn = buffs[l]; 891 sp = sl + l*sll; 892 893 #ifndef CONV_INDEX 894 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 895 #else 896 #pragma pipeloop(0) 897 for (i = 0; i < dsize; i += 3) { 898 LOAD_SRC(); 899 } 900 #endif /* CONV_INDEX */ 901 } 902 903 /* init buffer */ 904 #pragma pipeloop(0) 905 for (i = 0; i < (xsize + 7)/8; i++) { 906 buffd[2*i ] = drnd; 907 buffd[2*i + 1] = drnd; 908 } 909 910 for (j = 0; j < hgt; j++) { 911 mlib_d64 **buffc = buffs + buff_ind; 912 mlib_f32 *pk = karr, k0, k1, k2, k3; 913 sp = sl + n*sll; 914 915 for (l = 0; l < n; l++) { 916 buff[l] = buffc[l]; 917 } 918 buffn = buffc[n]; 919 920 #ifndef CONV_INDEX 921 for (l = 0; l < n; l++) { 922 if ((((mlib_addr)(sl + l*sll)) & 7) == 0) buff[l] = (mlib_d64*)(sl + l*sll); 923 } 924 if ((mlib_addr)sp & 7) mlib_ImageCopy_na((void*)sp, (void*)buffn, ssize); 925 #endif 926 927 #ifdef CONV_INDEX 928 ik_last = 0; 929 #else 930 ik_last = (m - 1); 931 #endif 932 933 for (jk = 0; jk < n; jk += jk_size) { 934 jk_size = n - jk; 935 #ifdef CONV_INDEX 936 if (jk_size >= 5) jk_size = 3; 937 if (jk_size == 4) jk_size = 2; 938 #else 939 if (jk_size >= 6) jk_size = 4; 940 if (jk_size == 5) jk_size = 3; 941 #endif 942 coff = 0; 943 944 if (jk_size == 2) { 945 946 for (ik = 0; ik < m; ik++, coff += NCHAN) { 947 if (!jk && ik == ik_last) continue; 948 949 k0 = pk[ik]; 950 k1 = pk[ik + m]; 951 952 doff = coff/8; 953 buff0 = buff[jk ] + doff; 954 buff1 = buff[jk + 1] + doff; 955 956 off = coff & 7; 957 vis_write_gsr(gsr_scale + off); 958 959 s01 = buff0[0]; 960 s11 = buff1[0]; 961 #pragma pipeloop(0) 962 for (i = 0; i < (xsize + 7)/8; i++) { 963 s00 = s01; 964 s10 = s11; 965 s01 = buff0[i + 1]; 966 s11 = buff1[i + 1]; 967 s0 = vis_faligndata(s00, s01); 968 s1 = vis_faligndata(s10, s11); 969 970 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 971 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 972 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 973 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 974 975 d0 = buffd[2*i]; 976 d1 = buffd[2*i + 1]; 977 d0 = vis_fpadd16(d00, d0); 978 d0 = vis_fpadd16(d10, d0); 979 d1 = vis_fpadd16(d01, d1); 980 d1 = vis_fpadd16(d11, d1); 981 buffd[2*i] = d0; 982 buffd[2*i + 1] = d1; 983 } 984 985 } 986 987 pk += 2*m; 988 989 } else if (jk_size == 3) { 990 991 for (ik = 0; ik < m; ik++, coff += NCHAN) { 992 if (!jk && ik == ik_last) continue; 993 994 k0 = pk[ik]; 995 k1 = pk[ik + m]; 996 k2 = pk[ik + 2*m]; 997 998 doff = coff/8; 999 buff0 = buff[jk ] + doff; 1000 buff1 = buff[jk + 1] + doff; 1001 buff2 = buff[jk + 2] + doff; 1002 1003 off = coff & 7; 1004 vis_write_gsr(gsr_scale + off); 1005 1006 if (off == 0) { 1007 #pragma pipeloop(0) 1008 for (i = 0; i < (xsize + 7)/8; i++) { 1009 d0 = buffd[2*i]; 1010 d1 = buffd[2*i + 1]; 1011 1012 s0 = buff0[i]; 1013 s1 = buff1[i]; 1014 s2 = buff2[i]; 1015 1016 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1017 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1018 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1019 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1020 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1021 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1022 1023 d00 = vis_fpadd16(d00, d10); 1024 d0 = vis_fpadd16(d20, d0); 1025 d0 = vis_fpadd16(d00, d0); 1026 d01 = vis_fpadd16(d01, d11); 1027 d1 = vis_fpadd16(d21, d1); 1028 d1 = vis_fpadd16(d01, d1); 1029 buffd[2*i] = d0; 1030 buffd[2*i + 1] = d1; 1031 } 1032 1033 } else if (off == 4) { 1034 s01 = buff0[0]; 1035 s11 = buff1[0]; 1036 s21 = buff2[0]; 1037 #pragma pipeloop(0) 1038 for (i = 0; i < (xsize + 7)/8; i++) { 1039 d0 = buffd[2*i]; 1040 d1 = buffd[2*i + 1]; 1041 1042 s00 = s01; 1043 s10 = s11; 1044 s20 = s21; 1045 s01 = buff0[i + 1]; 1046 s11 = buff1[i + 1]; 1047 s21 = buff2[i + 1]; 1048 1049 d00 = vis_fmul8x16au(vis_read_lo(s00), k0); 1050 d01 = vis_fmul8x16au(vis_read_hi(s01), k0); 1051 d10 = vis_fmul8x16au(vis_read_lo(s10), k1); 1052 d11 = vis_fmul8x16au(vis_read_hi(s11), k1); 1053 d20 = vis_fmul8x16au(vis_read_lo(s20), k2); 1054 d21 = vis_fmul8x16au(vis_read_hi(s21), k2); 1055 1056 d00 = vis_fpadd16(d00, d10); 1057 d0 = vis_fpadd16(d20, d0); 1058 d0 = vis_fpadd16(d00, d0); 1059 d01 = vis_fpadd16(d01, d11); 1060 d1 = vis_fpadd16(d21, d1); 1061 d1 = vis_fpadd16(d01, d1); 1062 buffd[2*i] = d0; 1063 buffd[2*i + 1] = d1; 1064 } 1065 1066 } else { 1067 s01 = buff0[0]; 1068 s11 = buff1[0]; 1069 s21 = buff2[0]; 1070 #pragma pipeloop(0) 1071 for (i = 0; i < (xsize + 7)/8; i++) { 1072 d0 = buffd[2*i]; 1073 d1 = buffd[2*i + 1]; 1074 1075 s00 = s01; 1076 s10 = s11; 1077 s20 = s21; 1078 s01 = buff0[i + 1]; 1079 s11 = buff1[i + 1]; 1080 s21 = buff2[i + 1]; 1081 s0 = vis_faligndata(s00, s01); 1082 s1 = vis_faligndata(s10, s11); 1083 s2 = vis_faligndata(s20, s21); 1084 1085 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1086 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1087 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1088 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1089 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1090 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1091 1092 d00 = vis_fpadd16(d00, d10); 1093 d0 = vis_fpadd16(d20, d0); 1094 d0 = vis_fpadd16(d00, d0); 1095 d01 = vis_fpadd16(d01, d11); 1096 d1 = vis_fpadd16(d21, d1); 1097 d1 = vis_fpadd16(d01, d1); 1098 buffd[2*i] = d0; 1099 buffd[2*i + 1] = d1; 1100 } 1101 } 1102 } 1103 1104 pk += 3*m; 1105 1106 } else { /* jk_size == 4 */ 1107 1108 for (ik = 0; ik < m; ik++, coff += NCHAN) { 1109 if (!jk && ik == ik_last) continue; 1110 1111 k0 = pk[ik]; 1112 k1 = pk[ik + m]; 1113 k2 = pk[ik + 2*m]; 1114 k3 = pk[ik + 3*m]; 1115 1116 doff = coff/8; 1117 buff0 = buff[jk ] + doff; 1118 buff1 = buff[jk + 1] + doff; 1119 buff2 = buff[jk + 2] + doff; 1120 buff3 = buff[jk + 3] + doff; 1121 1122 off = coff & 7; 1123 vis_write_gsr(gsr_scale + off); 1124 1125 if (off == 0) { 1126 1127 #pragma pipeloop(0) 1128 for (i = 0; i < (xsize + 7)/8; i++) { 1129 d0 = buffd[2*i]; 1130 d1 = buffd[2*i + 1]; 1131 1132 s0 = buff0[i]; 1133 s1 = buff1[i]; 1134 s2 = buff2[i]; 1135 s3 = buff3[i]; 1136 1137 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1138 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1139 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1140 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1141 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1142 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1143 d30 = vis_fmul8x16au(vis_read_hi(s3), k3); 1144 d31 = vis_fmul8x16au(vis_read_lo(s3), k3); 1145 1146 d00 = vis_fpadd16(d00, d10); 1147 d20 = vis_fpadd16(d20, d30); 1148 d0 = vis_fpadd16(d0, d00); 1149 d0 = vis_fpadd16(d0, d20); 1150 d01 = vis_fpadd16(d01, d11); 1151 d21 = vis_fpadd16(d21, d31); 1152 d1 = vis_fpadd16(d1, d01); 1153 d1 = vis_fpadd16(d1, d21); 1154 buffd[2*i] = d0; 1155 buffd[2*i + 1] = d1; 1156 } 1157 1158 } else if (off == 4) { 1159 1160 s01 = buff0[0]; 1161 s11 = buff1[0]; 1162 s21 = buff2[0]; 1163 s31 = buff3[0]; 1164 #pragma pipeloop(0) 1165 for (i = 0; i < (xsize + 7)/8; i++) { 1166 d0 = buffd[2*i]; 1167 d1 = buffd[2*i + 1]; 1168 1169 s00 = s01; 1170 s10 = s11; 1171 s20 = s21; 1172 s30 = s31; 1173 s01 = buff0[i + 1]; 1174 s11 = buff1[i + 1]; 1175 s21 = buff2[i + 1]; 1176 s31 = buff3[i + 1]; 1177 1178 d00 = vis_fmul8x16au(vis_read_lo(s00), k0); 1179 d01 = vis_fmul8x16au(vis_read_hi(s01), k0); 1180 d10 = vis_fmul8x16au(vis_read_lo(s10), k1); 1181 d11 = vis_fmul8x16au(vis_read_hi(s11), k1); 1182 d20 = vis_fmul8x16au(vis_read_lo(s20), k2); 1183 d21 = vis_fmul8x16au(vis_read_hi(s21), k2); 1184 d30 = vis_fmul8x16au(vis_read_lo(s30), k3); 1185 d31 = vis_fmul8x16au(vis_read_hi(s31), k3); 1186 1187 d00 = vis_fpadd16(d00, d10); 1188 d20 = vis_fpadd16(d20, d30); 1189 d0 = vis_fpadd16(d0, d00); 1190 d0 = vis_fpadd16(d0, d20); 1191 d01 = vis_fpadd16(d01, d11); 1192 d21 = vis_fpadd16(d21, d31); 1193 d1 = vis_fpadd16(d1, d01); 1194 d1 = vis_fpadd16(d1, d21); 1195 buffd[2*i] = d0; 1196 buffd[2*i + 1] = d1; 1197 } 1198 1199 } else { 1200 1201 s01 = buff0[0]; 1202 s11 = buff1[0]; 1203 s21 = buff2[0]; 1204 s31 = buff3[0]; 1205 #pragma pipeloop(0) 1206 for (i = 0; i < (xsize + 7)/8; i++) { 1207 d0 = buffd[2*i]; 1208 d1 = buffd[2*i + 1]; 1209 1210 s00 = s01; 1211 s10 = s11; 1212 s20 = s21; 1213 s30 = s31; 1214 s01 = buff0[i + 1]; 1215 s11 = buff1[i + 1]; 1216 s21 = buff2[i + 1]; 1217 s31 = buff3[i + 1]; 1218 s0 = vis_faligndata(s00, s01); 1219 s1 = vis_faligndata(s10, s11); 1220 s2 = vis_faligndata(s20, s21); 1221 s3 = vis_faligndata(s30, s31); 1222 1223 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1224 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1225 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1226 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1227 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1228 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1229 d30 = vis_fmul8x16au(vis_read_hi(s3), k3); 1230 d31 = vis_fmul8x16au(vis_read_lo(s3), k3); 1231 1232 d00 = vis_fpadd16(d00, d10); 1233 d20 = vis_fpadd16(d20, d30); 1234 d0 = vis_fpadd16(d0, d00); 1235 d0 = vis_fpadd16(d0, d20); 1236 d01 = vis_fpadd16(d01, d11); 1237 d21 = vis_fpadd16(d21, d31); 1238 d1 = vis_fpadd16(d1, d01); 1239 d1 = vis_fpadd16(d1, d21); 1240 buffd[2*i] = d0; 1241 buffd[2*i + 1] = d1; 1242 } 1243 } 1244 } 1245 1246 pk += 4*m; 1247 } 1248 } 1249 1250 /***************************************** 1251 ***************************************** 1252 ** Final iteration ** 1253 ***************************************** 1254 *****************************************/ 1255 1256 jk_size = n; 1257 #ifdef CONV_INDEX 1258 if (jk_size >= 5) jk_size = 3; 1259 if (jk_size == 4) jk_size = 2; 1260 #else 1261 if (jk_size >= 6) jk_size = 4; 1262 if (jk_size == 5) jk_size = 3; 1263 #endif 1264 1265 k0 = karr[ik_last]; 1266 k1 = karr[ik_last + m]; 1267 k2 = karr[ik_last + 2*m]; 1268 k3 = karr[ik_last + 3*m]; 1269 1270 off = ik_last*NCHAN; 1271 doff = off/8; 1272 off &= 7; 1273 buff0 = buff[0] + doff; 1274 buff1 = buff[1] + doff; 1275 buff2 = buff[2] + doff; 1276 buff3 = buff[3] + doff; 1277 vis_write_gsr(gsr_scale + off); 1278 1279 #ifndef CONV_INDEX 1280 if (jk_size == 2) { 1281 dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; 1282 1283 s01 = buff0[0]; 1284 s11 = buff1[0]; 1285 #pragma pipeloop(0) 1286 for (i = 0; i < xsize/8; i++) { 1287 s00 = s01; 1288 s10 = s11; 1289 s01 = buff0[i + 1]; 1290 s11 = buff1[i + 1]; 1291 s0 = vis_faligndata(s00, s01); 1292 s1 = vis_faligndata(s10, s11); 1293 1294 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1295 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1296 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1297 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1298 1299 d0 = buffd[2*i]; 1300 d1 = buffd[2*i + 1]; 1301 d0 = vis_fpadd16(d0, d00); 1302 d0 = vis_fpadd16(d0, d10); 1303 d1 = vis_fpadd16(d1, d01); 1304 d1 = vis_fpadd16(d1, d11); 1305 1306 dd = vis_fpack16_pair(d0, d1); 1307 dp[i] = dd; 1308 1309 buffd[2*i ] = drnd; 1310 buffd[2*i + 1] = drnd; 1311 } 1312 1313 if (emask) { 1314 s00 = s01; 1315 s10 = s11; 1316 s01 = buff0[i + 1]; 1317 s11 = buff1[i + 1]; 1318 s0 = vis_faligndata(s00, s01); 1319 s1 = vis_faligndata(s10, s11); 1320 1321 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1322 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1323 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1324 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1325 1326 d0 = buffd[2*i]; 1327 d1 = buffd[2*i + 1]; 1328 d0 = vis_fpadd16(d0, d00); 1329 d0 = vis_fpadd16(d0, d10); 1330 d1 = vis_fpadd16(d1, d01); 1331 d1 = vis_fpadd16(d1, d11); 1332 1333 dd = vis_fpack16_pair(d0, d1); 1334 vis_pst_8(dd, dp + i, emask); 1335 1336 buffd[2*i ] = drnd; 1337 buffd[2*i + 1] = drnd; 1338 } 1339 1340 if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); 1341 1342 } else if (jk_size == 3) { 1343 1344 dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; 1345 1346 s01 = buff0[0]; 1347 s11 = buff1[0]; 1348 s21 = buff2[0]; 1349 #pragma pipeloop(0) 1350 for (i = 0; i < xsize/8; i++) { 1351 s00 = s01; 1352 s10 = s11; 1353 s20 = s21; 1354 s01 = buff0[i + 1]; 1355 s11 = buff1[i + 1]; 1356 s21 = buff2[i + 1]; 1357 s0 = vis_faligndata(s00, s01); 1358 s1 = vis_faligndata(s10, s11); 1359 s2 = vis_faligndata(s20, s21); 1360 1361 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1362 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1363 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1364 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1365 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1366 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1367 1368 d0 = buffd[2*i]; 1369 d1 = buffd[2*i + 1]; 1370 d0 = vis_fpadd16(d0, d00); 1371 d0 = vis_fpadd16(d0, d10); 1372 d0 = vis_fpadd16(d0, d20); 1373 d1 = vis_fpadd16(d1, d01); 1374 d1 = vis_fpadd16(d1, d11); 1375 d1 = vis_fpadd16(d1, d21); 1376 1377 dd = vis_fpack16_pair(d0, d1); 1378 dp[i] = dd; 1379 1380 buffd[2*i ] = drnd; 1381 buffd[2*i + 1] = drnd; 1382 } 1383 1384 if (emask) { 1385 s00 = s01; 1386 s10 = s11; 1387 s20 = s21; 1388 s01 = buff0[i + 1]; 1389 s11 = buff1[i + 1]; 1390 s21 = buff2[i + 1]; 1391 s0 = vis_faligndata(s00, s01); 1392 s1 = vis_faligndata(s10, s11); 1393 s2 = vis_faligndata(s20, s21); 1394 1395 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1396 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1397 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1398 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1399 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1400 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1401 1402 d0 = buffd[2*i]; 1403 d1 = buffd[2*i + 1]; 1404 d0 = vis_fpadd16(d0, d00); 1405 d0 = vis_fpadd16(d0, d10); 1406 d0 = vis_fpadd16(d0, d20); 1407 d1 = vis_fpadd16(d1, d01); 1408 d1 = vis_fpadd16(d1, d11); 1409 d1 = vis_fpadd16(d1, d21); 1410 1411 dd = vis_fpack16_pair(d0, d1); 1412 vis_pst_8(dd, dp + i, emask); 1413 1414 buffd[2*i ] = drnd; 1415 buffd[2*i + 1] = drnd; 1416 } 1417 1418 if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); 1419 1420 } else /* if (jk_size == 4) */ { 1421 1422 dp = ((mlib_addr)dl & 7) ? buffe : (mlib_d64*)dl; 1423 1424 s01 = buff0[0]; 1425 s11 = buff1[0]; 1426 s21 = buff2[0]; 1427 s31 = buff3[0]; 1428 #pragma pipeloop(0) 1429 for (i = 0; i < xsize/8; i++) { 1430 s00 = s01; 1431 s10 = s11; 1432 s20 = s21; 1433 s30 = s31; 1434 s01 = buff0[i + 1]; 1435 s11 = buff1[i + 1]; 1436 s21 = buff2[i + 1]; 1437 s31 = buff3[i + 1]; 1438 s0 = vis_faligndata(s00, s01); 1439 s1 = vis_faligndata(s10, s11); 1440 s2 = vis_faligndata(s20, s21); 1441 s3 = vis_faligndata(s30, s31); 1442 1443 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1444 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1445 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1446 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1447 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1448 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1449 d30 = vis_fmul8x16au(vis_read_hi(s3), k3); 1450 d31 = vis_fmul8x16au(vis_read_lo(s3), k3); 1451 1452 d0 = buffd[2*i]; 1453 d1 = buffd[2*i + 1]; 1454 d0 = vis_fpadd16(d0, d00); 1455 d0 = vis_fpadd16(d0, d10); 1456 d0 = vis_fpadd16(d0, d20); 1457 d0 = vis_fpadd16(d0, d30); 1458 d1 = vis_fpadd16(d1, d01); 1459 d1 = vis_fpadd16(d1, d11); 1460 d1 = vis_fpadd16(d1, d21); 1461 d1 = vis_fpadd16(d1, d31); 1462 1463 dd = vis_fpack16_pair(d0, d1); 1464 dp[i] = dd; 1465 1466 buffd[2*i ] = drnd; 1467 buffd[2*i + 1] = drnd; 1468 } 1469 1470 if (emask) { 1471 s00 = s01; 1472 s10 = s11; 1473 s20 = s21; 1474 s30 = s31; 1475 s01 = buff0[i + 1]; 1476 s11 = buff1[i + 1]; 1477 s21 = buff2[i + 1]; 1478 s31 = buff3[i + 1]; 1479 s0 = vis_faligndata(s00, s01); 1480 s1 = vis_faligndata(s10, s11); 1481 s2 = vis_faligndata(s20, s21); 1482 s3 = vis_faligndata(s30, s31); 1483 1484 d00 = vis_fmul8x16au(vis_read_hi(s0), k0); 1485 d01 = vis_fmul8x16au(vis_read_lo(s0), k0); 1486 d10 = vis_fmul8x16au(vis_read_hi(s1), k1); 1487 d11 = vis_fmul8x16au(vis_read_lo(s1), k1); 1488 d20 = vis_fmul8x16au(vis_read_hi(s2), k2); 1489 d21 = vis_fmul8x16au(vis_read_lo(s2), k2); 1490 d30 = vis_fmul8x16au(vis_read_hi(s3), k3); 1491 d31 = vis_fmul8x16au(vis_read_lo(s3), k3); 1492 1493 d0 = buffd[2*i]; 1494 d1 = buffd[2*i + 1]; 1495 d0 = vis_fpadd16(d0, d00); 1496 d0 = vis_fpadd16(d0, d10); 1497 d0 = vis_fpadd16(d0, d20); 1498 d0 = vis_fpadd16(d0, d30); 1499 d1 = vis_fpadd16(d1, d01); 1500 d1 = vis_fpadd16(d1, d11); 1501 d1 = vis_fpadd16(d1, d21); 1502 d1 = vis_fpadd16(d1, d31); 1503 1504 dd = vis_fpack16_pair(d0, d1); 1505 vis_pst_8(dd, dp + i, emask); 1506 1507 buffd[2*i ] = drnd; 1508 buffd[2*i + 1] = drnd; 1509 } 1510 1511 if ((mlib_u8*)dp != dl) mlib_ImageCopy_na((void*)buffe, dl, xsize); 1512 } 1513 1514 #else /* CONV_INDEX */ 1515 1516 if (jk_size == 2) { 1517 vis_write_gsr(gsr_scale + 7); 1518 1519 #pragma pipeloop(0) 1520 for (i = 0; i < dsize; i += 3) { 1521 mlib_d64 d00, d01, d02, d03, d04, d05; 1522 mlib_d64 d10, d11, d12, d13, d14, d15; 1523 mlib_d64 d0, d1, d2, d3, d4, d5; 1524 mlib_d64 s00 = buff0[i]; 1525 mlib_d64 s01 = buff0[i + 1]; 1526 mlib_d64 s02 = buff0[i + 2]; 1527 mlib_d64 s10 = buff1[i]; 1528 mlib_d64 s11 = buff1[i + 1]; 1529 mlib_d64 s12 = buff1[i + 2]; 1530 1531 d00 = vis_fmul8x16au(vis_read_hi(s00), k0); 1532 d01 = vis_fmul8x16au(vis_read_lo(s00), k0); 1533 d02 = vis_fmul8x16au(vis_read_hi(s01), k0); 1534 d03 = vis_fmul8x16au(vis_read_lo(s01), k0); 1535 d04 = vis_fmul8x16au(vis_read_hi(s02), k0); 1536 d05 = vis_fmul8x16au(vis_read_lo(s02), k0); 1537 d10 = vis_fmul8x16au(vis_read_hi(s10), k1); 1538 d11 = vis_fmul8x16au(vis_read_lo(s10), k1); 1539 d12 = vis_fmul8x16au(vis_read_hi(s11), k1); 1540 d13 = vis_fmul8x16au(vis_read_lo(s11), k1); 1541 d14 = vis_fmul8x16au(vis_read_hi(s12), k1); 1542 d15 = vis_fmul8x16au(vis_read_lo(s12), k1); 1543 1544 d0 = buffd[2*i]; 1545 d1 = buffd[2*i + 1]; 1546 d2 = buffd[2*i + 2]; 1547 d3 = buffd[2*i + 3]; 1548 d4 = buffd[2*i + 4]; 1549 d5 = buffd[2*i + 5]; 1550 d0 = vis_fpadd16(d0, d00); 1551 d0 = vis_fpadd16(d0, d10); 1552 d1 = vis_fpadd16(d1, d01); 1553 d1 = vis_fpadd16(d1, d11); 1554 d2 = vis_fpadd16(d2, d02); 1555 d2 = vis_fpadd16(d2, d12); 1556 d3 = vis_fpadd16(d3, d03); 1557 d3 = vis_fpadd16(d3, d13); 1558 d4 = vis_fpadd16(d4, d04); 1559 d4 = vis_fpadd16(d4, d14); 1560 d5 = vis_fpadd16(d5, d05); 1561 d5 = vis_fpadd16(d5, d15); 1562 1563 buffe[i ] = vis_fpack16_pair(d0, d1); 1564 buffe[i + 1] = vis_fpack16_pair(d2, d3); 1565 buffe[i + 2] = vis_fpack16_pair(d4, d5); 1566 1567 buffd[2*i ] = drnd; 1568 buffd[2*i + 1] = drnd; 1569 buffd[2*i + 2] = drnd; 1570 buffd[2*i + 3] = drnd; 1571 buffd[2*i + 4] = drnd; 1572 buffd[2*i + 5] = drnd; 1573 1574 LOAD_SRC(); 1575 } 1576 1577 } else /* if (jk_size == 3) */ { 1578 vis_write_gsr(gsr_scale + 7); 1579 1580 #pragma pipeloop(0) 1581 for (i = 0; i < dsize; i += 3) { 1582 mlib_d64 d00, d01, d02, d03, d04, d05; 1583 mlib_d64 d10, d11, d12, d13, d14, d15; 1584 mlib_d64 d20, d21, d22, d23, d24, d25; 1585 mlib_d64 d0, d1, d2, d3, d4, d5; 1586 mlib_d64 s00 = buff0[i]; 1587 mlib_d64 s01 = buff0[i + 1]; 1588 mlib_d64 s02 = buff0[i + 2]; 1589 mlib_d64 s10 = buff1[i]; 1590 mlib_d64 s11 = buff1[i + 1]; 1591 mlib_d64 s12 = buff1[i + 2]; 1592 mlib_d64 s20 = buff2[i]; 1593 mlib_d64 s21 = buff2[i + 1]; 1594 mlib_d64 s22 = buff2[i + 2]; 1595 1596 d00 = vis_fmul8x16au(vis_read_hi(s00), k0); 1597 d01 = vis_fmul8x16au(vis_read_lo(s00), k0); 1598 d02 = vis_fmul8x16au(vis_read_hi(s01), k0); 1599 d03 = vis_fmul8x16au(vis_read_lo(s01), k0); 1600 d04 = vis_fmul8x16au(vis_read_hi(s02), k0); 1601 d05 = vis_fmul8x16au(vis_read_lo(s02), k0); 1602 d10 = vis_fmul8x16au(vis_read_hi(s10), k1); 1603 d11 = vis_fmul8x16au(vis_read_lo(s10), k1); 1604 d12 = vis_fmul8x16au(vis_read_hi(s11), k1); 1605 d13 = vis_fmul8x16au(vis_read_lo(s11), k1); 1606 d14 = vis_fmul8x16au(vis_read_hi(s12), k1); 1607 d15 = vis_fmul8x16au(vis_read_lo(s12), k1); 1608 d20 = vis_fmul8x16au(vis_read_hi(s20), k2); 1609 d21 = vis_fmul8x16au(vis_read_lo(s20), k2); 1610 d22 = vis_fmul8x16au(vis_read_hi(s21), k2); 1611 d23 = vis_fmul8x16au(vis_read_lo(s21), k2); 1612 d24 = vis_fmul8x16au(vis_read_hi(s22), k2); 1613 d25 = vis_fmul8x16au(vis_read_lo(s22), k2); 1614 1615 d0 = buffd[2*i]; 1616 d1 = buffd[2*i + 1]; 1617 d2 = buffd[2*i + 2]; 1618 d3 = buffd[2*i + 3]; 1619 d4 = buffd[2*i + 4]; 1620 d5 = buffd[2*i + 5]; 1621 d0 = vis_fpadd16(d0, d00); 1622 d0 = vis_fpadd16(d0, d10); 1623 d0 = vis_fpadd16(d0, d20); 1624 d1 = vis_fpadd16(d1, d01); 1625 d1 = vis_fpadd16(d1, d11); 1626 d1 = vis_fpadd16(d1, d21); 1627 d2 = vis_fpadd16(d2, d02); 1628 d2 = vis_fpadd16(d2, d12); 1629 d2 = vis_fpadd16(d2, d22); 1630 d3 = vis_fpadd16(d3, d03); 1631 d3 = vis_fpadd16(d3, d13); 1632 d3 = vis_fpadd16(d3, d23); 1633 d4 = vis_fpadd16(d4, d04); 1634 d4 = vis_fpadd16(d4, d14); 1635 d4 = vis_fpadd16(d4, d24); 1636 d5 = vis_fpadd16(d5, d05); 1637 d5 = vis_fpadd16(d5, d15); 1638 d5 = vis_fpadd16(d5, d25); 1639 1640 buffe[i ] = vis_fpack16_pair(d0, d1); 1641 buffe[i + 1] = vis_fpack16_pair(d2, d3); 1642 buffe[i + 2] = vis_fpack16_pair(d4, d5); 1643 1644 buffd[2*i ] = drnd; 1645 buffd[2*i + 1] = drnd; 1646 buffd[2*i + 2] = drnd; 1647 buffd[2*i + 3] = drnd; 1648 buffd[2*i + 4] = drnd; 1649 buffd[2*i + 5] = drnd; 1650 1651 LOAD_SRC(); 1652 } 1653 } 1654 #endif /* CONV_INDEX */ 1655 1656 #ifdef CONV_INDEX 1657 mlib_ImageColorTrue2IndexLine_U8_S16_3((void*)buffe, dl, wid, colormap); 1658 #endif /* CONV_INDEX */ 1659 1660 sl += sll; 1661 dl += dll; 1662 1663 buff_ind++; 1664 if (buff_ind >= (n + 1)) buff_ind = 0; 1665 } 1666 1667 mlib_free(pbuff); 1668 if (buffs != buffs_local) mlib_free(buffs); 1669 1670 return MLIB_SUCCESS; 1671 } 1672 1673 /***************************************************************/