1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 #include "mlib_image.h" 28 #include "mlib_ImageLookUp.h" 29 #include "mlib_c_ImageLookUp.h" 30 31 /***************************************************************/ 32 #define MLIB_C_IMAGELOOKUP(DTYPE, STYPE, TABLE) \ 33 { \ 34 mlib_s32 i, j, k; \ 35 \ 36 if (xsize < 2) { \ 37 for(j = 0; j < ysize; j++, dst += dlb, src += slb){ \ 38 for(k = 0; k < csize; k++) { \ 39 DTYPE *da = dst + k; \ 40 const STYPE *sa = src + k; \ 41 DTYPE *tab = (DTYPE*) TABLE[k]; \ 42 \ 43 for(i = 0; i < xsize; i++, da += csize, sa += csize) \ 44 *da=tab[*sa]; \ 45 } \ 46 } \ 47 } else { \ 48 for(j = 0; j < ysize; j++, dst += dlb, src += slb) { \ 49 for(k = 0; k < csize; k++) { \ 50 DTYPE *da = dst + k; \ 51 const STYPE *sa = src + k; \ 52 DTYPE *tab = (DTYPE*) TABLE[k]; \ 53 mlib_s32 s0, t0, s1, t1; \ 54 \ 55 s0 = (mlib_s32)sa[0]; \ 56 s1 = (mlib_s32)sa[csize]; \ 57 sa += 2*csize; \ 58 \ 59 for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2*csize) { \ 60 t0 = (mlib_s32)tab[s0]; \ 61 t1 = (mlib_s32)tab[s1]; \ 62 s0 = (mlib_s32)sa[0]; \ 63 s1 = (mlib_s32)sa[csize]; \ 64 da[0] = (DTYPE)t0; \ 65 da[csize] = (DTYPE)t1; \ 66 } \ 67 t0 = (mlib_s32)tab[s0]; \ 68 t1 = (mlib_s32)tab[s1]; \ 69 da[0] = (DTYPE)t0; \ 70 da[csize] = (DTYPE)t1; \ 71 if (xsize & 1) da[2*csize] = tab[sa[0]]; \ 72 } \ 73 } \ 74 } \ 75 } 76 77 /***************************************************************/ 78 #define MLIB_C_IMAGELOOKUPSI(DTYPE, STYPE, TABLE) \ 79 { \ 80 mlib_s32 i, j, k; \ 81 \ 82 if (xsize < 2) { \ 83 for(j = 0; j < ysize; j++, dst += dlb, src += slb){ \ 84 for(k = 0; k < csize; k++) { \ 85 DTYPE *da = dst + k; \ 86 const STYPE *sa = (void *)src; \ 87 DTYPE *tab = (DTYPE*) TABLE[k]; \ 88 \ 89 for(i = 0; i < xsize; i++, da += csize, sa ++) \ 90 *da=tab[*sa]; \ 91 } \ 92 } \ 93 } else { \ 94 for(j = 0; j < ysize; j++, dst += dlb, src += slb) { \ 95 for(k = 0; k < csize; k++) { \ 96 DTYPE *da = dst + k; \ 97 const STYPE *sa = (void *)src; \ 98 DTYPE *tab = (DTYPE*) TABLE[k]; \ 99 mlib_s32 s0, t0, s1, t1; \ 100 \ 101 s0 = (mlib_s32)sa[0]; \ 102 s1 = (mlib_s32)sa[1]; \ 103 sa += 2; \ 104 \ 105 for(i = 0; i < xsize - 3; i+=2, da += 2*csize, sa += 2) { \ 106 t0 = (mlib_s32)tab[s0]; \ 107 t1 = (mlib_s32)tab[s1]; \ 108 s0 = (mlib_s32)sa[0]; \ 109 s1 = (mlib_s32)sa[1]; \ 110 da[0] = (DTYPE)t0; \ 111 da[csize] = (DTYPE)t1; \ 112 } \ 113 t0 = (mlib_s32)tab[s0]; \ 114 t1 = (mlib_s32)tab[s1]; \ 115 da[0] = (DTYPE)t0; \ 116 da[csize] = (DTYPE)t1; \ 117 if (xsize & 1) da[2*csize] = tab[sa[0]]; \ 118 } \ 119 } \ 120 } \ 121 } 122 123 #ifdef _LITTLE_ENDIAN 124 125 /***************************************************************/ 126 #define READ_U8_U8_ALIGN(table0, table1, table2, table3) \ 127 t3 = table0[s0 & 0xFF]; \ 128 t2 = table1[s0>>8]; \ 129 t1 = table2[s1 & 0xFF]; \ 130 t0 = table3[s1>>8] 131 132 /***************************************************************/ 133 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3) \ 134 t3 = table0[s0 >> 8]; \ 135 t2 = table1[s1 & 0xFF]; \ 136 t1 = table2[s1 >> 8]; \ 137 t0 = table3[s2 & 0xFF] 138 139 /***************************************************************/ 140 #define READ_U8_S16_ALIGN(table0, table1, table2, table3) \ 141 t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 << 1) & 0x1FE)); \ 142 t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7) & 0x1FE)); \ 143 t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 15) & 0x1FE)); \ 144 t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 >> 23) & 0x1FE)) 145 146 /***************************************************************/ 147 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3) \ 148 t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 7) & 0x1FE)); \ 149 t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15) & 0x1FE)); \ 150 t3 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 23) & 0x1FE)); \ 151 t2 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 << 1) & 0x1FE)) 152 153 /***************************************************************/ 154 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2) \ 155 t1 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 7) & 0x1FE)); \ 156 t0 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 15) & 0x1FE)); \ 157 t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 >> 23) & 0x1FE)) 158 159 /***************************************************************/ 160 #define READ_U8_S32(table0, table1, table2, table3) \ 161 t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 << 2) & 0x3FC)); \ 162 t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 6) & 0x3FC)); \ 163 t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 14) & 0x3FC)); \ 164 t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 >> 22) & 0x3FC)) 165 166 #else /* _LITTLE_ENDIAN */ 167 168 /***********/ 169 #define READ_U8_U8_ALIGN(table0, table1, table2, table3) \ 170 t0 = table0[s0>>8]; \ 171 t1 = table1[s0 & 0xFF]; \ 172 t2 = table2[s1>>8]; \ 173 t3 = table3[s1 & 0xFF] 174 175 /***************************************************************/ 176 #define READ_U8_U8_NOTALIGN(table0, table1, table2, table3) \ 177 t0 = table0[s0 & 0xFF]; \ 178 t1 = table1[s1 >> 8]; \ 179 t2 = table2[s1 & 0xFF]; \ 180 t3 = table3[s2 >> 8] 181 182 /***************************************************************/ 183 #define READ_U8_S16_ALIGN(table0, table1, table2, table3) \ 184 t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 23) & 0x1FE)); \ 185 t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 15) & 0x1FE)); \ 186 t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 >> 7) & 0x1FE)); \ 187 t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s0 << 1) & 0x1FE)) 188 189 /***************************************************************/ 190 #define READ_U8_S16_NOTALIGN(table0, table1, table2, table3) \ 191 t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s0 >> 15) & 0x1FE)); \ 192 t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s0 >> 7) & 0x1FE)); \ 193 t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s0 << 1) & 0x1FE)); \ 194 t3 = *(mlib_u16*)((mlib_u8*)table3 + ((s1 >> 23) & 0x1FE)) 195 196 /***************************************************************/ 197 #define ADD_READ_U8_S16_NOTALIGN(table0, table1, table2) \ 198 t0 = *(mlib_u16*)((mlib_u8*)table0 + ((s1 >> 15) & 0x1FE)); \ 199 t1 = *(mlib_u16*)((mlib_u8*)table1 + ((s1 >> 7) & 0x1FE)); \ 200 t2 = *(mlib_u16*)((mlib_u8*)table2 + ((s1 << 1) & 0x1FE)) 201 202 /***************************************************************/ 203 #define READ_U8_S32(table0, table1, table2, table3) \ 204 t0 = *(mlib_u32*)((mlib_u8*)table0 + ((s0 >> 22) & 0x3FC)); \ 205 t1 = *(mlib_u32*)((mlib_u8*)table1 + ((s0 >> 14) & 0x3FC)); \ 206 t2 = *(mlib_u32*)((mlib_u8*)table2 + ((s0 >> 6) & 0x3FC)); \ 207 t3 = *(mlib_u32*)((mlib_u8*)table3 + ((s0 << 2) & 0x3FC)) 208 209 #endif /* _LITTLE_ENDIAN */ 210 211 /***************************************************************/ 212 void mlib_c_ImageLookUp_U8_U8(const mlib_u8 *src, 213 mlib_s32 slb, 214 mlib_u8 *dst, 215 mlib_s32 dlb, 216 mlib_s32 xsize, 217 mlib_s32 ysize, 218 mlib_s32 csize, 219 const mlib_u8 **table) 220 { 221 222 if (xsize * csize < 9) { 223 MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u8, table); 224 } 225 else if (csize == 1) { 226 mlib_s32 i, j; 227 228 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 229 mlib_u32 *da; 230 mlib_u16 *sa; 231 mlib_u8 *tab = (mlib_u8 *) table[0]; 232 mlib_u32 s0, s1, s2, t0, t1, t2, t3, t; 233 mlib_s32 off; 234 mlib_s32 size = xsize; 235 mlib_u8 *dp = dst, *sp = (void *)src; 236 237 off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3); 238 239 for (i = 0; i < off; i++, sp++) { 240 *dp++ = tab[sp[0]]; 241 size--; 242 } 243 244 da = (mlib_u32 *) dp; 245 246 if (((mlib_addr) sp & 1) == 0) { 247 sa = (mlib_u16 *) sp; 248 249 s0 = sa[0]; 250 s1 = sa[1]; 251 sa += 2; 252 253 #ifdef __SUNPRO_C 254 #pragma pipeloop(0) 255 #endif /* __SUNPRO_C */ 256 for (i = 0; i < size - 7; i += 4, da++, sa += 2) { 257 READ_U8_U8_ALIGN(tab, tab, tab, tab); 258 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 259 s0 = sa[0]; 260 s1 = sa[1]; 261 da[0] = t; 262 } 263 264 READ_U8_U8_ALIGN(tab, tab, tab, tab); 265 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 266 da[0] = t; 267 da++; 268 dp = (mlib_u8 *) da; 269 sp = (mlib_u8 *) sa; 270 i += 4; 271 for (; i < size; i++, dp++, sp++) 272 dp[0] = tab[sp[0]]; 273 274 } 275 else { 276 sa = (mlib_u16 *) (sp - 1); 277 278 s0 = sa[0]; 279 s1 = sa[1]; 280 s2 = sa[2]; 281 sa += 3; 282 283 #ifdef __SUNPRO_C 284 #pragma pipeloop(0) 285 #endif /* __SUNPRO_C */ 286 for (i = 0; i < size - 8; i += 4, da++, sa += 2) { 287 READ_U8_U8_NOTALIGN(tab, tab, tab, tab); 288 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 289 s0 = s2; 290 s1 = sa[0]; 291 s2 = sa[1]; 292 da[0] = t; 293 } 294 295 READ_U8_U8_NOTALIGN(tab, tab, tab, tab); 296 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 297 da[0] = t; 298 da++; 299 dp = (mlib_u8 *) da; 300 #ifdef _LITTLE_ENDIAN 301 *dp++ = tab[s2 >> 8]; 302 #else 303 *dp++ = tab[s2 & 0xFF]; 304 #endif /* _LITTLE_ENDIAN */ 305 sp = (mlib_u8 *) sa; 306 i += 5; 307 for (; i < size; i++, dp++, sp++) 308 dp[0] = tab[sp[0]]; 309 } 310 } 311 312 } 313 else if (csize == 2) { 314 mlib_s32 i, j; 315 316 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 317 mlib_u32 *da; 318 mlib_u16 *sa; 319 mlib_u8 *tab0 = (mlib_u8 *) table[0]; 320 mlib_u8 *tab1 = (mlib_u8 *) table[1]; 321 mlib_u8 *tab; 322 mlib_u32 s0, s1, s2, t0, t1, t2, t3, t; 323 mlib_s32 off; 324 mlib_s32 size = xsize * 2; 325 mlib_u8 *dp = dst, *sp = (void *)src; 326 327 off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3); 328 329 for (i = 0; i < off - 1; i += 2, sp += 2) { 330 *dp++ = tab0[sp[0]]; 331 *dp++ = tab1[sp[1]]; 332 size -= 2; 333 } 334 335 if ((off & 1) != 0) { 336 *dp++ = tab0[sp[0]]; 337 size--; 338 sp++; 339 tab = tab0; 340 tab0 = tab1; 341 tab1 = tab; 342 } 343 344 da = (mlib_u32 *) dp; 345 346 if (((mlib_addr) sp & 1) == 0) { 347 sa = (mlib_u16 *) sp; 348 349 s0 = sa[0]; 350 s1 = sa[1]; 351 sa += 2; 352 353 #ifdef __SUNPRO_C 354 #pragma pipeloop(0) 355 #endif /* __SUNPRO_C */ 356 for (i = 0; i < size - 7; i += 4, da++, sa += 2) { 357 READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1); 358 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 359 s0 = sa[0]; 360 s1 = sa[1]; 361 da[0] = t; 362 } 363 364 READ_U8_U8_ALIGN(tab0, tab1, tab0, tab1); 365 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 366 da[0] = t; 367 da++; 368 dp = (mlib_u8 *) da; 369 sp = (mlib_u8 *) sa; 370 i += 4; 371 372 for (; i < size - 1; i += 2, sp += 2) { 373 *dp++ = tab0[sp[0]]; 374 *dp++ = tab1[sp[1]]; 375 } 376 377 if (i < size) 378 *dp = tab0[(*sp)]; 379 380 } 381 else { 382 sa = (mlib_u16 *) (sp - 1); 383 384 s0 = sa[0]; 385 s1 = sa[1]; 386 s2 = sa[2]; 387 sa += 3; 388 389 #ifdef __SUNPRO_C 390 #pragma pipeloop(0) 391 #endif /* __SUNPRO_C */ 392 for (i = 0; i < size - 8; i += 4, da++, sa += 2) { 393 READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1); 394 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 395 s0 = s2; 396 s1 = sa[0]; 397 s2 = sa[1]; 398 da[0] = t; 399 } 400 401 READ_U8_U8_NOTALIGN(tab0, tab1, tab0, tab1); 402 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 403 da[0] = t; 404 da++; 405 dp = (mlib_u8 *) da; 406 #ifdef _LITTLE_ENDIAN 407 *dp++ = tab0[s2 >> 8]; 408 #else 409 *dp++ = tab0[s2 & 0xFF]; 410 #endif /* _LITTLE_ENDIAN */ 411 sp = (mlib_u8 *) sa; 412 i += 5; 413 414 for (; i < size - 1; i += 2, sp += 2) { 415 *dp++ = tab1[sp[0]]; 416 *dp++ = tab0[sp[1]]; 417 } 418 419 if (i < size) 420 *dp = tab1[(*sp)]; 421 } 422 } 423 424 } 425 else if (csize == 3) { 426 mlib_s32 i, j; 427 428 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 429 mlib_u32 *da; 430 mlib_u16 *sa; 431 mlib_u8 *tab0 = (mlib_u8 *) table[0]; 432 mlib_u8 *tab1 = (mlib_u8 *) table[1]; 433 mlib_u8 *tab2 = (mlib_u8 *) table[2]; 434 mlib_u8 *tab; 435 mlib_u32 s0, s1, s2, t0, t1, t2, t3, t; 436 mlib_s32 off; 437 mlib_s32 size = xsize * 3; 438 mlib_u8 *dp = dst, *sp = (void *)src; 439 440 off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3); 441 442 if (off == 1) { 443 *dp++ = tab0[sp[0]]; 444 tab = tab0; 445 tab0 = tab1; 446 tab1 = tab2; 447 tab2 = tab; 448 size--; 449 sp++; 450 } 451 else if (off == 2) { 452 *dp++ = tab0[sp[0]]; 453 *dp++ = tab1[sp[1]]; 454 tab = tab2; 455 tab2 = tab1; 456 tab1 = tab0; 457 tab0 = tab; 458 size -= 2; 459 sp += 2; 460 } 461 else if (off == 3) { 462 *dp++ = tab0[sp[0]]; 463 *dp++ = tab1[sp[1]]; 464 *dp++ = tab2[sp[2]]; 465 size -= 3; 466 sp += 3; 467 } 468 469 da = (mlib_u32 *) dp; 470 471 if (((mlib_addr) sp & 1) == 0) { 472 sa = (mlib_u16 *) sp; 473 474 s0 = sa[0]; 475 s1 = sa[1]; 476 sa += 2; 477 478 #ifdef __SUNPRO_C 479 #pragma pipeloop(0) 480 #endif /* __SUNPRO_C */ 481 for (i = 0; i < size - 7; i += 4, da++, sa += 2) { 482 READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0); 483 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 484 tab = tab0; 485 tab0 = tab1; 486 tab1 = tab2; 487 tab2 = tab; 488 s0 = sa[0]; 489 s1 = sa[1]; 490 da[0] = t; 491 } 492 493 READ_U8_U8_ALIGN(tab0, tab1, tab2, tab0); 494 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 495 da[0] = t; 496 da++; 497 dp = (mlib_u8 *) da; 498 sp = (mlib_u8 *) sa; 499 i += 4; 500 501 if (i < size) { 502 *dp++ = tab1[(*sp)]; 503 i++; 504 sp++; 505 } 506 507 if (i < size) { 508 *dp++ = tab2[(*sp)]; 509 i++; 510 sp++; 511 } 512 513 if (i < size) { 514 *dp++ = tab0[(*sp)]; 515 } 516 517 } 518 else { 519 sa = (mlib_u16 *) (sp - 1); 520 521 s0 = sa[0]; 522 s1 = sa[1]; 523 s2 = sa[2]; 524 sa += 3; 525 526 #ifdef __SUNPRO_C 527 #pragma pipeloop(0) 528 #endif /* __SUNPRO_C */ 529 for (i = 0; i < size - 8; i += 4, da++, sa += 2) { 530 READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0); 531 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 532 tab = tab0; 533 tab0 = tab1; 534 tab1 = tab2; 535 tab2 = tab; 536 s0 = s2; 537 s1 = sa[0]; 538 s2 = sa[1]; 539 da[0] = t; 540 } 541 542 READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab0); 543 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 544 da[0] = t; 545 da++; 546 dp = (mlib_u8 *) da; 547 #ifdef _LITTLE_ENDIAN 548 *dp++ = tab1[s2 >> 8]; 549 #else 550 *dp++ = tab1[s2 & 0xFF]; 551 #endif /* _LITTLE_ENDIAN */ 552 sp = (mlib_u8 *) sa; 553 i += 5; 554 555 if (i < size) { 556 *dp++ = tab2[(*sp)]; 557 i++; 558 sp++; 559 } 560 561 if (i < size) { 562 *dp++ = tab0[(*sp)]; 563 i++; 564 sp++; 565 } 566 567 if (i < size) { 568 *dp = tab1[(*sp)]; 569 } 570 } 571 } 572 573 } 574 else if (csize == 4) { 575 mlib_s32 i, j; 576 577 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 578 mlib_u32 *da; 579 mlib_u16 *sa; 580 mlib_u8 *tab0 = (mlib_u8 *) table[0]; 581 mlib_u8 *tab1 = (mlib_u8 *) table[1]; 582 mlib_u8 *tab2 = (mlib_u8 *) table[2]; 583 mlib_u8 *tab3 = (mlib_u8 *) table[3]; 584 mlib_u8 *tab; 585 mlib_u32 s0, s1, s2, t0, t1, t2, t3, t; 586 mlib_s32 off; 587 mlib_s32 size = xsize * 4; 588 mlib_u8 *dp = dst, *sp = (void *)src; 589 590 off = (mlib_s32) ((4 - ((mlib_addr) dst & 3)) & 3); 591 592 if (off == 1) { 593 *dp++ = tab0[sp[0]]; 594 tab = tab0; 595 tab0 = tab1; 596 tab1 = tab2; 597 tab2 = tab3; 598 tab3 = tab; 599 size--; 600 sp++; 601 } 602 else if (off == 2) { 603 *dp++ = tab0[sp[0]]; 604 *dp++ = tab1[sp[1]]; 605 tab = tab0; 606 tab0 = tab2; 607 tab2 = tab; 608 tab = tab1; 609 tab1 = tab3; 610 tab3 = tab; 611 size -= 2; 612 sp += 2; 613 } 614 else if (off == 3) { 615 *dp++ = tab0[sp[0]]; 616 *dp++ = tab1[sp[1]]; 617 *dp++ = tab2[sp[2]]; 618 tab = tab3; 619 tab3 = tab2; 620 tab2 = tab1; 621 tab1 = tab0; 622 tab0 = tab; 623 size -= 3; 624 sp += 3; 625 } 626 627 da = (mlib_u32 *) dp; 628 629 if (((mlib_addr) sp & 1) == 0) { 630 sa = (mlib_u16 *) sp; 631 632 s0 = sa[0]; 633 s1 = sa[1]; 634 sa += 2; 635 636 #ifdef __SUNPRO_C 637 #pragma pipeloop(0) 638 #endif /* __SUNPRO_C */ 639 for (i = 0; i < size - 7; i += 4, da++, sa += 2) { 640 READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3); 641 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 642 s0 = sa[0]; 643 s1 = sa[1]; 644 da[0] = t; 645 } 646 647 READ_U8_U8_ALIGN(tab0, tab1, tab2, tab3); 648 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 649 da[0] = t; 650 da++; 651 dp = (mlib_u8 *) da; 652 sp = (mlib_u8 *) sa; 653 i += 4; 654 655 if (i < size) { 656 *dp++ = tab0[(*sp)]; 657 i++; 658 sp++; 659 } 660 661 if (i < size) { 662 *dp++ = tab1[(*sp)]; 663 i++; 664 sp++; 665 } 666 667 if (i < size) { 668 *dp = tab2[(*sp)]; 669 } 670 671 } 672 else { 673 sa = (mlib_u16 *) (sp - 1); 674 675 s0 = sa[0]; 676 s1 = sa[1]; 677 s2 = sa[2]; 678 sa += 3; 679 680 #ifdef __SUNPRO_C 681 #pragma pipeloop(0) 682 #endif /* __SUNPRO_C */ 683 for (i = 0; i < size - 8; i += 4, da++, sa += 2) { 684 READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3); 685 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 686 s0 = s2; 687 s1 = sa[0]; 688 s2 = sa[1]; 689 da[0] = t; 690 } 691 692 READ_U8_U8_NOTALIGN(tab0, tab1, tab2, tab3); 693 t = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 694 da[0] = t; 695 da++; 696 dp = (mlib_u8 *) da; 697 #ifdef _LITTLE_ENDIAN 698 *dp++ = tab0[s2 >> 8]; 699 #else 700 *dp++ = tab0[s2 & 0xFF]; 701 #endif /* _LITTLE_ENDIAN */ 702 sp = (mlib_u8 *) sa; 703 i += 5; 704 705 if (i < size) { 706 *dp++ = tab1[(*sp)]; 707 i++; 708 sp++; 709 } 710 711 if (i < size) { 712 *dp++ = tab2[(*sp)]; 713 i++; 714 sp++; 715 } 716 717 if (i < size) { 718 *dp = tab3[(*sp)]; 719 } 720 } 721 } 722 } 723 } 724 725 /***************************************************************/ 726 void mlib_c_ImageLookUp_S16_U8(const mlib_s16 *src, 727 mlib_s32 slb, 728 mlib_u8 *dst, 729 mlib_s32 dlb, 730 mlib_s32 xsize, 731 mlib_s32 ysize, 732 mlib_s32 csize, 733 const mlib_u8 **table) 734 { 735 const mlib_u8 *table_base[4]; 736 mlib_s32 c; 737 738 for (c = 0; c < csize; c++) { 739 table_base[c] = &table[c][32768]; 740 } 741 742 #ifdef __GNUC__ 743 #pragma GCC diagnostic push 744 #pragma GCC diagnostic ignored "-Warray-bounds" 745 #endif 746 MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s16, table_base); 747 #ifdef __GNUC__ 748 #pragma GCC diagnostic pop 749 #endif 750 } 751 752 /***************************************************************/ 753 void mlib_c_ImageLookUp_U16_U8(const mlib_u16 *src, 754 mlib_s32 slb, 755 mlib_u8 *dst, 756 mlib_s32 dlb, 757 mlib_s32 xsize, 758 mlib_s32 ysize, 759 mlib_s32 csize, 760 const mlib_u8 **table) 761 { 762 const mlib_u8 *table_base[4]; 763 mlib_s32 c; 764 765 for (c = 0; c < csize; c++) { 766 table_base[c] = &table[c][0]; 767 } 768 769 #ifdef __GNUC__ 770 #pragma GCC diagnostic push 771 #pragma GCC diagnostic ignored "-Warray-bounds" 772 #endif 773 MLIB_C_IMAGELOOKUP(mlib_u8, mlib_u16, table_base); 774 #ifdef __GNUC__ 775 #pragma GCC diagnostic pop 776 #endif 777 } 778 779 /***************************************************************/ 780 void mlib_c_ImageLookUp_S32_U8(const mlib_s32 *src, 781 mlib_s32 slb, 782 mlib_u8 *dst, 783 mlib_s32 dlb, 784 mlib_s32 xsize, 785 mlib_s32 ysize, 786 mlib_s32 csize, 787 const mlib_u8 **table) 788 { 789 const mlib_u8 *table_base[4]; 790 mlib_s32 c; 791 792 for (c = 0; c < csize; c++) { 793 table_base[c] = &table[c][TABLE_SHIFT_S32]; 794 } 795 796 #ifdef __GNUC__ 797 #pragma GCC diagnostic push 798 #pragma GCC diagnostic ignored "-Warray-bounds" 799 #endif 800 MLIB_C_IMAGELOOKUP(mlib_u8, mlib_s32, table_base); 801 #ifdef __GNUC__ 802 #pragma GCC diagnostic pop 803 #endif 804 } 805 806 /***************************************************************/ 807 void mlib_c_ImageLookUp_U8_S16(const mlib_u8 *src, 808 mlib_s32 slb, 809 mlib_s16 *dst, 810 mlib_s32 dlb, 811 mlib_s32 xsize, 812 mlib_s32 ysize, 813 mlib_s32 csize, 814 const mlib_s16 **table) 815 { 816 817 if (xsize * csize < 12) { 818 MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u8, table); 819 } 820 else if (csize == 1) { 821 mlib_s32 i, j; 822 823 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 824 mlib_u32 *sa; 825 mlib_u32 *da; 826 mlib_u16 *tab = (mlib_u16 *) table[0]; 827 mlib_u32 s0, s1, t0, t1, t2, t3; 828 mlib_u32 res1, res2; 829 mlib_s32 off; 830 mlib_s32 size = xsize; 831 mlib_u16 *dp = (mlib_u16 *) dst; 832 mlib_u8 *sp = (void *)src; 833 834 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 835 836 for (i = 0; i < off; i++, sp++) { 837 *dp++ = tab[sp[0]]; 838 size--; 839 } 840 841 sa = (mlib_u32 *) sp; 842 843 if (((mlib_addr) dp & 3) == 0) { 844 da = (mlib_u32 *) dp; 845 846 s0 = sa[0]; 847 sa++; 848 849 #ifdef __SUNPRO_C 850 #pragma pipeloop(0) 851 #endif /* __SUNPRO_C */ 852 for (i = 0; i < size - 7; i += 4, da += 2, sa++) { 853 READ_U8_S16_ALIGN(tab, tab, tab, tab); 854 res1 = (t0 << 16) + t1; 855 res2 = (t2 << 16) + t3; 856 s0 = sa[0]; 857 da[0] = res1; 858 da[1] = res2; 859 } 860 861 READ_U8_S16_ALIGN(tab, tab, tab, tab); 862 res1 = (t0 << 16) + t1; 863 res2 = (t2 << 16) + t3; 864 da[0] = res1; 865 da[1] = res2; 866 da += 2; 867 dp = (mlib_u16 *) da; 868 sp = (mlib_u8 *) sa; 869 i += 4; 870 for (; i < size; i++, dp++, sp++) 871 dp[0] = tab[sp[0]]; 872 873 } 874 else { 875 876 *dp++ = tab[(*sp)]; 877 size--; 878 da = (mlib_u32 *) dp; 879 880 s0 = sa[0]; 881 s1 = sa[1]; 882 sa += 2; 883 884 #ifdef __SUNPRO_C 885 #pragma pipeloop(0) 886 #endif /* __SUNPRO_C */ 887 for (i = 0; i < size - 10; i += 4, da += 2, sa++) { 888 READ_U8_S16_NOTALIGN(tab, tab, tab, tab); 889 s0 = s1; 890 res1 = (t0 << 16) + t1; 891 res2 = (t2 << 16) + t3; 892 s1 = sa[0]; 893 da[0] = res1; 894 da[1] = res2; 895 } 896 897 READ_U8_S16_NOTALIGN(tab, tab, tab, tab); 898 res1 = (t0 << 16) + t1; 899 res2 = (t2 << 16) + t3; 900 da[0] = res1; 901 da[1] = res2; 902 ADD_READ_U8_S16_NOTALIGN(tab, tab, tab); 903 res1 = (t0 << 16) + t1; 904 da[2] = res1; 905 da += 3; 906 dp = (mlib_u16 *) da; 907 *dp++ = (mlib_u16) t2; 908 sp = (mlib_u8 *) sa; 909 i += 7; 910 for (; i < size; i++, dp++, sp++) 911 dp[0] = tab[sp[0]]; 912 } 913 } 914 915 } 916 else if (csize == 2) { 917 mlib_s32 i, j; 918 919 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 920 mlib_u32 *sa; 921 mlib_u32 *da; 922 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 923 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 924 mlib_u16 *tab; 925 mlib_u32 s0, s1, t0, t1, t2, t3; 926 mlib_u32 res1, res2; 927 mlib_s32 off; 928 mlib_s32 size = xsize * 2; 929 mlib_u16 *dp = (mlib_u16 *) dst; 930 mlib_u8 *sp = (void *)src; 931 932 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 933 934 for (i = 0; i < off - 1; i += 2, sp += 2) { 935 *dp++ = tab0[sp[0]]; 936 *dp++ = tab1[sp[1]]; 937 size -= 2; 938 } 939 940 if ((off & 1) != 0) { 941 *dp++ = tab0[*sp]; 942 size--; 943 sp++; 944 tab = tab0; 945 tab0 = tab1; 946 tab1 = tab; 947 } 948 949 sa = (mlib_u32 *) sp; 950 951 if (((mlib_addr) dp & 3) == 0) { 952 da = (mlib_u32 *) dp; 953 954 s0 = sa[0]; 955 sa++; 956 957 #ifdef __SUNPRO_C 958 #pragma pipeloop(0) 959 #endif /* __SUNPRO_C */ 960 for (i = 0; i < size - 7; i += 4, da += 2, sa++) { 961 READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1); 962 res1 = (t0 << 16) + t1; 963 res2 = (t2 << 16) + t3; 964 s0 = sa[0]; 965 da[0] = res1; 966 da[1] = res2; 967 } 968 969 READ_U8_S16_ALIGN(tab0, tab1, tab0, tab1); 970 res1 = (t0 << 16) + t1; 971 res2 = (t2 << 16) + t3; 972 da[0] = res1; 973 da[1] = res2; 974 da += 2; 975 dp = (mlib_u16 *) da; 976 sp = (mlib_u8 *) sa; 977 i += 4; 978 979 for (; i < size - 1; i += 2, sp += 2) { 980 *dp++ = tab0[sp[0]]; 981 *dp++ = tab1[sp[1]]; 982 } 983 984 if (i < size) 985 *dp = tab0[(*sp)]; 986 987 } 988 else { 989 990 *dp++ = tab0[(*sp)]; 991 size--; 992 da = (mlib_u32 *) dp; 993 994 s0 = sa[0]; 995 s1 = sa[1]; 996 sa += 2; 997 998 #ifdef __SUNPRO_C 999 #pragma pipeloop(0) 1000 #endif /* __SUNPRO_C */ 1001 for (i = 0; i < size - 10; i += 4, da += 2, sa++) { 1002 READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0); 1003 s0 = s1; 1004 res1 = (t0 << 16) + t1; 1005 res2 = (t2 << 16) + t3; 1006 s1 = sa[0]; 1007 da[0] = res1; 1008 da[1] = res2; 1009 } 1010 1011 READ_U8_S16_NOTALIGN(tab1, tab0, tab1, tab0); 1012 res1 = (t0 << 16) + t1; 1013 res2 = (t2 << 16) + t3; 1014 da[0] = res1; 1015 da[1] = res2; 1016 ADD_READ_U8_S16_NOTALIGN(tab1, tab0, tab1); 1017 res1 = (t0 << 16) + t1; 1018 da[2] = res1; 1019 da += 3; 1020 dp = (mlib_u16 *) da; 1021 *dp++ = (mlib_u16) t2; 1022 sp = (mlib_u8 *) sa; 1023 i += 7; 1024 1025 for (; i < size - 1; i += 2, sp += 2) { 1026 *dp++ = tab0[sp[0]]; 1027 *dp++ = tab1[sp[1]]; 1028 } 1029 1030 if (i < size) 1031 *dp = tab0[(*sp)]; 1032 } 1033 } 1034 1035 } 1036 else if (csize == 3) { 1037 mlib_s32 i, j; 1038 1039 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1040 mlib_u32 *sa; 1041 mlib_u32 *da; 1042 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 1043 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 1044 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 1045 mlib_u16 *tab; 1046 mlib_u32 s0, s1, t0, t1, t2, t3; 1047 mlib_u32 res1, res2; 1048 mlib_s32 off; 1049 mlib_s32 size = xsize * 3; 1050 mlib_u16 *dp = (mlib_u16 *) dst; 1051 mlib_u8 *sp = (void *)src; 1052 1053 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1054 1055 if (off == 1) { 1056 *dp++ = tab0[(*sp)]; 1057 tab = tab0; 1058 tab0 = tab1; 1059 tab1 = tab2; 1060 tab2 = tab; 1061 size--; 1062 sp++; 1063 } 1064 else if (off == 2) { 1065 *dp++ = tab0[sp[0]]; 1066 *dp++ = tab1[sp[1]]; 1067 tab = tab2; 1068 tab2 = tab1; 1069 tab1 = tab0; 1070 tab0 = tab; 1071 size -= 2; 1072 sp += 2; 1073 } 1074 else if (off == 3) { 1075 *dp++ = tab0[sp[0]]; 1076 *dp++ = tab1[sp[1]]; 1077 *dp++ = tab2[sp[2]]; 1078 size -= 3; 1079 sp += 3; 1080 } 1081 1082 sa = (mlib_u32 *) sp; 1083 1084 if (((mlib_addr) dp & 3) == 0) { 1085 da = (mlib_u32 *) dp; 1086 1087 s0 = sa[0]; 1088 sa++; 1089 1090 #ifdef __SUNPRO_C 1091 #pragma pipeloop(0) 1092 #endif /* __SUNPRO_C */ 1093 for (i = 0; i < size - 7; i += 4, da += 2, sa++) { 1094 READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0); 1095 res1 = (t0 << 16) + t1; 1096 res2 = (t2 << 16) + t3; 1097 tab = tab0; 1098 tab0 = tab1; 1099 tab1 = tab2; 1100 tab2 = tab; 1101 s0 = sa[0]; 1102 da[0] = res1; 1103 da[1] = res2; 1104 } 1105 1106 READ_U8_S16_ALIGN(tab0, tab1, tab2, tab0); 1107 res1 = (t0 << 16) + t1; 1108 res2 = (t2 << 16) + t3; 1109 da[0] = res1; 1110 da[1] = res2; 1111 da += 2; 1112 dp = (mlib_u16 *) da; 1113 sp = (mlib_u8 *) sa; 1114 i += 4; 1115 1116 if (i < size) { 1117 *dp++ = tab1[(*sp)]; 1118 i++; 1119 sp++; 1120 } 1121 1122 if (i < size) { 1123 *dp++ = tab2[(*sp)]; 1124 i++; 1125 sp++; 1126 } 1127 1128 if (i < size) { 1129 *dp = tab0[(*sp)]; 1130 } 1131 1132 } 1133 else { 1134 1135 *dp++ = tab0[(*sp)]; 1136 size--; 1137 da = (mlib_u32 *) dp; 1138 1139 s0 = sa[0]; 1140 s1 = sa[1]; 1141 sa += 2; 1142 1143 #ifdef __SUNPRO_C 1144 #pragma pipeloop(0) 1145 #endif /* __SUNPRO_C */ 1146 for (i = 0; i < size - 10; i += 4, da += 2, sa++) { 1147 READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1); 1148 s0 = s1; 1149 res1 = (t0 << 16) + t1; 1150 res2 = (t2 << 16) + t3; 1151 tab = tab0; 1152 tab0 = tab1; 1153 tab1 = tab2; 1154 tab2 = tab; 1155 s1 = sa[0]; 1156 da[0] = res1; 1157 da[1] = res2; 1158 } 1159 1160 READ_U8_S16_NOTALIGN(tab1, tab2, tab0, tab1); 1161 res1 = (t0 << 16) + t1; 1162 res2 = (t2 << 16) + t3; 1163 da[0] = res1; 1164 da[1] = res2; 1165 ADD_READ_U8_S16_NOTALIGN(tab2, tab0, tab1); 1166 res1 = (t0 << 16) + t1; 1167 da[2] = res1; 1168 da += 3; 1169 dp = (mlib_u16 *) da; 1170 *dp++ = (mlib_u16) t2; 1171 sp = (mlib_u8 *) sa; 1172 i += 7; 1173 1174 if (i < size) { 1175 *dp++ = tab2[(*sp)]; 1176 i++; 1177 sp++; 1178 } 1179 1180 if (i < size) { 1181 *dp++ = tab0[(*sp)]; 1182 i++; 1183 sp++; 1184 } 1185 1186 if (i < size) { 1187 *dp = tab1[(*sp)]; 1188 } 1189 } 1190 } 1191 1192 } 1193 else if (csize == 4) { 1194 mlib_s32 i, j; 1195 1196 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1197 mlib_u32 *sa; 1198 mlib_u32 *da; 1199 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 1200 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 1201 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 1202 mlib_u16 *tab3 = (mlib_u16 *) table[3]; 1203 mlib_u16 *tab; 1204 mlib_u32 s0, s1, t0, t1, t2, t3; 1205 mlib_u32 res1, res2; 1206 mlib_s32 off; 1207 mlib_s32 size = xsize * 4; 1208 mlib_u16 *dp = (mlib_u16 *) dst; 1209 mlib_u8 *sp = (void *)src; 1210 1211 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1212 1213 if (off == 1) { 1214 *dp++ = tab0[(*sp)]; 1215 tab = tab0; 1216 tab0 = tab1; 1217 tab1 = tab2; 1218 tab2 = tab3; 1219 tab3 = tab; 1220 size--; 1221 sp++; 1222 } 1223 else if (off == 2) { 1224 *dp++ = tab0[sp[0]]; 1225 *dp++ = tab1[sp[1]]; 1226 tab = tab0; 1227 tab0 = tab2; 1228 tab2 = tab; 1229 tab = tab1; 1230 tab1 = tab3; 1231 tab3 = tab; 1232 size -= 2; 1233 sp += 2; 1234 } 1235 else if (off == 3) { 1236 *dp++ = tab0[sp[0]]; 1237 *dp++ = tab1[sp[1]]; 1238 *dp++ = tab2[sp[2]]; 1239 tab = tab3; 1240 tab3 = tab2; 1241 tab2 = tab1; 1242 tab1 = tab0; 1243 tab0 = tab; 1244 size -= 3; 1245 sp += 3; 1246 } 1247 1248 sa = (mlib_u32 *) sp; 1249 1250 if (((mlib_addr) dp & 3) == 0) { 1251 da = (mlib_u32 *) dp; 1252 1253 s0 = sa[0]; 1254 sa++; 1255 1256 #ifdef __SUNPRO_C 1257 #pragma pipeloop(0) 1258 #endif /* __SUNPRO_C */ 1259 for (i = 0; i < size - 7; i += 4, da += 2, sa++) { 1260 READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3); 1261 res1 = (t0 << 16) + t1; 1262 res2 = (t2 << 16) + t3; 1263 s0 = sa[0]; 1264 da[0] = res1; 1265 da[1] = res2; 1266 } 1267 1268 READ_U8_S16_ALIGN(tab0, tab1, tab2, tab3); 1269 res1 = (t0 << 16) + t1; 1270 res2 = (t2 << 16) + t3; 1271 da[0] = res1; 1272 da[1] = res2; 1273 da += 2; 1274 dp = (mlib_u16 *) da; 1275 sp = (mlib_u8 *) sa; 1276 i += 4; 1277 1278 if (i < size) { 1279 *dp++ = tab0[(*sp)]; 1280 i++; 1281 sp++; 1282 } 1283 1284 if (i < size) { 1285 *dp++ = tab1[(*sp)]; 1286 i++; 1287 sp++; 1288 } 1289 1290 if (i < size) { 1291 *dp = tab2[(*sp)]; 1292 } 1293 1294 } 1295 else { 1296 1297 *dp++ = tab0[(*sp)]; 1298 size--; 1299 da = (mlib_u32 *) dp; 1300 1301 s0 = sa[0]; 1302 s1 = sa[1]; 1303 sa += 2; 1304 1305 #ifdef __SUNPRO_C 1306 #pragma pipeloop(0) 1307 #endif /* __SUNPRO_C */ 1308 for (i = 0; i < size - 10; i += 4, da += 2, sa++) { 1309 READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0); 1310 s0 = s1; 1311 res1 = (t0 << 16) + t1; 1312 res2 = (t2 << 16) + t3; 1313 s1 = sa[0]; 1314 da[0] = res1; 1315 da[1] = res2; 1316 } 1317 1318 READ_U8_S16_NOTALIGN(tab1, tab2, tab3, tab0); 1319 res1 = (t0 << 16) + t1; 1320 res2 = (t2 << 16) + t3; 1321 da[0] = res1; 1322 da[1] = res2; 1323 ADD_READ_U8_S16_NOTALIGN(tab1, tab2, tab3); 1324 res1 = (t0 << 16) + t1; 1325 da[2] = res1; 1326 da += 3; 1327 dp = (mlib_u16 *) da; 1328 *dp++ = (mlib_u16) t2; 1329 sp = (mlib_u8 *) sa; 1330 i += 7; 1331 1332 if (i < size) { 1333 *dp++ = tab0[(*sp)]; 1334 i++; 1335 sp++; 1336 } 1337 1338 if (i < size) { 1339 *dp++ = tab1[(*sp)]; 1340 i++; 1341 sp++; 1342 } 1343 1344 if (i < size) { 1345 *dp = tab2[(*sp)]; 1346 } 1347 } 1348 } 1349 } 1350 } 1351 1352 /***************************************************************/ 1353 void mlib_c_ImageLookUp_S16_S16(const mlib_s16 *src, 1354 mlib_s32 slb, 1355 mlib_s16 *dst, 1356 mlib_s32 dlb, 1357 mlib_s32 xsize, 1358 mlib_s32 ysize, 1359 mlib_s32 csize, 1360 const mlib_s16 **table) 1361 { 1362 const mlib_s16 *table_base[4]; 1363 mlib_s32 c; 1364 1365 for (c = 0; c < csize; c++) { 1366 table_base[c] = &table[c][32768]; 1367 } 1368 1369 #ifdef __GNUC__ 1370 #pragma GCC diagnostic push 1371 #pragma GCC diagnostic ignored "-Warray-bounds" 1372 #endif 1373 MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s16, table_base); 1374 #ifdef __GNUC__ 1375 #pragma GCC diagnostic pop 1376 #endif 1377 } 1378 1379 /***************************************************************/ 1380 void mlib_c_ImageLookUp_U16_S16(const mlib_u16 *src, 1381 mlib_s32 slb, 1382 mlib_s16 *dst, 1383 mlib_s32 dlb, 1384 mlib_s32 xsize, 1385 mlib_s32 ysize, 1386 mlib_s32 csize, 1387 const mlib_s16 **table) 1388 { 1389 const mlib_s16 *table_base[4]; 1390 mlib_s32 c; 1391 1392 for (c = 0; c < csize; c++) { 1393 table_base[c] = &table[c][0]; 1394 } 1395 1396 #ifdef __GNUC__ 1397 #pragma GCC diagnostic push 1398 #pragma GCC diagnostic ignored "-Warray-bounds" 1399 #endif 1400 MLIB_C_IMAGELOOKUP(mlib_s16, mlib_u16, table_base); 1401 #ifdef __GNUC__ 1402 #pragma GCC diagnostic pop 1403 #endif 1404 } 1405 1406 /***************************************************************/ 1407 void mlib_c_ImageLookUp_S32_S16(const mlib_s32 *src, 1408 mlib_s32 slb, 1409 mlib_s16 *dst, 1410 mlib_s32 dlb, 1411 mlib_s32 xsize, 1412 mlib_s32 ysize, 1413 mlib_s32 csize, 1414 const mlib_s16 **table) 1415 { 1416 const mlib_s16 *table_base[4]; 1417 mlib_s32 c; 1418 1419 for (c = 0; c < csize; c++) { 1420 table_base[c] = &table[c][TABLE_SHIFT_S32]; 1421 } 1422 1423 #ifdef __GNUC__ 1424 #pragma GCC diagnostic push 1425 #pragma GCC diagnostic ignored "-Warray-bounds" 1426 #endif 1427 MLIB_C_IMAGELOOKUP(mlib_s16, mlib_s32, table_base); 1428 #ifdef __GNUC__ 1429 #pragma GCC diagnostic pop 1430 #endif 1431 } 1432 1433 /***************************************************************/ 1434 void mlib_c_ImageLookUp_S16_U16(const mlib_s16 *src, 1435 mlib_s32 slb, 1436 mlib_u16 *dst, 1437 mlib_s32 dlb, 1438 mlib_s32 xsize, 1439 mlib_s32 ysize, 1440 mlib_s32 csize, 1441 const mlib_s16 **table) 1442 { 1443 const mlib_s16 *table_base[4]; 1444 mlib_s32 c; 1445 1446 for (c = 0; c < csize; c++) { 1447 table_base[c] = &table[c][32768]; 1448 } 1449 1450 #ifdef __GNUC__ 1451 #pragma GCC diagnostic push 1452 #pragma GCC diagnostic ignored "-Warray-bounds" 1453 #endif 1454 MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s16, table_base); 1455 #ifdef __GNUC__ 1456 #pragma GCC diagnostic pop 1457 #endif 1458 } 1459 1460 /***************************************************************/ 1461 void mlib_c_ImageLookUp_U16_U16(const mlib_u16 *src, 1462 mlib_s32 slb, 1463 mlib_u16 *dst, 1464 mlib_s32 dlb, 1465 mlib_s32 xsize, 1466 mlib_s32 ysize, 1467 mlib_s32 csize, 1468 const mlib_s16 **table) 1469 { 1470 const mlib_s16 *table_base[4]; 1471 mlib_s32 c; 1472 1473 for (c = 0; c < csize; c++) { 1474 table_base[c] = &table[c][0]; 1475 } 1476 1477 #ifdef __GNUC__ 1478 #pragma GCC diagnostic push 1479 #pragma GCC diagnostic ignored "-Warray-bounds" 1480 #endif 1481 MLIB_C_IMAGELOOKUP(mlib_u16, mlib_u16, table_base); 1482 #ifdef __GNUC__ 1483 #pragma GCC diagnostic pop 1484 #endif 1485 } 1486 1487 /***************************************************************/ 1488 void mlib_c_ImageLookUp_S32_U16(const mlib_s32 *src, 1489 mlib_s32 slb, 1490 mlib_u16 *dst, 1491 mlib_s32 dlb, 1492 mlib_s32 xsize, 1493 mlib_s32 ysize, 1494 mlib_s32 csize, 1495 const mlib_s16 **table) 1496 { 1497 const mlib_s16 *table_base[4]; 1498 mlib_s32 c; 1499 1500 for (c = 0; c < csize; c++) { 1501 table_base[c] = &table[c][TABLE_SHIFT_S32]; 1502 } 1503 1504 #ifdef __GNUC__ 1505 #pragma GCC diagnostic push 1506 #pragma GCC diagnostic ignored "-Warray-bounds" 1507 #endif 1508 MLIB_C_IMAGELOOKUP(mlib_u16, mlib_s32, table_base); 1509 #ifdef __GNUC__ 1510 #pragma GCC diagnostic pop 1511 #endif 1512 } 1513 1514 /***************************************************************/ 1515 void mlib_c_ImageLookUp_U8_S32(const mlib_u8 *src, 1516 mlib_s32 slb, 1517 mlib_s32 *dst, 1518 mlib_s32 dlb, 1519 mlib_s32 xsize, 1520 mlib_s32 ysize, 1521 mlib_s32 csize, 1522 const mlib_s32 **table) 1523 { 1524 1525 if (xsize * csize < 7) { 1526 MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u8, table); 1527 } 1528 else if (csize == 1) { 1529 mlib_s32 i, j; 1530 1531 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1532 mlib_u32 *sa; 1533 mlib_u32 *tab = (mlib_u32 *) table[0]; 1534 mlib_u32 s0, t0, t1, t2, t3; 1535 mlib_s32 off; 1536 mlib_s32 size = xsize; 1537 mlib_u32 *dp = (mlib_u32 *) dst; 1538 mlib_u8 *sp = (void *)src; 1539 1540 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1541 1542 for (i = 0; i < off; i++, sp++) { 1543 *dp++ = tab[sp[0]]; 1544 size--; 1545 } 1546 1547 sa = (mlib_u32 *) sp; 1548 1549 s0 = sa[0]; 1550 sa++; 1551 1552 #ifdef __SUNPRO_C 1553 #pragma pipeloop(0) 1554 #endif /* __SUNPRO_C */ 1555 for (i = 0; i < size - 7; i += 4, dp += 4, sa++) { 1556 READ_U8_S32(tab, tab, tab, tab); 1557 s0 = sa[0]; 1558 dp[0] = t0; 1559 dp[1] = t1; 1560 dp[2] = t2; 1561 dp[3] = t3; 1562 } 1563 1564 READ_U8_S32(tab, tab, tab, tab); 1565 dp[0] = t0; 1566 dp[1] = t1; 1567 dp[2] = t2; 1568 dp[3] = t3; 1569 dp += 4; 1570 sp = (mlib_u8 *) sa; 1571 i += 4; 1572 for (; i < size; i++, dp++, sp++) 1573 dp[0] = tab[sp[0]]; 1574 } 1575 1576 } 1577 else if (csize == 2) { 1578 mlib_s32 i, j; 1579 1580 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1581 mlib_u32 *sa; 1582 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 1583 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 1584 mlib_u32 *tab; 1585 mlib_u32 s0, t0, t1, t2, t3; 1586 mlib_s32 off; 1587 mlib_s32 size = xsize * 2; 1588 mlib_u32 *dp = (mlib_u32 *) dst; 1589 mlib_u8 *sp = (void *)src; 1590 1591 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1592 1593 for (i = 0; i < off - 1; i += 2, sp += 2) { 1594 *dp++ = tab0[sp[0]]; 1595 *dp++ = tab1[sp[1]]; 1596 size -= 2; 1597 } 1598 1599 if ((off & 1) != 0) { 1600 *dp++ = tab0[*sp]; 1601 size--; 1602 sp++; 1603 tab = tab0; 1604 tab0 = tab1; 1605 tab1 = tab; 1606 } 1607 1608 sa = (mlib_u32 *) sp; 1609 1610 s0 = sa[0]; 1611 sa++; 1612 1613 #ifdef __SUNPRO_C 1614 #pragma pipeloop(0) 1615 #endif /* __SUNPRO_C */ 1616 for (i = 0; i < size - 7; i += 4, dp += 4, sa++) { 1617 READ_U8_S32(tab0, tab1, tab0, tab1); 1618 s0 = sa[0]; 1619 dp[0] = t0; 1620 dp[1] = t1; 1621 dp[2] = t2; 1622 dp[3] = t3; 1623 } 1624 1625 READ_U8_S32(tab0, tab1, tab0, tab1); 1626 dp[0] = t0; 1627 dp[1] = t1; 1628 dp[2] = t2; 1629 dp[3] = t3; 1630 dp += 4; 1631 sp = (mlib_u8 *) sa; 1632 i += 4; 1633 1634 for (; i < size - 1; i += 2, sp += 2) { 1635 *dp++ = tab0[sp[0]]; 1636 *dp++ = tab1[sp[1]]; 1637 } 1638 1639 if (i < size) 1640 *dp = tab0[(*sp)]; 1641 } 1642 1643 } 1644 else if (csize == 3) { 1645 mlib_s32 i, j; 1646 1647 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1648 mlib_u32 *sa; 1649 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 1650 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 1651 mlib_u32 *tab2 = (mlib_u32 *) table[2]; 1652 mlib_u32 *tab; 1653 mlib_u32 s0, t0, t1, t2, t3; 1654 mlib_s32 off; 1655 mlib_s32 size = xsize * 3; 1656 mlib_u32 *dp = (mlib_u32 *) dst; 1657 mlib_u8 *sp = (void *)src; 1658 1659 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1660 1661 if (off == 1) { 1662 *dp++ = tab0[(*sp)]; 1663 tab = tab0; 1664 tab0 = tab1; 1665 tab1 = tab2; 1666 tab2 = tab; 1667 size--; 1668 sp++; 1669 } 1670 else if (off == 2) { 1671 *dp++ = tab0[sp[0]]; 1672 *dp++ = tab1[sp[1]]; 1673 tab = tab2; 1674 tab2 = tab1; 1675 tab1 = tab0; 1676 tab0 = tab; 1677 size -= 2; 1678 sp += 2; 1679 } 1680 else if (off == 3) { 1681 *dp++ = tab0[sp[0]]; 1682 *dp++ = tab1[sp[1]]; 1683 *dp++ = tab2[sp[2]]; 1684 size -= 3; 1685 sp += 3; 1686 } 1687 1688 sa = (mlib_u32 *) sp; 1689 1690 s0 = sa[0]; 1691 sa++; 1692 1693 #ifdef __SUNPRO_C 1694 #pragma pipeloop(0) 1695 #endif /* __SUNPRO_C */ 1696 for (i = 0; i < size - 7; i += 4, dp += 4, sa++) { 1697 READ_U8_S32(tab0, tab1, tab2, tab0); 1698 tab = tab0; 1699 tab0 = tab1; 1700 tab1 = tab2; 1701 tab2 = tab; 1702 s0 = sa[0]; 1703 dp[0] = t0; 1704 dp[1] = t1; 1705 dp[2] = t2; 1706 dp[3] = t3; 1707 } 1708 1709 READ_U8_S32(tab0, tab1, tab2, tab0); 1710 dp[0] = t0; 1711 dp[1] = t1; 1712 dp[2] = t2; 1713 dp[3] = t3; 1714 dp += 4; 1715 sp = (mlib_u8 *) sa; 1716 i += 4; 1717 1718 if (i < size) { 1719 *dp++ = tab1[(*sp)]; 1720 i++; 1721 sp++; 1722 } 1723 1724 if (i < size) { 1725 *dp++ = tab2[(*sp)]; 1726 i++; 1727 sp++; 1728 } 1729 1730 if (i < size) { 1731 *dp = tab0[(*sp)]; 1732 } 1733 } 1734 1735 } 1736 else if (csize == 4) { 1737 mlib_s32 i, j; 1738 1739 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1740 mlib_u32 *sa; 1741 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 1742 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 1743 mlib_u32 *tab2 = (mlib_u32 *) table[2]; 1744 mlib_u32 *tab3 = (mlib_u32 *) table[3]; 1745 mlib_u32 *tab; 1746 mlib_u32 s0, t0, t1, t2, t3; 1747 mlib_s32 off; 1748 mlib_s32 size = xsize * 4; 1749 mlib_u32 *dp = (mlib_u32 *) dst; 1750 mlib_u8 *sp = (void *)src; 1751 1752 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 1753 1754 if (off == 1) { 1755 *dp++ = tab0[(*sp)]; 1756 tab = tab0; 1757 tab0 = tab1; 1758 tab1 = tab2; 1759 tab2 = tab3; 1760 tab3 = tab; 1761 size--; 1762 sp++; 1763 } 1764 else if (off == 2) { 1765 *dp++ = tab0[sp[0]]; 1766 *dp++ = tab1[sp[1]]; 1767 tab = tab0; 1768 tab0 = tab2; 1769 tab2 = tab; 1770 tab = tab1; 1771 tab1 = tab3; 1772 tab3 = tab; 1773 size -= 2; 1774 sp += 2; 1775 } 1776 else if (off == 3) { 1777 *dp++ = tab0[sp[0]]; 1778 *dp++ = tab1[sp[1]]; 1779 *dp++ = tab2[sp[2]]; 1780 tab = tab3; 1781 tab3 = tab2; 1782 tab2 = tab1; 1783 tab1 = tab0; 1784 tab0 = tab; 1785 size -= 3; 1786 sp += 3; 1787 } 1788 1789 sa = (mlib_u32 *) sp; 1790 1791 s0 = sa[0]; 1792 sa++; 1793 1794 #ifdef __SUNPRO_C 1795 #pragma pipeloop(0) 1796 #endif /* __SUNPRO_C */ 1797 for (i = 0; i < size - 7; i += 4, dp += 4, sa++) { 1798 READ_U8_S32(tab0, tab1, tab2, tab3); 1799 s0 = sa[0]; 1800 dp[0] = t0; 1801 dp[1] = t1; 1802 dp[2] = t2; 1803 dp[3] = t3; 1804 } 1805 1806 READ_U8_S32(tab0, tab1, tab2, tab3); 1807 dp[0] = t0; 1808 dp[1] = t1; 1809 dp[2] = t2; 1810 dp[3] = t3; 1811 dp += 4; 1812 sp = (mlib_u8 *) sa; 1813 i += 4; 1814 1815 if (i < size) { 1816 *dp++ = tab0[(*sp)]; 1817 i++; 1818 sp++; 1819 } 1820 1821 if (i < size) { 1822 *dp++ = tab1[(*sp)]; 1823 i++; 1824 sp++; 1825 } 1826 1827 if (i < size) { 1828 *dp = tab2[(*sp)]; 1829 } 1830 } 1831 } 1832 } 1833 1834 /***************************************************************/ 1835 void mlib_c_ImageLookUp_S16_S32(const mlib_s16 *src, 1836 mlib_s32 slb, 1837 mlib_s32 *dst, 1838 mlib_s32 dlb, 1839 mlib_s32 xsize, 1840 mlib_s32 ysize, 1841 mlib_s32 csize, 1842 const mlib_s32 **table) 1843 { 1844 const mlib_s32 *table_base[4]; 1845 mlib_s32 c; 1846 1847 for (c = 0; c < csize; c++) { 1848 table_base[c] = &table[c][32768]; 1849 } 1850 1851 #ifdef __GNUC__ 1852 #pragma GCC diagnostic push 1853 #pragma GCC diagnostic ignored "-Warray-bounds" 1854 #endif 1855 MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s16, table_base); 1856 #ifdef __GNUC__ 1857 #pragma GCC diagnostic pop 1858 #endif 1859 } 1860 1861 /***************************************************************/ 1862 void mlib_c_ImageLookUp_U16_S32(const mlib_u16 *src, 1863 mlib_s32 slb, 1864 mlib_s32 *dst, 1865 mlib_s32 dlb, 1866 mlib_s32 xsize, 1867 mlib_s32 ysize, 1868 mlib_s32 csize, 1869 const mlib_s32 **table) 1870 { 1871 const mlib_s32 *table_base[4]; 1872 mlib_s32 c; 1873 1874 for (c = 0; c < csize; c++) { 1875 table_base[c] = &table[c][0]; 1876 } 1877 1878 #ifdef __GNUC__ 1879 #pragma GCC diagnostic push 1880 #pragma GCC diagnostic ignored "-Warray-bounds" 1881 #endif 1882 MLIB_C_IMAGELOOKUP(mlib_s32, mlib_u16, table_base); 1883 #ifdef __GNUC__ 1884 #pragma GCC diagnostic pop 1885 #endif 1886 } 1887 1888 /***************************************************************/ 1889 void mlib_c_ImageLookUp_S32_S32(const mlib_s32 *src, 1890 mlib_s32 slb, 1891 mlib_s32 *dst, 1892 mlib_s32 dlb, 1893 mlib_s32 xsize, 1894 mlib_s32 ysize, 1895 mlib_s32 csize, 1896 const mlib_s32 **table) 1897 { 1898 const mlib_s32 *table_base[4]; 1899 mlib_s32 c; 1900 1901 for (c = 0; c < csize; c++) { 1902 table_base[c] = &table[c][TABLE_SHIFT_S32]; 1903 } 1904 1905 #ifdef __GNUC__ 1906 #pragma GCC diagnostic push 1907 #pragma GCC diagnostic ignored "-Warray-bounds" 1908 #endif 1909 MLIB_C_IMAGELOOKUP(mlib_s32, mlib_s32, table_base); 1910 #ifdef __GNUC__ 1911 #pragma GCC diagnostic pop 1912 #endif 1913 } 1914 1915 /***************************************************************/ 1916 void mlib_c_ImageLookUpSI_U8_U8(const mlib_u8 *src, 1917 mlib_s32 slb, 1918 mlib_u8 *dst, 1919 mlib_s32 dlb, 1920 mlib_s32 xsize, 1921 mlib_s32 ysize, 1922 mlib_s32 csize, 1923 const mlib_u8 **table) 1924 { 1925 1926 if ((xsize < 8) || ((xsize * ysize) < 250)) { 1927 MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u8, table); 1928 } 1929 else if (csize == 2) { 1930 1931 mlib_u16 tab[256]; 1932 const mlib_u8 *tab0 = table[0]; 1933 const mlib_u8 *tab1 = table[1]; 1934 mlib_s32 i, j, s0, s1, s2; 1935 1936 s0 = tab0[0]; 1937 s1 = tab1[0]; 1938 for (i = 1; i < 256; i++) { 1939 #ifdef _LITTLE_ENDIAN 1940 s2 = (s1 << 8) + s0; 1941 #else 1942 s2 = (s0 << 8) + s1; 1943 #endif /* _LITTLE_ENDIAN */ 1944 s0 = tab0[i]; 1945 s1 = tab1[i]; 1946 tab[i - 1] = (mlib_u16) s2; 1947 } 1948 1949 #ifdef _LITTLE_ENDIAN 1950 s2 = (s1 << 8) + s0; 1951 #else 1952 s2 = (s0 << 8) + s1; 1953 #endif /* _LITTLE_ENDIAN */ 1954 tab[255] = (mlib_u16) s2; 1955 1956 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 1957 mlib_s32 *da; 1958 mlib_u8 *dp = dst; 1959 mlib_u8 *sa = (void *)src; 1960 mlib_s32 s0, t0, s1, t1, t, t2, off; 1961 mlib_s32 size = xsize; 1962 1963 if (((mlib_addr) dp & 1) == 0) { 1964 1965 if (((mlib_addr) dp & 3) != 0) { 1966 *((mlib_u16 *) dp) = tab[sa[0]]; 1967 sa++; 1968 size--; 1969 dp += 2; 1970 } 1971 1972 da = (mlib_s32 *) dp; 1973 1974 s0 = sa[0]; 1975 s1 = sa[1]; 1976 sa += 2; 1977 1978 #ifdef __SUNPRO_C 1979 #pragma pipeloop(0) 1980 #endif /* __SUNPRO_C */ 1981 for (i = 0; i < size - 3; i += 2, da++, sa += 2) { 1982 t0 = tab[s0]; 1983 t1 = tab[s1]; 1984 #ifdef _LITTLE_ENDIAN 1985 t = (t1 << 16) + t0; 1986 #else 1987 t = (t0 << 16) + t1; 1988 #endif /* _LITTLE_ENDIAN */ 1989 s0 = sa[0]; 1990 s1 = sa[1]; 1991 da[0] = t; 1992 } 1993 1994 t0 = tab[s0]; 1995 t1 = tab[s1]; 1996 #ifdef _LITTLE_ENDIAN 1997 t = (t1 << 16) + t0; 1998 #else 1999 t = (t0 << 16) + t1; 2000 #endif /* _LITTLE_ENDIAN */ 2001 da[0] = t; 2002 da++; 2003 2004 if (size & 1) 2005 *((mlib_u16 *) da) = tab[sa[0]]; 2006 2007 } 2008 else { 2009 2010 off = (mlib_s32) (4 - ((mlib_addr) dp & 3)); 2011 2012 if (off > 1) { 2013 t0 = tab[sa[0]]; 2014 #ifdef _LITTLE_ENDIAN 2015 dp[1] = (t0 >> 8); 2016 dp[0] = t0; 2017 #else 2018 dp[0] = (t0 >> 8); 2019 dp[1] = t0; 2020 #endif /* _LITTLE_ENDIAN */ 2021 sa++; 2022 size--; 2023 dp += 2; 2024 } 2025 2026 t0 = tab[sa[0]]; 2027 sa++; 2028 #ifdef _LITTLE_ENDIAN 2029 *dp++ = t0; 2030 #else 2031 *dp++ = (t0 >> 8); 2032 #endif /* _LITTLE_ENDIAN */ 2033 2034 da = (mlib_s32 *) dp; 2035 2036 s0 = sa[0]; 2037 s1 = sa[1]; 2038 sa += 2; 2039 2040 #ifdef __SUNPRO_C 2041 #pragma pipeloop(0) 2042 #endif /* __SUNPRO_C */ 2043 for (i = 0; i < size - 4; i += 2, da++, sa += 2) { 2044 t1 = tab[s0]; 2045 t2 = tab[s1]; 2046 #ifdef _LITTLE_ENDIAN 2047 t = (t0 >> 8) + (t1 << 8) + (t2 << 24); 2048 #else 2049 t = (t0 << 24) + (t1 << 8) + (t2 >> 8); 2050 #endif /* _LITTLE_ENDIAN */ 2051 t0 = t2; 2052 s0 = sa[0]; 2053 s1 = sa[1]; 2054 da[0] = t; 2055 } 2056 2057 t1 = tab[s0]; 2058 t2 = tab[s1]; 2059 #ifdef _LITTLE_ENDIAN 2060 t = (t0 >> 8) + (t1 << 8) + (t2 << 24); 2061 #else 2062 t = (t0 << 24) + (t1 << 8) + (t2 >> 8); 2063 #endif /* _LITTLE_ENDIAN */ 2064 da[0] = t; 2065 da++; 2066 dp = (mlib_u8 *) da; 2067 #ifdef _LITTLE_ENDIAN 2068 dp[0] = (t2 >> 8); 2069 #else 2070 dp[0] = t2; 2071 #endif /* _LITTLE_ENDIAN */ 2072 2073 if ((size & 1) == 0) { 2074 t0 = tab[sa[0]]; 2075 #ifdef _LITTLE_ENDIAN 2076 dp[2] = (t0 >> 8); 2077 dp[1] = t0; 2078 #else 2079 dp[1] = (t0 >> 8); 2080 dp[2] = t0; 2081 #endif /* _LITTLE_ENDIAN */ 2082 } 2083 } 2084 } 2085 2086 } 2087 else if (csize == 3) { 2088 mlib_u32 tab[256]; 2089 const mlib_u8 *tab0 = table[0]; 2090 const mlib_u8 *tab1 = table[1]; 2091 const mlib_u8 *tab2 = table[2]; 2092 mlib_s32 i, j; 2093 mlib_u32 s0, s1, s2, s3; 2094 2095 s0 = tab0[0]; 2096 s1 = tab1[0]; 2097 s2 = tab2[0]; 2098 for (i = 1; i < 256; i++) { 2099 #ifdef _LITTLE_ENDIAN 2100 s3 = (s2 << 24) + (s1 << 16) + (s0 << 8); 2101 #else 2102 s3 = (s0 << 16) + (s1 << 8) + s2; 2103 #endif /* _LITTLE_ENDIAN */ 2104 s0 = tab0[i]; 2105 s1 = tab1[i]; 2106 s2 = tab2[i]; 2107 tab[i - 1] = s3; 2108 } 2109 2110 #ifdef _LITTLE_ENDIAN 2111 s3 = (s2 << 24) + (s1 << 16) + (s0 << 8); 2112 #else 2113 s3 = (s0 << 16) + (s1 << 8) + s2; 2114 #endif /* _LITTLE_ENDIAN */ 2115 tab[255] = s3; 2116 2117 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2118 mlib_u32 *da; 2119 mlib_u8 *dp = dst; 2120 mlib_u8 *sa = (void *)src, *ptr; 2121 mlib_u32 s0, s1, t0, t1; 2122 mlib_u32 res1, res2; 2123 mlib_s32 size = xsize, off; 2124 2125 off = (mlib_s32) ((mlib_addr) dp & 3); 2126 2127 #ifdef __SUNPRO_C 2128 #pragma pipeloop(0) 2129 #endif /* __SUNPRO_C */ 2130 for (i = 0; i < off; i++) { 2131 ptr = (mlib_u8 *) (tab + sa[0]); 2132 dp[0] = ptr[1]; 2133 dp[1] = ptr[2]; 2134 dp[2] = ptr[3]; 2135 dp += 3; 2136 sa++; 2137 } 2138 2139 size -= off; 2140 da = (mlib_u32 *) dp; 2141 s0 = sa[0]; 2142 s1 = sa[1]; 2143 sa += 2; 2144 2145 #ifdef __SUNPRO_C 2146 #pragma pipeloop(0) 2147 #endif /* __SUNPRO_C */ 2148 for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) { 2149 t0 = tab[s0]; 2150 t1 = tab[s1]; 2151 #ifdef _LITTLE_ENDIAN 2152 da[0] = (t0 >> 8) + (t1 << 16); 2153 res2 = (t1 >> 16); 2154 #else 2155 da[0] = (t0 << 8) + (t1 >> 16); 2156 res2 = (t1 << 16); 2157 #endif /* _LITTLE_ENDIAN */ 2158 s0 = sa[0]; 2159 s1 = sa[1]; 2160 t0 = tab[s0]; 2161 t1 = tab[s1]; 2162 #ifdef _LITTLE_ENDIAN 2163 res2 += (t0 << 8); 2164 res1 = (t0 >> 24) + t1; 2165 #else 2166 res2 += (t0 >> 8); 2167 res1 = (t0 << 24) + t1; 2168 #endif /* _LITTLE_ENDIAN */ 2169 s0 = sa[2]; 2170 s1 = sa[3]; 2171 da[1] = res2; 2172 da[2] = res1; 2173 } 2174 2175 t0 = tab[s0]; 2176 t1 = tab[s1]; 2177 #ifdef _LITTLE_ENDIAN 2178 da[0] = (t0 >> 8) + (t1 << 16); 2179 res2 = (t1 >> 16); 2180 #else 2181 da[0] = (t0 << 8) + (t1 >> 16); 2182 res2 = (t1 << 16); 2183 #endif /* _LITTLE_ENDIAN */ 2184 s0 = sa[0]; 2185 s1 = sa[1]; 2186 t0 = tab[s0]; 2187 t1 = tab[s1]; 2188 #ifdef _LITTLE_ENDIAN 2189 res2 += (t0 << 8); 2190 res1 = (t0 >> 24) + t1; 2191 #else 2192 res2 += (t0 >> 8); 2193 res1 = (t0 << 24) + t1; 2194 #endif /* _LITTLE_ENDIAN */ 2195 da[1] = res2; 2196 da[2] = res1; 2197 da += 3; 2198 sa += 2; 2199 dp = (mlib_u8 *) da; 2200 i += 4; 2201 2202 #ifdef __SUNPRO_C 2203 #pragma pipeloop(0) 2204 #endif /* __SUNPRO_C */ 2205 for (; i < size; i++) { 2206 ptr = (mlib_u8 *) (tab + sa[0]); 2207 dp[0] = ptr[1]; 2208 dp[1] = ptr[2]; 2209 dp[2] = ptr[3]; 2210 dp += 3; 2211 sa++; 2212 } 2213 } 2214 2215 } 2216 else if (csize == 4) { 2217 mlib_u32 tab[256]; 2218 const mlib_u8 *tab0 = table[0]; 2219 const mlib_u8 *tab1 = table[1]; 2220 const mlib_u8 *tab2 = table[2]; 2221 const mlib_u8 *tab3 = table[3]; 2222 mlib_s32 i, j; 2223 mlib_u32 s0, s1, s2, s3, s4; 2224 2225 s0 = tab0[0]; 2226 s1 = tab1[0]; 2227 s2 = tab2[0]; 2228 s3 = tab3[0]; 2229 for (i = 1; i < 256; i++) { 2230 #ifdef _LITTLE_ENDIAN 2231 s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0; 2232 #else 2233 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; 2234 #endif /* _LITTLE_ENDIAN */ 2235 s0 = tab0[i]; 2236 s1 = tab1[i]; 2237 s2 = tab2[i]; 2238 s3 = tab3[i]; 2239 tab[i - 1] = s4; 2240 } 2241 2242 #ifdef _LITTLE_ENDIAN 2243 s4 = (s3 << 24) + (s2 << 16) + (s1 << 8) + s0; 2244 #else 2245 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; 2246 #endif /* _LITTLE_ENDIAN */ 2247 tab[255] = s4; 2248 2249 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2250 mlib_u32 *da; 2251 mlib_u8 *dp = dst; 2252 mlib_u8 *sa = (void *)src; 2253 mlib_u32 s0, t0, s1, t1, t2; 2254 mlib_s32 size = xsize, off; 2255 mlib_u32 shift, shift1, res1, res2; 2256 2257 if (((mlib_addr) dp & 3) == 0) { 2258 2259 da = (mlib_u32 *) dp; 2260 2261 s0 = sa[0]; 2262 s1 = sa[1]; 2263 sa += 2; 2264 2265 #ifdef __SUNPRO_C 2266 #pragma pipeloop(0) 2267 #endif /* __SUNPRO_C */ 2268 for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) { 2269 t0 = tab[s0]; 2270 t1 = tab[s1]; 2271 s0 = sa[0]; 2272 s1 = sa[1]; 2273 da[0] = t0; 2274 da[1] = t1; 2275 } 2276 2277 t0 = tab[s0]; 2278 t1 = tab[s1]; 2279 da[0] = t0; 2280 da[1] = t1; 2281 2282 if (size & 1) 2283 da[2] = tab[sa[0]]; 2284 2285 } 2286 else { 2287 2288 off = (mlib_s32) (4 - ((mlib_addr) dp & 3)); 2289 shift = 8 * off; 2290 shift1 = 32 - shift; 2291 2292 for (i = 0; i < off; i++) { 2293 dp[i] = table[i][sa[0]]; 2294 } 2295 2296 dp += i; 2297 t0 = tab[sa[0]]; 2298 sa++; 2299 2300 da = (mlib_u32 *) dp; 2301 2302 s0 = sa[0]; 2303 s1 = sa[1]; 2304 sa += 2; 2305 2306 #ifdef __SUNPRO_C 2307 #pragma pipeloop(0) 2308 #endif /* __SUNPRO_C */ 2309 for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) { 2310 t1 = tab[s0]; 2311 t2 = tab[s1]; 2312 #ifdef _LITTLE_ENDIAN 2313 res1 = (t0 >> shift) + (t1 << shift1); 2314 res2 = (t1 >> shift) + (t2 << shift1); 2315 #else 2316 res1 = (t0 << shift) + (t1 >> shift1); 2317 res2 = (t1 << shift) + (t2 >> shift1); 2318 #endif /* _LITTLE_ENDIAN */ 2319 t0 = t2; 2320 s0 = sa[0]; 2321 s1 = sa[1]; 2322 da[0] = res1; 2323 da[1] = res2; 2324 } 2325 2326 t1 = tab[s0]; 2327 t2 = tab[s1]; 2328 #ifdef _LITTLE_ENDIAN 2329 res1 = (t0 >> shift) + (t1 << shift1); 2330 res2 = (t1 >> shift) + (t2 << shift1); 2331 #else 2332 res1 = (t0 << shift) + (t1 >> shift1); 2333 res2 = (t1 << shift) + (t2 >> shift1); 2334 #endif /* _LITTLE_ENDIAN */ 2335 da[0] = res1; 2336 da[1] = res2; 2337 #ifdef _LITTLE_ENDIAN 2338 t0 = (da[2] >> shift1); 2339 da[2] = (t2 >> shift) + (t0 << shift1); 2340 #else 2341 t0 = (da[2] << shift1); 2342 da[2] = (t2 << shift) + (t0 >> shift1); 2343 #endif /* _LITTLE_ENDIAN */ 2344 da += 2; 2345 dp = (mlib_u8 *) da + (4 - off); 2346 2347 if ((size & 1) == 0) { 2348 t0 = tab[sa[0]]; 2349 #ifdef _LITTLE_ENDIAN 2350 dp[3] = (mlib_u8) (t0 >> 24); 2351 dp[2] = (mlib_u8) (t0 >> 16); 2352 dp[1] = (mlib_u8) (t0 >> 8); 2353 dp[0] = (mlib_u8) t0; 2354 #else 2355 dp[0] = (mlib_u8) (t0 >> 24); 2356 dp[1] = (mlib_u8) (t0 >> 16); 2357 dp[2] = (mlib_u8) (t0 >> 8); 2358 dp[3] = (mlib_u8) t0; 2359 #endif /* _LITTLE_ENDIAN */ 2360 } 2361 } 2362 } 2363 } 2364 } 2365 2366 /***************************************************************/ 2367 2368 #ifdef _MSC_VER 2369 #pragma optimize("", off) 2370 #endif /* _MSC_VER */ 2371 2372 void mlib_c_ImageLookUpSI_S16_U8(const mlib_s16 *src, 2373 mlib_s32 slb, 2374 mlib_u8 *dst, 2375 mlib_s32 dlb, 2376 mlib_s32 xsize, 2377 mlib_s32 ysize, 2378 mlib_s32 csize, 2379 const mlib_u8 **table) 2380 { 2381 const mlib_u8 *table_base[4]; 2382 mlib_s32 c; 2383 2384 for (c = 0; c < csize; c++) { 2385 table_base[c] = &table[c][32768]; 2386 } 2387 2388 if ((xsize < 8) || (csize == 2)) { 2389 MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s16, table_base); 2390 } 2391 else if (csize == 3) { 2392 mlib_s32 i, j; 2393 2394 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2395 mlib_u32 *da; 2396 mlib_u8 *dp = dst; 2397 mlib_s16 *sa = (void *)src; 2398 const mlib_u8 *tab0 = table_base[0]; 2399 const mlib_u8 *tab1 = table_base[1]; 2400 const mlib_u8 *tab2 = table_base[2]; 2401 mlib_s32 s0, s1; 2402 mlib_u32 t0, t1, t2, t3, t4, t5; 2403 mlib_u32 res1, res2; 2404 mlib_s32 size = xsize, off; 2405 2406 off = (mlib_s32) ((mlib_addr) dp & 3); 2407 2408 #ifdef __SUNPRO_C 2409 #pragma pipeloop(0) 2410 #endif /* __SUNPRO_C */ 2411 for (i = 0; i < off; i++) { 2412 s0 = *sa++; 2413 dp[0] = tab0[s0]; 2414 dp[1] = tab1[s0]; 2415 dp[2] = tab2[s0]; 2416 dp += 3; 2417 } 2418 2419 size -= off; 2420 da = (mlib_u32 *) dp; 2421 s0 = sa[0]; 2422 s1 = sa[1]; 2423 sa += 2; 2424 2425 #ifdef __SUNPRO_C 2426 #pragma pipeloop(0) 2427 #endif /* __SUNPRO_C */ 2428 for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) { 2429 t0 = tab0[s0]; 2430 t1 = tab1[s0]; 2431 t2 = tab2[s0]; 2432 t3 = tab0[s1]; 2433 t4 = tab1[s1]; 2434 t5 = tab2[s1]; 2435 #ifdef _LITTLE_ENDIAN 2436 da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2437 res2 = (t5 << 8) + t4; 2438 #else 2439 da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2440 res2 = (t4 << 24) + (t5 << 16); 2441 #endif /* _LITTLE_ENDIAN */ 2442 s0 = sa[0]; 2443 s1 = sa[1]; 2444 t0 = tab0[s0]; 2445 t1 = tab1[s0]; 2446 t2 = tab2[s0]; 2447 t3 = tab0[s1]; 2448 t4 = tab1[s1]; 2449 t5 = tab2[s1]; 2450 #ifdef _LITTLE_ENDIAN 2451 res2 += ((t1 << 24) + (t0 << 16)); 2452 res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2; 2453 #else 2454 res2 += ((t0 << 8) + t1); 2455 res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5; 2456 #endif /* _LITTLE_ENDIAN */ 2457 s0 = sa[2]; 2458 s1 = sa[3]; 2459 da[1] = res2; 2460 da[2] = res1; 2461 } 2462 2463 t0 = tab0[s0]; 2464 t1 = tab1[s0]; 2465 t2 = tab2[s0]; 2466 t3 = tab0[s1]; 2467 t4 = tab1[s1]; 2468 t5 = tab2[s1]; 2469 #ifdef _LITTLE_ENDIAN 2470 da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2471 res2 = (t5 << 8) + t4; 2472 #else 2473 da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2474 res2 = (t4 << 24) + (t5 << 16); 2475 #endif /* _LITTLE_ENDIAN */ 2476 s0 = sa[0]; 2477 s1 = sa[1]; 2478 t0 = tab0[s0]; 2479 t1 = tab1[s0]; 2480 t2 = tab2[s0]; 2481 t3 = tab0[s1]; 2482 t4 = tab1[s1]; 2483 t5 = tab2[s1]; 2484 #ifdef _LITTLE_ENDIAN 2485 res2 += ((t1 << 24) + (t0 << 16)); 2486 res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2; 2487 #else 2488 res2 += ((t0 << 8) + t1); 2489 res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5; 2490 #endif /* _LITTLE_ENDIAN */ 2491 da[1] = res2; 2492 da[2] = res1; 2493 da += 3; 2494 sa += 2; 2495 dp = (mlib_u8 *) da; 2496 i += 4; 2497 2498 #ifdef __SUNPRO_C 2499 #pragma pipeloop(0) 2500 #endif /* __SUNPRO_C */ 2501 for (; i < size; i++) { 2502 s0 = *sa++; 2503 dp[0] = tab0[s0]; 2504 dp[1] = tab1[s0]; 2505 dp[2] = tab2[s0]; 2506 dp += 3; 2507 } 2508 } 2509 2510 } 2511 else if (csize == 4) { 2512 mlib_s32 i, j; 2513 2514 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2515 mlib_u32 *da; 2516 mlib_u8 *dp = dst; 2517 mlib_s16 *sa = (void *)src; 2518 const mlib_u8 *tab0 = table_base[0]; 2519 const mlib_u8 *tab1 = table_base[1]; 2520 const mlib_u8 *tab2 = table_base[2]; 2521 const mlib_u8 *tab3 = table_base[3]; 2522 mlib_s32 s0; 2523 mlib_u32 t0, t1, t2, t3; 2524 mlib_s32 size = xsize, off; 2525 mlib_u32 shift, shift1, res1, res2, res; 2526 2527 if (((mlib_addr) dp & 3) == 0) { 2528 2529 da = (mlib_u32 *) dp; 2530 2531 s0 = sa[0]; 2532 sa++; 2533 2534 #ifdef __SUNPRO_C 2535 #pragma pipeloop(0) 2536 #endif /* __SUNPRO_C */ 2537 for (i = 0; i < size - 1; i++, da++, sa++) { 2538 t0 = tab0[s0]; 2539 t1 = tab1[s0]; 2540 t2 = tab2[s0]; 2541 t3 = tab3[s0]; 2542 #ifdef _LITTLE_ENDIAN 2543 res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2544 #else 2545 res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2546 #endif /* _LITTLE_ENDIAN */ 2547 s0 = sa[0]; 2548 da[0] = res; 2549 } 2550 2551 t0 = tab0[s0]; 2552 t1 = tab1[s0]; 2553 t2 = tab2[s0]; 2554 t3 = tab3[s0]; 2555 #ifdef _LITTLE_ENDIAN 2556 res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2557 #else 2558 res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2559 #endif /* _LITTLE_ENDIAN */ 2560 da[0] = res; 2561 2562 } 2563 else { 2564 2565 off = (mlib_s32) (4 - ((mlib_addr) dp & 3)); 2566 shift = 8 * off; 2567 shift1 = 32 - shift; 2568 2569 s0 = *sa++; 2570 2571 for (i = 0; i < off; i++) { 2572 dp[i] = table_base[i][s0]; 2573 } 2574 2575 dp += i; 2576 da = (mlib_u32 *) dp; 2577 2578 t0 = tab0[s0]; 2579 t1 = tab1[s0]; 2580 t2 = tab2[s0]; 2581 t3 = tab3[s0]; 2582 2583 #ifdef _LITTLE_ENDIAN 2584 res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2585 #else 2586 res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2587 #endif /* _LITTLE_ENDIAN */ 2588 2589 s0 = sa[0]; 2590 sa++; 2591 2592 #ifdef __SUNPRO_C 2593 #pragma pipeloop(0) 2594 #endif /* __SUNPRO_C */ 2595 for (i = 0; i < size - 2; i++, da++, sa++) { 2596 t0 = tab0[s0]; 2597 t1 = tab1[s0]; 2598 t2 = tab2[s0]; 2599 t3 = tab3[s0]; 2600 #ifdef _LITTLE_ENDIAN 2601 res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2602 res = (res1 >> shift) + (res2 << shift1); 2603 #else 2604 res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2605 res = (res1 << shift) + (res2 >> shift1); 2606 #endif /* _LITTLE_ENDIAN */ 2607 res1 = res2; 2608 s0 = sa[0]; 2609 da[0] = res; 2610 } 2611 2612 t0 = tab0[s0]; 2613 t1 = tab1[s0]; 2614 t2 = tab2[s0]; 2615 t3 = tab3[s0]; 2616 #ifdef _LITTLE_ENDIAN 2617 res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2618 res = (res1 >> shift) + (res2 << shift1); 2619 #else 2620 res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2621 res = (res1 << shift) + (res2 >> shift1); 2622 #endif /* _LITTLE_ENDIAN */ 2623 da[0] = res; 2624 #ifdef _LITTLE_ENDIAN 2625 res1 = (da[1] >> shift1); 2626 da[1] = (res2 >> shift) + (res1 << shift1); 2627 #else 2628 res1 = (da[1] << shift1); 2629 da[1] = (res2 << shift) + (res1 >> shift1); 2630 #endif /* _LITTLE_ENDIAN */ 2631 } 2632 } 2633 } 2634 } 2635 2636 #ifdef _MSC_VER 2637 #pragma optimize("", on) 2638 #endif /* _MSC_VER */ 2639 2640 /***************************************************************/ 2641 void mlib_c_ImageLookUpSI_U16_U8(const mlib_u16 *src, 2642 mlib_s32 slb, 2643 mlib_u8 *dst, 2644 mlib_s32 dlb, 2645 mlib_s32 xsize, 2646 mlib_s32 ysize, 2647 mlib_s32 csize, 2648 const mlib_u8 **table) 2649 { 2650 const mlib_u8 *table_base[4]; 2651 mlib_s32 c; 2652 2653 for (c = 0; c < csize; c++) { 2654 table_base[c] = &table[c][0]; 2655 } 2656 2657 if ((xsize < 8) || (csize == 2)) { 2658 MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_u16, table_base); 2659 } 2660 else if (csize == 3) { 2661 mlib_s32 i, j; 2662 2663 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2664 mlib_u32 *da; 2665 mlib_u8 *dp = dst; 2666 mlib_u16 *sa = (void *)src; 2667 const mlib_u8 *tab0 = table_base[0]; 2668 const mlib_u8 *tab1 = table_base[1]; 2669 const mlib_u8 *tab2 = table_base[2]; 2670 mlib_s32 s0, s1; 2671 mlib_u32 t0, t1, t2, t3, t4, t5; 2672 mlib_u32 res1, res2; 2673 mlib_s32 size = xsize, off; 2674 2675 off = (mlib_s32) ((mlib_addr) dp & 3); 2676 2677 #ifdef __SUNPRO_C 2678 #pragma pipeloop(0) 2679 #endif /* __SUNPRO_C */ 2680 for (i = 0; i < off; i++) { 2681 s0 = *sa++; 2682 dp[0] = tab0[s0]; 2683 dp[1] = tab1[s0]; 2684 dp[2] = tab2[s0]; 2685 dp += 3; 2686 } 2687 2688 size -= off; 2689 da = (mlib_u32 *) dp; 2690 s0 = sa[0]; 2691 s1 = sa[1]; 2692 sa += 2; 2693 2694 #ifdef __SUNPRO_C 2695 #pragma pipeloop(0) 2696 #endif /* __SUNPRO_C */ 2697 for (i = 0; i < size - 7; i += 4, da += 3, sa += 4) { 2698 t0 = tab0[s0]; 2699 t1 = tab1[s0]; 2700 t2 = tab2[s0]; 2701 t3 = tab0[s1]; 2702 t4 = tab1[s1]; 2703 t5 = tab2[s1]; 2704 #ifdef _LITTLE_ENDIAN 2705 da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2706 res2 = (t5 << 8) + t4; 2707 #else 2708 da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2709 res2 = (t4 << 24) + (t5 << 16); 2710 #endif /* _LITTLE_ENDIAN */ 2711 s0 = sa[0]; 2712 s1 = sa[1]; 2713 t0 = tab0[s0]; 2714 t1 = tab1[s0]; 2715 t2 = tab2[s0]; 2716 t3 = tab0[s1]; 2717 t4 = tab1[s1]; 2718 t5 = tab2[s1]; 2719 #ifdef _LITTLE_ENDIAN 2720 res2 += ((t1 << 24) + (t0 << 16)); 2721 res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2; 2722 #else 2723 res2 += ((t0 << 8) + t1); 2724 res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5; 2725 #endif /* _LITTLE_ENDIAN */ 2726 s0 = sa[2]; 2727 s1 = sa[3]; 2728 da[1] = res2; 2729 da[2] = res1; 2730 } 2731 2732 t0 = tab0[s0]; 2733 t1 = tab1[s0]; 2734 t2 = tab2[s0]; 2735 t3 = tab0[s1]; 2736 t4 = tab1[s1]; 2737 t5 = tab2[s1]; 2738 #ifdef _LITTLE_ENDIAN 2739 da[0] = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2740 res2 = (t5 << 8) + t4; 2741 #else 2742 da[0] = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2743 res2 = (t4 << 24) + (t5 << 16); 2744 #endif /* _LITTLE_ENDIAN */ 2745 s0 = sa[0]; 2746 s1 = sa[1]; 2747 t0 = tab0[s0]; 2748 t1 = tab1[s0]; 2749 t2 = tab2[s0]; 2750 t3 = tab0[s1]; 2751 t4 = tab1[s1]; 2752 t5 = tab2[s1]; 2753 #ifdef _LITTLE_ENDIAN 2754 res2 += ((t1 << 24) + (t0 << 16)); 2755 res1 = (t5 << 24) + (t4 << 16) + (t3 << 8) + t2; 2756 #else 2757 res2 += ((t0 << 8) + t1); 2758 res1 = (t2 << 24) + (t3 << 16) + (t4 << 8) + t5; 2759 #endif /* _LITTLE_ENDIAN */ 2760 da[1] = res2; 2761 da[2] = res1; 2762 da += 3; 2763 sa += 2; 2764 dp = (mlib_u8 *) da; 2765 i += 4; 2766 2767 #ifdef __SUNPRO_C 2768 #pragma pipeloop(0) 2769 #endif /* __SUNPRO_C */ 2770 for (; i < size; i++) { 2771 s0 = *sa++; 2772 dp[0] = tab0[s0]; 2773 dp[1] = tab1[s0]; 2774 dp[2] = tab2[s0]; 2775 dp += 3; 2776 } 2777 } 2778 2779 } 2780 else if (csize == 4) { 2781 mlib_s32 i, j; 2782 2783 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2784 mlib_u32 *da; 2785 mlib_u8 *dp = dst; 2786 mlib_u16 *sa = (void *)src; 2787 const mlib_u8 *tab0 = table_base[0]; 2788 const mlib_u8 *tab1 = table_base[1]; 2789 const mlib_u8 *tab2 = table_base[2]; 2790 const mlib_u8 *tab3 = table_base[3]; 2791 mlib_s32 s0; 2792 mlib_u32 t0, t1, t2, t3; 2793 mlib_s32 size = xsize, off; 2794 mlib_u32 shift, shift1, res1, res2, res; 2795 2796 if (((mlib_addr) dp & 3) == 0) { 2797 2798 da = (mlib_u32 *) dp; 2799 2800 s0 = sa[0]; 2801 sa++; 2802 2803 #ifdef __SUNPRO_C 2804 #pragma pipeloop(0) 2805 #endif /* __SUNPRO_C */ 2806 for (i = 0; i < size - 1; i++, da++, sa++) { 2807 t0 = tab0[s0]; 2808 t1 = tab1[s0]; 2809 t2 = tab2[s0]; 2810 t3 = tab3[s0]; 2811 #ifdef _LITTLE_ENDIAN 2812 res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2813 #else 2814 res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2815 #endif /* _LITTLE_ENDIAN */ 2816 s0 = sa[0]; 2817 da[0] = res; 2818 } 2819 2820 t0 = tab0[s0]; 2821 t1 = tab1[s0]; 2822 t2 = tab2[s0]; 2823 t3 = tab3[s0]; 2824 #ifdef _LITTLE_ENDIAN 2825 res = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2826 #else 2827 res = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2828 #endif /* _LITTLE_ENDIAN */ 2829 da[0] = res; 2830 2831 } 2832 else { 2833 2834 off = (mlib_s32) (4 - ((mlib_addr) dp & 3)); 2835 shift = 8 * off; 2836 shift1 = 32 - shift; 2837 2838 s0 = *sa++; 2839 2840 for (i = 0; i < off; i++) { 2841 dp[i] = table_base[i][s0]; 2842 } 2843 2844 dp += i; 2845 da = (mlib_u32 *) dp; 2846 2847 t0 = tab0[s0]; 2848 t1 = tab1[s0]; 2849 t2 = tab2[s0]; 2850 t3 = tab3[s0]; 2851 2852 #ifdef _LITTLE_ENDIAN 2853 res1 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2854 #else 2855 res1 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2856 #endif /* _LITTLE_ENDIAN */ 2857 2858 s0 = sa[0]; 2859 sa++; 2860 2861 #ifdef __SUNPRO_C 2862 #pragma pipeloop(0) 2863 #endif /* __SUNPRO_C */ 2864 for (i = 0; i < size - 2; i++, da++, sa++) { 2865 t0 = tab0[s0]; 2866 t1 = tab1[s0]; 2867 t2 = tab2[s0]; 2868 t3 = tab3[s0]; 2869 #ifdef _LITTLE_ENDIAN 2870 res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2871 res = (res1 >> shift) + (res2 << shift1); 2872 #else 2873 res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2874 res = (res1 << shift) + (res2 >> shift1); 2875 #endif /* _LITTLE_ENDIAN */ 2876 res1 = res2; 2877 s0 = sa[0]; 2878 da[0] = res; 2879 } 2880 2881 t0 = tab0[s0]; 2882 t1 = tab1[s0]; 2883 t2 = tab2[s0]; 2884 t3 = tab3[s0]; 2885 #ifdef _LITTLE_ENDIAN 2886 res2 = (t3 << 24) + (t2 << 16) + (t1 << 8) + t0; 2887 res = (res1 >> shift) + (res2 << shift1); 2888 #else 2889 res2 = (t0 << 24) + (t1 << 16) + (t2 << 8) + t3; 2890 res = (res1 << shift) + (res2 >> shift1); 2891 #endif /* _LITTLE_ENDIAN */ 2892 da[0] = res; 2893 #ifdef _LITTLE_ENDIAN 2894 res1 = (da[1] >> shift1); 2895 da[1] = (res2 >> shift) + (res1 << shift1); 2896 #else 2897 res1 = (da[1] << shift1); 2898 da[1] = (res2 << shift) + (res1 >> shift1); 2899 #endif /* _LITTLE_ENDIAN */ 2900 } 2901 } 2902 } 2903 } 2904 2905 /***************************************************************/ 2906 void mlib_c_ImageLookUpSI_S32_U8(const mlib_s32 *src, 2907 mlib_s32 slb, 2908 mlib_u8 *dst, 2909 mlib_s32 dlb, 2910 mlib_s32 xsize, 2911 mlib_s32 ysize, 2912 mlib_s32 csize, 2913 const mlib_u8 **table) 2914 { 2915 const mlib_u8 *table_base[4]; 2916 mlib_s32 c; 2917 2918 for (c = 0; c < csize; c++) { 2919 table_base[c] = &table[c][TABLE_SHIFT_S32]; 2920 } 2921 2922 #ifdef __GNUC__ 2923 #pragma GCC diagnostic push 2924 #pragma GCC diagnostic ignored "-Warray-bounds" 2925 #endif 2926 MLIB_C_IMAGELOOKUPSI(mlib_u8, mlib_s32, table_base); 2927 #ifdef __GNUC__ 2928 #pragma GCC diagnostic pop 2929 #endif 2930 } 2931 2932 /***************************************************************/ 2933 void mlib_c_ImageLookUpSI_U8_S16(const mlib_u8 *src, 2934 mlib_s32 slb, 2935 mlib_s16 *dst, 2936 mlib_s32 dlb, 2937 mlib_s32 xsize, 2938 mlib_s32 ysize, 2939 mlib_s32 csize, 2940 const mlib_s16 **table) 2941 { 2942 2943 if ((xsize < 4) || ((xsize * ysize) < 250)) { 2944 MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u8, table); 2945 2946 } 2947 else if (csize == 2) { 2948 mlib_u32 tab[256]; 2949 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 2950 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 2951 mlib_s32 i, j; 2952 mlib_u32 s0, s1, s2; 2953 2954 s0 = tab0[0]; 2955 s1 = tab1[0]; 2956 for (i = 1; i < 256; i++) { 2957 #ifdef _LITTLE_ENDIAN 2958 s2 = (s1 << 16) + s0; 2959 #else 2960 s2 = (s0 << 16) + s1; 2961 #endif /* _LITTLE_ENDIAN */ 2962 s0 = tab0[i]; 2963 s1 = tab1[i]; 2964 tab[i - 1] = s2; 2965 } 2966 2967 #ifdef _LITTLE_ENDIAN 2968 s2 = (s1 << 16) + s0; 2969 #else 2970 s2 = (s0 << 16) + s1; 2971 #endif /* _LITTLE_ENDIAN */ 2972 tab[255] = s2; 2973 2974 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 2975 mlib_u32 *da; 2976 mlib_u16 *dp = (mlib_u16 *) dst; 2977 mlib_u8 *sa = (void *)src; 2978 mlib_u32 s0, t0, s1, t1, t2; 2979 mlib_u32 res1, res2; 2980 mlib_s32 size = xsize; 2981 2982 if (((mlib_addr) dp & 3) == 0) { 2983 2984 da = (mlib_u32 *) dp; 2985 s0 = sa[0]; 2986 s1 = sa[1]; 2987 sa += 2; 2988 2989 #ifdef __SUNPRO_C 2990 #pragma pipeloop(0) 2991 #endif /* __SUNPRO_C */ 2992 for (i = 0; i < size - 3; i += 2, da += 2, sa += 2) { 2993 t0 = tab[s0]; 2994 t1 = tab[s1]; 2995 s0 = sa[0]; 2996 s1 = sa[1]; 2997 da[0] = t0; 2998 da[1] = t1; 2999 } 3000 3001 t0 = tab[s0]; 3002 t1 = tab[s1]; 3003 da[0] = t0; 3004 da[1] = t1; 3005 3006 if (size & 1) 3007 da[2] = tab[sa[0]]; 3008 3009 } 3010 else { 3011 3012 t0 = tab[*sa++]; 3013 #ifdef _LITTLE_ENDIAN 3014 *dp++ = (mlib_u16) (t0); 3015 #else 3016 *dp++ = (mlib_u16) (t0 >> 16); 3017 #endif /* _LITTLE_ENDIAN */ 3018 da = (mlib_u32 *) dp; 3019 s0 = sa[0]; 3020 s1 = sa[1]; 3021 sa += 2; 3022 3023 #ifdef __SUNPRO_C 3024 #pragma pipeloop(0) 3025 #endif /* __SUNPRO_C */ 3026 for (i = 0; i < size - 4; i += 2, da += 2, sa += 2) { 3027 t1 = tab[s0]; 3028 t2 = tab[s1]; 3029 #ifdef _LITTLE_ENDIAN 3030 res1 = (t0 >> 16) + (t1 << 16); 3031 res2 = (t1 >> 16) + (t2 << 16); 3032 #else 3033 res1 = (t0 << 16) + (t1 >> 16); 3034 res2 = (t1 << 16) + (t2 >> 16); 3035 #endif /* _LITTLE_ENDIAN */ 3036 t0 = t2; 3037 s0 = sa[0]; 3038 s1 = sa[1]; 3039 da[0] = res1; 3040 da[1] = res2; 3041 } 3042 3043 t1 = tab[s0]; 3044 t2 = tab[s1]; 3045 #ifdef _LITTLE_ENDIAN 3046 res1 = (t0 >> 16) + (t1 << 16); 3047 res2 = (t1 >> 16) + (t2 << 16); 3048 #else 3049 res1 = (t0 << 16) + (t1 >> 16); 3050 res2 = (t1 << 16) + (t2 >> 16); 3051 #endif /* _LITTLE_ENDIAN */ 3052 da[0] = res1; 3053 da[1] = res2; 3054 da += 2; 3055 dp = (mlib_u16 *) da; 3056 #ifdef _LITTLE_ENDIAN 3057 dp[0] = (mlib_u16) (t2 >> 16); 3058 #else 3059 dp[0] = (mlib_u16) t2; 3060 #endif /* _LITTLE_ENDIAN */ 3061 3062 if ((size & 1) == 0) { 3063 t0 = tab[sa[0]]; 3064 #ifdef _LITTLE_ENDIAN 3065 dp[2] = (mlib_u16) (t0 >> 16); 3066 dp[1] = (mlib_u16) t0; 3067 #else 3068 dp[1] = (mlib_u16) (t0 >> 16); 3069 dp[2] = (mlib_u16) t0; 3070 #endif /* _LITTLE_ENDIAN */ 3071 } 3072 } 3073 } 3074 3075 } 3076 else if (csize == 3) { 3077 mlib_u32 tab[512]; 3078 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 3079 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 3080 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 3081 mlib_s32 i, j; 3082 mlib_u32 s0, s1, s2, s3, s4; 3083 3084 s0 = tab0[0]; 3085 s1 = tab1[0]; 3086 s2 = tab2[0]; 3087 for (i = 1; i < 256; i++) { 3088 #ifdef _LITTLE_ENDIAN 3089 s3 = (s0 << 16); 3090 s4 = (s2 << 16) + s1; 3091 #else 3092 s3 = s0; 3093 s4 = (s1 << 16) + s2; 3094 #endif /* _LITTLE_ENDIAN */ 3095 s0 = tab0[i]; 3096 s1 = tab1[i]; 3097 s2 = tab2[i]; 3098 tab[2 * i - 2] = s3; 3099 tab[2 * i - 1] = s4; 3100 } 3101 3102 #ifdef _LITTLE_ENDIAN 3103 s4 = (s2 << 16) + s1; 3104 tab[510] = s0 << 16; 3105 #else 3106 s4 = (s1 << 16) + s2; 3107 tab[510] = s0; 3108 #endif /* _LITTLE_ENDIAN */ 3109 tab[511] = s4; 3110 3111 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 3112 mlib_u32 *da; 3113 mlib_u16 *dp = (mlib_u16 *) dst, *ptr; 3114 mlib_u8 *sa = (void *)src; 3115 mlib_u32 s0, s1, t0, t1, t2, t3; 3116 mlib_u32 res1, res2; 3117 mlib_s32 size = xsize, off; 3118 3119 off = (mlib_s32) ((mlib_addr) dp & 3); 3120 3121 if (off != 0) { 3122 ptr = (mlib_u16 *) (tab + 2 * sa[0]); 3123 dp[0] = ptr[1]; 3124 dp[1] = ptr[2]; 3125 dp[2] = ptr[3]; 3126 dp += 3; 3127 sa++; 3128 size--; 3129 } 3130 3131 da = (mlib_u32 *) dp; 3132 s0 = sa[0] << 3; 3133 s1 = sa[1] << 3; 3134 sa += 2; 3135 3136 #ifdef __SUNPRO_C 3137 #pragma pipeloop(0) 3138 #endif /* __SUNPRO_C */ 3139 for (i = 0; i < size - 3; i += 2, da += 3, sa += 2) { 3140 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3141 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3142 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3143 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3144 #ifdef _LITTLE_ENDIAN 3145 res1 = (t0 >> 16) + (t1 << 16); 3146 res2 = (t1 >> 16) + t2; 3147 #else 3148 res1 = (t0 << 16) + (t1 >> 16); 3149 res2 = (t1 << 16) + t2; 3150 #endif /* _LITTLE_ENDIAN */ 3151 s0 = sa[0] << 3; 3152 s1 = sa[1] << 3; 3153 da[0] = res1; 3154 da[1] = res2; 3155 da[2] = t3; 3156 } 3157 3158 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3159 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3160 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3161 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3162 #ifdef _LITTLE_ENDIAN 3163 res1 = (t0 >> 16) + (t1 << 16); 3164 res2 = (t1 >> 16) + t2; 3165 #else 3166 res1 = (t0 << 16) + (t1 >> 16); 3167 res2 = (t1 << 16) + t2; 3168 #endif /* _LITTLE_ENDIAN */ 3169 da[0] = res1; 3170 da[1] = res2; 3171 da[2] = t3; 3172 da += 3; 3173 dp = (mlib_u16 *) da; 3174 i += 2; 3175 3176 if (i < size) { 3177 ptr = (mlib_u16 *) (tab + 2 * sa[0]); 3178 dp[0] = ptr[1]; 3179 dp[1] = ptr[2]; 3180 dp[2] = ptr[3]; 3181 } 3182 } 3183 3184 } 3185 else if (csize == 4) { 3186 mlib_u32 tab[512]; 3187 mlib_u16 *tab0 = (mlib_u16 *) table[0]; 3188 mlib_u16 *tab1 = (mlib_u16 *) table[1]; 3189 mlib_u16 *tab2 = (mlib_u16 *) table[2]; 3190 mlib_u16 *tab3 = (mlib_u16 *) table[3]; 3191 mlib_s32 i, j; 3192 mlib_u32 s0, s1, s2, s3, s4, s5; 3193 3194 s0 = tab0[0]; 3195 s1 = tab1[0]; 3196 s2 = tab2[0]; 3197 s3 = tab3[0]; 3198 for (i = 1; i < 256; i++) { 3199 #ifdef _LITTLE_ENDIAN 3200 s4 = (s1 << 16) + s0; 3201 s5 = (s3 << 16) + s2; 3202 #else 3203 s4 = (s0 << 16) + s1; 3204 s5 = (s2 << 16) + s3; 3205 #endif /* _LITTLE_ENDIAN */ 3206 s0 = tab0[i]; 3207 s1 = tab1[i]; 3208 s2 = tab2[i]; 3209 s3 = tab3[i]; 3210 tab[2 * i - 2] = s4; 3211 tab[2 * i - 1] = s5; 3212 } 3213 3214 #ifdef _LITTLE_ENDIAN 3215 s4 = (s1 << 16) + s0; 3216 s5 = (s3 << 16) + s2; 3217 #else 3218 s4 = (s0 << 16) + s1; 3219 s5 = (s2 << 16) + s3; 3220 #endif /* _LITTLE_ENDIAN */ 3221 tab[510] = s4; 3222 tab[511] = s5; 3223 3224 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 3225 mlib_u32 *da; 3226 mlib_u16 *dp = (mlib_u16 *) dst; 3227 mlib_u8 *sa = (void *)src; 3228 mlib_u32 s0, t0, s1, t1, t2, t3, t4, t5; 3229 mlib_s32 size = xsize; 3230 mlib_u32 res1, res2, res3, res4; 3231 3232 if (((mlib_addr) dp & 3) == 0) { 3233 3234 da = (mlib_u32 *) dp; 3235 3236 s0 = sa[0] << 3; 3237 s1 = sa[1] << 3; 3238 sa += 2; 3239 3240 #ifdef __SUNPRO_C 3241 #pragma pipeloop(0) 3242 #endif /* __SUNPRO_C */ 3243 for (i = 0; i < size - 3; i += 2, da += 4, sa += 2) { 3244 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3245 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3246 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3247 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3248 s0 = sa[0] << 3; 3249 s1 = sa[1] << 3; 3250 da[0] = t0; 3251 da[1] = t1; 3252 da[2] = t2; 3253 da[3] = t3; 3254 } 3255 3256 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3257 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3258 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3259 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3260 da[0] = t0; 3261 da[1] = t1; 3262 da[2] = t2; 3263 da[3] = t3; 3264 3265 if (size & 1) { 3266 da[4] = tab[2 * sa[0]]; 3267 da[5] = tab[2 * sa[0] + 1]; 3268 } 3269 3270 } 3271 else { 3272 3273 t4 = tab[2 * sa[0]]; 3274 t5 = tab[2 * sa[0] + 1]; 3275 #ifdef _LITTLE_ENDIAN 3276 *dp++ = (mlib_u16) (t4); 3277 #else 3278 *dp++ = (mlib_u16) (t4 >> 16); 3279 #endif /* _LITTLE_ENDIAN */ 3280 sa++; 3281 da = (mlib_u32 *) dp; 3282 #ifdef _LITTLE_ENDIAN 3283 *da++ = (t4 >> 16) + (t5 << 16); 3284 #else 3285 *da++ = (t4 << 16) + (t5 >> 16); 3286 #endif /* _LITTLE_ENDIAN */ 3287 s0 = sa[0] << 3; 3288 s1 = sa[1] << 3; 3289 sa += 2; 3290 3291 #ifdef __SUNPRO_C 3292 #pragma pipeloop(0) 3293 #endif /* __SUNPRO_C */ 3294 for (i = 0; i < size - 4; i += 2, da += 4, sa += 2) { 3295 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3296 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3297 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3298 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3299 #ifdef _LITTLE_ENDIAN 3300 res1 = (t5 >> 16) + (t0 << 16); 3301 res2 = (t0 >> 16) + (t1 << 16); 3302 res3 = (t1 >> 16) + (t2 << 16); 3303 res4 = (t2 >> 16) + (t3 << 16); 3304 #else 3305 res1 = (t5 << 16) + (t0 >> 16); 3306 res2 = (t0 << 16) + (t1 >> 16); 3307 res3 = (t1 << 16) + (t2 >> 16); 3308 res4 = (t2 << 16) + (t3 >> 16); 3309 #endif /* _LITTLE_ENDIAN */ 3310 s0 = sa[0] << 3; 3311 s1 = sa[1] << 3; 3312 da[0] = res1; 3313 da[1] = res2; 3314 da[2] = res3; 3315 da[3] = res4; 3316 t5 = t3; 3317 } 3318 3319 t0 = *(mlib_u32 *) ((mlib_u8 *) tab + s0); 3320 t1 = *(mlib_u32 *) ((mlib_u8 *) tab + s0 + 4); 3321 t2 = *(mlib_u32 *) ((mlib_u8 *) tab + s1); 3322 t3 = *(mlib_u32 *) ((mlib_u8 *) tab + s1 + 4); 3323 #ifdef _LITTLE_ENDIAN 3324 res1 = (t5 >> 16) + (t0 << 16); 3325 res2 = (t0 >> 16) + (t1 << 16); 3326 res3 = (t1 >> 16) + (t2 << 16); 3327 res4 = (t2 >> 16) + (t3 << 16); 3328 #else 3329 res1 = (t5 << 16) + (t0 >> 16); 3330 res2 = (t0 << 16) + (t1 >> 16); 3331 res3 = (t1 << 16) + (t2 >> 16); 3332 res4 = (t2 << 16) + (t3 >> 16); 3333 #endif /* _LITTLE_ENDIAN */ 3334 da[0] = res1; 3335 da[1] = res2; 3336 da[2] = res3; 3337 da[3] = res4; 3338 da += 4; 3339 dp = (mlib_u16 *) da; 3340 #ifdef _LITTLE_ENDIAN 3341 dp[0] = (mlib_u16) (t3 >> 16); 3342 #else 3343 dp[0] = (mlib_u16) t3; 3344 #endif /* _LITTLE_ENDIAN */ 3345 3346 if ((size & 1) == 0) { 3347 t0 = tab[2 * sa[0]]; 3348 #ifdef _LITTLE_ENDIAN 3349 dp[2] = (mlib_u16) (t0 >> 16); 3350 dp[1] = (mlib_u16) t0; 3351 #else 3352 dp[1] = (mlib_u16) (t0 >> 16); 3353 dp[2] = (mlib_u16) t0; 3354 #endif /* _LITTLE_ENDIAN */ 3355 t0 = tab[2 * sa[0] + 1]; 3356 #ifdef _LITTLE_ENDIAN 3357 dp[4] = (mlib_u16) (t0 >> 16); 3358 dp[3] = (mlib_u16) t0; 3359 #else 3360 dp[3] = (mlib_u16) (t0 >> 16); 3361 dp[4] = (mlib_u16) t0; 3362 #endif /* _LITTLE_ENDIAN */ 3363 } 3364 } 3365 } 3366 } 3367 } 3368 3369 /***************************************************************/ 3370 void mlib_c_ImageLookUpSI_S16_S16(const mlib_s16 *src, 3371 mlib_s32 slb, 3372 mlib_s16 *dst, 3373 mlib_s32 dlb, 3374 mlib_s32 xsize, 3375 mlib_s32 ysize, 3376 mlib_s32 csize, 3377 const mlib_s16 **table) 3378 { 3379 const mlib_s16 *table_base[4]; 3380 mlib_s32 c; 3381 3382 for (c = 0; c < csize; c++) { 3383 table_base[c] = &table[c][32768]; 3384 } 3385 3386 #ifdef __GNUC__ 3387 #pragma GCC diagnostic push 3388 #pragma GCC diagnostic ignored "-Warray-bounds" 3389 #endif 3390 MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s16, table_base); 3391 #ifdef __GNUC__ 3392 #pragma GCC diagnostic pop 3393 #endif 3394 } 3395 3396 /***************************************************************/ 3397 void mlib_c_ImageLookUpSI_U16_S16(const mlib_u16 *src, 3398 mlib_s32 slb, 3399 mlib_s16 *dst, 3400 mlib_s32 dlb, 3401 mlib_s32 xsize, 3402 mlib_s32 ysize, 3403 mlib_s32 csize, 3404 const mlib_s16 **table) 3405 { 3406 const mlib_s16 *table_base[4]; 3407 mlib_s32 c; 3408 3409 for (c = 0; c < csize; c++) { 3410 table_base[c] = &table[c][0]; 3411 } 3412 3413 MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_u16, table_base); 3414 } 3415 3416 /***************************************************************/ 3417 void mlib_c_ImageLookUpSI_S32_S16(const mlib_s32 *src, 3418 mlib_s32 slb, 3419 mlib_s16 *dst, 3420 mlib_s32 dlb, 3421 mlib_s32 xsize, 3422 mlib_s32 ysize, 3423 mlib_s32 csize, 3424 const mlib_s16 **table) 3425 { 3426 const mlib_s16 *table_base[4]; 3427 mlib_s32 c; 3428 3429 for (c = 0; c < csize; c++) { 3430 table_base[c] = &table[c][TABLE_SHIFT_S32]; 3431 } 3432 3433 #ifdef __GNUC__ 3434 #pragma GCC diagnostic push 3435 #pragma GCC diagnostic ignored "-Warray-bounds" 3436 #endif 3437 MLIB_C_IMAGELOOKUPSI(mlib_s16, mlib_s32, table_base); 3438 #ifdef __GNUC__ 3439 #pragma GCC diagnostic pop 3440 #endif 3441 } 3442 3443 /***************************************************************/ 3444 void mlib_c_ImageLookUpSI_S16_U16(const mlib_s16 *src, 3445 mlib_s32 slb, 3446 mlib_u16 *dst, 3447 mlib_s32 dlb, 3448 mlib_s32 xsize, 3449 mlib_s32 ysize, 3450 mlib_s32 csize, 3451 const mlib_u16 **table) 3452 { 3453 const mlib_u16 *table_base[4]; 3454 mlib_s32 c; 3455 3456 for (c = 0; c < csize; c++) { 3457 table_base[c] = &table[c][32768]; 3458 } 3459 3460 MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s16, table_base); 3461 } 3462 3463 /***************************************************************/ 3464 void mlib_c_ImageLookUpSI_U16_U16(const mlib_u16 *src, 3465 mlib_s32 slb, 3466 mlib_u16 *dst, 3467 mlib_s32 dlb, 3468 mlib_s32 xsize, 3469 mlib_s32 ysize, 3470 mlib_s32 csize, 3471 const mlib_u16 **table) 3472 { 3473 const mlib_u16 *table_base[4]; 3474 mlib_s32 c; 3475 3476 for (c = 0; c < csize; c++) { 3477 table_base[c] = &table[c][0]; 3478 } 3479 3480 #ifdef __GNUC__ 3481 #pragma GCC diagnostic push 3482 #pragma GCC diagnostic ignored "-Warray-bounds" 3483 #endif 3484 MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_u16, table_base); 3485 #ifdef __GNUC__ 3486 #pragma GCC diagnostic pop 3487 #endif 3488 } 3489 3490 /***************************************************************/ 3491 void mlib_c_ImageLookUpSI_S32_U16(const mlib_s32 *src, 3492 mlib_s32 slb, 3493 mlib_u16 *dst, 3494 mlib_s32 dlb, 3495 mlib_s32 xsize, 3496 mlib_s32 ysize, 3497 mlib_s32 csize, 3498 const mlib_u16 **table) 3499 { 3500 const mlib_u16 *table_base[4]; 3501 mlib_s32 c; 3502 3503 for (c = 0; c < csize; c++) { 3504 table_base[c] = &table[c][TABLE_SHIFT_S32]; 3505 } 3506 3507 #ifdef __GNUC__ 3508 #pragma GCC diagnostic push 3509 #pragma GCC diagnostic ignored "-Warray-bounds" 3510 #endif 3511 MLIB_C_IMAGELOOKUPSI(mlib_u16, mlib_s32, table_base); 3512 #ifdef __GNUC__ 3513 #pragma GCC diagnostic pop 3514 #endif 3515 } 3516 3517 /***************************************************************/ 3518 void mlib_c_ImageLookUpSI_U8_S32(const mlib_u8 *src, 3519 mlib_s32 slb, 3520 mlib_s32 *dst, 3521 mlib_s32 dlb, 3522 mlib_s32 xsize, 3523 mlib_s32 ysize, 3524 mlib_s32 csize, 3525 const mlib_s32 **table) 3526 { 3527 3528 if (xsize < 7) { 3529 MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u8, table); 3530 } 3531 else if (csize == 2) { 3532 mlib_s32 i, j; 3533 3534 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 3535 mlib_u32 *sa; 3536 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 3537 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 3538 mlib_u32 s0, t0, t1, t2, t3; 3539 mlib_s32 off; 3540 mlib_s32 size = xsize; 3541 mlib_u32 *dp = (mlib_u32 *) dst; 3542 mlib_u8 *sp = (void *)src; 3543 3544 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 3545 3546 for (i = 0; i < off; i++, sp++) { 3547 *dp++ = tab0[sp[0]]; 3548 *dp++ = tab1[sp[0]]; 3549 size--; 3550 } 3551 3552 sa = (mlib_u32 *) sp; 3553 3554 s0 = sa[0]; 3555 sa++; 3556 3557 #ifdef __SUNPRO_C 3558 #pragma pipeloop(0) 3559 #endif /* __SUNPRO_C */ 3560 for (i = 0; i < size - 7; i += 4, dp += 8, sa++) { 3561 #ifdef _LITTLE_ENDIAN 3562 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3563 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3564 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3565 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3566 #else 3567 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3568 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3569 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3570 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3571 #endif /* _LITTLE_ENDIAN */ 3572 dp[0] = t0; 3573 dp[1] = t1; 3574 dp[2] = t2; 3575 dp[3] = t3; 3576 #ifdef _LITTLE_ENDIAN 3577 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3578 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3579 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3580 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3581 #else 3582 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3583 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3584 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3585 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3586 #endif /* _LITTLE_ENDIAN */ 3587 s0 = sa[0]; 3588 dp[4] = t0; 3589 dp[5] = t1; 3590 dp[6] = t2; 3591 dp[7] = t3; 3592 } 3593 3594 #ifdef _LITTLE_ENDIAN 3595 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3596 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3597 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3598 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3599 #else 3600 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3601 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3602 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3603 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3604 #endif /* _LITTLE_ENDIAN */ 3605 dp[0] = t0; 3606 dp[1] = t1; 3607 dp[2] = t2; 3608 dp[3] = t3; 3609 #ifdef _LITTLE_ENDIAN 3610 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3611 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3612 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3613 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3614 #else 3615 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3616 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3617 t2 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3618 t3 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3619 #endif /* _LITTLE_ENDIAN */ 3620 dp[4] = t0; 3621 dp[5] = t1; 3622 dp[6] = t2; 3623 dp[7] = t3; 3624 dp += 8; 3625 sp = (mlib_u8 *) sa; 3626 i += 4; 3627 3628 for (; i < size; i++, sp++) { 3629 *dp++ = tab0[sp[0]]; 3630 *dp++ = tab1[sp[0]]; 3631 } 3632 } 3633 3634 } 3635 else if (csize == 3) { 3636 mlib_s32 i, j; 3637 3638 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 3639 mlib_u32 *sa; 3640 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 3641 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 3642 mlib_u32 *tab2 = (mlib_u32 *) table[2]; 3643 mlib_u32 s0, t0, t1, t2, t3, t4, t5; 3644 mlib_s32 off; 3645 mlib_s32 size = xsize; 3646 mlib_u32 *dp = (mlib_u32 *) dst; 3647 mlib_u8 *sp = (void *)src; 3648 3649 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 3650 3651 for (i = 0; i < off; i++, sp++) { 3652 *dp++ = tab0[sp[0]]; 3653 *dp++ = tab1[sp[0]]; 3654 *dp++ = tab2[sp[0]]; 3655 size--; 3656 } 3657 3658 sa = (mlib_u32 *) sp; 3659 3660 s0 = sa[0]; 3661 sa++; 3662 3663 #ifdef __SUNPRO_C 3664 #pragma pipeloop(0) 3665 #endif /* __SUNPRO_C */ 3666 for (i = 0; i < size - 7; i += 4, dp += 12, sa++) { 3667 #ifdef _LITTLE_ENDIAN 3668 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3669 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3670 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3671 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3672 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3673 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3674 #else 3675 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3676 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3677 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3678 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3679 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3680 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3681 #endif /* _LITTLE_ENDIAN */ 3682 dp[0] = t0; 3683 dp[1] = t1; 3684 dp[2] = t2; 3685 dp[3] = t3; 3686 dp[4] = t4; 3687 dp[5] = t5; 3688 #ifdef _LITTLE_ENDIAN 3689 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3690 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3691 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3692 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3693 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3694 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3695 #else 3696 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3697 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3698 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3699 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3700 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3701 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3702 #endif /* _LITTLE_ENDIAN */ 3703 s0 = sa[0]; 3704 dp[6] = t0; 3705 dp[7] = t1; 3706 dp[8] = t2; 3707 dp[9] = t3; 3708 dp[10] = t4; 3709 dp[11] = t5; 3710 } 3711 3712 #ifdef _LITTLE_ENDIAN 3713 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3714 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3715 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3716 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3717 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3718 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3719 #else 3720 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3721 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3722 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3723 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3724 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3725 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3726 #endif /* _LITTLE_ENDIAN */ 3727 dp[0] = t0; 3728 dp[1] = t1; 3729 dp[2] = t2; 3730 dp[3] = t3; 3731 dp[4] = t4; 3732 dp[5] = t5; 3733 #ifdef _LITTLE_ENDIAN 3734 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3735 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3736 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3737 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3738 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3739 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3740 #else 3741 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3742 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3743 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3744 t3 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3745 t4 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3746 t5 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3747 #endif /* _LITTLE_ENDIAN */ 3748 dp[6] = t0; 3749 dp[7] = t1; 3750 dp[8] = t2; 3751 dp[9] = t3; 3752 dp[10] = t4; 3753 dp[11] = t5; 3754 dp += 12; 3755 sp = (mlib_u8 *) sa; 3756 i += 4; 3757 3758 for (; i < size; i++, sp++) { 3759 *dp++ = tab0[sp[0]]; 3760 *dp++ = tab1[sp[0]]; 3761 *dp++ = tab2[sp[0]]; 3762 } 3763 } 3764 3765 } 3766 else if (csize == 4) { 3767 mlib_s32 i, j; 3768 3769 for (j = 0; j < ysize; j++, dst += dlb, src += slb) { 3770 mlib_u32 *sa; 3771 mlib_u32 *tab0 = (mlib_u32 *) table[0]; 3772 mlib_u32 *tab1 = (mlib_u32 *) table[1]; 3773 mlib_u32 *tab2 = (mlib_u32 *) table[2]; 3774 mlib_u32 *tab3 = (mlib_u32 *) table[3]; 3775 mlib_u32 s0, t0, t1, t2, t3; 3776 mlib_s32 off; 3777 mlib_s32 size = xsize; 3778 mlib_u32 *dp = (mlib_u32 *) dst; 3779 mlib_u8 *sp = (void *)src; 3780 3781 off = (mlib_s32) ((4 - ((mlib_addr) src & 3)) & 3); 3782 3783 for (i = 0; i < off; i++, sp++) { 3784 *dp++ = tab0[sp[0]]; 3785 *dp++ = tab1[sp[0]]; 3786 *dp++ = tab2[sp[0]]; 3787 *dp++ = tab3[sp[0]]; 3788 size--; 3789 } 3790 3791 sa = (mlib_u32 *) sp; 3792 3793 s0 = sa[0]; 3794 sa++; 3795 3796 #ifdef __SUNPRO_C 3797 #pragma pipeloop(0) 3798 #endif /* __SUNPRO_C */ 3799 for (i = 0; i < size - 7; i += 4, dp += 16, sa++) { 3800 #ifdef _LITTLE_ENDIAN 3801 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3802 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3803 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3804 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC)); 3805 #else 3806 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3807 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3808 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3809 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC)); 3810 #endif /* _LITTLE_ENDIAN */ 3811 dp[0] = t0; 3812 dp[1] = t1; 3813 dp[2] = t2; 3814 dp[3] = t3; 3815 #ifdef _LITTLE_ENDIAN 3816 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3817 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3818 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3819 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC)); 3820 #else 3821 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3822 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3823 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3824 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC)); 3825 #endif /* _LITTLE_ENDIAN */ 3826 dp[4] = t0; 3827 dp[5] = t1; 3828 dp[6] = t2; 3829 dp[7] = t3; 3830 #ifdef _LITTLE_ENDIAN 3831 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3832 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3833 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3834 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC)); 3835 #else 3836 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3837 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3838 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3839 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC)); 3840 #endif /* _LITTLE_ENDIAN */ 3841 dp[8] = t0; 3842 dp[9] = t1; 3843 dp[10] = t2; 3844 dp[11] = t3; 3845 #ifdef _LITTLE_ENDIAN 3846 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3847 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3848 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3849 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC)); 3850 #else 3851 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3852 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3853 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3854 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC)); 3855 #endif /* _LITTLE_ENDIAN */ 3856 s0 = sa[0]; 3857 dp[12] = t0; 3858 dp[13] = t1; 3859 dp[14] = t2; 3860 dp[15] = t3; 3861 } 3862 3863 #ifdef _LITTLE_ENDIAN 3864 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3865 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3866 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3867 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC)); 3868 #else 3869 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3870 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3871 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3872 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC)); 3873 #endif /* _LITTLE_ENDIAN */ 3874 dp[0] = t0; 3875 dp[1] = t1; 3876 dp[2] = t2; 3877 dp[3] = t3; 3878 #ifdef _LITTLE_ENDIAN 3879 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3880 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3881 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3882 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC)); 3883 #else 3884 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3885 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3886 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3887 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC)); 3888 #endif /* _LITTLE_ENDIAN */ 3889 dp[4] = t0; 3890 dp[5] = t1; 3891 dp[6] = t2; 3892 dp[7] = t3; 3893 #ifdef _LITTLE_ENDIAN 3894 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 14) & 0x3FC)); 3895 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 14) & 0x3FC)); 3896 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 14) & 0x3FC)); 3897 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 14) & 0x3FC)); 3898 #else 3899 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 6) & 0x3FC)); 3900 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 6) & 0x3FC)); 3901 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 6) & 0x3FC)); 3902 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 6) & 0x3FC)); 3903 #endif /* _LITTLE_ENDIAN */ 3904 dp[8] = t0; 3905 dp[9] = t1; 3906 dp[10] = t2; 3907 dp[11] = t3; 3908 #ifdef _LITTLE_ENDIAN 3909 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 >> 22) & 0x3FC)); 3910 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 >> 22) & 0x3FC)); 3911 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 >> 22) & 0x3FC)); 3912 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 >> 22) & 0x3FC)); 3913 #else 3914 t0 = *(mlib_u32 *) ((mlib_u8 *) tab0 + ((s0 << 2) & 0x3FC)); 3915 t1 = *(mlib_u32 *) ((mlib_u8 *) tab1 + ((s0 << 2) & 0x3FC)); 3916 t2 = *(mlib_u32 *) ((mlib_u8 *) tab2 + ((s0 << 2) & 0x3FC)); 3917 t3 = *(mlib_u32 *) ((mlib_u8 *) tab3 + ((s0 << 2) & 0x3FC)); 3918 #endif /* _LITTLE_ENDIAN */ 3919 dp[12] = t0; 3920 dp[13] = t1; 3921 dp[14] = t2; 3922 dp[15] = t3; 3923 dp += 16; 3924 sp = (mlib_u8 *) sa; 3925 i += 4; 3926 3927 for (; i < size; i++, sp++) { 3928 *dp++ = tab0[sp[0]]; 3929 *dp++ = tab1[sp[0]]; 3930 *dp++ = tab2[sp[0]]; 3931 *dp++ = tab3[sp[0]]; 3932 } 3933 } 3934 } 3935 } 3936 3937 /***************************************************************/ 3938 void mlib_c_ImageLookUpSI_S16_S32(const mlib_s16 *src, 3939 mlib_s32 slb, 3940 mlib_s32 *dst, 3941 mlib_s32 dlb, 3942 mlib_s32 xsize, 3943 mlib_s32 ysize, 3944 mlib_s32 csize, 3945 const mlib_s32 **table) 3946 { 3947 const mlib_s32 *table_base[4]; 3948 mlib_s32 c; 3949 3950 for (c = 0; c < csize; c++) { 3951 table_base[c] = &table[c][32768]; 3952 } 3953 3954 #ifdef __GNUC__ 3955 #pragma GCC diagnostic push 3956 #pragma GCC diagnostic ignored "-Warray-bounds" 3957 #endif 3958 MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s16, table_base); 3959 #ifdef __GNUC__ 3960 #pragma GCC diagnostic pop 3961 #endif 3962 } 3963 3964 /***************************************************************/ 3965 void mlib_c_ImageLookUpSI_U16_S32(const mlib_u16 *src, 3966 mlib_s32 slb, 3967 mlib_s32 *dst, 3968 mlib_s32 dlb, 3969 mlib_s32 xsize, 3970 mlib_s32 ysize, 3971 mlib_s32 csize, 3972 const mlib_s32 **table) 3973 { 3974 const mlib_s32 *table_base[4]; 3975 mlib_s32 c; 3976 3977 for (c = 0; c < csize; c++) { 3978 table_base[c] = &table[c][0]; 3979 } 3980 3981 #ifdef __GNUC__ 3982 #pragma GCC diagnostic push 3983 #pragma GCC diagnostic ignored "-Warray-bounds" 3984 #endif 3985 MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_u16, table_base); 3986 #ifdef __GNUC__ 3987 #pragma GCC diagnostic pop 3988 #endif 3989 } 3990 3991 /***************************************************************/ 3992 void mlib_c_ImageLookUpSI_S32_S32(const mlib_s32 *src, 3993 mlib_s32 slb, 3994 mlib_s32 *dst, 3995 mlib_s32 dlb, 3996 mlib_s32 xsize, 3997 mlib_s32 ysize, 3998 mlib_s32 csize, 3999 const mlib_s32 **table) 4000 { 4001 const mlib_s32 *table_base[4]; 4002 mlib_s32 c; 4003 4004 for (c = 0; c < csize; c++) { 4005 table_base[c] = &table[c][TABLE_SHIFT_S32]; 4006 } 4007 4008 MLIB_C_IMAGELOOKUPSI(mlib_s32, mlib_s32, table_base); 4009 } 4010 4011 /***************************************************************/