1 /* 2 * jfdctint.c 3 * 4 * Copyright (C) 1991-1996, Thomas G. Lane. 5 * Modification developed 2003-2009 by Guido Vollbeding. 6 * This file is part of the Independent JPEG Group's software. 7 * For conditions of distribution and use, see the accompanying README file. 8 * 9 * This file contains a slow-but-accurate integer implementation of the 10 * forward DCT (Discrete Cosine Transform). 11 * 12 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT 13 * on each column. Direct algorithms are also available, but they are 14 * much more complex and seem not to be any faster when reduced to code. 15 * 16 * This implementation is based on an algorithm described in 17 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 18 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 19 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 20 * The primary algorithm described there uses 11 multiplies and 29 adds. 21 * We use their alternate method with 12 multiplies and 32 adds. 22 * The advantage of this method is that no data path contains more than one 23 * multiplication; this allows a very simple and accurate implementation in 24 * scaled fixed-point arithmetic, with a minimal number of shifts. 25 * 26 * We also provide FDCT routines with various input sample block sizes for 27 * direct resolution reduction or enlargement and for direct resolving the 28 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN 29 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block. 30 * 31 * For N<8 we fill the remaining block coefficients with zero. 32 * For N>8 we apply a partial N-point FDCT on the input samples, computing 33 * just the lower 8 frequency coefficients and discarding the rest. 34 * 35 * We must scale the output coefficients of the N-point FDCT appropriately 36 * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling 37 * is folded into the constant multipliers (pass 2) and/or final/initial 38 * shifting. 39 * 40 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 41 * since there would be too many additional constants to pre-calculate. 42 */ 43 44 #define JPEG_INTERNALS 45 #include "jinclude.h" 46 #include "jpeglib.h" 47 #include "jdct.h" /* Private declarations for DCT subsystem */ 48 49 #ifdef DCT_ISLOW_SUPPORTED 50 51 52 /* 53 * This module is specialized to the case DCTSIZE = 8. 54 */ 55 56 #if DCTSIZE != 8 57 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 58 #endif 59 60 61 /* 62 * The poop on this scaling stuff is as follows: 63 * 64 * Each 1-D DCT step produces outputs which are a factor of sqrt(N) 65 * larger than the true DCT outputs. The final outputs are therefore 66 * a factor of N larger than desired; since N=8 this can be cured by 67 * a simple right shift at the end of the algorithm. The advantage of 68 * this arrangement is that we save two multiplications per 1-D DCT, 69 * because the y0 and y4 outputs need not be divided by sqrt(N). 70 * In the IJG code, this factor of 8 is removed by the quantization step 71 * (in jcdctmgr.c), NOT in this module. 72 * 73 * We have to do addition and subtraction of the integer inputs, which 74 * is no problem, and multiplication by fractional constants, which is 75 * a problem to do in integer arithmetic. We multiply all the constants 76 * by CONST_SCALE and convert them to integer constants (thus retaining 77 * CONST_BITS bits of precision in the constants). After doing a 78 * multiplication we have to divide the product by CONST_SCALE, with proper 79 * rounding, to produce the correct output. This division can be done 80 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 81 * as long as possible so that partial sums can be added together with 82 * full fractional precision. 83 * 84 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 85 * they are represented to better-than-integral precision. These outputs 86 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 87 * with the recommended scaling. (For 12-bit sample data, the intermediate 88 * array is INT32 anyway.) 89 * 90 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 91 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 92 * shows that the values given below are the most effective. 93 */ 94 95 #if BITS_IN_JSAMPLE == 8 96 #define CONST_BITS 13 97 #define PASS1_BITS 2 98 #else 99 #define CONST_BITS 13 100 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 101 #endif 102 103 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 104 * causing a lot of useless floating-point operations at run time. 105 * To get around this we use the following pre-calculated constants. 106 * If you change CONST_BITS you may want to add appropriate values. 107 * (With a reasonable C compiler, you can just rely on the FIX() macro...) 108 */ 109 110 #if CONST_BITS == 13 111 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 112 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 113 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 114 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 115 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 116 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 117 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 118 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 119 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 120 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 121 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 122 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 123 #else 124 #define FIX_0_298631336 FIX(0.298631336) 125 #define FIX_0_390180644 FIX(0.390180644) 126 #define FIX_0_541196100 FIX(0.541196100) 127 #define FIX_0_765366865 FIX(0.765366865) 128 #define FIX_0_899976223 FIX(0.899976223) 129 #define FIX_1_175875602 FIX(1.175875602) 130 #define FIX_1_501321110 FIX(1.501321110) 131 #define FIX_1_847759065 FIX(1.847759065) 132 #define FIX_1_961570560 FIX(1.961570560) 133 #define FIX_2_053119869 FIX(2.053119869) 134 #define FIX_2_562915447 FIX(2.562915447) 135 #define FIX_3_072711026 FIX(3.072711026) 136 #endif 137 138 139 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 140 * For 8-bit samples with the recommended scaling, all the variable 141 * and constant values involved are no more than 16 bits wide, so a 142 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 143 * For 12-bit samples, a full 32-bit multiplication will be needed. 144 */ 145 146 #if BITS_IN_JSAMPLE == 8 147 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) 148 #else 149 #define MULTIPLY(var,const) ((var) * (const)) 150 #endif 151 152 153 /* 154 * Perform the forward DCT on one block of samples. 155 */ 156 157 GLOBAL(void) 158 jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 159 { 160 INT32 tmp0, tmp1, tmp2, tmp3; 161 INT32 tmp10, tmp11, tmp12, tmp13; 162 INT32 z1; 163 DCTELEM *dataptr; 164 JSAMPROW elemptr; 165 int ctr; 166 SHIFT_TEMPS 167 168 /* Pass 1: process rows. */ 169 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 170 /* furthermore, we scale the results by 2**PASS1_BITS. */ 171 172 dataptr = data; 173 for (ctr = 0; ctr < DCTSIZE; ctr++) { 174 elemptr = sample_data[ctr] + start_col; 175 176 /* Even part per LL&M figure 1 --- note that published figure is faulty; 177 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 178 */ 179 180 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 181 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 182 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 183 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 184 185 tmp10 = tmp0 + tmp3; 186 tmp12 = tmp0 - tmp3; 187 tmp11 = tmp1 + tmp2; 188 tmp13 = tmp1 - tmp2; 189 190 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 191 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 192 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 193 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 194 195 /* Apply unsigned->signed conversion */ 196 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); 197 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); 198 199 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 200 /* Add fudge factor here for final descale. */ 201 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 202 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), 203 CONST_BITS-PASS1_BITS); 204 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), 205 CONST_BITS-PASS1_BITS); 206 207 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 208 * cK represents sqrt(2) * cos(K*pi/16). 209 * i0..i3 in the paper are tmp0..tmp3 here. 210 */ 211 212 tmp10 = tmp0 + tmp3; 213 tmp11 = tmp1 + tmp2; 214 tmp12 = tmp0 + tmp2; 215 tmp13 = tmp1 + tmp3; 216 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 217 /* Add fudge factor here for final descale. */ 218 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 219 220 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 221 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 222 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 223 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 224 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 225 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 226 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 227 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 228 229 tmp12 += z1; 230 tmp13 += z1; 231 232 dataptr[1] = (DCTELEM) 233 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); 234 dataptr[3] = (DCTELEM) 235 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); 236 dataptr[5] = (DCTELEM) 237 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); 238 dataptr[7] = (DCTELEM) 239 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); 240 241 dataptr += DCTSIZE; /* advance pointer to next row */ 242 } 243 244 /* Pass 2: process columns. 245 * We remove the PASS1_BITS scaling, but leave the results scaled up 246 * by an overall factor of 8. 247 */ 248 249 dataptr = data; 250 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 251 /* Even part per LL&M figure 1 --- note that published figure is faulty; 252 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 253 */ 254 255 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 256 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 257 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 258 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 259 260 /* Add fudge factor here for final descale. */ 261 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); 262 tmp12 = tmp0 - tmp3; 263 tmp11 = tmp1 + tmp2; 264 tmp13 = tmp1 - tmp2; 265 266 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 267 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 268 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 269 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 270 271 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); 272 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); 273 274 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 275 /* Add fudge factor here for final descale. */ 276 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 277 dataptr[DCTSIZE*2] = (DCTELEM) 278 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); 279 dataptr[DCTSIZE*6] = (DCTELEM) 280 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); 281 282 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 283 * cK represents sqrt(2) * cos(K*pi/16). 284 * i0..i3 in the paper are tmp0..tmp3 here. 285 */ 286 287 tmp10 = tmp0 + tmp3; 288 tmp11 = tmp1 + tmp2; 289 tmp12 = tmp0 + tmp2; 290 tmp13 = tmp1 + tmp3; 291 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 292 /* Add fudge factor here for final descale. */ 293 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 294 295 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 296 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 297 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 298 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 299 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 300 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 301 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 302 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 303 304 tmp12 += z1; 305 tmp13 += z1; 306 307 dataptr[DCTSIZE*1] = (DCTELEM) 308 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); 309 dataptr[DCTSIZE*3] = (DCTELEM) 310 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); 311 dataptr[DCTSIZE*5] = (DCTELEM) 312 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); 313 dataptr[DCTSIZE*7] = (DCTELEM) 314 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); 315 316 dataptr++; /* advance pointer to next column */ 317 } 318 } 319 320 #ifdef DCT_SCALING_SUPPORTED 321 322 323 /* 324 * Perform the forward DCT on a 7x7 sample block. 325 */ 326 327 GLOBAL(void) 328 jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 329 { 330 INT32 tmp0, tmp1, tmp2, tmp3; 331 INT32 tmp10, tmp11, tmp12; 332 INT32 z1, z2, z3; 333 DCTELEM *dataptr; 334 JSAMPROW elemptr; 335 int ctr; 336 SHIFT_TEMPS 337 338 /* Pre-zero output coefficient block. */ 339 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 340 341 /* Pass 1: process rows. */ 342 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 343 /* furthermore, we scale the results by 2**PASS1_BITS. */ 344 /* cK represents sqrt(2) * cos(K*pi/14). */ 345 346 dataptr = data; 347 for (ctr = 0; ctr < 7; ctr++) { 348 elemptr = sample_data[ctr] + start_col; 349 350 /* Even part */ 351 352 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); 353 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); 354 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); 355 tmp3 = GETJSAMPLE(elemptr[3]); 356 357 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); 358 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); 359 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); 360 361 z1 = tmp0 + tmp2; 362 /* Apply unsigned->signed conversion */ 363 dataptr[0] = (DCTELEM) 364 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); 365 tmp3 += tmp3; 366 z1 -= tmp3; 367 z1 -= tmp3; 368 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ 369 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ 370 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ 371 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); 372 z1 -= z2; 373 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ 374 dataptr[4] = (DCTELEM) 375 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ 376 CONST_BITS-PASS1_BITS); 377 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); 378 379 /* Odd part */ 380 381 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 382 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 383 tmp0 = tmp1 - tmp2; 384 tmp1 += tmp2; 385 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ 386 tmp1 += tmp2; 387 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ 388 tmp0 += tmp3; 389 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ 390 391 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); 392 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); 393 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); 394 395 dataptr += DCTSIZE; /* advance pointer to next row */ 396 } 397 398 /* Pass 2: process columns. 399 * We remove the PASS1_BITS scaling, but leave the results scaled up 400 * by an overall factor of 8. 401 * We must also scale the output by (8/7)**2 = 64/49, which we fold 402 * into the constant multipliers: 403 * cK now represents sqrt(2) * cos(K*pi/14) * 64/49. 404 */ 405 406 dataptr = data; 407 for (ctr = 0; ctr < 7; ctr++) { 408 /* Even part */ 409 410 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; 411 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; 412 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; 413 tmp3 = dataptr[DCTSIZE*3]; 414 415 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; 416 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; 417 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; 418 419 z1 = tmp0 + tmp2; 420 dataptr[DCTSIZE*0] = (DCTELEM) 421 DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ 422 CONST_BITS+PASS1_BITS); 423 tmp3 += tmp3; 424 z1 -= tmp3; 425 z1 -= tmp3; 426 z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ 427 z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ 428 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ 429 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS); 430 z1 -= z2; 431 z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ 432 dataptr[DCTSIZE*4] = (DCTELEM) 433 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ 434 CONST_BITS+PASS1_BITS); 435 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS); 436 437 /* Odd part */ 438 439 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ 440 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ 441 tmp0 = tmp1 - tmp2; 442 tmp1 += tmp2; 443 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ 444 tmp1 += tmp2; 445 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ 446 tmp0 += tmp3; 447 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ 448 449 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS); 450 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS); 451 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS); 452 453 dataptr++; /* advance pointer to next column */ 454 } 455 } 456 457 458 /* 459 * Perform the forward DCT on a 6x6 sample block. 460 */ 461 462 GLOBAL(void) 463 jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 464 { 465 INT32 tmp0, tmp1, tmp2; 466 INT32 tmp10, tmp11, tmp12; 467 DCTELEM *dataptr; 468 JSAMPROW elemptr; 469 int ctr; 470 SHIFT_TEMPS 471 472 /* Pre-zero output coefficient block. */ 473 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 474 475 /* Pass 1: process rows. */ 476 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 477 /* furthermore, we scale the results by 2**PASS1_BITS. */ 478 /* cK represents sqrt(2) * cos(K*pi/12). */ 479 480 dataptr = data; 481 for (ctr = 0; ctr < 6; ctr++) { 482 elemptr = sample_data[ctr] + start_col; 483 484 /* Even part */ 485 486 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 487 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 488 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 489 490 tmp10 = tmp0 + tmp2; 491 tmp12 = tmp0 - tmp2; 492 493 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 494 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 495 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 496 497 /* Apply unsigned->signed conversion */ 498 dataptr[0] = (DCTELEM) 499 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); 500 dataptr[2] = (DCTELEM) 501 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 502 CONST_BITS-PASS1_BITS); 503 dataptr[4] = (DCTELEM) 504 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 505 CONST_BITS-PASS1_BITS); 506 507 /* Odd part */ 508 509 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 510 CONST_BITS-PASS1_BITS); 511 512 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); 513 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); 514 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); 515 516 dataptr += DCTSIZE; /* advance pointer to next row */ 517 } 518 519 /* Pass 2: process columns. 520 * We remove the PASS1_BITS scaling, but leave the results scaled up 521 * by an overall factor of 8. 522 * We must also scale the output by (8/6)**2 = 16/9, which we fold 523 * into the constant multipliers: 524 * cK now represents sqrt(2) * cos(K*pi/12) * 16/9. 525 */ 526 527 dataptr = data; 528 for (ctr = 0; ctr < 6; ctr++) { 529 /* Even part */ 530 531 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 532 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 533 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 534 535 tmp10 = tmp0 + tmp2; 536 tmp12 = tmp0 - tmp2; 537 538 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 539 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 540 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 541 542 dataptr[DCTSIZE*0] = (DCTELEM) 543 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 544 CONST_BITS+PASS1_BITS); 545 dataptr[DCTSIZE*2] = (DCTELEM) 546 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 547 CONST_BITS+PASS1_BITS); 548 dataptr[DCTSIZE*4] = (DCTELEM) 549 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 550 CONST_BITS+PASS1_BITS); 551 552 /* Odd part */ 553 554 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 555 556 dataptr[DCTSIZE*1] = (DCTELEM) 557 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 558 CONST_BITS+PASS1_BITS); 559 dataptr[DCTSIZE*3] = (DCTELEM) 560 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 561 CONST_BITS+PASS1_BITS); 562 dataptr[DCTSIZE*5] = (DCTELEM) 563 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 564 CONST_BITS+PASS1_BITS); 565 566 dataptr++; /* advance pointer to next column */ 567 } 568 } 569 570 571 /* 572 * Perform the forward DCT on a 5x5 sample block. 573 */ 574 575 GLOBAL(void) 576 jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 577 { 578 INT32 tmp0, tmp1, tmp2; 579 INT32 tmp10, tmp11; 580 DCTELEM *dataptr; 581 JSAMPROW elemptr; 582 int ctr; 583 SHIFT_TEMPS 584 585 /* Pre-zero output coefficient block. */ 586 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 587 588 /* Pass 1: process rows. */ 589 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 590 /* furthermore, we scale the results by 2**PASS1_BITS. */ 591 /* We scale the results further by 2 as part of output adaption */ 592 /* scaling for different DCT size. */ 593 /* cK represents sqrt(2) * cos(K*pi/10). */ 594 595 dataptr = data; 596 for (ctr = 0; ctr < 5; ctr++) { 597 elemptr = sample_data[ctr] + start_col; 598 599 /* Even part */ 600 601 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); 602 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); 603 tmp2 = GETJSAMPLE(elemptr[2]); 604 605 tmp10 = tmp0 + tmp1; 606 tmp11 = tmp0 - tmp1; 607 608 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); 609 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); 610 611 /* Apply unsigned->signed conversion */ 612 dataptr[0] = (DCTELEM) 613 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); 614 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ 615 tmp10 -= tmp2 << 2; 616 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ 617 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1); 618 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1); 619 620 /* Odd part */ 621 622 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ 623 624 dataptr[1] = (DCTELEM) 625 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ 626 CONST_BITS-PASS1_BITS-1); 627 dataptr[3] = (DCTELEM) 628 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ 629 CONST_BITS-PASS1_BITS-1); 630 631 dataptr += DCTSIZE; /* advance pointer to next row */ 632 } 633 634 /* Pass 2: process columns. 635 * We remove the PASS1_BITS scaling, but leave the results scaled up 636 * by an overall factor of 8. 637 * We must also scale the output by (8/5)**2 = 64/25, which we partially 638 * fold into the constant multipliers (other part was done in pass 1): 639 * cK now represents sqrt(2) * cos(K*pi/10) * 32/25. 640 */ 641 642 dataptr = data; 643 for (ctr = 0; ctr < 5; ctr++) { 644 /* Even part */ 645 646 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; 647 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; 648 tmp2 = dataptr[DCTSIZE*2]; 649 650 tmp10 = tmp0 + tmp1; 651 tmp11 = tmp0 - tmp1; 652 653 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; 654 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; 655 656 dataptr[DCTSIZE*0] = (DCTELEM) 657 DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ 658 CONST_BITS+PASS1_BITS); 659 tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ 660 tmp10 -= tmp2 << 2; 661 tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ 662 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); 663 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); 664 665 /* Odd part */ 666 667 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ 668 669 dataptr[DCTSIZE*1] = (DCTELEM) 670 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ 671 CONST_BITS+PASS1_BITS); 672 dataptr[DCTSIZE*3] = (DCTELEM) 673 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ 674 CONST_BITS+PASS1_BITS); 675 676 dataptr++; /* advance pointer to next column */ 677 } 678 } 679 680 681 /* 682 * Perform the forward DCT on a 4x4 sample block. 683 */ 684 685 GLOBAL(void) 686 jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 687 { 688 INT32 tmp0, tmp1; 689 INT32 tmp10, tmp11; 690 DCTELEM *dataptr; 691 JSAMPROW elemptr; 692 int ctr; 693 SHIFT_TEMPS 694 695 /* Pre-zero output coefficient block. */ 696 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 697 698 /* Pass 1: process rows. */ 699 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 700 /* furthermore, we scale the results by 2**PASS1_BITS. */ 701 /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ 702 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ 703 704 dataptr = data; 705 for (ctr = 0; ctr < 4; ctr++) { 706 elemptr = sample_data[ctr] + start_col; 707 708 /* Even part */ 709 710 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 711 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 712 713 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 714 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 715 716 /* Apply unsigned->signed conversion */ 717 dataptr[0] = (DCTELEM) 718 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); 719 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); 720 721 /* Odd part */ 722 723 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 724 /* Add fudge factor here for final descale. */ 725 tmp0 += ONE << (CONST_BITS-PASS1_BITS-3); 726 727 dataptr[1] = (DCTELEM) 728 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 729 CONST_BITS-PASS1_BITS-2); 730 dataptr[3] = (DCTELEM) 731 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 732 CONST_BITS-PASS1_BITS-2); 733 734 dataptr += DCTSIZE; /* advance pointer to next row */ 735 } 736 737 /* Pass 2: process columns. 738 * We remove the PASS1_BITS scaling, but leave the results scaled up 739 * by an overall factor of 8. 740 */ 741 742 dataptr = data; 743 for (ctr = 0; ctr < 4; ctr++) { 744 /* Even part */ 745 746 /* Add fudge factor here for final descale. */ 747 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); 748 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 749 750 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 751 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 752 753 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 754 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 755 756 /* Odd part */ 757 758 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 759 /* Add fudge factor here for final descale. */ 760 tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); 761 762 dataptr[DCTSIZE*1] = (DCTELEM) 763 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 764 CONST_BITS+PASS1_BITS); 765 dataptr[DCTSIZE*3] = (DCTELEM) 766 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 767 CONST_BITS+PASS1_BITS); 768 769 dataptr++; /* advance pointer to next column */ 770 } 771 } 772 773 774 /* 775 * Perform the forward DCT on a 3x3 sample block. 776 */ 777 778 GLOBAL(void) 779 jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 780 { 781 INT32 tmp0, tmp1, tmp2; 782 DCTELEM *dataptr; 783 JSAMPROW elemptr; 784 int ctr; 785 SHIFT_TEMPS 786 787 /* Pre-zero output coefficient block. */ 788 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 789 790 /* Pass 1: process rows. */ 791 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 792 /* furthermore, we scale the results by 2**PASS1_BITS. */ 793 /* We scale the results further by 2**2 as part of output adaption */ 794 /* scaling for different DCT size. */ 795 /* cK represents sqrt(2) * cos(K*pi/6). */ 796 797 dataptr = data; 798 for (ctr = 0; ctr < 3; ctr++) { 799 elemptr = sample_data[ctr] + start_col; 800 801 /* Even part */ 802 803 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); 804 tmp1 = GETJSAMPLE(elemptr[1]); 805 806 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); 807 808 /* Apply unsigned->signed conversion */ 809 dataptr[0] = (DCTELEM) 810 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); 811 dataptr[2] = (DCTELEM) 812 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ 813 CONST_BITS-PASS1_BITS-2); 814 815 /* Odd part */ 816 817 dataptr[1] = (DCTELEM) 818 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ 819 CONST_BITS-PASS1_BITS-2); 820 821 dataptr += DCTSIZE; /* advance pointer to next row */ 822 } 823 824 /* Pass 2: process columns. 825 * We remove the PASS1_BITS scaling, but leave the results scaled up 826 * by an overall factor of 8. 827 * We must also scale the output by (8/3)**2 = 64/9, which we partially 828 * fold into the constant multipliers (other part was done in pass 1): 829 * cK now represents sqrt(2) * cos(K*pi/6) * 16/9. 830 */ 831 832 dataptr = data; 833 for (ctr = 0; ctr < 3; ctr++) { 834 /* Even part */ 835 836 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; 837 tmp1 = dataptr[DCTSIZE*1]; 838 839 tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; 840 841 dataptr[DCTSIZE*0] = (DCTELEM) 842 DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 843 CONST_BITS+PASS1_BITS); 844 dataptr[DCTSIZE*2] = (DCTELEM) 845 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ 846 CONST_BITS+PASS1_BITS); 847 848 /* Odd part */ 849 850 dataptr[DCTSIZE*1] = (DCTELEM) 851 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ 852 CONST_BITS+PASS1_BITS); 853 854 dataptr++; /* advance pointer to next column */ 855 } 856 } 857 858 859 /* 860 * Perform the forward DCT on a 2x2 sample block. 861 */ 862 863 GLOBAL(void) 864 jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 865 { 866 INT32 tmp0, tmp1, tmp2, tmp3; 867 JSAMPROW elemptr; 868 869 /* Pre-zero output coefficient block. */ 870 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 871 872 /* Pass 1: process rows. */ 873 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 874 875 /* Row 0 */ 876 elemptr = sample_data[0] + start_col; 877 878 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); 879 tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); 880 881 /* Row 1 */ 882 elemptr = sample_data[1] + start_col; 883 884 tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); 885 tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); 886 887 /* Pass 2: process columns. 888 * We leave the results scaled up by an overall factor of 8. 889 * We must also scale the output by (8/2)**2 = 2**4. 890 */ 891 892 /* Column 0 */ 893 /* Apply unsigned->signed conversion */ 894 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4); 895 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4); 896 897 /* Column 1 */ 898 data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4); 899 data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4); 900 } 901 902 903 /* 904 * Perform the forward DCT on a 1x1 sample block. 905 */ 906 907 GLOBAL(void) 908 jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 909 { 910 /* Pre-zero output coefficient block. */ 911 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 912 913 /* We leave the result scaled up by an overall factor of 8. */ 914 /* We must also scale the output by (8/1)**2 = 2**6. */ 915 /* Apply unsigned->signed conversion */ 916 data[0] = (DCTELEM) 917 ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6); 918 } 919 920 921 /* 922 * Perform the forward DCT on a 9x9 sample block. 923 */ 924 925 GLOBAL(void) 926 jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 927 { 928 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 929 INT32 tmp10, tmp11, tmp12, tmp13; 930 INT32 z1, z2; 931 DCTELEM workspace[8]; 932 DCTELEM *dataptr; 933 DCTELEM *wsptr; 934 JSAMPROW elemptr; 935 int ctr; 936 SHIFT_TEMPS 937 938 /* Pass 1: process rows. */ 939 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 940 /* we scale the results further by 2 as part of output adaption */ 941 /* scaling for different DCT size. */ 942 /* cK represents sqrt(2) * cos(K*pi/18). */ 943 944 dataptr = data; 945 ctr = 0; 946 for (;;) { 947 elemptr = sample_data[ctr] + start_col; 948 949 /* Even part */ 950 951 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]); 952 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]); 953 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]); 954 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]); 955 tmp4 = GETJSAMPLE(elemptr[4]); 956 957 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]); 958 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]); 959 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); 960 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); 961 962 z1 = tmp0 + tmp2 + tmp3; 963 z2 = tmp1 + tmp4; 964 /* Apply unsigned->signed conversion */ 965 dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); 966 dataptr[6] = (DCTELEM) 967 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ 968 CONST_BITS-1); 969 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */ 970 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */ 971 dataptr[2] = (DCTELEM) 972 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */ 973 + z1 + z2, CONST_BITS-1); 974 dataptr[4] = (DCTELEM) 975 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */ 976 + z1 - z2, CONST_BITS-1); 977 978 /* Odd part */ 979 980 dataptr[3] = (DCTELEM) 981 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */ 982 CONST_BITS-1); 983 984 tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */ 985 tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */ 986 tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */ 987 988 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1); 989 990 tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */ 991 992 dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1); 993 dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1); 994 995 ctr++; 996 997 if (ctr != DCTSIZE) { 998 if (ctr == 9) 999 break; /* Done. */ 1000 dataptr += DCTSIZE; /* advance pointer to next row */ 1001 } else 1002 dataptr = workspace; /* switch pointer to extended workspace */ 1003 } 1004 1005 /* Pass 2: process columns. 1006 * We leave the results scaled up by an overall factor of 8. 1007 * We must also scale the output by (8/9)**2 = 64/81, which we partially 1008 * fold into the constant multipliers and final/initial shifting: 1009 * cK now represents sqrt(2) * cos(K*pi/18) * 128/81. 1010 */ 1011 1012 dataptr = data; 1013 wsptr = workspace; 1014 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1015 /* Even part */ 1016 1017 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0]; 1018 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7]; 1019 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6]; 1020 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5]; 1021 tmp4 = dataptr[DCTSIZE*4]; 1022 1023 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0]; 1024 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7]; 1025 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6]; 1026 tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5]; 1027 1028 z1 = tmp0 + tmp2 + tmp3; 1029 z2 = tmp1 + tmp4; 1030 dataptr[DCTSIZE*0] = (DCTELEM) 1031 DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */ 1032 CONST_BITS+2); 1033 dataptr[DCTSIZE*6] = (DCTELEM) 1034 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */ 1035 CONST_BITS+2); 1036 z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */ 1037 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */ 1038 dataptr[DCTSIZE*2] = (DCTELEM) 1039 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */ 1040 + z1 + z2, CONST_BITS+2); 1041 dataptr[DCTSIZE*4] = (DCTELEM) 1042 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */ 1043 + z1 - z2, CONST_BITS+2); 1044 1045 /* Odd part */ 1046 1047 dataptr[DCTSIZE*3] = (DCTELEM) 1048 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */ 1049 CONST_BITS+2); 1050 1051 tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */ 1052 tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */ 1053 tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */ 1054 1055 dataptr[DCTSIZE*1] = (DCTELEM) 1056 DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2); 1057 1058 tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */ 1059 1060 dataptr[DCTSIZE*5] = (DCTELEM) 1061 DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2); 1062 dataptr[DCTSIZE*7] = (DCTELEM) 1063 DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2); 1064 1065 dataptr++; /* advance pointer to next column */ 1066 wsptr++; /* advance pointer to next column */ 1067 } 1068 } 1069 1070 1071 /* 1072 * Perform the forward DCT on a 10x10 sample block. 1073 */ 1074 1075 GLOBAL(void) 1076 jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1077 { 1078 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 1079 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1080 DCTELEM workspace[8*2]; 1081 DCTELEM *dataptr; 1082 DCTELEM *wsptr; 1083 JSAMPROW elemptr; 1084 int ctr; 1085 SHIFT_TEMPS 1086 1087 /* Pass 1: process rows. */ 1088 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 1089 /* we scale the results further by 2 as part of output adaption */ 1090 /* scaling for different DCT size. */ 1091 /* cK represents sqrt(2) * cos(K*pi/20). */ 1092 1093 dataptr = data; 1094 ctr = 0; 1095 for (;;) { 1096 elemptr = sample_data[ctr] + start_col; 1097 1098 /* Even part */ 1099 1100 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); 1101 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); 1102 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); 1103 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); 1104 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); 1105 1106 tmp10 = tmp0 + tmp4; 1107 tmp13 = tmp0 - tmp4; 1108 tmp11 = tmp1 + tmp3; 1109 tmp14 = tmp1 - tmp3; 1110 1111 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); 1112 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); 1113 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); 1114 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); 1115 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); 1116 1117 /* Apply unsigned->signed conversion */ 1118 dataptr[0] = (DCTELEM) 1119 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); 1120 tmp12 += tmp12; 1121 dataptr[4] = (DCTELEM) 1122 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ 1123 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ 1124 CONST_BITS-1); 1125 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ 1126 dataptr[2] = (DCTELEM) 1127 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ 1128 CONST_BITS-1); 1129 dataptr[6] = (DCTELEM) 1130 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ 1131 CONST_BITS-1); 1132 1133 /* Odd part */ 1134 1135 tmp10 = tmp0 + tmp4; 1136 tmp11 = tmp1 - tmp3; 1137 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1); 1138 tmp2 <<= CONST_BITS; 1139 dataptr[1] = (DCTELEM) 1140 DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ 1141 MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ 1142 MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ 1143 MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ 1144 CONST_BITS-1); 1145 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ 1146 MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ 1147 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ 1148 (tmp11 << (CONST_BITS - 1)) - tmp2; 1149 dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1); 1150 dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1); 1151 1152 ctr++; 1153 1154 if (ctr != DCTSIZE) { 1155 if (ctr == 10) 1156 break; /* Done. */ 1157 dataptr += DCTSIZE; /* advance pointer to next row */ 1158 } else 1159 dataptr = workspace; /* switch pointer to extended workspace */ 1160 } 1161 1162 /* Pass 2: process columns. 1163 * We leave the results scaled up by an overall factor of 8. 1164 * We must also scale the output by (8/10)**2 = 16/25, which we partially 1165 * fold into the constant multipliers and final/initial shifting: 1166 * cK now represents sqrt(2) * cos(K*pi/20) * 32/25. 1167 */ 1168 1169 dataptr = data; 1170 wsptr = workspace; 1171 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1172 /* Even part */ 1173 1174 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; 1175 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; 1176 tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; 1177 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; 1178 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; 1179 1180 tmp10 = tmp0 + tmp4; 1181 tmp13 = tmp0 - tmp4; 1182 tmp11 = tmp1 + tmp3; 1183 tmp14 = tmp1 - tmp3; 1184 1185 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; 1186 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; 1187 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; 1188 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; 1189 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; 1190 1191 dataptr[DCTSIZE*0] = (DCTELEM) 1192 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ 1193 CONST_BITS+2); 1194 tmp12 += tmp12; 1195 dataptr[DCTSIZE*4] = (DCTELEM) 1196 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ 1197 MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ 1198 CONST_BITS+2); 1199 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ 1200 dataptr[DCTSIZE*2] = (DCTELEM) 1201 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ 1202 CONST_BITS+2); 1203 dataptr[DCTSIZE*6] = (DCTELEM) 1204 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ 1205 CONST_BITS+2); 1206 1207 /* Odd part */ 1208 1209 tmp10 = tmp0 + tmp4; 1210 tmp11 = tmp1 - tmp3; 1211 dataptr[DCTSIZE*5] = (DCTELEM) 1212 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ 1213 CONST_BITS+2); 1214 tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ 1215 dataptr[DCTSIZE*1] = (DCTELEM) 1216 DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ 1217 MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ 1218 MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ 1219 MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ 1220 CONST_BITS+2); 1221 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ 1222 MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ 1223 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ 1224 MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ 1225 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2); 1226 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2); 1227 1228 dataptr++; /* advance pointer to next column */ 1229 wsptr++; /* advance pointer to next column */ 1230 } 1231 } 1232 1233 1234 /* 1235 * Perform the forward DCT on an 11x11 sample block. 1236 */ 1237 1238 GLOBAL(void) 1239 jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1240 { 1241 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 1242 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1243 INT32 z1, z2, z3; 1244 DCTELEM workspace[8*3]; 1245 DCTELEM *dataptr; 1246 DCTELEM *wsptr; 1247 JSAMPROW elemptr; 1248 int ctr; 1249 SHIFT_TEMPS 1250 1251 /* Pass 1: process rows. */ 1252 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 1253 /* we scale the results further by 2 as part of output adaption */ 1254 /* scaling for different DCT size. */ 1255 /* cK represents sqrt(2) * cos(K*pi/22). */ 1256 1257 dataptr = data; 1258 ctr = 0; 1259 for (;;) { 1260 elemptr = sample_data[ctr] + start_col; 1261 1262 /* Even part */ 1263 1264 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]); 1265 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]); 1266 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]); 1267 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]); 1268 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]); 1269 tmp5 = GETJSAMPLE(elemptr[5]); 1270 1271 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]); 1272 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); 1273 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); 1274 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); 1275 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); 1276 1277 /* Apply unsigned->signed conversion */ 1278 dataptr[0] = (DCTELEM) 1279 ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); 1280 tmp5 += tmp5; 1281 tmp0 -= tmp5; 1282 tmp1 -= tmp5; 1283 tmp2 -= tmp5; 1284 tmp3 -= tmp5; 1285 tmp4 -= tmp5; 1286 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */ 1287 MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */ 1288 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */ 1289 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */ 1290 dataptr[2] = (DCTELEM) 1291 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */ 1292 - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */ 1293 CONST_BITS-1); 1294 dataptr[4] = (DCTELEM) 1295 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */ 1296 - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */ 1297 + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */ 1298 CONST_BITS-1); 1299 dataptr[6] = (DCTELEM) 1300 DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */ 1301 - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */ 1302 CONST_BITS-1); 1303 1304 /* Odd part */ 1305 1306 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */ 1307 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */ 1308 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */ 1309 tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */ 1310 + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */ 1311 tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */ 1312 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */ 1313 tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */ 1314 - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */ 1315 tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */ 1316 tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */ 1317 + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */ 1318 tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */ 1319 - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */ 1320 1321 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1); 1322 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1); 1323 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1); 1324 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1); 1325 1326 ctr++; 1327 1328 if (ctr != DCTSIZE) { 1329 if (ctr == 11) 1330 break; /* Done. */ 1331 dataptr += DCTSIZE; /* advance pointer to next row */ 1332 } else 1333 dataptr = workspace; /* switch pointer to extended workspace */ 1334 } 1335 1336 /* Pass 2: process columns. 1337 * We leave the results scaled up by an overall factor of 8. 1338 * We must also scale the output by (8/11)**2 = 64/121, which we partially 1339 * fold into the constant multipliers and final/initial shifting: 1340 * cK now represents sqrt(2) * cos(K*pi/22) * 128/121. 1341 */ 1342 1343 dataptr = data; 1344 wsptr = workspace; 1345 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1346 /* Even part */ 1347 1348 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2]; 1349 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1]; 1350 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0]; 1351 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7]; 1352 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6]; 1353 tmp5 = dataptr[DCTSIZE*5]; 1354 1355 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2]; 1356 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1]; 1357 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0]; 1358 tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7]; 1359 tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6]; 1360 1361 dataptr[DCTSIZE*0] = (DCTELEM) 1362 DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5, 1363 FIX(1.057851240)), /* 128/121 */ 1364 CONST_BITS+2); 1365 tmp5 += tmp5; 1366 tmp0 -= tmp5; 1367 tmp1 -= tmp5; 1368 tmp2 -= tmp5; 1369 tmp3 -= tmp5; 1370 tmp4 -= tmp5; 1371 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */ 1372 MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */ 1373 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */ 1374 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */ 1375 dataptr[DCTSIZE*2] = (DCTELEM) 1376 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */ 1377 - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */ 1378 CONST_BITS+2); 1379 dataptr[DCTSIZE*4] = (DCTELEM) 1380 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */ 1381 - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */ 1382 + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */ 1383 CONST_BITS+2); 1384 dataptr[DCTSIZE*6] = (DCTELEM) 1385 DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */ 1386 - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */ 1387 CONST_BITS+2); 1388 1389 /* Odd part */ 1390 1391 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */ 1392 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */ 1393 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */ 1394 tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */ 1395 + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */ 1396 tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */ 1397 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */ 1398 tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */ 1399 - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */ 1400 tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */ 1401 tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */ 1402 + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */ 1403 tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */ 1404 - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */ 1405 1406 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); 1407 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); 1408 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); 1409 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); 1410 1411 dataptr++; /* advance pointer to next column */ 1412 wsptr++; /* advance pointer to next column */ 1413 } 1414 } 1415 1416 1417 /* 1418 * Perform the forward DCT on a 12x12 sample block. 1419 */ 1420 1421 GLOBAL(void) 1422 jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1423 { 1424 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 1425 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1426 DCTELEM workspace[8*4]; 1427 DCTELEM *dataptr; 1428 DCTELEM *wsptr; 1429 JSAMPROW elemptr; 1430 int ctr; 1431 SHIFT_TEMPS 1432 1433 /* Pass 1: process rows. */ 1434 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 1435 /* cK represents sqrt(2) * cos(K*pi/24). */ 1436 1437 dataptr = data; 1438 ctr = 0; 1439 for (;;) { 1440 elemptr = sample_data[ctr] + start_col; 1441 1442 /* Even part */ 1443 1444 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); 1445 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); 1446 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); 1447 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); 1448 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); 1449 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); 1450 1451 tmp10 = tmp0 + tmp5; 1452 tmp13 = tmp0 - tmp5; 1453 tmp11 = tmp1 + tmp4; 1454 tmp14 = tmp1 - tmp4; 1455 tmp12 = tmp2 + tmp3; 1456 tmp15 = tmp2 - tmp3; 1457 1458 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); 1459 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); 1460 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); 1461 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); 1462 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); 1463 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); 1464 1465 /* Apply unsigned->signed conversion */ 1466 dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); 1467 dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); 1468 dataptr[4] = (DCTELEM) 1469 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ 1470 CONST_BITS); 1471 dataptr[2] = (DCTELEM) 1472 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ 1473 CONST_BITS); 1474 1475 /* Odd part */ 1476 1477 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ 1478 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ 1479 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ 1480 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ 1481 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ 1482 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ 1483 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ 1484 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ 1485 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ 1486 + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ 1487 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ 1488 - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ 1489 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ 1490 - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ 1491 1492 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS); 1493 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS); 1494 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS); 1495 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS); 1496 1497 ctr++; 1498 1499 if (ctr != DCTSIZE) { 1500 if (ctr == 12) 1501 break; /* Done. */ 1502 dataptr += DCTSIZE; /* advance pointer to next row */ 1503 } else 1504 dataptr = workspace; /* switch pointer to extended workspace */ 1505 } 1506 1507 /* Pass 2: process columns. 1508 * We leave the results scaled up by an overall factor of 8. 1509 * We must also scale the output by (8/12)**2 = 4/9, which we partially 1510 * fold into the constant multipliers and final shifting: 1511 * cK now represents sqrt(2) * cos(K*pi/24) * 8/9. 1512 */ 1513 1514 dataptr = data; 1515 wsptr = workspace; 1516 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1517 /* Even part */ 1518 1519 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; 1520 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; 1521 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; 1522 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; 1523 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; 1524 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; 1525 1526 tmp10 = tmp0 + tmp5; 1527 tmp13 = tmp0 - tmp5; 1528 tmp11 = tmp1 + tmp4; 1529 tmp14 = tmp1 - tmp4; 1530 tmp12 = tmp2 + tmp3; 1531 tmp15 = tmp2 - tmp3; 1532 1533 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; 1534 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; 1535 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; 1536 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; 1537 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; 1538 tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; 1539 1540 dataptr[DCTSIZE*0] = (DCTELEM) 1541 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ 1542 CONST_BITS+1); 1543 dataptr[DCTSIZE*6] = (DCTELEM) 1544 DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ 1545 CONST_BITS+1); 1546 dataptr[DCTSIZE*4] = (DCTELEM) 1547 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ 1548 CONST_BITS+1); 1549 dataptr[DCTSIZE*2] = (DCTELEM) 1550 DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ 1551 MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ 1552 CONST_BITS+1); 1553 1554 /* Odd part */ 1555 1556 tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ 1557 tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ 1558 tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ 1559 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ 1560 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ 1561 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ 1562 + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ 1563 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ 1564 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ 1565 + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ 1566 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ 1567 - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ 1568 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ 1569 - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ 1570 1571 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1); 1572 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1); 1573 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1); 1574 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1); 1575 1576 dataptr++; /* advance pointer to next column */ 1577 wsptr++; /* advance pointer to next column */ 1578 } 1579 } 1580 1581 1582 /* 1583 * Perform the forward DCT on a 13x13 sample block. 1584 */ 1585 1586 GLOBAL(void) 1587 jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1588 { 1589 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 1590 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1591 INT32 z1, z2; 1592 DCTELEM workspace[8*5]; 1593 DCTELEM *dataptr; 1594 DCTELEM *wsptr; 1595 JSAMPROW elemptr; 1596 int ctr; 1597 SHIFT_TEMPS 1598 1599 /* Pass 1: process rows. */ 1600 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 1601 /* cK represents sqrt(2) * cos(K*pi/26). */ 1602 1603 dataptr = data; 1604 ctr = 0; 1605 for (;;) { 1606 elemptr = sample_data[ctr] + start_col; 1607 1608 /* Even part */ 1609 1610 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]); 1611 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]); 1612 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]); 1613 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]); 1614 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]); 1615 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]); 1616 tmp6 = GETJSAMPLE(elemptr[6]); 1617 1618 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]); 1619 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]); 1620 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); 1621 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); 1622 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); 1623 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); 1624 1625 /* Apply unsigned->signed conversion */ 1626 dataptr[0] = (DCTELEM) 1627 (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); 1628 tmp6 += tmp6; 1629 tmp0 -= tmp6; 1630 tmp1 -= tmp6; 1631 tmp2 -= tmp6; 1632 tmp3 -= tmp6; 1633 tmp4 -= tmp6; 1634 tmp5 -= tmp6; 1635 dataptr[2] = (DCTELEM) 1636 DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */ 1637 MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */ 1638 MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */ 1639 MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */ 1640 MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */ 1641 MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */ 1642 CONST_BITS); 1643 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */ 1644 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */ 1645 MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */ 1646 z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */ 1647 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */ 1648 MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */ 1649 1650 dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS); 1651 dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS); 1652 1653 /* Odd part */ 1654 1655 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */ 1656 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */ 1657 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */ 1658 MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */ 1659 tmp0 = tmp1 + tmp2 + tmp3 - 1660 MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */ 1661 MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */ 1662 tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */ 1663 MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */ 1664 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */ 1665 tmp1 += tmp4 + tmp5 + 1666 MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */ 1667 MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */ 1668 tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */ 1669 tmp2 += tmp4 + tmp6 - 1670 MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */ 1671 MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */ 1672 tmp3 += tmp5 + tmp6 + 1673 MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */ 1674 MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */ 1675 1676 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); 1677 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); 1678 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); 1679 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); 1680 1681 ctr++; 1682 1683 if (ctr != DCTSIZE) { 1684 if (ctr == 13) 1685 break; /* Done. */ 1686 dataptr += DCTSIZE; /* advance pointer to next row */ 1687 } else 1688 dataptr = workspace; /* switch pointer to extended workspace */ 1689 } 1690 1691 /* Pass 2: process columns. 1692 * We leave the results scaled up by an overall factor of 8. 1693 * We must also scale the output by (8/13)**2 = 64/169, which we partially 1694 * fold into the constant multipliers and final shifting: 1695 * cK now represents sqrt(2) * cos(K*pi/26) * 128/169. 1696 */ 1697 1698 dataptr = data; 1699 wsptr = workspace; 1700 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1701 /* Even part */ 1702 1703 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4]; 1704 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3]; 1705 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2]; 1706 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1]; 1707 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0]; 1708 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7]; 1709 tmp6 = dataptr[DCTSIZE*6]; 1710 1711 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4]; 1712 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3]; 1713 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2]; 1714 tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1]; 1715 tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0]; 1716 tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7]; 1717 1718 dataptr[DCTSIZE*0] = (DCTELEM) 1719 DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6, 1720 FIX(0.757396450)), /* 128/169 */ 1721 CONST_BITS+1); 1722 tmp6 += tmp6; 1723 tmp0 -= tmp6; 1724 tmp1 -= tmp6; 1725 tmp2 -= tmp6; 1726 tmp3 -= tmp6; 1727 tmp4 -= tmp6; 1728 tmp5 -= tmp6; 1729 dataptr[DCTSIZE*2] = (DCTELEM) 1730 DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */ 1731 MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */ 1732 MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */ 1733 MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */ 1734 MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */ 1735 MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */ 1736 CONST_BITS+1); 1737 z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */ 1738 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */ 1739 MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */ 1740 z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */ 1741 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */ 1742 MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */ 1743 1744 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1); 1745 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1); 1746 1747 /* Odd part */ 1748 1749 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */ 1750 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */ 1751 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */ 1752 MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */ 1753 tmp0 = tmp1 + tmp2 + tmp3 - 1754 MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */ 1755 MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */ 1756 tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */ 1757 MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */ 1758 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */ 1759 tmp1 += tmp4 + tmp5 + 1760 MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */ 1761 MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */ 1762 tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */ 1763 tmp2 += tmp4 + tmp6 - 1764 MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */ 1765 MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */ 1766 tmp3 += tmp5 + tmp6 + 1767 MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */ 1768 MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */ 1769 1770 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1); 1771 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1); 1772 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1); 1773 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1); 1774 1775 dataptr++; /* advance pointer to next column */ 1776 wsptr++; /* advance pointer to next column */ 1777 } 1778 } 1779 1780 1781 /* 1782 * Perform the forward DCT on a 14x14 sample block. 1783 */ 1784 1785 GLOBAL(void) 1786 jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1787 { 1788 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 1789 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 1790 DCTELEM workspace[8*6]; 1791 DCTELEM *dataptr; 1792 DCTELEM *wsptr; 1793 JSAMPROW elemptr; 1794 int ctr; 1795 SHIFT_TEMPS 1796 1797 /* Pass 1: process rows. */ 1798 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 1799 /* cK represents sqrt(2) * cos(K*pi/28). */ 1800 1801 dataptr = data; 1802 ctr = 0; 1803 for (;;) { 1804 elemptr = sample_data[ctr] + start_col; 1805 1806 /* Even part */ 1807 1808 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); 1809 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); 1810 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); 1811 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); 1812 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); 1813 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); 1814 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); 1815 1816 tmp10 = tmp0 + tmp6; 1817 tmp14 = tmp0 - tmp6; 1818 tmp11 = tmp1 + tmp5; 1819 tmp15 = tmp1 - tmp5; 1820 tmp12 = tmp2 + tmp4; 1821 tmp16 = tmp2 - tmp4; 1822 1823 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); 1824 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); 1825 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); 1826 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); 1827 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); 1828 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); 1829 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); 1830 1831 /* Apply unsigned->signed conversion */ 1832 dataptr[0] = (DCTELEM) 1833 (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); 1834 tmp13 += tmp13; 1835 dataptr[4] = (DCTELEM) 1836 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ 1837 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ 1838 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ 1839 CONST_BITS); 1840 1841 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ 1842 1843 dataptr[2] = (DCTELEM) 1844 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ 1845 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ 1846 CONST_BITS); 1847 dataptr[6] = (DCTELEM) 1848 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ 1849 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ 1850 CONST_BITS); 1851 1852 /* Odd part */ 1853 1854 tmp10 = tmp1 + tmp2; 1855 tmp11 = tmp5 - tmp4; 1856 dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6); 1857 tmp3 <<= CONST_BITS; 1858 tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ 1859 tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ 1860 tmp10 += tmp11 - tmp3; 1861 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ 1862 MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ 1863 dataptr[5] = (DCTELEM) 1864 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ 1865 + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ 1866 CONST_BITS); 1867 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ 1868 MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ 1869 dataptr[3] = (DCTELEM) 1870 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ 1871 - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ 1872 CONST_BITS); 1873 dataptr[1] = (DCTELEM) 1874 DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - 1875 MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ 1876 CONST_BITS); 1877 1878 ctr++; 1879 1880 if (ctr != DCTSIZE) { 1881 if (ctr == 14) 1882 break; /* Done. */ 1883 dataptr += DCTSIZE; /* advance pointer to next row */ 1884 } else 1885 dataptr = workspace; /* switch pointer to extended workspace */ 1886 } 1887 1888 /* Pass 2: process columns. 1889 * We leave the results scaled up by an overall factor of 8. 1890 * We must also scale the output by (8/14)**2 = 16/49, which we partially 1891 * fold into the constant multipliers and final shifting: 1892 * cK now represents sqrt(2) * cos(K*pi/28) * 32/49. 1893 */ 1894 1895 dataptr = data; 1896 wsptr = workspace; 1897 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 1898 /* Even part */ 1899 1900 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; 1901 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; 1902 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; 1903 tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; 1904 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; 1905 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; 1906 tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; 1907 1908 tmp10 = tmp0 + tmp6; 1909 tmp14 = tmp0 - tmp6; 1910 tmp11 = tmp1 + tmp5; 1911 tmp15 = tmp1 - tmp5; 1912 tmp12 = tmp2 + tmp4; 1913 tmp16 = tmp2 - tmp4; 1914 1915 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; 1916 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; 1917 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; 1918 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; 1919 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; 1920 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; 1921 tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; 1922 1923 dataptr[DCTSIZE*0] = (DCTELEM) 1924 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, 1925 FIX(0.653061224)), /* 32/49 */ 1926 CONST_BITS+1); 1927 tmp13 += tmp13; 1928 dataptr[DCTSIZE*4] = (DCTELEM) 1929 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ 1930 MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ 1931 MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ 1932 CONST_BITS+1); 1933 1934 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ 1935 1936 dataptr[DCTSIZE*2] = (DCTELEM) 1937 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ 1938 + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ 1939 CONST_BITS+1); 1940 dataptr[DCTSIZE*6] = (DCTELEM) 1941 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ 1942 - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ 1943 CONST_BITS+1); 1944 1945 /* Odd part */ 1946 1947 tmp10 = tmp1 + tmp2; 1948 tmp11 = tmp5 - tmp4; 1949 dataptr[DCTSIZE*7] = (DCTELEM) 1950 DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, 1951 FIX(0.653061224)), /* 32/49 */ 1952 CONST_BITS+1); 1953 tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ 1954 tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ 1955 tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ 1956 tmp10 += tmp11 - tmp3; 1957 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ 1958 MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ 1959 dataptr[DCTSIZE*5] = (DCTELEM) 1960 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ 1961 + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ 1962 CONST_BITS+1); 1963 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ 1964 MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ 1965 dataptr[DCTSIZE*3] = (DCTELEM) 1966 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ 1967 - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ 1968 CONST_BITS+1); 1969 dataptr[DCTSIZE*1] = (DCTELEM) 1970 DESCALE(tmp11 + tmp12 + tmp3 1971 - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ 1972 - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ 1973 CONST_BITS+1); 1974 1975 dataptr++; /* advance pointer to next column */ 1976 wsptr++; /* advance pointer to next column */ 1977 } 1978 } 1979 1980 1981 /* 1982 * Perform the forward DCT on a 15x15 sample block. 1983 */ 1984 1985 GLOBAL(void) 1986 jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 1987 { 1988 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1989 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 1990 INT32 z1, z2, z3; 1991 DCTELEM workspace[8*7]; 1992 DCTELEM *dataptr; 1993 DCTELEM *wsptr; 1994 JSAMPROW elemptr; 1995 int ctr; 1996 SHIFT_TEMPS 1997 1998 /* Pass 1: process rows. */ 1999 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 2000 /* cK represents sqrt(2) * cos(K*pi/30). */ 2001 2002 dataptr = data; 2003 ctr = 0; 2004 for (;;) { 2005 elemptr = sample_data[ctr] + start_col; 2006 2007 /* Even part */ 2008 2009 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]); 2010 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]); 2011 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]); 2012 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]); 2013 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]); 2014 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]); 2015 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]); 2016 tmp7 = GETJSAMPLE(elemptr[7]); 2017 2018 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]); 2019 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]); 2020 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]); 2021 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]); 2022 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]); 2023 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]); 2024 tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); 2025 2026 z1 = tmp0 + tmp4 + tmp5; 2027 z2 = tmp1 + tmp3 + tmp6; 2028 z3 = tmp2 + tmp7; 2029 /* Apply unsigned->signed conversion */ 2030 dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); 2031 z3 += z3; 2032 dataptr[6] = (DCTELEM) 2033 DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ 2034 MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */ 2035 CONST_BITS); 2036 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; 2037 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */ 2038 MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */ 2039 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */ 2040 MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */ 2041 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */ 2042 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */ 2043 MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */ 2044 2045 dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS); 2046 dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS); 2047 2048 /* Odd part */ 2049 2050 tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, 2051 FIX(1.224744871)); /* c5 */ 2052 tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */ 2053 MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */ 2054 tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */ 2055 tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */ 2056 MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */ 2057 MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */ 2058 tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */ 2059 MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */ 2060 MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */ 2061 tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */ 2062 MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */ 2063 MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */ 2064 2065 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); 2066 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); 2067 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); 2068 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); 2069 2070 ctr++; 2071 2072 if (ctr != DCTSIZE) { 2073 if (ctr == 15) 2074 break; /* Done. */ 2075 dataptr += DCTSIZE; /* advance pointer to next row */ 2076 } else 2077 dataptr = workspace; /* switch pointer to extended workspace */ 2078 } 2079 2080 /* Pass 2: process columns. 2081 * We leave the results scaled up by an overall factor of 8. 2082 * We must also scale the output by (8/15)**2 = 64/225, which we partially 2083 * fold into the constant multipliers and final shifting: 2084 * cK now represents sqrt(2) * cos(K*pi/30) * 256/225. 2085 */ 2086 2087 dataptr = data; 2088 wsptr = workspace; 2089 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2090 /* Even part */ 2091 2092 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6]; 2093 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5]; 2094 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4]; 2095 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3]; 2096 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2]; 2097 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1]; 2098 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0]; 2099 tmp7 = dataptr[DCTSIZE*7]; 2100 2101 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6]; 2102 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5]; 2103 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4]; 2104 tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3]; 2105 tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2]; 2106 tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1]; 2107 tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0]; 2108 2109 z1 = tmp0 + tmp4 + tmp5; 2110 z2 = tmp1 + tmp3 + tmp6; 2111 z3 = tmp2 + tmp7; 2112 dataptr[DCTSIZE*0] = (DCTELEM) 2113 DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */ 2114 CONST_BITS+2); 2115 z3 += z3; 2116 dataptr[DCTSIZE*6] = (DCTELEM) 2117 DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */ 2118 MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */ 2119 CONST_BITS+2); 2120 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; 2121 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */ 2122 MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */ 2123 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */ 2124 MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */ 2125 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */ 2126 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */ 2127 MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */ 2128 2129 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2); 2130 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2); 2131 2132 /* Odd part */ 2133 2134 tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, 2135 FIX(1.393487498)); /* c5 */ 2136 tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */ 2137 MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */ 2138 tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */ 2139 tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */ 2140 MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */ 2141 MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */ 2142 tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */ 2143 MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */ 2144 MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */ 2145 tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */ 2146 MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */ 2147 MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */ 2148 2149 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); 2150 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); 2151 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); 2152 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); 2153 2154 dataptr++; /* advance pointer to next column */ 2155 wsptr++; /* advance pointer to next column */ 2156 } 2157 } 2158 2159 2160 /* 2161 * Perform the forward DCT on a 16x16 sample block. 2162 */ 2163 2164 GLOBAL(void) 2165 jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2166 { 2167 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 2168 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 2169 DCTELEM workspace[DCTSIZE2]; 2170 DCTELEM *dataptr; 2171 DCTELEM *wsptr; 2172 JSAMPROW elemptr; 2173 int ctr; 2174 SHIFT_TEMPS 2175 2176 /* Pass 1: process rows. */ 2177 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 2178 /* furthermore, we scale the results by 2**PASS1_BITS. */ 2179 /* cK represents sqrt(2) * cos(K*pi/32). */ 2180 2181 dataptr = data; 2182 ctr = 0; 2183 for (;;) { 2184 elemptr = sample_data[ctr] + start_col; 2185 2186 /* Even part */ 2187 2188 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); 2189 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); 2190 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); 2191 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); 2192 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); 2193 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); 2194 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); 2195 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); 2196 2197 tmp10 = tmp0 + tmp7; 2198 tmp14 = tmp0 - tmp7; 2199 tmp11 = tmp1 + tmp6; 2200 tmp15 = tmp1 - tmp6; 2201 tmp12 = tmp2 + tmp5; 2202 tmp16 = tmp2 - tmp5; 2203 tmp13 = tmp3 + tmp4; 2204 tmp17 = tmp3 - tmp4; 2205 2206 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); 2207 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); 2208 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); 2209 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); 2210 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); 2211 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); 2212 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); 2213 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); 2214 2215 /* Apply unsigned->signed conversion */ 2216 dataptr[0] = (DCTELEM) 2217 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); 2218 dataptr[4] = (DCTELEM) 2219 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 2220 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 2221 CONST_BITS-PASS1_BITS); 2222 2223 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 2224 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 2225 2226 dataptr[2] = (DCTELEM) 2227 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 2228 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 2229 CONST_BITS-PASS1_BITS); 2230 dataptr[6] = (DCTELEM) 2231 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 2232 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 2233 CONST_BITS-PASS1_BITS); 2234 2235 /* Odd part */ 2236 2237 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 2238 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 2239 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 2240 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 2241 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 2242 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 2243 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 2244 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 2245 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 2246 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 2247 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 2248 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 2249 tmp10 = tmp11 + tmp12 + tmp13 - 2250 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 2251 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 2252 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 2253 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 2254 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 2255 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 2256 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 2257 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 2258 2259 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 2260 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 2261 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 2262 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 2263 2264 ctr++; 2265 2266 if (ctr != DCTSIZE) { 2267 if (ctr == DCTSIZE * 2) 2268 break; /* Done. */ 2269 dataptr += DCTSIZE; /* advance pointer to next row */ 2270 } else 2271 dataptr = workspace; /* switch pointer to extended workspace */ 2272 } 2273 2274 /* Pass 2: process columns. 2275 * We remove the PASS1_BITS scaling, but leave the results scaled up 2276 * by an overall factor of 8. 2277 * We must also scale the output by (8/16)**2 = 1/2**2. 2278 */ 2279 2280 dataptr = data; 2281 wsptr = workspace; 2282 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2283 /* Even part */ 2284 2285 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; 2286 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; 2287 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; 2288 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; 2289 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; 2290 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; 2291 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; 2292 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; 2293 2294 tmp10 = tmp0 + tmp7; 2295 tmp14 = tmp0 - tmp7; 2296 tmp11 = tmp1 + tmp6; 2297 tmp15 = tmp1 - tmp6; 2298 tmp12 = tmp2 + tmp5; 2299 tmp16 = tmp2 - tmp5; 2300 tmp13 = tmp3 + tmp4; 2301 tmp17 = tmp3 - tmp4; 2302 2303 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; 2304 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; 2305 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; 2306 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; 2307 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; 2308 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; 2309 tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; 2310 tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; 2311 2312 dataptr[DCTSIZE*0] = (DCTELEM) 2313 DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2); 2314 dataptr[DCTSIZE*4] = (DCTELEM) 2315 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 2316 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 2317 CONST_BITS+PASS1_BITS+2); 2318 2319 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 2320 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 2321 2322 dataptr[DCTSIZE*2] = (DCTELEM) 2323 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 2324 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */ 2325 CONST_BITS+PASS1_BITS+2); 2326 dataptr[DCTSIZE*6] = (DCTELEM) 2327 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 2328 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 2329 CONST_BITS+PASS1_BITS+2); 2330 2331 /* Odd part */ 2332 2333 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 2334 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 2335 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 2336 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 2337 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 2338 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 2339 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 2340 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 2341 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 2342 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 2343 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 2344 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 2345 tmp10 = tmp11 + tmp12 + tmp13 - 2346 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 2347 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 2348 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 2349 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 2350 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 2351 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 2352 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 2353 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 2354 2355 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2); 2356 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2); 2357 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2); 2358 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2); 2359 2360 dataptr++; /* advance pointer to next column */ 2361 wsptr++; /* advance pointer to next column */ 2362 } 2363 } 2364 2365 2366 /* 2367 * Perform the forward DCT on a 16x8 sample block. 2368 * 2369 * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). 2370 */ 2371 2372 GLOBAL(void) 2373 jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2374 { 2375 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 2376 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 2377 INT32 z1; 2378 DCTELEM *dataptr; 2379 JSAMPROW elemptr; 2380 int ctr; 2381 SHIFT_TEMPS 2382 2383 /* Pass 1: process rows. */ 2384 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 2385 /* furthermore, we scale the results by 2**PASS1_BITS. */ 2386 /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ 2387 2388 dataptr = data; 2389 ctr = 0; 2390 for (ctr = 0; ctr < DCTSIZE; ctr++) { 2391 elemptr = sample_data[ctr] + start_col; 2392 2393 /* Even part */ 2394 2395 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); 2396 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); 2397 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); 2398 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); 2399 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); 2400 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); 2401 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); 2402 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); 2403 2404 tmp10 = tmp0 + tmp7; 2405 tmp14 = tmp0 - tmp7; 2406 tmp11 = tmp1 + tmp6; 2407 tmp15 = tmp1 - tmp6; 2408 tmp12 = tmp2 + tmp5; 2409 tmp16 = tmp2 - tmp5; 2410 tmp13 = tmp3 + tmp4; 2411 tmp17 = tmp3 - tmp4; 2412 2413 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); 2414 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); 2415 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); 2416 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); 2417 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); 2418 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); 2419 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); 2420 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); 2421 2422 /* Apply unsigned->signed conversion */ 2423 dataptr[0] = (DCTELEM) 2424 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); 2425 dataptr[4] = (DCTELEM) 2426 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 2427 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 2428 CONST_BITS-PASS1_BITS); 2429 2430 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 2431 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 2432 2433 dataptr[2] = (DCTELEM) 2434 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 2435 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 2436 CONST_BITS-PASS1_BITS); 2437 dataptr[6] = (DCTELEM) 2438 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 2439 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 2440 CONST_BITS-PASS1_BITS); 2441 2442 /* Odd part */ 2443 2444 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 2445 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 2446 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 2447 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 2448 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 2449 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 2450 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 2451 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 2452 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 2453 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 2454 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 2455 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 2456 tmp10 = tmp11 + tmp12 + tmp13 - 2457 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 2458 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 2459 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 2460 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 2461 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 2462 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 2463 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 2464 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 2465 2466 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 2467 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 2468 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 2469 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 2470 2471 dataptr += DCTSIZE; /* advance pointer to next row */ 2472 } 2473 2474 /* Pass 2: process columns. 2475 * We remove the PASS1_BITS scaling, but leave the results scaled up 2476 * by an overall factor of 8. 2477 * We must also scale the output by 8/16 = 1/2. 2478 */ 2479 2480 dataptr = data; 2481 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2482 /* Even part per LL&M figure 1 --- note that published figure is faulty; 2483 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 2484 */ 2485 2486 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 2487 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 2488 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 2489 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 2490 2491 tmp10 = tmp0 + tmp3; 2492 tmp12 = tmp0 - tmp3; 2493 tmp11 = tmp1 + tmp2; 2494 tmp13 = tmp1 - tmp2; 2495 2496 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 2497 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 2498 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 2499 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 2500 2501 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); 2502 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); 2503 2504 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 2505 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), 2506 CONST_BITS+PASS1_BITS+1); 2507 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), 2508 CONST_BITS+PASS1_BITS+1); 2509 2510 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 2511 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 2512 * i0..i3 in the paper are tmp0..tmp3 here. 2513 */ 2514 2515 tmp10 = tmp0 + tmp3; 2516 tmp11 = tmp1 + tmp2; 2517 tmp12 = tmp0 + tmp2; 2518 tmp13 = tmp1 + tmp3; 2519 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 2520 2521 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 2522 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 2523 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 2524 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 2525 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 2526 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 2527 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 2528 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 2529 2530 tmp12 += z1; 2531 tmp13 += z1; 2532 2533 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, 2534 CONST_BITS+PASS1_BITS+1); 2535 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, 2536 CONST_BITS+PASS1_BITS+1); 2537 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, 2538 CONST_BITS+PASS1_BITS+1); 2539 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, 2540 CONST_BITS+PASS1_BITS+1); 2541 2542 dataptr++; /* advance pointer to next column */ 2543 } 2544 } 2545 2546 2547 /* 2548 * Perform the forward DCT on a 14x7 sample block. 2549 * 2550 * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns). 2551 */ 2552 2553 GLOBAL(void) 2554 jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2555 { 2556 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 2557 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 2558 INT32 z1, z2, z3; 2559 DCTELEM *dataptr; 2560 JSAMPROW elemptr; 2561 int ctr; 2562 SHIFT_TEMPS 2563 2564 /* Zero bottom row of output coefficient block. */ 2565 MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); 2566 2567 /* Pass 1: process rows. */ 2568 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 2569 /* furthermore, we scale the results by 2**PASS1_BITS. */ 2570 /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ 2571 2572 dataptr = data; 2573 for (ctr = 0; ctr < 7; ctr++) { 2574 elemptr = sample_data[ctr] + start_col; 2575 2576 /* Even part */ 2577 2578 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); 2579 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); 2580 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); 2581 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); 2582 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); 2583 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); 2584 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); 2585 2586 tmp10 = tmp0 + tmp6; 2587 tmp14 = tmp0 - tmp6; 2588 tmp11 = tmp1 + tmp5; 2589 tmp15 = tmp1 - tmp5; 2590 tmp12 = tmp2 + tmp4; 2591 tmp16 = tmp2 - tmp4; 2592 2593 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); 2594 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); 2595 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); 2596 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); 2597 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); 2598 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); 2599 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); 2600 2601 /* Apply unsigned->signed conversion */ 2602 dataptr[0] = (DCTELEM) 2603 ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); 2604 tmp13 += tmp13; 2605 dataptr[4] = (DCTELEM) 2606 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ 2607 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ 2608 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ 2609 CONST_BITS-PASS1_BITS); 2610 2611 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ 2612 2613 dataptr[2] = (DCTELEM) 2614 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ 2615 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ 2616 CONST_BITS-PASS1_BITS); 2617 dataptr[6] = (DCTELEM) 2618 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ 2619 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ 2620 CONST_BITS-PASS1_BITS); 2621 2622 /* Odd part */ 2623 2624 tmp10 = tmp1 + tmp2; 2625 tmp11 = tmp5 - tmp4; 2626 dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS); 2627 tmp3 <<= CONST_BITS; 2628 tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ 2629 tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ 2630 tmp10 += tmp11 - tmp3; 2631 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ 2632 MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ 2633 dataptr[5] = (DCTELEM) 2634 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ 2635 + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ 2636 CONST_BITS-PASS1_BITS); 2637 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ 2638 MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ 2639 dataptr[3] = (DCTELEM) 2640 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ 2641 - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ 2642 CONST_BITS-PASS1_BITS); 2643 dataptr[1] = (DCTELEM) 2644 DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - 2645 MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ 2646 CONST_BITS-PASS1_BITS); 2647 2648 dataptr += DCTSIZE; /* advance pointer to next row */ 2649 } 2650 2651 /* Pass 2: process columns. 2652 * We remove the PASS1_BITS scaling, but leave the results scaled up 2653 * by an overall factor of 8. 2654 * We must also scale the output by (8/14)*(8/7) = 32/49, which we 2655 * partially fold into the constant multipliers and final shifting: 2656 * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49. 2657 */ 2658 2659 dataptr = data; 2660 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2661 /* Even part */ 2662 2663 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; 2664 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; 2665 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; 2666 tmp3 = dataptr[DCTSIZE*3]; 2667 2668 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; 2669 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; 2670 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; 2671 2672 z1 = tmp0 + tmp2; 2673 dataptr[DCTSIZE*0] = (DCTELEM) 2674 DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ 2675 CONST_BITS+PASS1_BITS+1); 2676 tmp3 += tmp3; 2677 z1 -= tmp3; 2678 z1 -= tmp3; 2679 z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ 2680 z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ 2681 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ 2682 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1); 2683 z1 -= z2; 2684 z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ 2685 dataptr[DCTSIZE*4] = (DCTELEM) 2686 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ 2687 CONST_BITS+PASS1_BITS+1); 2688 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1); 2689 2690 /* Odd part */ 2691 2692 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ 2693 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ 2694 tmp0 = tmp1 - tmp2; 2695 tmp1 += tmp2; 2696 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ 2697 tmp1 += tmp2; 2698 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ 2699 tmp0 += tmp3; 2700 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ 2701 2702 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); 2703 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); 2704 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); 2705 2706 dataptr++; /* advance pointer to next column */ 2707 } 2708 } 2709 2710 2711 /* 2712 * Perform the forward DCT on a 12x6 sample block. 2713 * 2714 * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). 2715 */ 2716 2717 GLOBAL(void) 2718 jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2719 { 2720 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 2721 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 2722 DCTELEM *dataptr; 2723 JSAMPROW elemptr; 2724 int ctr; 2725 SHIFT_TEMPS 2726 2727 /* Zero 2 bottom rows of output coefficient block. */ 2728 MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); 2729 2730 /* Pass 1: process rows. */ 2731 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 2732 /* furthermore, we scale the results by 2**PASS1_BITS. */ 2733 /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ 2734 2735 dataptr = data; 2736 for (ctr = 0; ctr < 6; ctr++) { 2737 elemptr = sample_data[ctr] + start_col; 2738 2739 /* Even part */ 2740 2741 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); 2742 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); 2743 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); 2744 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); 2745 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); 2746 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); 2747 2748 tmp10 = tmp0 + tmp5; 2749 tmp13 = tmp0 - tmp5; 2750 tmp11 = tmp1 + tmp4; 2751 tmp14 = tmp1 - tmp4; 2752 tmp12 = tmp2 + tmp3; 2753 tmp15 = tmp2 - tmp3; 2754 2755 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); 2756 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); 2757 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); 2758 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); 2759 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); 2760 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); 2761 2762 /* Apply unsigned->signed conversion */ 2763 dataptr[0] = (DCTELEM) 2764 ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); 2765 dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); 2766 dataptr[4] = (DCTELEM) 2767 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ 2768 CONST_BITS-PASS1_BITS); 2769 dataptr[2] = (DCTELEM) 2770 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ 2771 CONST_BITS-PASS1_BITS); 2772 2773 /* Odd part */ 2774 2775 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ 2776 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ 2777 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ 2778 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ 2779 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ 2780 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ 2781 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ 2782 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ 2783 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ 2784 + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ 2785 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ 2786 - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ 2787 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ 2788 - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ 2789 2790 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 2791 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 2792 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 2793 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 2794 2795 dataptr += DCTSIZE; /* advance pointer to next row */ 2796 } 2797 2798 /* Pass 2: process columns. 2799 * We remove the PASS1_BITS scaling, but leave the results scaled up 2800 * by an overall factor of 8. 2801 * We must also scale the output by (8/12)*(8/6) = 8/9, which we 2802 * partially fold into the constant multipliers and final shifting: 2803 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. 2804 */ 2805 2806 dataptr = data; 2807 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2808 /* Even part */ 2809 2810 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 2811 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 2812 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 2813 2814 tmp10 = tmp0 + tmp2; 2815 tmp12 = tmp0 - tmp2; 2816 2817 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 2818 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 2819 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 2820 2821 dataptr[DCTSIZE*0] = (DCTELEM) 2822 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 2823 CONST_BITS+PASS1_BITS+1); 2824 dataptr[DCTSIZE*2] = (DCTELEM) 2825 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 2826 CONST_BITS+PASS1_BITS+1); 2827 dataptr[DCTSIZE*4] = (DCTELEM) 2828 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 2829 CONST_BITS+PASS1_BITS+1); 2830 2831 /* Odd part */ 2832 2833 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 2834 2835 dataptr[DCTSIZE*1] = (DCTELEM) 2836 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 2837 CONST_BITS+PASS1_BITS+1); 2838 dataptr[DCTSIZE*3] = (DCTELEM) 2839 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 2840 CONST_BITS+PASS1_BITS+1); 2841 dataptr[DCTSIZE*5] = (DCTELEM) 2842 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 2843 CONST_BITS+PASS1_BITS+1); 2844 2845 dataptr++; /* advance pointer to next column */ 2846 } 2847 } 2848 2849 2850 /* 2851 * Perform the forward DCT on a 10x5 sample block. 2852 * 2853 * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns). 2854 */ 2855 2856 GLOBAL(void) 2857 jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2858 { 2859 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 2860 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 2861 DCTELEM *dataptr; 2862 JSAMPROW elemptr; 2863 int ctr; 2864 SHIFT_TEMPS 2865 2866 /* Zero 3 bottom rows of output coefficient block. */ 2867 MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); 2868 2869 /* Pass 1: process rows. */ 2870 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 2871 /* furthermore, we scale the results by 2**PASS1_BITS. */ 2872 /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ 2873 2874 dataptr = data; 2875 for (ctr = 0; ctr < 5; ctr++) { 2876 elemptr = sample_data[ctr] + start_col; 2877 2878 /* Even part */ 2879 2880 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); 2881 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); 2882 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); 2883 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); 2884 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); 2885 2886 tmp10 = tmp0 + tmp4; 2887 tmp13 = tmp0 - tmp4; 2888 tmp11 = tmp1 + tmp3; 2889 tmp14 = tmp1 - tmp3; 2890 2891 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); 2892 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); 2893 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); 2894 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); 2895 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); 2896 2897 /* Apply unsigned->signed conversion */ 2898 dataptr[0] = (DCTELEM) 2899 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); 2900 tmp12 += tmp12; 2901 dataptr[4] = (DCTELEM) 2902 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ 2903 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ 2904 CONST_BITS-PASS1_BITS); 2905 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ 2906 dataptr[2] = (DCTELEM) 2907 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ 2908 CONST_BITS-PASS1_BITS); 2909 dataptr[6] = (DCTELEM) 2910 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ 2911 CONST_BITS-PASS1_BITS); 2912 2913 /* Odd part */ 2914 2915 tmp10 = tmp0 + tmp4; 2916 tmp11 = tmp1 - tmp3; 2917 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS); 2918 tmp2 <<= CONST_BITS; 2919 dataptr[1] = (DCTELEM) 2920 DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ 2921 MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ 2922 MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ 2923 MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ 2924 CONST_BITS-PASS1_BITS); 2925 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ 2926 MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ 2927 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ 2928 (tmp11 << (CONST_BITS - 1)) - tmp2; 2929 dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS); 2930 dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS); 2931 2932 dataptr += DCTSIZE; /* advance pointer to next row */ 2933 } 2934 2935 /* Pass 2: process columns. 2936 * We remove the PASS1_BITS scaling, but leave the results scaled up 2937 * by an overall factor of 8. 2938 * We must also scale the output by (8/10)*(8/5) = 32/25, which we 2939 * fold into the constant multipliers: 2940 * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25. 2941 */ 2942 2943 dataptr = data; 2944 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 2945 /* Even part */ 2946 2947 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; 2948 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; 2949 tmp2 = dataptr[DCTSIZE*2]; 2950 2951 tmp10 = tmp0 + tmp1; 2952 tmp11 = tmp0 - tmp1; 2953 2954 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; 2955 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; 2956 2957 dataptr[DCTSIZE*0] = (DCTELEM) 2958 DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ 2959 CONST_BITS+PASS1_BITS); 2960 tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ 2961 tmp10 -= tmp2 << 2; 2962 tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ 2963 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); 2964 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); 2965 2966 /* Odd part */ 2967 2968 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ 2969 2970 dataptr[DCTSIZE*1] = (DCTELEM) 2971 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ 2972 CONST_BITS+PASS1_BITS); 2973 dataptr[DCTSIZE*3] = (DCTELEM) 2974 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ 2975 CONST_BITS+PASS1_BITS); 2976 2977 dataptr++; /* advance pointer to next column */ 2978 } 2979 } 2980 2981 2982 /* 2983 * Perform the forward DCT on an 8x4 sample block. 2984 * 2985 * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). 2986 */ 2987 2988 GLOBAL(void) 2989 jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 2990 { 2991 INT32 tmp0, tmp1, tmp2, tmp3; 2992 INT32 tmp10, tmp11, tmp12, tmp13; 2993 INT32 z1; 2994 DCTELEM *dataptr; 2995 JSAMPROW elemptr; 2996 int ctr; 2997 SHIFT_TEMPS 2998 2999 /* Zero 4 bottom rows of output coefficient block. */ 3000 MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); 3001 3002 /* Pass 1: process rows. */ 3003 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3004 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3005 /* We must also scale the output by 8/4 = 2, which we add here. */ 3006 3007 dataptr = data; 3008 for (ctr = 0; ctr < 4; ctr++) { 3009 elemptr = sample_data[ctr] + start_col; 3010 3011 /* Even part per LL&M figure 1 --- note that published figure is faulty; 3012 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 3013 */ 3014 3015 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 3016 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 3017 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 3018 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 3019 3020 tmp10 = tmp0 + tmp3; 3021 tmp12 = tmp0 - tmp3; 3022 tmp11 = tmp1 + tmp2; 3023 tmp13 = tmp1 - tmp2; 3024 3025 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 3026 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 3027 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 3028 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 3029 3030 /* Apply unsigned->signed conversion */ 3031 dataptr[0] = (DCTELEM) 3032 ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); 3033 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); 3034 3035 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 3036 /* Add fudge factor here for final descale. */ 3037 z1 += ONE << (CONST_BITS-PASS1_BITS-2); 3038 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), 3039 CONST_BITS-PASS1_BITS-1); 3040 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), 3041 CONST_BITS-PASS1_BITS-1); 3042 3043 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 3044 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 3045 * i0..i3 in the paper are tmp0..tmp3 here. 3046 */ 3047 3048 tmp10 = tmp0 + tmp3; 3049 tmp11 = tmp1 + tmp2; 3050 tmp12 = tmp0 + tmp2; 3051 tmp13 = tmp1 + tmp3; 3052 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 3053 /* Add fudge factor here for final descale. */ 3054 z1 += ONE << (CONST_BITS-PASS1_BITS-2); 3055 3056 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 3057 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 3058 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 3059 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 3060 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 3061 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 3062 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 3063 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 3064 3065 tmp12 += z1; 3066 tmp13 += z1; 3067 3068 dataptr[1] = (DCTELEM) 3069 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); 3070 dataptr[3] = (DCTELEM) 3071 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); 3072 dataptr[5] = (DCTELEM) 3073 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); 3074 dataptr[7] = (DCTELEM) 3075 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); 3076 3077 dataptr += DCTSIZE; /* advance pointer to next row */ 3078 } 3079 3080 /* Pass 2: process columns. 3081 * We remove the PASS1_BITS scaling, but leave the results scaled up 3082 * by an overall factor of 8. 3083 * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 3084 */ 3085 3086 dataptr = data; 3087 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 3088 /* Even part */ 3089 3090 /* Add fudge factor here for final descale. */ 3091 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); 3092 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 3093 3094 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 3095 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 3096 3097 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 3098 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 3099 3100 /* Odd part */ 3101 3102 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 3103 /* Add fudge factor here for final descale. */ 3104 tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); 3105 3106 dataptr[DCTSIZE*1] = (DCTELEM) 3107 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 3108 CONST_BITS+PASS1_BITS); 3109 dataptr[DCTSIZE*3] = (DCTELEM) 3110 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 3111 CONST_BITS+PASS1_BITS); 3112 3113 dataptr++; /* advance pointer to next column */ 3114 } 3115 } 3116 3117 3118 /* 3119 * Perform the forward DCT on a 6x3 sample block. 3120 * 3121 * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns). 3122 */ 3123 3124 GLOBAL(void) 3125 jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3126 { 3127 INT32 tmp0, tmp1, tmp2; 3128 INT32 tmp10, tmp11, tmp12; 3129 DCTELEM *dataptr; 3130 JSAMPROW elemptr; 3131 int ctr; 3132 SHIFT_TEMPS 3133 3134 /* Pre-zero output coefficient block. */ 3135 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3136 3137 /* Pass 1: process rows. */ 3138 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3139 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3140 /* We scale the results further by 2 as part of output adaption */ 3141 /* scaling for different DCT size. */ 3142 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ 3143 3144 dataptr = data; 3145 for (ctr = 0; ctr < 3; ctr++) { 3146 elemptr = sample_data[ctr] + start_col; 3147 3148 /* Even part */ 3149 3150 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 3151 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 3152 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 3153 3154 tmp10 = tmp0 + tmp2; 3155 tmp12 = tmp0 - tmp2; 3156 3157 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 3158 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 3159 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 3160 3161 /* Apply unsigned->signed conversion */ 3162 dataptr[0] = (DCTELEM) 3163 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); 3164 dataptr[2] = (DCTELEM) 3165 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 3166 CONST_BITS-PASS1_BITS-1); 3167 dataptr[4] = (DCTELEM) 3168 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 3169 CONST_BITS-PASS1_BITS-1); 3170 3171 /* Odd part */ 3172 3173 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 3174 CONST_BITS-PASS1_BITS-1); 3175 3176 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1))); 3177 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1)); 3178 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1))); 3179 3180 dataptr += DCTSIZE; /* advance pointer to next row */ 3181 } 3182 3183 /* Pass 2: process columns. 3184 * We remove the PASS1_BITS scaling, but leave the results scaled up 3185 * by an overall factor of 8. 3186 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially 3187 * fold into the constant multipliers (other part was done in pass 1): 3188 * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9. 3189 */ 3190 3191 dataptr = data; 3192 for (ctr = 0; ctr < 6; ctr++) { 3193 /* Even part */ 3194 3195 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; 3196 tmp1 = dataptr[DCTSIZE*1]; 3197 3198 tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; 3199 3200 dataptr[DCTSIZE*0] = (DCTELEM) 3201 DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 3202 CONST_BITS+PASS1_BITS); 3203 dataptr[DCTSIZE*2] = (DCTELEM) 3204 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ 3205 CONST_BITS+PASS1_BITS); 3206 3207 /* Odd part */ 3208 3209 dataptr[DCTSIZE*1] = (DCTELEM) 3210 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ 3211 CONST_BITS+PASS1_BITS); 3212 3213 dataptr++; /* advance pointer to next column */ 3214 } 3215 } 3216 3217 3218 /* 3219 * Perform the forward DCT on a 4x2 sample block. 3220 * 3221 * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). 3222 */ 3223 3224 GLOBAL(void) 3225 jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3226 { 3227 INT32 tmp0, tmp1; 3228 INT32 tmp10, tmp11; 3229 DCTELEM *dataptr; 3230 JSAMPROW elemptr; 3231 int ctr; 3232 SHIFT_TEMPS 3233 3234 /* Pre-zero output coefficient block. */ 3235 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3236 3237 /* Pass 1: process rows. */ 3238 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3239 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3240 /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ 3241 /* 4-point FDCT kernel, */ 3242 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ 3243 3244 dataptr = data; 3245 for (ctr = 0; ctr < 2; ctr++) { 3246 elemptr = sample_data[ctr] + start_col; 3247 3248 /* Even part */ 3249 3250 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 3251 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 3252 3253 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 3254 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 3255 3256 /* Apply unsigned->signed conversion */ 3257 dataptr[0] = (DCTELEM) 3258 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); 3259 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); 3260 3261 /* Odd part */ 3262 3263 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 3264 /* Add fudge factor here for final descale. */ 3265 tmp0 += ONE << (CONST_BITS-PASS1_BITS-4); 3266 3267 dataptr[1] = (DCTELEM) 3268 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 3269 CONST_BITS-PASS1_BITS-3); 3270 dataptr[3] = (DCTELEM) 3271 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 3272 CONST_BITS-PASS1_BITS-3); 3273 3274 dataptr += DCTSIZE; /* advance pointer to next row */ 3275 } 3276 3277 /* Pass 2: process columns. 3278 * We remove the PASS1_BITS scaling, but leave the results scaled up 3279 * by an overall factor of 8. 3280 */ 3281 3282 dataptr = data; 3283 for (ctr = 0; ctr < 4; ctr++) { 3284 /* Even part */ 3285 3286 /* Add fudge factor here for final descale. */ 3287 tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1)); 3288 tmp1 = dataptr[DCTSIZE*1]; 3289 3290 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 3291 3292 /* Odd part */ 3293 3294 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 3295 3296 dataptr++; /* advance pointer to next column */ 3297 } 3298 } 3299 3300 3301 /* 3302 * Perform the forward DCT on a 2x1 sample block. 3303 * 3304 * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns). 3305 */ 3306 3307 GLOBAL(void) 3308 jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3309 { 3310 INT32 tmp0, tmp1; 3311 JSAMPROW elemptr; 3312 3313 /* Pre-zero output coefficient block. */ 3314 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3315 3316 elemptr = sample_data[0] + start_col; 3317 3318 tmp0 = GETJSAMPLE(elemptr[0]); 3319 tmp1 = GETJSAMPLE(elemptr[1]); 3320 3321 /* We leave the results scaled up by an overall factor of 8. 3322 * We must also scale the output by (8/2)*(8/1) = 2**5. 3323 */ 3324 3325 /* Even part */ 3326 /* Apply unsigned->signed conversion */ 3327 data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); 3328 3329 /* Odd part */ 3330 data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); 3331 } 3332 3333 3334 /* 3335 * Perform the forward DCT on an 8x16 sample block. 3336 * 3337 * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns). 3338 */ 3339 3340 GLOBAL(void) 3341 jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3342 { 3343 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 3344 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 3345 INT32 z1; 3346 DCTELEM workspace[DCTSIZE2]; 3347 DCTELEM *dataptr; 3348 DCTELEM *wsptr; 3349 JSAMPROW elemptr; 3350 int ctr; 3351 SHIFT_TEMPS 3352 3353 /* Pass 1: process rows. */ 3354 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3355 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3356 3357 dataptr = data; 3358 ctr = 0; 3359 for (;;) { 3360 elemptr = sample_data[ctr] + start_col; 3361 3362 /* Even part per LL&M figure 1 --- note that published figure is faulty; 3363 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 3364 */ 3365 3366 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 3367 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 3368 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 3369 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 3370 3371 tmp10 = tmp0 + tmp3; 3372 tmp12 = tmp0 - tmp3; 3373 tmp11 = tmp1 + tmp2; 3374 tmp13 = tmp1 - tmp2; 3375 3376 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 3377 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 3378 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 3379 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 3380 3381 /* Apply unsigned->signed conversion */ 3382 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); 3383 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); 3384 3385 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 3386 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), 3387 CONST_BITS-PASS1_BITS); 3388 dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), 3389 CONST_BITS-PASS1_BITS); 3390 3391 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 3392 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 3393 * i0..i3 in the paper are tmp0..tmp3 here. 3394 */ 3395 3396 tmp10 = tmp0 + tmp3; 3397 tmp11 = tmp1 + tmp2; 3398 tmp12 = tmp0 + tmp2; 3399 tmp13 = tmp1 + tmp3; 3400 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 3401 3402 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 3403 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 3404 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 3405 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 3406 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 3407 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 3408 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 3409 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 3410 3411 tmp12 += z1; 3412 tmp13 += z1; 3413 3414 dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); 3415 dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); 3416 dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); 3417 dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); 3418 3419 ctr++; 3420 3421 if (ctr != DCTSIZE) { 3422 if (ctr == DCTSIZE * 2) 3423 break; /* Done. */ 3424 dataptr += DCTSIZE; /* advance pointer to next row */ 3425 } else 3426 dataptr = workspace; /* switch pointer to extended workspace */ 3427 } 3428 3429 /* Pass 2: process columns. 3430 * We remove the PASS1_BITS scaling, but leave the results scaled up 3431 * by an overall factor of 8. 3432 * We must also scale the output by 8/16 = 1/2. 3433 * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 3434 */ 3435 3436 dataptr = data; 3437 wsptr = workspace; 3438 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 3439 /* Even part */ 3440 3441 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; 3442 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; 3443 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; 3444 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; 3445 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; 3446 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; 3447 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; 3448 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; 3449 3450 tmp10 = tmp0 + tmp7; 3451 tmp14 = tmp0 - tmp7; 3452 tmp11 = tmp1 + tmp6; 3453 tmp15 = tmp1 - tmp6; 3454 tmp12 = tmp2 + tmp5; 3455 tmp16 = tmp2 - tmp5; 3456 tmp13 = tmp3 + tmp4; 3457 tmp17 = tmp3 - tmp4; 3458 3459 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; 3460 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; 3461 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; 3462 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; 3463 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; 3464 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; 3465 tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; 3466 tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; 3467 3468 dataptr[DCTSIZE*0] = (DCTELEM) 3469 DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1); 3470 dataptr[DCTSIZE*4] = (DCTELEM) 3471 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 3472 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 3473 CONST_BITS+PASS1_BITS+1); 3474 3475 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 3476 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 3477 3478 dataptr[DCTSIZE*2] = (DCTELEM) 3479 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 3480 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 3481 CONST_BITS+PASS1_BITS+1); 3482 dataptr[DCTSIZE*6] = (DCTELEM) 3483 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 3484 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 3485 CONST_BITS+PASS1_BITS+1); 3486 3487 /* Odd part */ 3488 3489 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 3490 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 3491 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 3492 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 3493 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 3494 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 3495 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 3496 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 3497 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 3498 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 3499 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 3500 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 3501 tmp10 = tmp11 + tmp12 + tmp13 - 3502 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 3503 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 3504 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 3505 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 3506 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 3507 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 3508 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 3509 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 3510 3511 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1); 3512 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1); 3513 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1); 3514 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1); 3515 3516 dataptr++; /* advance pointer to next column */ 3517 wsptr++; /* advance pointer to next column */ 3518 } 3519 } 3520 3521 3522 /* 3523 * Perform the forward DCT on a 7x14 sample block. 3524 * 3525 * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns). 3526 */ 3527 3528 GLOBAL(void) 3529 jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3530 { 3531 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 3532 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 3533 INT32 z1, z2, z3; 3534 DCTELEM workspace[8*6]; 3535 DCTELEM *dataptr; 3536 DCTELEM *wsptr; 3537 JSAMPROW elemptr; 3538 int ctr; 3539 SHIFT_TEMPS 3540 3541 /* Pre-zero output coefficient block. */ 3542 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3543 3544 /* Pass 1: process rows. */ 3545 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3546 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3547 /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ 3548 3549 dataptr = data; 3550 ctr = 0; 3551 for (;;) { 3552 elemptr = sample_data[ctr] + start_col; 3553 3554 /* Even part */ 3555 3556 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); 3557 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); 3558 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); 3559 tmp3 = GETJSAMPLE(elemptr[3]); 3560 3561 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); 3562 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); 3563 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); 3564 3565 z1 = tmp0 + tmp2; 3566 /* Apply unsigned->signed conversion */ 3567 dataptr[0] = (DCTELEM) 3568 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); 3569 tmp3 += tmp3; 3570 z1 -= tmp3; 3571 z1 -= tmp3; 3572 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ 3573 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ 3574 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ 3575 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); 3576 z1 -= z2; 3577 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ 3578 dataptr[4] = (DCTELEM) 3579 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ 3580 CONST_BITS-PASS1_BITS); 3581 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); 3582 3583 /* Odd part */ 3584 3585 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 3586 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 3587 tmp0 = tmp1 - tmp2; 3588 tmp1 += tmp2; 3589 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ 3590 tmp1 += tmp2; 3591 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ 3592 tmp0 += tmp3; 3593 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ 3594 3595 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); 3596 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); 3597 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); 3598 3599 ctr++; 3600 3601 if (ctr != DCTSIZE) { 3602 if (ctr == 14) 3603 break; /* Done. */ 3604 dataptr += DCTSIZE; /* advance pointer to next row */ 3605 } else 3606 dataptr = workspace; /* switch pointer to extended workspace */ 3607 } 3608 3609 /* Pass 2: process columns. 3610 * We remove the PASS1_BITS scaling, but leave the results scaled up 3611 * by an overall factor of 8. 3612 * We must also scale the output by (8/7)*(8/14) = 32/49, which we 3613 * fold into the constant multipliers: 3614 * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49. 3615 */ 3616 3617 dataptr = data; 3618 wsptr = workspace; 3619 for (ctr = 0; ctr < 7; ctr++) { 3620 /* Even part */ 3621 3622 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; 3623 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; 3624 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; 3625 tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; 3626 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; 3627 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; 3628 tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; 3629 3630 tmp10 = tmp0 + tmp6; 3631 tmp14 = tmp0 - tmp6; 3632 tmp11 = tmp1 + tmp5; 3633 tmp15 = tmp1 - tmp5; 3634 tmp12 = tmp2 + tmp4; 3635 tmp16 = tmp2 - tmp4; 3636 3637 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; 3638 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; 3639 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; 3640 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; 3641 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; 3642 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; 3643 tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; 3644 3645 dataptr[DCTSIZE*0] = (DCTELEM) 3646 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, 3647 FIX(0.653061224)), /* 32/49 */ 3648 CONST_BITS+PASS1_BITS); 3649 tmp13 += tmp13; 3650 dataptr[DCTSIZE*4] = (DCTELEM) 3651 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ 3652 MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ 3653 MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ 3654 CONST_BITS+PASS1_BITS); 3655 3656 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ 3657 3658 dataptr[DCTSIZE*2] = (DCTELEM) 3659 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ 3660 + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ 3661 CONST_BITS+PASS1_BITS); 3662 dataptr[DCTSIZE*6] = (DCTELEM) 3663 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ 3664 - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ 3665 CONST_BITS+PASS1_BITS); 3666 3667 /* Odd part */ 3668 3669 tmp10 = tmp1 + tmp2; 3670 tmp11 = tmp5 - tmp4; 3671 dataptr[DCTSIZE*7] = (DCTELEM) 3672 DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, 3673 FIX(0.653061224)), /* 32/49 */ 3674 CONST_BITS+PASS1_BITS); 3675 tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ 3676 tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ 3677 tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ 3678 tmp10 += tmp11 - tmp3; 3679 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ 3680 MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ 3681 dataptr[DCTSIZE*5] = (DCTELEM) 3682 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ 3683 + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ 3684 CONST_BITS+PASS1_BITS); 3685 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ 3686 MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ 3687 dataptr[DCTSIZE*3] = (DCTELEM) 3688 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ 3689 - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ 3690 CONST_BITS+PASS1_BITS); 3691 dataptr[DCTSIZE*1] = (DCTELEM) 3692 DESCALE(tmp11 + tmp12 + tmp3 3693 - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ 3694 - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ 3695 CONST_BITS+PASS1_BITS); 3696 3697 dataptr++; /* advance pointer to next column */ 3698 wsptr++; /* advance pointer to next column */ 3699 } 3700 } 3701 3702 3703 /* 3704 * Perform the forward DCT on a 6x12 sample block. 3705 * 3706 * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns). 3707 */ 3708 3709 GLOBAL(void) 3710 jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3711 { 3712 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 3713 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 3714 DCTELEM workspace[8*4]; 3715 DCTELEM *dataptr; 3716 DCTELEM *wsptr; 3717 JSAMPROW elemptr; 3718 int ctr; 3719 SHIFT_TEMPS 3720 3721 /* Pre-zero output coefficient block. */ 3722 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3723 3724 /* Pass 1: process rows. */ 3725 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3726 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3727 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ 3728 3729 dataptr = data; 3730 ctr = 0; 3731 for (;;) { 3732 elemptr = sample_data[ctr] + start_col; 3733 3734 /* Even part */ 3735 3736 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 3737 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 3738 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 3739 3740 tmp10 = tmp0 + tmp2; 3741 tmp12 = tmp0 - tmp2; 3742 3743 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 3744 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 3745 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 3746 3747 /* Apply unsigned->signed conversion */ 3748 dataptr[0] = (DCTELEM) 3749 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); 3750 dataptr[2] = (DCTELEM) 3751 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 3752 CONST_BITS-PASS1_BITS); 3753 dataptr[4] = (DCTELEM) 3754 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 3755 CONST_BITS-PASS1_BITS); 3756 3757 /* Odd part */ 3758 3759 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 3760 CONST_BITS-PASS1_BITS); 3761 3762 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); 3763 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); 3764 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); 3765 3766 ctr++; 3767 3768 if (ctr != DCTSIZE) { 3769 if (ctr == 12) 3770 break; /* Done. */ 3771 dataptr += DCTSIZE; /* advance pointer to next row */ 3772 } else 3773 dataptr = workspace; /* switch pointer to extended workspace */ 3774 } 3775 3776 /* Pass 2: process columns. 3777 * We remove the PASS1_BITS scaling, but leave the results scaled up 3778 * by an overall factor of 8. 3779 * We must also scale the output by (8/6)*(8/12) = 8/9, which we 3780 * fold into the constant multipliers: 3781 * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9. 3782 */ 3783 3784 dataptr = data; 3785 wsptr = workspace; 3786 for (ctr = 0; ctr < 6; ctr++) { 3787 /* Even part */ 3788 3789 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; 3790 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; 3791 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; 3792 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; 3793 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; 3794 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; 3795 3796 tmp10 = tmp0 + tmp5; 3797 tmp13 = tmp0 - tmp5; 3798 tmp11 = tmp1 + tmp4; 3799 tmp14 = tmp1 - tmp4; 3800 tmp12 = tmp2 + tmp3; 3801 tmp15 = tmp2 - tmp3; 3802 3803 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; 3804 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; 3805 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; 3806 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; 3807 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; 3808 tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; 3809 3810 dataptr[DCTSIZE*0] = (DCTELEM) 3811 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ 3812 CONST_BITS+PASS1_BITS); 3813 dataptr[DCTSIZE*6] = (DCTELEM) 3814 DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ 3815 CONST_BITS+PASS1_BITS); 3816 dataptr[DCTSIZE*4] = (DCTELEM) 3817 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ 3818 CONST_BITS+PASS1_BITS); 3819 dataptr[DCTSIZE*2] = (DCTELEM) 3820 DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ 3821 MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ 3822 CONST_BITS+PASS1_BITS); 3823 3824 /* Odd part */ 3825 3826 tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ 3827 tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ 3828 tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ 3829 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ 3830 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ 3831 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ 3832 + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ 3833 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ 3834 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ 3835 + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ 3836 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ 3837 - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ 3838 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ 3839 - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ 3840 3841 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS); 3842 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS); 3843 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS); 3844 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS); 3845 3846 dataptr++; /* advance pointer to next column */ 3847 wsptr++; /* advance pointer to next column */ 3848 } 3849 } 3850 3851 3852 /* 3853 * Perform the forward DCT on a 5x10 sample block. 3854 * 3855 * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns). 3856 */ 3857 3858 GLOBAL(void) 3859 jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 3860 { 3861 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 3862 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 3863 DCTELEM workspace[8*2]; 3864 DCTELEM *dataptr; 3865 DCTELEM *wsptr; 3866 JSAMPROW elemptr; 3867 int ctr; 3868 SHIFT_TEMPS 3869 3870 /* Pre-zero output coefficient block. */ 3871 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 3872 3873 /* Pass 1: process rows. */ 3874 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 3875 /* furthermore, we scale the results by 2**PASS1_BITS. */ 3876 /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ 3877 3878 dataptr = data; 3879 ctr = 0; 3880 for (;;) { 3881 elemptr = sample_data[ctr] + start_col; 3882 3883 /* Even part */ 3884 3885 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); 3886 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); 3887 tmp2 = GETJSAMPLE(elemptr[2]); 3888 3889 tmp10 = tmp0 + tmp1; 3890 tmp11 = tmp0 - tmp1; 3891 3892 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); 3893 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); 3894 3895 /* Apply unsigned->signed conversion */ 3896 dataptr[0] = (DCTELEM) 3897 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); 3898 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ 3899 tmp10 -= tmp2 << 2; 3900 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ 3901 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS); 3902 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS); 3903 3904 /* Odd part */ 3905 3906 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ 3907 3908 dataptr[1] = (DCTELEM) 3909 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ 3910 CONST_BITS-PASS1_BITS); 3911 dataptr[3] = (DCTELEM) 3912 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ 3913 CONST_BITS-PASS1_BITS); 3914 3915 ctr++; 3916 3917 if (ctr != DCTSIZE) { 3918 if (ctr == 10) 3919 break; /* Done. */ 3920 dataptr += DCTSIZE; /* advance pointer to next row */ 3921 } else 3922 dataptr = workspace; /* switch pointer to extended workspace */ 3923 } 3924 3925 /* Pass 2: process columns. 3926 * We remove the PASS1_BITS scaling, but leave the results scaled up 3927 * by an overall factor of 8. 3928 * We must also scale the output by (8/5)*(8/10) = 32/25, which we 3929 * fold into the constant multipliers: 3930 * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25. 3931 */ 3932 3933 dataptr = data; 3934 wsptr = workspace; 3935 for (ctr = 0; ctr < 5; ctr++) { 3936 /* Even part */ 3937 3938 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; 3939 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; 3940 tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; 3941 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; 3942 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; 3943 3944 tmp10 = tmp0 + tmp4; 3945 tmp13 = tmp0 - tmp4; 3946 tmp11 = tmp1 + tmp3; 3947 tmp14 = tmp1 - tmp3; 3948 3949 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; 3950 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; 3951 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; 3952 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; 3953 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; 3954 3955 dataptr[DCTSIZE*0] = (DCTELEM) 3956 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ 3957 CONST_BITS+PASS1_BITS); 3958 tmp12 += tmp12; 3959 dataptr[DCTSIZE*4] = (DCTELEM) 3960 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ 3961 MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ 3962 CONST_BITS+PASS1_BITS); 3963 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ 3964 dataptr[DCTSIZE*2] = (DCTELEM) 3965 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ 3966 CONST_BITS+PASS1_BITS); 3967 dataptr[DCTSIZE*6] = (DCTELEM) 3968 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ 3969 CONST_BITS+PASS1_BITS); 3970 3971 /* Odd part */ 3972 3973 tmp10 = tmp0 + tmp4; 3974 tmp11 = tmp1 - tmp3; 3975 dataptr[DCTSIZE*5] = (DCTELEM) 3976 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ 3977 CONST_BITS+PASS1_BITS); 3978 tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ 3979 dataptr[DCTSIZE*1] = (DCTELEM) 3980 DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ 3981 MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ 3982 MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ 3983 MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ 3984 CONST_BITS+PASS1_BITS); 3985 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ 3986 MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ 3987 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ 3988 MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ 3989 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS); 3990 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS); 3991 3992 dataptr++; /* advance pointer to next column */ 3993 wsptr++; /* advance pointer to next column */ 3994 } 3995 } 3996 3997 3998 /* 3999 * Perform the forward DCT on a 4x8 sample block. 4000 * 4001 * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). 4002 */ 4003 4004 GLOBAL(void) 4005 jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 4006 { 4007 INT32 tmp0, tmp1, tmp2, tmp3; 4008 INT32 tmp10, tmp11, tmp12, tmp13; 4009 INT32 z1; 4010 DCTELEM *dataptr; 4011 JSAMPROW elemptr; 4012 int ctr; 4013 SHIFT_TEMPS 4014 4015 /* Pre-zero output coefficient block. */ 4016 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 4017 4018 /* Pass 1: process rows. */ 4019 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 4020 /* furthermore, we scale the results by 2**PASS1_BITS. */ 4021 /* We must also scale the output by 8/4 = 2, which we add here. */ 4022 /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ 4023 4024 dataptr = data; 4025 for (ctr = 0; ctr < DCTSIZE; ctr++) { 4026 elemptr = sample_data[ctr] + start_col; 4027 4028 /* Even part */ 4029 4030 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 4031 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 4032 4033 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 4034 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 4035 4036 /* Apply unsigned->signed conversion */ 4037 dataptr[0] = (DCTELEM) 4038 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); 4039 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); 4040 4041 /* Odd part */ 4042 4043 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 4044 /* Add fudge factor here for final descale. */ 4045 tmp0 += ONE << (CONST_BITS-PASS1_BITS-2); 4046 4047 dataptr[1] = (DCTELEM) 4048 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 4049 CONST_BITS-PASS1_BITS-1); 4050 dataptr[3] = (DCTELEM) 4051 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 4052 CONST_BITS-PASS1_BITS-1); 4053 4054 dataptr += DCTSIZE; /* advance pointer to next row */ 4055 } 4056 4057 /* Pass 2: process columns. 4058 * We remove the PASS1_BITS scaling, but leave the results scaled up 4059 * by an overall factor of 8. 4060 */ 4061 4062 dataptr = data; 4063 for (ctr = 0; ctr < 4; ctr++) { 4064 /* Even part per LL&M figure 1 --- note that published figure is faulty; 4065 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 4066 */ 4067 4068 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 4069 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 4070 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 4071 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 4072 4073 /* Add fudge factor here for final descale. */ 4074 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); 4075 tmp12 = tmp0 - tmp3; 4076 tmp11 = tmp1 + tmp2; 4077 tmp13 = tmp1 - tmp2; 4078 4079 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 4080 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 4081 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 4082 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 4083 4084 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); 4085 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); 4086 4087 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 4088 /* Add fudge factor here for final descale. */ 4089 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 4090 dataptr[DCTSIZE*2] = (DCTELEM) 4091 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); 4092 dataptr[DCTSIZE*6] = (DCTELEM) 4093 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); 4094 4095 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 4096 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 4097 * i0..i3 in the paper are tmp0..tmp3 here. 4098 */ 4099 4100 tmp10 = tmp0 + tmp3; 4101 tmp11 = tmp1 + tmp2; 4102 tmp12 = tmp0 + tmp2; 4103 tmp13 = tmp1 + tmp3; 4104 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 4105 /* Add fudge factor here for final descale. */ 4106 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 4107 4108 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 4109 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 4110 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 4111 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 4112 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 4113 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 4114 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 4115 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 4116 4117 tmp12 += z1; 4118 tmp13 += z1; 4119 4120 dataptr[DCTSIZE*1] = (DCTELEM) 4121 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); 4122 dataptr[DCTSIZE*3] = (DCTELEM) 4123 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); 4124 dataptr[DCTSIZE*5] = (DCTELEM) 4125 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); 4126 dataptr[DCTSIZE*7] = (DCTELEM) 4127 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); 4128 4129 dataptr++; /* advance pointer to next column */ 4130 } 4131 } 4132 4133 4134 /* 4135 * Perform the forward DCT on a 3x6 sample block. 4136 * 4137 * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). 4138 */ 4139 4140 GLOBAL(void) 4141 jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 4142 { 4143 INT32 tmp0, tmp1, tmp2; 4144 INT32 tmp10, tmp11, tmp12; 4145 DCTELEM *dataptr; 4146 JSAMPROW elemptr; 4147 int ctr; 4148 SHIFT_TEMPS 4149 4150 /* Pre-zero output coefficient block. */ 4151 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 4152 4153 /* Pass 1: process rows. */ 4154 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 4155 /* furthermore, we scale the results by 2**PASS1_BITS. */ 4156 /* We scale the results further by 2 as part of output adaption */ 4157 /* scaling for different DCT size. */ 4158 /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ 4159 4160 dataptr = data; 4161 for (ctr = 0; ctr < 6; ctr++) { 4162 elemptr = sample_data[ctr] + start_col; 4163 4164 /* Even part */ 4165 4166 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); 4167 tmp1 = GETJSAMPLE(elemptr[1]); 4168 4169 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); 4170 4171 /* Apply unsigned->signed conversion */ 4172 dataptr[0] = (DCTELEM) 4173 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); 4174 dataptr[2] = (DCTELEM) 4175 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ 4176 CONST_BITS-PASS1_BITS-1); 4177 4178 /* Odd part */ 4179 4180 dataptr[1] = (DCTELEM) 4181 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ 4182 CONST_BITS-PASS1_BITS-1); 4183 4184 dataptr += DCTSIZE; /* advance pointer to next row */ 4185 } 4186 4187 /* Pass 2: process columns. 4188 * We remove the PASS1_BITS scaling, but leave the results scaled up 4189 * by an overall factor of 8. 4190 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially 4191 * fold into the constant multipliers (other part was done in pass 1): 4192 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. 4193 */ 4194 4195 dataptr = data; 4196 for (ctr = 0; ctr < 3; ctr++) { 4197 /* Even part */ 4198 4199 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 4200 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 4201 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 4202 4203 tmp10 = tmp0 + tmp2; 4204 tmp12 = tmp0 - tmp2; 4205 4206 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 4207 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 4208 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 4209 4210 dataptr[DCTSIZE*0] = (DCTELEM) 4211 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 4212 CONST_BITS+PASS1_BITS); 4213 dataptr[DCTSIZE*2] = (DCTELEM) 4214 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 4215 CONST_BITS+PASS1_BITS); 4216 dataptr[DCTSIZE*4] = (DCTELEM) 4217 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 4218 CONST_BITS+PASS1_BITS); 4219 4220 /* Odd part */ 4221 4222 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 4223 4224 dataptr[DCTSIZE*1] = (DCTELEM) 4225 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 4226 CONST_BITS+PASS1_BITS); 4227 dataptr[DCTSIZE*3] = (DCTELEM) 4228 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 4229 CONST_BITS+PASS1_BITS); 4230 dataptr[DCTSIZE*5] = (DCTELEM) 4231 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 4232 CONST_BITS+PASS1_BITS); 4233 4234 dataptr++; /* advance pointer to next column */ 4235 } 4236 } 4237 4238 4239 /* 4240 * Perform the forward DCT on a 2x4 sample block. 4241 * 4242 * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). 4243 */ 4244 4245 GLOBAL(void) 4246 jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 4247 { 4248 INT32 tmp0, tmp1; 4249 INT32 tmp10, tmp11; 4250 DCTELEM *dataptr; 4251 JSAMPROW elemptr; 4252 int ctr; 4253 SHIFT_TEMPS 4254 4255 /* Pre-zero output coefficient block. */ 4256 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 4257 4258 /* Pass 1: process rows. */ 4259 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 4260 /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ 4261 4262 dataptr = data; 4263 for (ctr = 0; ctr < 4; ctr++) { 4264 elemptr = sample_data[ctr] + start_col; 4265 4266 /* Even part */ 4267 4268 tmp0 = GETJSAMPLE(elemptr[0]); 4269 tmp1 = GETJSAMPLE(elemptr[1]); 4270 4271 /* Apply unsigned->signed conversion */ 4272 dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); 4273 4274 /* Odd part */ 4275 4276 dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3); 4277 4278 dataptr += DCTSIZE; /* advance pointer to next row */ 4279 } 4280 4281 /* Pass 2: process columns. 4282 * We leave the results scaled up by an overall factor of 8. 4283 * 4-point FDCT kernel, 4284 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. 4285 */ 4286 4287 dataptr = data; 4288 for (ctr = 0; ctr < 2; ctr++) { 4289 /* Even part */ 4290 4291 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3]; 4292 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 4293 4294 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 4295 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 4296 4297 dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1); 4298 dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1); 4299 4300 /* Odd part */ 4301 4302 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 4303 /* Add fudge factor here for final descale. */ 4304 tmp0 += ONE << (CONST_BITS-1); 4305 4306 dataptr[DCTSIZE*1] = (DCTELEM) 4307 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 4308 CONST_BITS); 4309 dataptr[DCTSIZE*3] = (DCTELEM) 4310 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 4311 CONST_BITS); 4312 4313 dataptr++; /* advance pointer to next column */ 4314 } 4315 } 4316 4317 4318 /* 4319 * Perform the forward DCT on a 1x2 sample block. 4320 * 4321 * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). 4322 */ 4323 4324 GLOBAL(void) 4325 jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 4326 { 4327 INT32 tmp0, tmp1; 4328 4329 /* Pre-zero output coefficient block. */ 4330 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 4331 4332 tmp0 = GETJSAMPLE(sample_data[0][start_col]); 4333 tmp1 = GETJSAMPLE(sample_data[1][start_col]); 4334 4335 /* We leave the results scaled up by an overall factor of 8. 4336 * We must also scale the output by (8/1)*(8/2) = 2**5. 4337 */ 4338 4339 /* Even part */ 4340 /* Apply unsigned->signed conversion */ 4341 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); 4342 4343 /* Odd part */ 4344 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); 4345 } 4346 4347 #endif /* DCT_SCALING_SUPPORTED */ 4348 #endif /* DCT_ISLOW_SUPPORTED */