< prev index next >

modules/javafx.graphics/src/main/native-iio/libjpeg7/jidctint.c

Print this page

        

*** 1,10 **** /* * jidctint.c * * Copyright (C) 1991-1998, Thomas G. Lane. ! * Modification developed 2002-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains a slow-but-accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine --- 1,10 ---- /* * jidctint.c * * Copyright (C) 1991-1998, Thomas G. Lane. ! * Modification developed 2002-2016 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains a slow-but-accurate integer implementation of the * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
*** 163,172 **** --- 163,175 ---- #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) /* * Perform dequantization and inverse DCT on one block of coefficients. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/16). */ GLOBAL(void) jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block,
*** 182,194 **** JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. */ ! /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { --- 185,198 ---- JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[DCTSIZE2]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. ! * Note results are scaled up by sqrt(8) compared to a true IDCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) {
*** 221,239 **** quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); ! z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS; --- 225,237 ---- quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS;
*** 241,250 **** --- 239,255 ---- z2 += ONE << (CONST_BITS-PASS1_BITS-1); tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 258,282 **** tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 263,287 ---- tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 292,308 **** inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } ! /* Pass 2: process rows from work array, store into output array. */ ! /* Note that we must descale the results by a factor of 8 == 2**3, */ ! /* and also undo the PASS1_BITS scaling. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { outptr = output_buf[ctr] + output_col; /* Rows of zeroes can be exploited in the same way as we did with columns. * However, the column calculation has created many nonzero AC terms, so * the simplification applies less often (typically 5% to 10% of the time). * On machines with very fast multiplication, it's possible that the * test takes more time than it's worth. In that case this section --- 297,320 ---- inptr++; /* advance pointers to next column */ quantptr++; wsptr++; } ! /* Pass 2: process rows from work array, store into output array. ! * Note that we must descale the results by a factor of 8 == 2**3, ! * and also undo the PASS1_BITS scaling. ! */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { outptr = output_buf[ctr] + output_col; + + /* Add range center and fudge factor for final descale and range-limit. */ + z2 = (INT32) wsptr[0] + + ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + + (ONE << (PASS1_BITS+2))); + /* Rows of zeroes can be exploited in the same way as we did with columns. * However, the column calculation has created many nonzero AC terms, so * the simplification applies less often (typically 5% to 10% of the time). * On machines with very fast multiplication, it's possible that the * test takes more time than it's worth. In that case this section
*** 311,321 **** #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ ! JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval; --- 323,333 ---- #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ ! JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; outptr[2] = dcval;
*** 328,354 **** wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = (INT32) wsptr[2]; ! z3 = (INT32) wsptr[6]; ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); - /* Add fudge factor here for final descale. */ - z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3; --- 340,365 ---- wsptr += DCTSIZE; /* advance pointer to next row */ continue; } #endif ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 362,386 **** tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 373,397 ---- tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 416,426 **** #ifdef IDCT_SCALING_SUPPORTED /* * Perform dequantization and inverse DCT on one block of coefficients, ! * producing a 7x7 output block. * * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/14). */ --- 427,437 ---- #ifdef IDCT_SCALING_SUPPORTED /* * Perform dequantization and inverse DCT on one block of coefficients, ! * producing a reduced-size 7x7 output block. * * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/14). */
*** 500,511 **** for (ctr = 0; ctr < 7; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp13 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6]; --- 511,524 ---- for (ctr = 0; ctr < 7; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp13 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp13 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6];
*** 636,647 **** for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; tmp11 = tmp0 - tmp10 - tmp10; --- 649,662 ---- for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; tmp11 = tmp0 - tmp10 - tmp10;
*** 755,766 **** for (ctr = 0; ctr < 5; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp12 <<= CONST_BITS; tmp0 = (INT32) wsptr[2]; tmp1 = (INT32) wsptr[4]; z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ --- 770,783 ---- for (ctr = 0; ctr < 5; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp12 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp12 <<= CONST_BITS; tmp0 = (INT32) wsptr[2]; tmp1 = (INT32) wsptr[4]; z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
*** 867,878 **** for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS; --- 884,897 ---- for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS;
*** 964,975 **** for (ctr = 0; ctr < 3; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12; --- 983,996 ---- for (ctr = 0; ctr < 3; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12;
*** 1006,1030 **** GLOBAL(void) jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! SHIFT_TEMPS /* Pass 1: process columns from input. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; /* Column 0 */ tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); ! /* Add fudge factor here for final descale. */ ! tmp4 += ONE << 2; tmp0 = tmp4 + tmp5; tmp2 = tmp4 - tmp5; /* Column 1 */ --- 1027,1051 ---- GLOBAL(void) jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! ISHIFT_TEMPS /* Pass 1: process columns from input. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; /* Column 0 */ tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); tmp0 = tmp4 + tmp5; tmp2 = tmp4 - tmp5; /* Column 1 */
*** 1037,1054 **** /* Pass 2: process 2 rows, store into output array. */ /* Row 0 */ outptr = output_buf[0] + output_col; ! outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; /* Row 1 */ outptr = output_buf[1] + output_col; ! outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients, --- 1058,1075 ---- /* Pass 2: process 2 rows, store into output array. */ /* Row 0 */ outptr = output_buf[0] + output_col; ! outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; /* Row 1 */ outptr = output_buf[1] + output_col; ! outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients,
*** 1061,1081 **** GLOBAL(void) jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! int dcval; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! SHIFT_TEMPS /* 1x1 is trivial: just take the DC coefficient divided by 8. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; dcval = DEQUANTIZE(coef_block[0], quantptr[0]); ! dcval = (int) DESCALE((INT32) dcval, 3); ! output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients, --- 1082,1106 ---- GLOBAL(void) jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! DCTELEM dcval; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! ISHIFT_TEMPS /* 1x1 is trivial: just take the DC coefficient divided by 8. */ + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + dcval = DEQUANTIZE(coef_block[0], quantptr[0]); ! /* Add range center and fudge factor for descale and range-limit. */ ! dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); ! output_buf[0][output_col] = ! range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients,
*** 1170,1181 **** for (ctr = 0; ctr < 9; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6]; --- 1195,1208 ---- for (ctr = 0; ctr < 9; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6];
*** 1353,1364 **** for (ctr = 0; ctr < 10; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1; --- 1380,1393 ---- for (ctr = 0; ctr < 10; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z3 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1;
*** 1546,1557 **** for (ctr = 0; ctr < 11; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp10 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6]; --- 1575,1588 ---- for (ctr = 0; ctr < 11; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp10 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp10 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6];
*** 1750,1761 **** for (ctr = 0; ctr < 12; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ --- 1781,1794 ---- for (ctr = 0; ctr < 12; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z3 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
*** 1971,1982 **** for (ctr = 0; ctr < 13; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[4]; z4 = (INT32) wsptr[6]; --- 2004,2017 ---- for (ctr = 0; ctr < 13; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z1 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[4]; z4 = (INT32) wsptr[6];
*** 2198,2209 **** for (ctr = 0; ctr < 14; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ --- 2233,2246 ---- for (ctr = 0; ctr < 14; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z1 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
*** 2430,2441 **** for (ctr = 0; ctr < 15; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[4]; z4 = (INT32) wsptr[6]; --- 2467,2480 ---- for (ctr = 0; ctr < 15; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z1 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z2 = (INT32) wsptr[2]; z3 = (INT32) wsptr[4]; z4 = (INT32) wsptr[6];
*** 2583,2593 **** /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp0 <<= CONST_BITS; /* Add fudge factor here for final descale. */ ! tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ --- 2622,2632 ---- /* Even part */ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); tmp0 <<= CONST_BITS; /* Add fudge factor here for final descale. */ ! tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
*** 2681,2692 **** for (ctr = 0; ctr < 16; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ --- 2720,2733 ---- for (ctr = 0; ctr < 16; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
*** 2833,2845 **** JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[8*8]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. */ ! /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { --- 2874,2888 ---- JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[8*8]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. ! * Note results are scaled up by sqrt(8) compared to a true IDCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) {
*** 2872,2890 **** quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); ! z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS; --- 2915,2927 ---- quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS;
*** 2892,2901 **** --- 2929,2945 ---- z2 += ONE << (CONST_BITS-PASS1_BITS-1); tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 2909,2933 **** tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 2953,2977 ---- tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 2946,2963 **** } /* Pass 2: process 8 rows from work array, store into output array. * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ wsptr = workspace; for (ctr = 0; ctr < 8; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ --- 2990,3010 ---- } /* Pass 2: process 8 rows from work array, store into output array. * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ + wsptr = workspace; for (ctr = 0; ctr < 8; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; z1 = (INT32) wsptr[4]; tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
*** 3107,3116 **** --- 3154,3164 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 3162,3179 **** } /* Pass 2: process 7 rows from work array, store into output array. * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ wsptr = workspace; for (ctr = 0; ctr < 7; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ --- 3210,3230 ---- } /* Pass 2: process 7 rows from work array, store into output array. * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ + wsptr = workspace; for (ctr = 0; ctr < 7; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z1 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z1 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
*** 3302,3311 **** --- 3353,3363 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 3344,3361 **** } /* Pass 2: process 6 rows from work array, store into output array. * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ wsptr = workspace; for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ --- 3396,3416 ---- } /* Pass 2: process 6 rows from work array, store into output array. * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ + wsptr = workspace; for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z3 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
*** 3478,3487 **** --- 3533,3543 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 3518,3535 **** } /* Pass 2: process 5 rows from work array, store into output array. * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ wsptr = workspace; for (ctr = 0; ctr < 5; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1; --- 3574,3594 ---- } /* Pass 2: process 5 rows from work array, store into output array. * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ + wsptr = workspace; for (ctr = 0; ctr < 5; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z3 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 <<= CONST_BITS; z4 = (INT32) wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1;
*** 3637,3648 **** int ctr; int workspace[8*4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. ! * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ --- 3696,3709 ---- int ctr; int workspace[8*4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. ! * 4-point IDCT kernel, ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 3673,3707 **** wsptr[8*3] = (int) (tmp10 - tmp0); wsptr[8*1] = (int) (tmp12 + tmp2); wsptr[8*2] = (int) (tmp12 - tmp2); } ! /* Pass 2: process rows from work array, store into output array. */ ! /* Note that we must descale the results by a factor of 8 == 2**3, */ ! /* and also undo the PASS1_BITS scaling. */ wsptr = workspace; for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = (INT32) wsptr[2]; ! z3 = (INT32) wsptr[6]; ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); ! /* Add fudge factor here for final descale. */ ! z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3; --- 3734,3773 ---- wsptr[8*3] = (int) (tmp10 - tmp0); wsptr[8*1] = (int) (tmp12 + tmp2); wsptr[8*2] = (int) (tmp12 - tmp2); } ! /* Pass 2: process rows from work array, store into output array. ! * Note that we must descale the results by a factor of 8 == 2**3, ! * and also undo the PASS1_BITS scaling. ! * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ wsptr = workspace; for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z2 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 3715,3739 **** tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 3781,3805 ---- tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 3791,3800 **** --- 3857,3867 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 3821,3838 **** } /* Pass 2: process 3 rows from work array, store into output array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ wsptr = workspace; for (ctr = 0; ctr < 3; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; tmp11 = tmp0 - tmp10 - tmp10; --- 3888,3908 ---- } /* Pass 2: process 3 rows from work array, store into output array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + wsptr = workspace; for (ctr = 0; ctr < 3; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; tmp11 = tmp0 - tmp10 - tmp10;
*** 3922,3939 **** /* Pass 2: process 2 rows from work array, store into output array. * 4-point IDCT kernel, * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. */ wsptr = workspace; for (ctr = 0; ctr < 2; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = wsptr[0] + (ONE << 2); tmp2 = wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS; --- 3992,4010 ---- /* Pass 2: process 2 rows from work array, store into output array. * 4-point IDCT kernel, * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. */ + wsptr = workspace; for (ctr = 0; ctr < 2; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2)); tmp2 = wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS;
*** 3977,4013 **** GLOBAL(void) jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! INT32 tmp0, tmp10; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! SHIFT_TEMPS /* Pass 1: empty. */ /* Pass 2: process 1 row from input, store into output array. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; outptr = output_buf[0] + output_col; /* Even part */ ! tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]); ! /* Add fudge factor here for final descale. */ ! tmp10 += ONE << 2; /* Odd part */ ! tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]); /* Final output stage */ ! outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients, --- 4048,4084 ---- GLOBAL(void) jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! DCTELEM tmp0, tmp1; ISLOW_MULT_TYPE * quantptr; JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! ISHIFT_TEMPS /* Pass 1: empty. */ /* Pass 2: process 1 row from input, store into output array. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; outptr = output_buf[0] + output_col; /* Even part */ ! tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]); ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); /* Odd part */ ! tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]); /* Final output stage */ ! outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; ! outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; } /* * Perform dequantization and inverse DCT on one block of coefficients,
*** 4034,4043 **** --- 4105,4115 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 4133,4167 **** wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); } ! /* Pass 2: process rows from work array, store into output array. */ ! /* Note that we must descale the results by a factor of 8 == 2**3, */ ! /* and also undo the PASS1_BITS scaling. */ wsptr = workspace; for (ctr = 0; ctr < 16; ctr++) { outptr = output_buf[ctr] + output_col; ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = (INT32) wsptr[2]; ! z3 = (INT32) wsptr[6]; ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); ! /* Add fudge factor here for final descale. */ ! z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3; --- 4205,4244 ---- wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); } ! /* Pass 2: process rows from work array, store into output array. ! * Note that we must descale the results by a factor of 8 == 2**3, ! * and also undo the PASS1_BITS scaling. ! * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ wsptr = workspace; for (ctr = 0; ctr < 16; ctr++) { outptr = output_buf[ctr] + output_col; ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! z2 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); z3 = (INT32) wsptr[4]; tmp0 = (z2 + z3) << CONST_BITS; tmp1 = (z2 - z3) << CONST_BITS; + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 4175,4199 **** tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 4252,4276 ---- tmp3 = (INT32) wsptr[1]; z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 4252,4261 **** --- 4329,4339 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 4339,4356 **** } /* Pass 2: process 14 rows from work array, store into output array. * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ wsptr = workspace; for (ctr = 0; ctr < 14; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp23 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6]; --- 4417,4437 ---- } /* Pass 2: process 14 rows from work array, store into output array. * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ + wsptr = workspace; for (ctr = 0; ctr < 14; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp23 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp23 <<= CONST_BITS; z1 = (INT32) wsptr[2]; z2 = (INT32) wsptr[4]; z3 = (INT32) wsptr[6];
*** 4435,4444 **** --- 4516,4526 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 4518,4535 **** } /* Pass 2: process 12 rows from work array, store into output array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ wsptr = workspace; for (ctr = 0; ctr < 12; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp10 <<= CONST_BITS; tmp12 = (INT32) wsptr[4]; tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ tmp11 = tmp10 + tmp20; tmp21 = tmp10 - tmp20 - tmp20; --- 4600,4620 ---- } /* Pass 2: process 12 rows from work array, store into output array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + wsptr = workspace; for (ctr = 0; ctr < 12; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp10 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp10 <<= CONST_BITS; tmp12 = (INT32) wsptr[4]; tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ tmp11 = tmp10 + tmp20; tmp21 = tmp10 - tmp20 - tmp20;
*** 4599,4608 **** --- 4684,4694 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 4674,4691 **** } /* Pass 2: process 10 rows from work array, store into output array. * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ wsptr = workspace; for (ctr = 0; ctr < 10; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp12 <<= CONST_BITS; tmp13 = (INT32) wsptr[2]; tmp14 = (INT32) wsptr[4]; z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ --- 4760,4780 ---- } /* Pass 2: process 10 rows from work array, store into output array. * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ + wsptr = workspace; for (ctr = 0; ctr < 10; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp12 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp12 <<= CONST_BITS; tmp13 = (INT32) wsptr[2]; tmp14 = (INT32) wsptr[4]; z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
*** 4748,4760 **** JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[4*8]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. */ ! /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 4; ctr > 0; ctr--) { --- 4837,4851 ---- JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; int workspace[4*8]; /* buffers data between passes */ SHIFT_TEMPS ! /* Pass 1: process columns from input, store into work array. ! * Note results are scaled up by sqrt(8) compared to a true IDCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 4; ctr > 0; ctr--) {
*** 4787,4805 **** quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. */ ! /* The rotator is sqrt(2)*c(-6). */ ! ! z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); ! z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); ! ! z1 = MULTIPLY(z2 + z3, FIX_0_541196100); ! tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); ! tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS; --- 4878,4890 ---- quantptr++; wsptr++; continue; } ! /* Even part: reverse the even part of the forward DCT. ! * The rotator is c(-6). ! */ z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); z2 <<= CONST_BITS; z3 <<= CONST_BITS;
*** 4807,4816 **** --- 4892,4908 ---- z2 += ONE << (CONST_BITS-PASS1_BITS-1); tmp0 = z2 + z3; tmp1 = z2 - z3; + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + tmp10 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; tmp11 = tmp1 + tmp3; tmp12 = tmp1 - tmp3;
*** 4824,4848 **** tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ --- 4916,4940 ---- tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); z2 = tmp0 + tmp2; z3 = tmp1 + tmp3; ! z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ ! z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ ! z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ z2 += z1; z3 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp0 += z1 + z2; tmp3 += z1 + z3; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ tmp1 += z1 + z3; tmp2 += z1 + z2; /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
*** 4859,4878 **** quantptr++; wsptr++; } /* Pass 2: process 8 rows from work array, store into output array. ! * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ wsptr = workspace; for (ctr = 0; ctr < 8; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS; --- 4951,4974 ---- quantptr++; wsptr++; } /* Pass 2: process 8 rows from work array, store into output array. ! * 4-point IDCT kernel, ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. */ + wsptr = workspace; for (ctr = 0; ctr < 8; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp2 = (INT32) wsptr[2]; tmp10 = (tmp0 + tmp2) << CONST_BITS; tmp12 = (tmp0 - tmp2) << CONST_BITS;
*** 4930,4939 **** --- 5026,5036 ---- SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 4972,4989 **** } /* Pass 2: process 6 rows from work array, store into output array. * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ wsptr = workspace; for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12; --- 5069,5089 ---- } /* Pass 2: process 6 rows from work array, store into output array. * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ + wsptr = workspace; for (ctr = 0; ctr < 6; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 = (INT32) wsptr[0] + ! ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + ! (ONE << (PASS1_BITS+2))); tmp0 <<= CONST_BITS; tmp2 = (INT32) wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12;
*** 5035,5044 **** --- 5135,5145 ---- /* Pass 1: process columns from input, store into work array. * 4-point IDCT kernel, * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. */ + inptr = coef_block; quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */
*** 5073,5084 **** for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add fudge factor here for final descale. */ ! tmp10 = wsptr[0] + (ONE << (CONST_BITS+2)); /* Odd part */ tmp0 = wsptr[1]; --- 5174,5187 ---- for (ctr = 0; ctr < 4; ctr++) { outptr = output_buf[ctr] + output_col; /* Even part */ ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp10 = wsptr[0] + ! ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) + ! (ONE << (CONST_BITS+2))); /* Odd part */ tmp0 = wsptr[1];
*** 5104,5137 **** GLOBAL(void) jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! INT32 tmp0, tmp10; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! SHIFT_TEMPS /* Process 1 column from input, store into output array. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; /* Even part */ ! tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); ! /* Add fudge factor here for final descale. */ ! tmp10 += ONE << 2; /* Odd part */ ! tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); /* Final output stage */ ! output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) ! & RANGE_MASK]; ! output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) ! & RANGE_MASK]; } #endif /* IDCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */ --- 5207,5240 ---- GLOBAL(void) jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { ! DCTELEM tmp0, tmp1; ISLOW_MULT_TYPE * quantptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); ! ISHIFT_TEMPS /* Process 1 column from input, store into output array. */ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; /* Even part */ ! tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); ! /* Add range center and fudge factor for final descale and range-limit. */ ! tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); /* Odd part */ ! tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); /* Final output stage */ ! output_buf[0][output_col] = ! range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; ! output_buf[1][output_col] = ! range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; } #endif /* IDCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */
< prev index next >