< prev index next >

modules/javafx.graphics/src/main/native-iio/libjpeg7/jfdctint.c

Print this page

        

*** 1,10 **** /* * jfdctint.c * * Copyright (C) 1991-1996, Thomas G. Lane. ! * Modification developed 2003-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains a slow-but-accurate integer implementation of the * forward DCT (Discrete Cosine Transform). --- 1,10 ---- /* * jfdctint.c * * Copyright (C) 1991-1996, Thomas G. Lane. ! * Modification developed 2003-2015 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains a slow-but-accurate integer implementation of the * forward DCT (Discrete Cosine Transform).
*** 163,182 **** DCTELEM *dataptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); --- 163,184 ---- DCTELEM *dataptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * cK represents sqrt(2) * cos(K*pi/16). ! */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
*** 190,257 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); ! dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS-PASS1_BITS); ! dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - tmp12 += z1; tmp13 += z1; ! dataptr[1] = (DCTELEM) ! RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); ! dataptr[3] = (DCTELEM) ! RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); ! dataptr[5] = (DCTELEM) ! RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); ! dataptr[7] = (DCTELEM) ! RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; --- 192,262 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); ! ! dataptr[2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ CONST_BITS-PASS1_BITS); ! dataptr[6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-1); ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! ! dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS); ! dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS); ! dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); ! dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
*** 269,319 **** tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); dataptr[DCTSIZE*2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - tmp12 += z1; tmp13 += z1; ! dataptr[DCTSIZE*1] = (DCTELEM) ! RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*3] = (DCTELEM) ! RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*5] = (DCTELEM) ! RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*7] = (DCTELEM) ! RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } } --- 274,326 ---- tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ ! CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ ! CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! ! dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } }
*** 336,349 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* cK represents sqrt(2) * cos(K*pi/14). */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { elemptr = sample_data[ctr] + start_col; --- 343,357 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * cK represents sqrt(2) * cos(K*pi/14). ! */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 357,367 **** tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); z1 = tmp0 + tmp2; ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); tmp3 += tmp3; z1 -= tmp3; z1 -= tmp3; --- 365,375 ---- tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); z1 = tmp0 + tmp2; ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); tmp3 += tmp3; z1 -= tmp3; z1 -= tmp3;
*** 470,483 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* cK represents sqrt(2) * cos(K*pi/12). */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col; --- 478,492 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * cK represents sqrt(2) * cos(K*pi/12). ! */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 492,502 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS); --- 501,511 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS);
*** 583,598 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* cK represents sqrt(2) * cos(K*pi/10). */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { elemptr = sample_data[ctr] + start_col; --- 592,608 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * cK represents sqrt(2) * cos(K*pi/10). ! */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 606,616 **** tmp11 = tmp0 - tmp1; tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ tmp10 -= tmp2 << 2; tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ --- 616,626 ---- tmp11 = tmp0 - tmp1; tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ tmp10 -= tmp2 << 2; tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
*** 693,707 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ ! /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; --- 703,718 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We must also scale the output by (8/4)**2 = 2**2, which we add here. ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. ! */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 711,721 **** tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); /* Odd part */ --- 722,732 ---- tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); /* Odd part */
*** 735,744 **** --- 746,756 ---- } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { /* Even part */
*** 785,800 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We scale the results further by 2**2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* cK represents sqrt(2) * cos(K*pi/6). */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { elemptr = sample_data[ctr] + start_col; --- 797,813 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We scale the results further by 2**2 as part of output adaption ! * scaling for different DCT size. ! * cK represents sqrt(2) * cos(K*pi/6). ! */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 803,813 **** tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); tmp1 = GETJSAMPLE(elemptr[1]); tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ CONST_BITS-PASS1_BITS-2); --- 816,826 ---- tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); tmp1 = GETJSAMPLE(elemptr[1]); tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ CONST_BITS-PASS1_BITS-2);
*** 861,878 **** */ GLOBAL(void) jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! INT32 tmp0, tmp1, tmp2, tmp3; JSAMPROW elemptr; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ /* Row 0 */ elemptr = sample_data[0] + start_col; tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); --- 874,892 ---- */ GLOBAL(void) jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! DCTELEM tmp0, tmp1, tmp2, tmp3; JSAMPROW elemptr; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! */ /* Row 0 */ elemptr = sample_data[0] + start_col; tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
*** 888,922 **** * We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/2)**2 = 2**4. */ /* Column 0 */ ! /* Apply unsigned->signed conversion */ ! data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4); ! data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4); /* Column 1 */ ! data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4); ! data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4); } /* * Perform the forward DCT on a 1x1 sample block. */ GLOBAL(void) jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); /* We leave the result scaled up by an overall factor of 8. */ /* We must also scale the output by (8/1)**2 = 2**6. */ ! /* Apply unsigned->signed conversion */ ! data[0] = (DCTELEM) ! ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6); } /* * Perform the forward DCT on a 9x9 sample block. --- 902,939 ---- * We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/2)**2 = 2**4. */ /* Column 0 */ ! /* Apply unsigned->signed conversion. */ ! data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4; ! data[DCTSIZE*1] = (tmp0 - tmp2) << 4; /* Column 1 */ ! data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4; ! data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4; } /* * Perform the forward DCT on a 1x1 sample block. */ GLOBAL(void) jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { + DCTELEM dcval; + /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + dcval = GETJSAMPLE(sample_data[0][start_col]); + /* We leave the result scaled up by an overall factor of 8. */ /* We must also scale the output by (8/1)**2 = 2**6. */ ! /* Apply unsigned->signed conversion. */ ! data[0] = (dcval - CENTERJSAMPLE) << 6; } /* * Perform the forward DCT on a 9x9 sample block.
*** 933,947 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* we scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* cK represents sqrt(2) * cos(K*pi/18). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 950,965 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * we scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * cK represents sqrt(2) * cos(K*pi/18). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 959,969 **** tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); z1 = tmp0 + tmp2 + tmp3; z2 = tmp1 + tmp4; ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); dataptr[6] = (DCTELEM) DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ CONST_BITS-1); z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */ --- 977,987 ---- tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); z1 = tmp0 + tmp2 + tmp3; z2 = tmp1 + tmp4; ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); dataptr[6] = (DCTELEM) DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ CONST_BITS-1); z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
*** 1082,1096 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* we scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* cK represents sqrt(2) * cos(K*pi/20). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 1100,1115 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * we scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * cK represents sqrt(2) * cos(K*pi/20). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 1112,1122 **** tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); tmp12 += tmp12; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ --- 1131,1141 ---- tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); tmp12 += tmp12; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
*** 1246,1260 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* we scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* cK represents sqrt(2) * cos(K*pi/22). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 1265,1280 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * we scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * cK represents sqrt(2) * cos(K*pi/22). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 1272,1282 **** tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); tmp5 += tmp5; tmp0 -= tmp5; tmp1 -= tmp5; --- 1292,1302 ---- tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); tmp5 += tmp5; tmp0 -= tmp5; tmp1 -= tmp5;
*** 1428,1440 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ ! /* cK represents sqrt(2) * cos(K*pi/24). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 1448,1461 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! * cK represents sqrt(2) * cos(K*pi/24). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 1460,1470 **** tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ CONST_BITS); --- 1481,1491 ---- tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ CONST_BITS);
*** 1594,1606 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ ! /* cK represents sqrt(2) * cos(K*pi/26). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 1615,1628 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! * cK represents sqrt(2) * cos(K*pi/26). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 1620,1630 **** tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); tmp6 += tmp6; tmp0 -= tmp6; tmp1 -= tmp6; --- 1642,1652 ---- tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); tmp6 += tmp6; tmp0 -= tmp6; tmp1 -= tmp6;
*** 1792,1804 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ ! /* cK represents sqrt(2) * cos(K*pi/28). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 1814,1827 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! * cK represents sqrt(2) * cos(K*pi/28). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 1826,1836 **** tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); tmp13 += tmp13; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ --- 1849,1859 ---- tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); tmp13 += tmp13; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
*** 1993,2005 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ ! /* cK represents sqrt(2) * cos(K*pi/30). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 2016,2029 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! * cK represents sqrt(2) * cos(K*pi/30). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 2024,2034 **** tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); z1 = tmp0 + tmp4 + tmp5; z2 = tmp1 + tmp3 + tmp6; z3 = tmp2 + tmp7; ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); z3 += z3; dataptr[6] = (DCTELEM) DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */ --- 2048,2058 ---- tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); z1 = tmp0 + tmp4 + tmp5; z2 = tmp1 + tmp3 + tmp6; z3 = tmp2 + tmp7; ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); z3 += z3; dataptr[6] = (DCTELEM) DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
*** 2171,2184 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* cK represents sqrt(2) * cos(K*pi/32). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 2195,2209 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * cK represents sqrt(2) * cos(K*pi/32). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 2210,2220 **** tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ --- 2235,2245 ---- tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
*** 2273,2282 **** --- 2298,2308 ---- /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. * We must also scale the output by (8/16)**2 = 1/2**2. + * cK represents sqrt(2) * cos(K*pi/32). */ dataptr = data; wsptr = workspace; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
*** 2378,2391 **** DCTELEM *dataptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ dataptr = data; ctr = 0; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col; --- 2404,2418 ---- DCTELEM *dataptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). ! */ dataptr = data; ctr = 0; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 2417,2427 **** tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ --- 2444,2454 ---- tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
*** 2473,2488 **** /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. * We must also scale the output by 8/16 = 1/2. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; --- 2500,2516 ---- /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. * We must also scale the output by 8/16 = 1/2. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
*** 2499,2545 **** tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); ! dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS+1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ ! tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ ! tmp12 += z1; ! tmp13 += z1; ! dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, ! CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, ! CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, ! CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, ! CONST_BITS+PASS1_BITS+1); dataptr++; /* advance pointer to next column */ } } --- 2527,2573 ---- tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ ! dataptr[DCTSIZE*2] = (DCTELEM) ! DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*6] = (DCTELEM) ! DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ CONST_BITS+PASS1_BITS+1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + tmp12 += z1; + tmp13 += z1; + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); ! dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1); dataptr++; /* advance pointer to next column */ } }
*** 2562,2575 **** SHIFT_TEMPS /* Zero bottom row of output coefficient block. */ MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { elemptr = sample_data[ctr] + start_col; --- 2590,2604 ---- SHIFT_TEMPS /* Zero bottom row of output coefficient block. */ MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). ! */ dataptr = data; for (ctr = 0; ctr < 7; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 2596,2606 **** tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); tmp13 += tmp13; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ --- 2625,2635 ---- tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); tmp13 += tmp13; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
*** 2725,2738 **** SHIFT_TEMPS /* Zero 2 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col; --- 2754,2768 ---- SHIFT_TEMPS /* Zero 2 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). ! */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 2757,2767 **** tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ --- 2787,2797 ---- tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
*** 2864,2877 **** SHIFT_TEMPS /* Zero 3 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { elemptr = sample_data[ctr] + start_col; --- 2894,2908 ---- SHIFT_TEMPS /* Zero 3 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). ! */ dataptr = data; for (ctr = 0; ctr < 5; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 2892,2902 **** tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); tmp12 += tmp12; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ --- 2923,2933 ---- tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); tmp12 += tmp12; dataptr[4] = (DCTELEM) DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
*** 2997,3017 **** SHIFT_TEMPS /* Zero 4 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We must also scale the output by 8/4 = 2, which we add here. */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); --- 3028,3050 ---- SHIFT_TEMPS /* Zero 4 bottom rows of output coefficient block. */ MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We must also scale the output by 8/4 = 2, which we add here. ! * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
*** 3025,3088 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); ! dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS-PASS1_BITS-1); ! dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS-PASS1_BITS-1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - tmp12 += z1; tmp13 += z1; ! dataptr[1] = (DCTELEM) ! RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); ! dataptr[3] = (DCTELEM) ! RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); ! dataptr[5] = (DCTELEM) ! RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); ! dataptr[7] = (DCTELEM) ! RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. ! * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part */ --- 3058,3124 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); ! ! dataptr[2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ CONST_BITS-PASS1_BITS-1); ! dataptr[6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ CONST_BITS-PASS1_BITS-1); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS-PASS1_BITS-2); ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! ! dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1); ! dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1); ! dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1); ! dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1); dataptr += DCTSIZE; /* advance pointer to next row */ } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. ! * 4-point FDCT kernel, ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { /* Even part */
*** 3132,3147 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { elemptr = sample_data[ctr] + start_col; --- 3168,3184 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). ! */ dataptr = data; for (ctr = 0; ctr < 3; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 3156,3166 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS-1); --- 3193,3203 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS-1);
*** 3232,3247 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ ! /* 4-point FDCT kernel, */ ! /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ dataptr = data; for (ctr = 0; ctr < 2; ctr++) { elemptr = sample_data[ctr] + start_col; --- 3269,3285 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. ! * 4-point FDCT kernel, ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. ! */ dataptr = data; for (ctr = 0; ctr < 2; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 3251,3261 **** tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); /* Odd part */ --- 3289,3299 ---- tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); /* Odd part */
*** 3305,3315 **** */ GLOBAL(void) jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! INT32 tmp0, tmp1; JSAMPROW elemptr; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); --- 3343,3353 ---- */ GLOBAL(void) jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! DCTELEM tmp0, tmp1; JSAMPROW elemptr; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
*** 3321,3335 **** /* We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/2)*(8/1) = 2**5. */ /* Even part */ ! /* Apply unsigned->signed conversion */ ! data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); /* Odd part */ ! data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); } /* * Perform the forward DCT on an 8x16 sample block. --- 3359,3375 ---- /* We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/2)*(8/1) = 2**5. */ /* Even part */ ! ! /* Apply unsigned->signed conversion. */ ! data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5; /* Odd part */ ! ! data[1] = (tmp0 - tmp1) << 5; } /* * Perform the forward DCT on an 8x16 sample block.
*** 3348,3368 **** DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); --- 3388,3410 ---- DCTELEM *wsptr; JSAMPROW elemptr; int ctr; SHIFT_TEMPS ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
*** 3376,3422 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); ! dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS-PASS1_BITS); ! dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ - tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ - tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ ! tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ ! tmp12 += z1; ! tmp13 += z1; ! dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); ! dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); ! dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); ! dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); ctr++; if (ctr != DCTSIZE) { if (ctr == DCTSIZE * 2) --- 3418,3468 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ ! dataptr[2] = (DCTELEM) ! DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ CONST_BITS-PASS1_BITS); ! dataptr[6] = (DCTELEM) ! DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ CONST_BITS-PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + tmp12 += z1; + tmp13 += z1; + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); ! dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); ! dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); ! dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS); ctr++; if (ctr != DCTSIZE) { if (ctr == DCTSIZE * 2)
*** 3539,3552 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 3585,3599 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 3561,3571 **** tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); z1 = tmp0 + tmp2; ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); tmp3 += tmp3; z1 -= tmp3; z1 -= tmp3; --- 3608,3618 ---- tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); z1 = tmp0 + tmp2; ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); tmp3 += tmp3; z1 -= tmp3; z1 -= tmp3;
*** 3719,3732 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 3766,3780 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 3742,3752 **** tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS); --- 3790,3800 ---- tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ CONST_BITS-PASS1_BITS);
*** 3868,3881 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col; --- 3916,3930 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). ! */ dataptr = data; ctr = 0; for (;;) { elemptr = sample_data[ctr] + start_col;
*** 3890,3900 **** tmp11 = tmp0 - tmp1; tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ tmp10 -= tmp2 << 2; tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ --- 3939,3949 ---- tmp11 = tmp0 - tmp1; tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ tmp10 -= tmp2 << 2; tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
*** 4013,4027 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We must also scale the output by 8/4 = 2, which we add here. */ ! /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col; --- 4062,4078 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We must also scale the output by 8/4 = 2, which we add here. ! * 4-point FDCT kernel, ! * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. ! */ dataptr = data; for (ctr = 0; ctr < DCTSIZE; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 4031,4041 **** tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); /* Odd part */ --- 4082,4092 ---- tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); /* Odd part */
*** 4055,4070 **** } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; --- 4106,4122 ---- } /* Pass 2: process columns. * We remove the PASS1_BITS scaling, but leave the results scaled up * by an overall factor of 8. + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { /* Even part per LL&M figure 1 --- note that published figure is faulty; ! * rotator "c1" should be "c6". */ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
*** 4082,4132 **** tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); dataptr[DCTSIZE*2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). * i0..i3 in the paper are tmp0..tmp3 here. */ - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ ! tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ - tmp12 += z1; tmp13 += z1; ! dataptr[DCTSIZE*1] = (DCTELEM) ! RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*3] = (DCTELEM) ! RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*5] = (DCTELEM) ! RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*7] = (DCTELEM) ! RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } } --- 4134,4186 ---- tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); ! z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) ! RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */ ! CONST_BITS+PASS1_BITS); dataptr[DCTSIZE*6] = (DCTELEM) ! RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */ ! CONST_BITS+PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * i0..i3 in the paper are tmp0..tmp3 here. */ tmp12 = tmp0 + tmp2; tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ /* Add fudge factor here for final descale. */ z1 += ONE << (CONST_BITS+PASS1_BITS-1); ! tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ tmp12 += z1; tmp13 += z1; ! z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ ! tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ ! tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ ! tmp0 += z1 + tmp12; ! tmp3 += z1 + tmp13; ! ! z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ ! tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ ! tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ ! tmp1 += z1 + tmp13; ! tmp2 += z1 + tmp12; ! ! dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS); ! dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS); dataptr++; /* advance pointer to next column */ } }
*** 4148,4163 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT; */ ! /* furthermore, we scale the results by 2**PASS1_BITS. */ ! /* We scale the results further by 2 as part of output adaption */ ! /* scaling for different DCT size. */ ! /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col; --- 4202,4218 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT; ! * furthermore, we scale the results by 2**PASS1_BITS. ! * We scale the results further by 2 as part of output adaption ! * scaling for different DCT size. ! * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). ! */ dataptr = data; for (ctr = 0; ctr < 6; ctr++) { elemptr = sample_data[ctr] + start_col;
*** 4166,4176 **** tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); tmp1 = GETJSAMPLE(elemptr[1]); tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ CONST_BITS-PASS1_BITS-1); --- 4221,4231 ---- tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); tmp1 = GETJSAMPLE(elemptr[1]); tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); dataptr[2] = (DCTELEM) DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ CONST_BITS-PASS1_BITS-1);
*** 4253,4276 **** SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. */ ! /* Note results are scaled up by sqrt(8) compared to a true DCT. */ ! /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part */ tmp0 = GETJSAMPLE(elemptr[0]); tmp1 = GETJSAMPLE(elemptr[1]); ! /* Apply unsigned->signed conversion */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); /* Odd part */ dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3); --- 4308,4332 ---- SHIFT_TEMPS /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: process rows. ! * Note results are scaled up by sqrt(8) compared to a true DCT. ! * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. ! */ dataptr = data; for (ctr = 0; ctr < 4; ctr++) { elemptr = sample_data[ctr] + start_col; /* Even part */ tmp0 = GETJSAMPLE(elemptr[0]); tmp1 = GETJSAMPLE(elemptr[1]); ! /* Apply unsigned->signed conversion. */ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); /* Odd part */ dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
*** 4322,4348 **** */ GLOBAL(void) jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! INT32 tmp0, tmp1; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! tmp0 = GETJSAMPLE(sample_data[0][start_col]); ! tmp1 = GETJSAMPLE(sample_data[1][start_col]); ! /* We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/1)*(8/2) = 2**5. */ /* Even part */ ! /* Apply unsigned->signed conversion */ ! data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); /* Odd part */ ! data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); } #endif /* DCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */ --- 4378,4409 ---- */ GLOBAL(void) jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { ! DCTELEM tmp0, tmp1; /* Pre-zero output coefficient block. */ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); ! /* Pass 1: empty. */ ! /* Pass 2: process columns. ! * We leave the results scaled up by an overall factor of 8. * We must also scale the output by (8/1)*(8/2) = 2**5. */ /* Even part */ ! ! tmp0 = GETJSAMPLE(sample_data[0][start_col]); ! tmp1 = GETJSAMPLE(sample_data[1][start_col]); ! ! /* Apply unsigned->signed conversion. */ ! data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5; /* Odd part */ ! ! data[DCTSIZE*1] = (tmp0 - tmp1) << 5; } #endif /* DCT_SCALING_SUPPORTED */ #endif /* DCT_ISLOW_SUPPORTED */
< prev index next >