1 /*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2009 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
148 * For 12-bit samples, a full 32-bit multiplication will be needed.
149 */
150
151 #if BITS_IN_JSAMPLE == 8
152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153 #else
154 #define MULTIPLY(var,const) ((var) * (const))
155 #endif
156
157
158 /* Dequantize a coefficient by multiplying it by the multiplier-table
159 * entry; produce an int result. In this module, both inputs and result
160 * are 16 bits or less, so either int or short multiply will work.
161 */
162
163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166 /*
167 * Perform dequantization and inverse DCT on one block of coefficients.
168 */
169
170 GLOBAL(void)
171 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
172 JCOEFPTR coef_block,
173 JSAMPARRAY output_buf, JDIMENSION output_col)
174 {
175 INT32 tmp0, tmp1, tmp2, tmp3;
176 INT32 tmp10, tmp11, tmp12, tmp13;
177 INT32 z1, z2, z3;
178 JCOEFPTR inptr;
179 ISLOW_MULT_TYPE * quantptr;
180 int * wsptr;
181 JSAMPROW outptr;
182 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
183 int ctr;
184 int workspace[DCTSIZE2]; /* buffers data between passes */
185 SHIFT_TEMPS
186
187 /* Pass 1: process columns from input, store into work array. */
188 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
189 /* furthermore, we scale the results by 2**PASS1_BITS. */
190
191 inptr = coef_block;
192 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
193 wsptr = workspace;
194 for (ctr = DCTSIZE; ctr > 0; ctr--) {
195 /* Due to quantization, we will usually find that many of the input
196 * coefficients are zero, especially the AC terms. We can exploit this
197 * by short-circuiting the IDCT calculation for any column in which all
198 * the AC terms are zero. In that case each output is equal to the
199 * DC coefficient (with scale factor as needed).
200 * With typical images and quantization tables, half or more of the
201 * column DCT calculations can be simplified this way.
202 */
203
204 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
205 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
206 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
207 inptr[DCTSIZE*7] == 0) {
208 /* AC terms all zero */
209 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
210
211 wsptr[DCTSIZE*0] = dcval;
212 wsptr[DCTSIZE*1] = dcval;
213 wsptr[DCTSIZE*2] = dcval;
214 wsptr[DCTSIZE*3] = dcval;
215 wsptr[DCTSIZE*4] = dcval;
216 wsptr[DCTSIZE*5] = dcval;
217 wsptr[DCTSIZE*6] = dcval;
218 wsptr[DCTSIZE*7] = dcval;
219
220 inptr++; /* advance pointers to next column */
221 quantptr++;
222 wsptr++;
223 continue;
224 }
225
226 /* Even part: reverse the even part of the forward DCT. */
227 /* The rotator is sqrt(2)*c(-6). */
228
229 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
230 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
231
232 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
233 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
234 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
235
236 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
237 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
238 z2 <<= CONST_BITS;
239 z3 <<= CONST_BITS;
240 /* Add fudge factor here for final descale. */
241 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
242
243 tmp0 = z2 + z3;
244 tmp1 = z2 - z3;
245
246 tmp10 = tmp0 + tmp2;
247 tmp13 = tmp0 - tmp2;
248 tmp11 = tmp1 + tmp3;
249 tmp12 = tmp1 - tmp3;
250
251 /* Odd part per figure 8; the matrix is unitary and hence its
252 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
253 */
254
255 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
256 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
257 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
258 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
259
260 z2 = tmp0 + tmp2;
261 z3 = tmp1 + tmp3;
262
263 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
264 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
265 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
266 z2 += z1;
267 z3 += z1;
268
269 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
270 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
271 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
272 tmp0 += z1 + z2;
273 tmp3 += z1 + z3;
274
275 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
276 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
277 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
278 tmp1 += z1 + z3;
279 tmp2 += z1 + z2;
280
281 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
282
283 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
284 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
285 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
286 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
287 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
288 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
289 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
290 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
291
292 inptr++; /* advance pointers to next column */
293 quantptr++;
294 wsptr++;
295 }
296
297 /* Pass 2: process rows from work array, store into output array. */
298 /* Note that we must descale the results by a factor of 8 == 2**3, */
299 /* and also undo the PASS1_BITS scaling. */
300
301 wsptr = workspace;
302 for (ctr = 0; ctr < DCTSIZE; ctr++) {
303 outptr = output_buf[ctr] + output_col;
304 /* Rows of zeroes can be exploited in the same way as we did with columns.
305 * However, the column calculation has created many nonzero AC terms, so
306 * the simplification applies less often (typically 5% to 10% of the time).
307 * On machines with very fast multiplication, it's possible that the
308 * test takes more time than it's worth. In that case this section
309 * may be commented out.
310 */
311
312 #ifndef NO_ZERO_ROW_TEST
313 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
314 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
315 /* AC terms all zero */
316 JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
317 & RANGE_MASK];
318
319 outptr[0] = dcval;
320 outptr[1] = dcval;
321 outptr[2] = dcval;
322 outptr[3] = dcval;
323 outptr[4] = dcval;
324 outptr[5] = dcval;
325 outptr[6] = dcval;
326 outptr[7] = dcval;
327
328 wsptr += DCTSIZE; /* advance pointer to next row */
329 continue;
330 }
331 #endif
332
333 /* Even part: reverse the even part of the forward DCT. */
334 /* The rotator is sqrt(2)*c(-6). */
335
336 z2 = (INT32) wsptr[2];
337 z3 = (INT32) wsptr[6];
338
339 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
340 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
341 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
342
343 /* Add fudge factor here for final descale. */
344 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
345 z3 = (INT32) wsptr[4];
346
347 tmp0 = (z2 + z3) << CONST_BITS;
348 tmp1 = (z2 - z3) << CONST_BITS;
349
350 tmp10 = tmp0 + tmp2;
351 tmp13 = tmp0 - tmp2;
352 tmp11 = tmp1 + tmp3;
353 tmp12 = tmp1 - tmp3;
354
355 /* Odd part per figure 8; the matrix is unitary and hence its
356 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
357 */
358
359 tmp0 = (INT32) wsptr[7];
360 tmp1 = (INT32) wsptr[5];
361 tmp2 = (INT32) wsptr[3];
362 tmp3 = (INT32) wsptr[1];
363
364 z2 = tmp0 + tmp2;
365 z3 = tmp1 + tmp3;
366
367 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
368 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
369 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
370 z2 += z1;
371 z3 += z1;
372
373 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
374 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
375 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
376 tmp0 += z1 + z2;
377 tmp3 += z1 + z3;
378
379 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
380 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
381 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
382 tmp1 += z1 + z3;
383 tmp2 += z1 + z2;
384
385 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
386
387 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
388 CONST_BITS+PASS1_BITS+3)
389 & RANGE_MASK];
390 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
391 CONST_BITS+PASS1_BITS+3)
392 & RANGE_MASK];
393 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
394 CONST_BITS+PASS1_BITS+3)
395 & RANGE_MASK];
396 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
397 CONST_BITS+PASS1_BITS+3)
398 & RANGE_MASK];
399 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
400 CONST_BITS+PASS1_BITS+3)
401 & RANGE_MASK];
402 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
403 CONST_BITS+PASS1_BITS+3)
404 & RANGE_MASK];
405 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
406 CONST_BITS+PASS1_BITS+3)
407 & RANGE_MASK];
408 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
409 CONST_BITS+PASS1_BITS+3)
410 & RANGE_MASK];
411
412 wsptr += DCTSIZE; /* advance pointer to next row */
413 }
414 }
415
416 #ifdef IDCT_SCALING_SUPPORTED
417
418
419 /*
420 * Perform dequantization and inverse DCT on one block of coefficients,
421 * producing a 7x7 output block.
422 *
423 * Optimized algorithm with 12 multiplications in the 1-D kernel.
424 * cK represents sqrt(2) * cos(K*pi/14).
425 */
426
427 GLOBAL(void)
428 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
429 JCOEFPTR coef_block,
430 JSAMPARRAY output_buf, JDIMENSION output_col)
431 {
432 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
433 INT32 z1, z2, z3;
434 JCOEFPTR inptr;
435 ISLOW_MULT_TYPE * quantptr;
436 int * wsptr;
437 JSAMPROW outptr;
438 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
439 int ctr;
440 int workspace[7*7]; /* buffers data between passes */
441 SHIFT_TEMPS
485
486 /* Final output stage */
487
488 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
489 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
490 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
491 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
492 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
493 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
494 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
495 }
496
497 /* Pass 2: process 7 rows from work array, store into output array. */
498
499 wsptr = workspace;
500 for (ctr = 0; ctr < 7; ctr++) {
501 outptr = output_buf[ctr] + output_col;
502
503 /* Even part */
504
505 /* Add fudge factor here for final descale. */
506 tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
507 tmp13 <<= CONST_BITS;
508
509 z1 = (INT32) wsptr[2];
510 z2 = (INT32) wsptr[4];
511 z3 = (INT32) wsptr[6];
512
513 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
514 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
515 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
516 tmp0 = z1 + z3;
517 z2 -= tmp0;
518 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
519 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
520 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
521 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
522
523 /* Odd part */
524
525 z1 = (INT32) wsptr[1];
526 z2 = (INT32) wsptr[3];
621 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
622
623 /* Final output stage */
624
625 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
626 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
627 wsptr[6*1] = (int) (tmp11 + tmp1);
628 wsptr[6*4] = (int) (tmp11 - tmp1);
629 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
630 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
631 }
632
633 /* Pass 2: process 6 rows from work array, store into output array. */
634
635 wsptr = workspace;
636 for (ctr = 0; ctr < 6; ctr++) {
637 outptr = output_buf[ctr] + output_col;
638
639 /* Even part */
640
641 /* Add fudge factor here for final descale. */
642 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
643 tmp0 <<= CONST_BITS;
644 tmp2 = (INT32) wsptr[4];
645 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
646 tmp1 = tmp0 + tmp10;
647 tmp11 = tmp0 - tmp10 - tmp10;
648 tmp10 = (INT32) wsptr[2];
649 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
650 tmp10 = tmp1 + tmp0;
651 tmp12 = tmp1 - tmp0;
652
653 /* Odd part */
654
655 z1 = (INT32) wsptr[1];
656 z2 = (INT32) wsptr[3];
657 z3 = (INT32) wsptr[5];
658 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
659 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
660 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
661 tmp1 = (z1 - z2 - z3) << CONST_BITS;
662
740 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
741 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
742
743 /* Final output stage */
744
745 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
746 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
747 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
748 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
749 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
750 }
751
752 /* Pass 2: process 5 rows from work array, store into output array. */
753
754 wsptr = workspace;
755 for (ctr = 0; ctr < 5; ctr++) {
756 outptr = output_buf[ctr] + output_col;
757
758 /* Even part */
759
760 /* Add fudge factor here for final descale. */
761 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
762 tmp12 <<= CONST_BITS;
763 tmp0 = (INT32) wsptr[2];
764 tmp1 = (INT32) wsptr[4];
765 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
766 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
767 z3 = tmp12 + z2;
768 tmp10 = z3 + z1;
769 tmp11 = z3 - z1;
770 tmp12 -= z2 << 2;
771
772 /* Odd part */
773
774 z2 = (INT32) wsptr[1];
775 z3 = (INT32) wsptr[3];
776
777 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
778 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
779 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
780
781 /* Final output stage */
852 CONST_BITS-PASS1_BITS);
853 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
854 CONST_BITS-PASS1_BITS);
855
856 /* Final output stage */
857
858 wsptr[4*0] = (int) (tmp10 + tmp0);
859 wsptr[4*3] = (int) (tmp10 - tmp0);
860 wsptr[4*1] = (int) (tmp12 + tmp2);
861 wsptr[4*2] = (int) (tmp12 - tmp2);
862 }
863
864 /* Pass 2: process 4 rows from work array, store into output array. */
865
866 wsptr = workspace;
867 for (ctr = 0; ctr < 4; ctr++) {
868 outptr = output_buf[ctr] + output_col;
869
870 /* Even part */
871
872 /* Add fudge factor here for final descale. */
873 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
874 tmp2 = (INT32) wsptr[2];
875
876 tmp10 = (tmp0 + tmp2) << CONST_BITS;
877 tmp12 = (tmp0 - tmp2) << CONST_BITS;
878
879 /* Odd part */
880 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
881
882 z2 = (INT32) wsptr[1];
883 z3 = (INT32) wsptr[3];
884
885 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
886 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
887 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
888
889 /* Final output stage */
890
891 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
892 CONST_BITS+PASS1_BITS+3)
893 & RANGE_MASK];
949 /* Odd part */
950
951 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
952 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
953
954 /* Final output stage */
955
956 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
957 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
958 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
959 }
960
961 /* Pass 2: process 3 rows from work array, store into output array. */
962
963 wsptr = workspace;
964 for (ctr = 0; ctr < 3; ctr++) {
965 outptr = output_buf[ctr] + output_col;
966
967 /* Even part */
968
969 /* Add fudge factor here for final descale. */
970 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
971 tmp0 <<= CONST_BITS;
972 tmp2 = (INT32) wsptr[2];
973 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
974 tmp10 = tmp0 + tmp12;
975 tmp2 = tmp0 - tmp12 - tmp12;
976
977 /* Odd part */
978
979 tmp12 = (INT32) wsptr[1];
980 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
981
982 /* Final output stage */
983
984 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
985 CONST_BITS+PASS1_BITS+3)
986 & RANGE_MASK];
987 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
988 CONST_BITS+PASS1_BITS+3)
989 & RANGE_MASK];
990 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
991 CONST_BITS+PASS1_BITS+3)
992 & RANGE_MASK];
993
994 wsptr += 3; /* advance pointer to next row */
995 }
996 }
997
998
999 /*
1000 * Perform dequantization and inverse DCT on one block of coefficients,
1001 * producing a reduced-size 2x2 output block.
1002 *
1003 * Multiplication-less algorithm.
1004 */
1005
1006 GLOBAL(void)
1007 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1008 JCOEFPTR coef_block,
1009 JSAMPARRAY output_buf, JDIMENSION output_col)
1010 {
1011 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1012 ISLOW_MULT_TYPE * quantptr;
1013 JSAMPROW outptr;
1014 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1015 SHIFT_TEMPS
1016
1017 /* Pass 1: process columns from input. */
1018
1019 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1020
1021 /* Column 0 */
1022 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1023 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1024 /* Add fudge factor here for final descale. */
1025 tmp4 += ONE << 2;
1026
1027 tmp0 = tmp4 + tmp5;
1028 tmp2 = tmp4 - tmp5;
1029
1030 /* Column 1 */
1031 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1032 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1033
1034 tmp1 = tmp4 + tmp5;
1035 tmp3 = tmp4 - tmp5;
1036
1037 /* Pass 2: process 2 rows, store into output array. */
1038
1039 /* Row 0 */
1040 outptr = output_buf[0] + output_col;
1041
1042 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1043 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1044
1045 /* Row 1 */
1046 outptr = output_buf[1] + output_col;
1047
1048 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1049 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1050 }
1051
1052
1053 /*
1054 * Perform dequantization and inverse DCT on one block of coefficients,
1055 * producing a reduced-size 1x1 output block.
1056 *
1057 * We hardly need an inverse DCT routine for this: just take the
1058 * average pixel value, which is one-eighth of the DC coefficient.
1059 */
1060
1061 GLOBAL(void)
1062 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1063 JCOEFPTR coef_block,
1064 JSAMPARRAY output_buf, JDIMENSION output_col)
1065 {
1066 int dcval;
1067 ISLOW_MULT_TYPE * quantptr;
1068 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1069 SHIFT_TEMPS
1070
1071 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1072 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1073 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1074 dcval = (int) DESCALE((INT32) dcval, 3);
1075
1076 output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
1077 }
1078
1079
1080 /*
1081 * Perform dequantization and inverse DCT on one block of coefficients,
1082 * producing a 9x9 output block.
1083 *
1084 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1085 * cK represents sqrt(2) * cos(K*pi/18).
1086 */
1087
1088 GLOBAL(void)
1089 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1090 JCOEFPTR coef_block,
1091 JSAMPARRAY output_buf, JDIMENSION output_col)
1092 {
1093 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1094 INT32 z1, z2, z3, z4;
1095 JCOEFPTR inptr;
1096 ISLOW_MULT_TYPE * quantptr;
1155
1156 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1157 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1158 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1159 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1160 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1161 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1162 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1163 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1164 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1165 }
1166
1167 /* Pass 2: process 9 rows from work array, store into output array. */
1168
1169 wsptr = workspace;
1170 for (ctr = 0; ctr < 9; ctr++) {
1171 outptr = output_buf[ctr] + output_col;
1172
1173 /* Even part */
1174
1175 /* Add fudge factor here for final descale. */
1176 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1177 tmp0 <<= CONST_BITS;
1178
1179 z1 = (INT32) wsptr[2];
1180 z2 = (INT32) wsptr[4];
1181 z3 = (INT32) wsptr[6];
1182
1183 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1184 tmp1 = tmp0 + tmp3;
1185 tmp2 = tmp0 - tmp3 - tmp3;
1186
1187 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1188 tmp11 = tmp2 + tmp0;
1189 tmp14 = tmp2 - tmp0 - tmp0;
1190
1191 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1192 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1193 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1194
1195 tmp10 = tmp1 + tmp0 - tmp3;
1196 tmp12 = tmp1 - tmp0 + tmp2;
1338 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1339 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1340 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1341 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1342 wsptr[8*2] = (int) (tmp22 + tmp12);
1343 wsptr[8*7] = (int) (tmp22 - tmp12);
1344 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1345 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1346 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1347 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1348 }
1349
1350 /* Pass 2: process 10 rows from work array, store into output array. */
1351
1352 wsptr = workspace;
1353 for (ctr = 0; ctr < 10; ctr++) {
1354 outptr = output_buf[ctr] + output_col;
1355
1356 /* Even part */
1357
1358 /* Add fudge factor here for final descale. */
1359 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1360 z3 <<= CONST_BITS;
1361 z4 = (INT32) wsptr[4];
1362 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1363 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1364 tmp10 = z3 + z1;
1365 tmp11 = z3 - z2;
1366
1367 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1368
1369 z2 = (INT32) wsptr[2];
1370 z3 = (INT32) wsptr[6];
1371
1372 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1373 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1374 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1375
1376 tmp20 = tmp10 + tmp12;
1377 tmp24 = tmp10 - tmp12;
1378 tmp21 = tmp11 + tmp13;
1379 tmp23 = tmp11 - tmp13;
1531 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1532 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1533 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1534 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1535 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1536 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1537 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1538 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1539 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1540 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1541 }
1542
1543 /* Pass 2: process 11 rows from work array, store into output array. */
1544
1545 wsptr = workspace;
1546 for (ctr = 0; ctr < 11; ctr++) {
1547 outptr = output_buf[ctr] + output_col;
1548
1549 /* Even part */
1550
1551 /* Add fudge factor here for final descale. */
1552 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1553 tmp10 <<= CONST_BITS;
1554
1555 z1 = (INT32) wsptr[2];
1556 z2 = (INT32) wsptr[4];
1557 z3 = (INT32) wsptr[6];
1558
1559 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1560 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1561 z4 = z1 + z3;
1562 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1563 z4 -= z2;
1564 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1565 tmp21 = tmp20 + tmp23 + tmp25 -
1566 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1567 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1568 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1569 tmp24 += tmp25;
1570 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1571 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1572 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1735 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1736 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1737 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1738 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1739 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1740 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1741 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1742 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1743 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1744 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1745 }
1746
1747 /* Pass 2: process 12 rows from work array, store into output array. */
1748
1749 wsptr = workspace;
1750 for (ctr = 0; ctr < 12; ctr++) {
1751 outptr = output_buf[ctr] + output_col;
1752
1753 /* Even part */
1754
1755 /* Add fudge factor here for final descale. */
1756 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1757 z3 <<= CONST_BITS;
1758
1759 z4 = (INT32) wsptr[4];
1760 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1761
1762 tmp10 = z3 + z4;
1763 tmp11 = z3 - z4;
1764
1765 z1 = (INT32) wsptr[2];
1766 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1767 z1 <<= CONST_BITS;
1768 z2 = (INT32) wsptr[6];
1769 z2 <<= CONST_BITS;
1770
1771 tmp12 = z1 - z2;
1772
1773 tmp21 = z3 + tmp12;
1774 tmp24 = z3 - tmp12;
1775
1776 tmp12 = z4 + z2;
1956 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1957 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1958 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1959 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1960 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1961 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1962 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1963 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1964 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1965 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1966 }
1967
1968 /* Pass 2: process 13 rows from work array, store into output array. */
1969
1970 wsptr = workspace;
1971 for (ctr = 0; ctr < 13; ctr++) {
1972 outptr = output_buf[ctr] + output_col;
1973
1974 /* Even part */
1975
1976 /* Add fudge factor here for final descale. */
1977 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1978 z1 <<= CONST_BITS;
1979
1980 z2 = (INT32) wsptr[2];
1981 z3 = (INT32) wsptr[4];
1982 z4 = (INT32) wsptr[6];
1983
1984 tmp10 = z3 + z4;
1985 tmp11 = z3 - z4;
1986
1987 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1988 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1989
1990 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1991 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1992
1993 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1994 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1995
1996 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1997 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2183 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2184 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2185 wsptr[8*3] = (int) (tmp23 + tmp13);
2186 wsptr[8*10] = (int) (tmp23 - tmp13);
2187 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2188 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2189 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2190 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2191 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2192 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2193 }
2194
2195 /* Pass 2: process 14 rows from work array, store into output array. */
2196
2197 wsptr = workspace;
2198 for (ctr = 0; ctr < 14; ctr++) {
2199 outptr = output_buf[ctr] + output_col;
2200
2201 /* Even part */
2202
2203 /* Add fudge factor here for final descale. */
2204 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2205 z1 <<= CONST_BITS;
2206 z4 = (INT32) wsptr[4];
2207 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2208 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2209 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2210
2211 tmp10 = z1 + z2;
2212 tmp11 = z1 + z3;
2213 tmp12 = z1 - z4;
2214
2215 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2216
2217 z1 = (INT32) wsptr[2];
2218 z2 = (INT32) wsptr[6];
2219
2220 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2221
2222 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2223 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2224 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2415 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2416 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2417 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2418 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2419 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2420 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2421 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2422 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2423 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2424 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2425 }
2426
2427 /* Pass 2: process 15 rows from work array, store into output array. */
2428
2429 wsptr = workspace;
2430 for (ctr = 0; ctr < 15; ctr++) {
2431 outptr = output_buf[ctr] + output_col;
2432
2433 /* Even part */
2434
2435 /* Add fudge factor here for final descale. */
2436 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2437 z1 <<= CONST_BITS;
2438
2439 z2 = (INT32) wsptr[2];
2440 z3 = (INT32) wsptr[4];
2441 z4 = (INT32) wsptr[6];
2442
2443 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2444 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2445
2446 tmp12 = z1 - tmp10;
2447 tmp13 = z1 + tmp11;
2448 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2449
2450 z4 = z2 - z3;
2451 z3 += z2;
2452 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2453 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2454 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2455
2456 tmp20 = tmp13 + tmp10 + tmp11;
2568 JCOEFPTR inptr;
2569 ISLOW_MULT_TYPE * quantptr;
2570 int * wsptr;
2571 JSAMPROW outptr;
2572 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2573 int ctr;
2574 int workspace[8*16]; /* buffers data between passes */
2575 SHIFT_TEMPS
2576
2577 /* Pass 1: process columns from input, store into work array. */
2578
2579 inptr = coef_block;
2580 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2581 wsptr = workspace;
2582 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2583 /* Even part */
2584
2585 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2586 tmp0 <<= CONST_BITS;
2587 /* Add fudge factor here for final descale. */
2588 tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
2589
2590 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2591 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2592 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2593
2594 tmp10 = tmp0 + tmp1;
2595 tmp11 = tmp0 - tmp1;
2596 tmp12 = tmp0 + tmp2;
2597 tmp13 = tmp0 - tmp2;
2598
2599 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2600 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2601 z3 = z1 - z2;
2602 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2603 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2604
2605 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2606 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2607 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2608 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2666 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2667 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2668 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2669 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2670 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2671 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2672 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2673 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2674 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2675 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2676 }
2677
2678 /* Pass 2: process 16 rows from work array, store into output array. */
2679
2680 wsptr = workspace;
2681 for (ctr = 0; ctr < 16; ctr++) {
2682 outptr = output_buf[ctr] + output_col;
2683
2684 /* Even part */
2685
2686 /* Add fudge factor here for final descale. */
2687 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2688 tmp0 <<= CONST_BITS;
2689
2690 z1 = (INT32) wsptr[4];
2691 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2692 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2693
2694 tmp10 = tmp0 + tmp1;
2695 tmp11 = tmp0 - tmp1;
2696 tmp12 = tmp0 + tmp2;
2697 tmp13 = tmp0 - tmp2;
2698
2699 z1 = (INT32) wsptr[2];
2700 z2 = (INT32) wsptr[6];
2701 z3 = z1 - z2;
2702 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2703 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2704
2705 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2706 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2707 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2818 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2819 */
2820
2821 GLOBAL(void)
2822 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2823 JCOEFPTR coef_block,
2824 JSAMPARRAY output_buf, JDIMENSION output_col)
2825 {
2826 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2827 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2828 INT32 z1, z2, z3, z4;
2829 JCOEFPTR inptr;
2830 ISLOW_MULT_TYPE * quantptr;
2831 int * wsptr;
2832 JSAMPROW outptr;
2833 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2834 int ctr;
2835 int workspace[8*8]; /* buffers data between passes */
2836 SHIFT_TEMPS
2837
2838 /* Pass 1: process columns from input, store into work array. */
2839 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
2840 /* furthermore, we scale the results by 2**PASS1_BITS. */
2841
2842 inptr = coef_block;
2843 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2844 wsptr = workspace;
2845 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2846 /* Due to quantization, we will usually find that many of the input
2847 * coefficients are zero, especially the AC terms. We can exploit this
2848 * by short-circuiting the IDCT calculation for any column in which all
2849 * the AC terms are zero. In that case each output is equal to the
2850 * DC coefficient (with scale factor as needed).
2851 * With typical images and quantization tables, half or more of the
2852 * column DCT calculations can be simplified this way.
2853 */
2854
2855 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2856 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2857 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2858 inptr[DCTSIZE*7] == 0) {
2859 /* AC terms all zero */
2860 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2861
2862 wsptr[DCTSIZE*0] = dcval;
2863 wsptr[DCTSIZE*1] = dcval;
2864 wsptr[DCTSIZE*2] = dcval;
2865 wsptr[DCTSIZE*3] = dcval;
2866 wsptr[DCTSIZE*4] = dcval;
2867 wsptr[DCTSIZE*5] = dcval;
2868 wsptr[DCTSIZE*6] = dcval;
2869 wsptr[DCTSIZE*7] = dcval;
2870
2871 inptr++; /* advance pointers to next column */
2872 quantptr++;
2873 wsptr++;
2874 continue;
2875 }
2876
2877 /* Even part: reverse the even part of the forward DCT. */
2878 /* The rotator is sqrt(2)*c(-6). */
2879
2880 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2881 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2882
2883 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
2884 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
2885 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
2886
2887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2889 z2 <<= CONST_BITS;
2890 z3 <<= CONST_BITS;
2891 /* Add fudge factor here for final descale. */
2892 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2893
2894 tmp0 = z2 + z3;
2895 tmp1 = z2 - z3;
2896
2897 tmp10 = tmp0 + tmp2;
2898 tmp13 = tmp0 - tmp2;
2899 tmp11 = tmp1 + tmp3;
2900 tmp12 = tmp1 - tmp3;
2901
2902 /* Odd part per figure 8; the matrix is unitary and hence its
2903 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2904 */
2905
2906 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2907 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2908 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2909 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2910
2911 z2 = tmp0 + tmp2;
2912 z3 = tmp1 + tmp3;
2913
2914 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
2915 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
2916 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
2917 z2 += z1;
2918 z3 += z1;
2919
2920 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
2921 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
2922 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
2923 tmp0 += z1 + z2;
2924 tmp3 += z1 + z3;
2925
2926 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
2927 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
2928 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
2929 tmp1 += z1 + z3;
2930 tmp2 += z1 + z2;
2931
2932 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2933
2934 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2935 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2936 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2937 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2938 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2939 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2940 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2941 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2942
2943 inptr++; /* advance pointers to next column */
2944 quantptr++;
2945 wsptr++;
2946 }
2947
2948 /* Pass 2: process 8 rows from work array, store into output array.
2949 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2950 */
2951 wsptr = workspace;
2952 for (ctr = 0; ctr < 8; ctr++) {
2953 outptr = output_buf[ctr] + output_col;
2954
2955 /* Even part */
2956
2957 /* Add fudge factor here for final descale. */
2958 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2959 tmp0 <<= CONST_BITS;
2960
2961 z1 = (INT32) wsptr[4];
2962 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2963 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2964
2965 tmp10 = tmp0 + tmp1;
2966 tmp11 = tmp0 - tmp1;
2967 tmp12 = tmp0 + tmp2;
2968 tmp13 = tmp0 - tmp2;
2969
2970 z1 = (INT32) wsptr[2];
2971 z2 = (INT32) wsptr[6];
2972 z3 = z1 - z2;
2973 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2974 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2975
2976 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2977 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2978 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3092 GLOBAL(void)
3093 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3094 JCOEFPTR coef_block,
3095 JSAMPARRAY output_buf, JDIMENSION output_col)
3096 {
3097 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3098 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3099 INT32 z1, z2, z3, z4;
3100 JCOEFPTR inptr;
3101 ISLOW_MULT_TYPE * quantptr;
3102 int * wsptr;
3103 JSAMPROW outptr;
3104 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3105 int ctr;
3106 int workspace[8*7]; /* buffers data between passes */
3107 SHIFT_TEMPS
3108
3109 /* Pass 1: process columns from input, store into work array.
3110 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3111 */
3112 inptr = coef_block;
3113 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3114 wsptr = workspace;
3115 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3116 /* Even part */
3117
3118 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3119 tmp23 <<= CONST_BITS;
3120 /* Add fudge factor here for final descale. */
3121 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3122
3123 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3124 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3125 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3126
3127 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3128 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3129 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3130 tmp10 = z1 + z3;
3131 z2 -= tmp10;
3147 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3148 tmp11 += tmp12;
3149 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3150 tmp10 += z2;
3151 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3152
3153 /* Final output stage */
3154
3155 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3156 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3157 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3158 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3159 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3160 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3161 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3162 }
3163
3164 /* Pass 2: process 7 rows from work array, store into output array.
3165 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3166 */
3167 wsptr = workspace;
3168 for (ctr = 0; ctr < 7; ctr++) {
3169 outptr = output_buf[ctr] + output_col;
3170
3171 /* Even part */
3172
3173 /* Add fudge factor here for final descale. */
3174 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3175 z1 <<= CONST_BITS;
3176 z4 = (INT32) wsptr[4];
3177 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3178 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3179 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3180
3181 tmp10 = z1 + z2;
3182 tmp11 = z1 + z3;
3183 tmp12 = z1 - z4;
3184
3185 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3186
3187 z1 = (INT32) wsptr[2];
3188 z2 = (INT32) wsptr[6];
3189
3190 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3191
3192 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3193 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3194 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3287 GLOBAL(void)
3288 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3289 JCOEFPTR coef_block,
3290 JSAMPARRAY output_buf, JDIMENSION output_col)
3291 {
3292 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3293 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3294 INT32 z1, z2, z3, z4;
3295 JCOEFPTR inptr;
3296 ISLOW_MULT_TYPE * quantptr;
3297 int * wsptr;
3298 JSAMPROW outptr;
3299 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3300 int ctr;
3301 int workspace[8*6]; /* buffers data between passes */
3302 SHIFT_TEMPS
3303
3304 /* Pass 1: process columns from input, store into work array.
3305 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3306 */
3307 inptr = coef_block;
3308 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3309 wsptr = workspace;
3310 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3311 /* Even part */
3312
3313 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3314 tmp10 <<= CONST_BITS;
3315 /* Add fudge factor here for final descale. */
3316 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3317 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3318 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3319 tmp11 = tmp10 + tmp20;
3320 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3321 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3322 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3323 tmp20 = tmp11 + tmp10;
3324 tmp22 = tmp11 - tmp10;
3325
3326 /* Odd part */
3329 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3330 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3331 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3332 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3333 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3334 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3335
3336 /* Final output stage */
3337
3338 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3339 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3340 wsptr[8*1] = (int) (tmp21 + tmp11);
3341 wsptr[8*4] = (int) (tmp21 - tmp11);
3342 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3343 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3344 }
3345
3346 /* Pass 2: process 6 rows from work array, store into output array.
3347 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3348 */
3349 wsptr = workspace;
3350 for (ctr = 0; ctr < 6; ctr++) {
3351 outptr = output_buf[ctr] + output_col;
3352
3353 /* Even part */
3354
3355 /* Add fudge factor here for final descale. */
3356 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3357 z3 <<= CONST_BITS;
3358
3359 z4 = (INT32) wsptr[4];
3360 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3361
3362 tmp10 = z3 + z4;
3363 tmp11 = z3 - z4;
3364
3365 z1 = (INT32) wsptr[2];
3366 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3367 z1 <<= CONST_BITS;
3368 z2 = (INT32) wsptr[6];
3369 z2 <<= CONST_BITS;
3370
3371 tmp12 = z1 - z2;
3372
3373 tmp21 = z3 + tmp12;
3374 tmp24 = z3 - tmp12;
3375
3376 tmp12 = z4 + z2;
3463 GLOBAL(void)
3464 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3465 JCOEFPTR coef_block,
3466 JSAMPARRAY output_buf, JDIMENSION output_col)
3467 {
3468 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3469 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3470 INT32 z1, z2, z3, z4;
3471 JCOEFPTR inptr;
3472 ISLOW_MULT_TYPE * quantptr;
3473 int * wsptr;
3474 JSAMPROW outptr;
3475 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3476 int ctr;
3477 int workspace[8*5]; /* buffers data between passes */
3478 SHIFT_TEMPS
3479
3480 /* Pass 1: process columns from input, store into work array.
3481 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3482 */
3483 inptr = coef_block;
3484 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3485 wsptr = workspace;
3486 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3487 /* Even part */
3488
3489 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3490 tmp12 <<= CONST_BITS;
3491 /* Add fudge factor here for final descale. */
3492 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3493 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3494 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3495 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3496 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3497 z3 = tmp12 + z2;
3498 tmp10 = z3 + z1;
3499 tmp11 = z3 - z1;
3500 tmp12 -= z2 << 2;
3501
3502 /* Odd part */
3503
3504 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3505 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3506
3507 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3508 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3509 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3510
3511 /* Final output stage */
3512
3513 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3514 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3515 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3516 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3517 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3518 }
3519
3520 /* Pass 2: process 5 rows from work array, store into output array.
3521 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3522 */
3523 wsptr = workspace;
3524 for (ctr = 0; ctr < 5; ctr++) {
3525 outptr = output_buf[ctr] + output_col;
3526
3527 /* Even part */
3528
3529 /* Add fudge factor here for final descale. */
3530 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3531 z3 <<= CONST_BITS;
3532 z4 = (INT32) wsptr[4];
3533 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3534 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3535 tmp10 = z3 + z1;
3536 tmp11 = z3 - z2;
3537
3538 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3539
3540 z2 = (INT32) wsptr[2];
3541 z3 = (INT32) wsptr[6];
3542
3543 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3544 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3545 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3546
3547 tmp20 = tmp10 + tmp12;
3548 tmp24 = tmp10 - tmp12;
3549 tmp21 = tmp11 + tmp13;
3550 tmp23 = tmp11 - tmp13;
3622 */
3623
3624 GLOBAL(void)
3625 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3626 JCOEFPTR coef_block,
3627 JSAMPARRAY output_buf, JDIMENSION output_col)
3628 {
3629 INT32 tmp0, tmp1, tmp2, tmp3;
3630 INT32 tmp10, tmp11, tmp12, tmp13;
3631 INT32 z1, z2, z3;
3632 JCOEFPTR inptr;
3633 ISLOW_MULT_TYPE * quantptr;
3634 int * wsptr;
3635 JSAMPROW outptr;
3636 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3637 int ctr;
3638 int workspace[8*4]; /* buffers data between passes */
3639 SHIFT_TEMPS
3640
3641 /* Pass 1: process columns from input, store into work array.
3642 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3643 */
3644 inptr = coef_block;
3645 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3646 wsptr = workspace;
3647 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3648 /* Even part */
3649
3650 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3651 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3652
3653 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3654 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3655
3656 /* Odd part */
3657 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3658
3659 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3660 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3661
3662 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3663 /* Add fudge factor here for final descale. */
3664 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3665 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3666 CONST_BITS-PASS1_BITS);
3667 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3668 CONST_BITS-PASS1_BITS);
3669
3670 /* Final output stage */
3671
3672 wsptr[8*0] = (int) (tmp10 + tmp0);
3673 wsptr[8*3] = (int) (tmp10 - tmp0);
3674 wsptr[8*1] = (int) (tmp12 + tmp2);
3675 wsptr[8*2] = (int) (tmp12 - tmp2);
3676 }
3677
3678 /* Pass 2: process rows from work array, store into output array. */
3679 /* Note that we must descale the results by a factor of 8 == 2**3, */
3680 /* and also undo the PASS1_BITS scaling. */
3681
3682 wsptr = workspace;
3683 for (ctr = 0; ctr < 4; ctr++) {
3684 outptr = output_buf[ctr] + output_col;
3685
3686 /* Even part: reverse the even part of the forward DCT. */
3687 /* The rotator is sqrt(2)*c(-6). */
3688
3689 z2 = (INT32) wsptr[2];
3690 z3 = (INT32) wsptr[6];
3691
3692 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
3693 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
3694 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
3695
3696 /* Add fudge factor here for final descale. */
3697 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3698 z3 = (INT32) wsptr[4];
3699
3700 tmp0 = (z2 + z3) << CONST_BITS;
3701 tmp1 = (z2 - z3) << CONST_BITS;
3702
3703 tmp10 = tmp0 + tmp2;
3704 tmp13 = tmp0 - tmp2;
3705 tmp11 = tmp1 + tmp3;
3706 tmp12 = tmp1 - tmp3;
3707
3708 /* Odd part per figure 8; the matrix is unitary and hence its
3709 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3710 */
3711
3712 tmp0 = (INT32) wsptr[7];
3713 tmp1 = (INT32) wsptr[5];
3714 tmp2 = (INT32) wsptr[3];
3715 tmp3 = (INT32) wsptr[1];
3716
3717 z2 = tmp0 + tmp2;
3718 z3 = tmp1 + tmp3;
3719
3720 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
3721 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
3722 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
3723 z2 += z1;
3724 z3 += z1;
3725
3726 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
3727 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
3728 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
3729 tmp0 += z1 + z2;
3730 tmp3 += z1 + z3;
3731
3732 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
3733 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
3734 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
3735 tmp1 += z1 + z3;
3736 tmp2 += z1 + z2;
3737
3738 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3739
3740 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3741 CONST_BITS+PASS1_BITS+3)
3742 & RANGE_MASK];
3743 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3744 CONST_BITS+PASS1_BITS+3)
3745 & RANGE_MASK];
3746 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3747 CONST_BITS+PASS1_BITS+3)
3748 & RANGE_MASK];
3749 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3750 CONST_BITS+PASS1_BITS+3)
3751 & RANGE_MASK];
3752 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3753 CONST_BITS+PASS1_BITS+3)
3754 & RANGE_MASK];
3776
3777 GLOBAL(void)
3778 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3779 JCOEFPTR coef_block,
3780 JSAMPARRAY output_buf, JDIMENSION output_col)
3781 {
3782 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3783 INT32 z1, z2, z3;
3784 JCOEFPTR inptr;
3785 ISLOW_MULT_TYPE * quantptr;
3786 int * wsptr;
3787 JSAMPROW outptr;
3788 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3789 int ctr;
3790 int workspace[6*3]; /* buffers data between passes */
3791 SHIFT_TEMPS
3792
3793 /* Pass 1: process columns from input, store into work array.
3794 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3795 */
3796 inptr = coef_block;
3797 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3798 wsptr = workspace;
3799 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3800 /* Even part */
3801
3802 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3803 tmp0 <<= CONST_BITS;
3804 /* Add fudge factor here for final descale. */
3805 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3806 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3807 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3808 tmp10 = tmp0 + tmp12;
3809 tmp2 = tmp0 - tmp12 - tmp12;
3810
3811 /* Odd part */
3812
3813 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3814 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3815
3816 /* Final output stage */
3817
3818 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3819 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3820 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3821 }
3822
3823 /* Pass 2: process 3 rows from work array, store into output array.
3824 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3825 */
3826 wsptr = workspace;
3827 for (ctr = 0; ctr < 3; ctr++) {
3828 outptr = output_buf[ctr] + output_col;
3829
3830 /* Even part */
3831
3832 /* Add fudge factor here for final descale. */
3833 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3834 tmp0 <<= CONST_BITS;
3835 tmp2 = (INT32) wsptr[4];
3836 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3837 tmp1 = tmp0 + tmp10;
3838 tmp11 = tmp0 - tmp10 - tmp10;
3839 tmp10 = (INT32) wsptr[2];
3840 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3841 tmp10 = tmp1 + tmp0;
3842 tmp12 = tmp1 - tmp0;
3843
3844 /* Odd part */
3845
3846 z1 = (INT32) wsptr[1];
3847 z2 = (INT32) wsptr[3];
3848 z3 = (INT32) wsptr[5];
3849 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3850 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3851 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3852 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3853
3907 wsptr = workspace;
3908 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3909 /* Even part */
3910
3911 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3912
3913 /* Odd part */
3914
3915 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3916
3917 /* Final output stage */
3918
3919 wsptr[4*0] = tmp10 + tmp0;
3920 wsptr[4*1] = tmp10 - tmp0;
3921 }
3922
3923 /* Pass 2: process 2 rows from work array, store into output array.
3924 * 4-point IDCT kernel,
3925 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3926 */
3927 wsptr = workspace;
3928 for (ctr = 0; ctr < 2; ctr++) {
3929 outptr = output_buf[ctr] + output_col;
3930
3931 /* Even part */
3932
3933 /* Add fudge factor here for final descale. */
3934 tmp0 = wsptr[0] + (ONE << 2);
3935 tmp2 = wsptr[2];
3936
3937 tmp10 = (tmp0 + tmp2) << CONST_BITS;
3938 tmp12 = (tmp0 - tmp2) << CONST_BITS;
3939
3940 /* Odd part */
3941 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3942
3943 z2 = wsptr[1];
3944 z3 = wsptr[3];
3945
3946 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3947 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3948 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3949
3950 /* Final output stage */
3951
3952 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3953 CONST_BITS+3)
3954 & RANGE_MASK];
3962 CONST_BITS+3)
3963 & RANGE_MASK];
3964
3965 wsptr += 4; /* advance pointer to next row */
3966 }
3967 }
3968
3969
3970 /*
3971 * Perform dequantization and inverse DCT on one block of coefficients,
3972 * producing a 2x1 output block.
3973 *
3974 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
3975 */
3976
3977 GLOBAL(void)
3978 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3979 JCOEFPTR coef_block,
3980 JSAMPARRAY output_buf, JDIMENSION output_col)
3981 {
3982 INT32 tmp0, tmp10;
3983 ISLOW_MULT_TYPE * quantptr;
3984 JSAMPROW outptr;
3985 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3986 SHIFT_TEMPS
3987
3988 /* Pass 1: empty. */
3989
3990 /* Pass 2: process 1 row from input, store into output array. */
3991
3992 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3993 outptr = output_buf[0] + output_col;
3994
3995 /* Even part */
3996
3997 tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
3998 /* Add fudge factor here for final descale. */
3999 tmp10 += ONE << 2;
4000
4001 /* Odd part */
4002
4003 tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
4004
4005 /* Final output stage */
4006
4007 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
4008 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
4009 }
4010
4011
4012 /*
4013 * Perform dequantization and inverse DCT on one block of coefficients,
4014 * producing a 8x16 output block.
4015 *
4016 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4017 */
4018
4019 GLOBAL(void)
4020 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4021 JCOEFPTR coef_block,
4022 JSAMPARRAY output_buf, JDIMENSION output_col)
4023 {
4024 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4025 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4026 INT32 z1, z2, z3, z4;
4027 JCOEFPTR inptr;
4028 ISLOW_MULT_TYPE * quantptr;
4029 int * wsptr;
4030 JSAMPROW outptr;
4031 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4032 int ctr;
4033 int workspace[8*16]; /* buffers data between passes */
4034 SHIFT_TEMPS
4035
4036 /* Pass 1: process columns from input, store into work array.
4037 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4038 */
4039 inptr = coef_block;
4040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4041 wsptr = workspace;
4042 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4043 /* Even part */
4044
4045 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4046 tmp0 <<= CONST_BITS;
4047 /* Add fudge factor here for final descale. */
4048 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4049
4050 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4051 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4052 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4053
4054 tmp10 = tmp0 + tmp1;
4055 tmp11 = tmp0 - tmp1;
4056 tmp12 = tmp0 + tmp2;
4057 tmp13 = tmp0 - tmp2;
4058
4118 /* Final output stage */
4119
4120 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4121 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4122 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4123 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4124 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4125 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4126 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4127 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4128 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4129 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4130 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4131 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4132 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4133 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4134 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4135 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4136 }
4137
4138 /* Pass 2: process rows from work array, store into output array. */
4139 /* Note that we must descale the results by a factor of 8 == 2**3, */
4140 /* and also undo the PASS1_BITS scaling. */
4141
4142 wsptr = workspace;
4143 for (ctr = 0; ctr < 16; ctr++) {
4144 outptr = output_buf[ctr] + output_col;
4145
4146 /* Even part: reverse the even part of the forward DCT. */
4147 /* The rotator is sqrt(2)*c(-6). */
4148
4149 z2 = (INT32) wsptr[2];
4150 z3 = (INT32) wsptr[6];
4151
4152 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
4153 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
4154 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
4155
4156 /* Add fudge factor here for final descale. */
4157 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4158 z3 = (INT32) wsptr[4];
4159
4160 tmp0 = (z2 + z3) << CONST_BITS;
4161 tmp1 = (z2 - z3) << CONST_BITS;
4162
4163 tmp10 = tmp0 + tmp2;
4164 tmp13 = tmp0 - tmp2;
4165 tmp11 = tmp1 + tmp3;
4166 tmp12 = tmp1 - tmp3;
4167
4168 /* Odd part per figure 8; the matrix is unitary and hence its
4169 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4170 */
4171
4172 tmp0 = (INT32) wsptr[7];
4173 tmp1 = (INT32) wsptr[5];
4174 tmp2 = (INT32) wsptr[3];
4175 tmp3 = (INT32) wsptr[1];
4176
4177 z2 = tmp0 + tmp2;
4178 z3 = tmp1 + tmp3;
4179
4180 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
4181 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
4182 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
4183 z2 += z1;
4184 z3 += z1;
4185
4186 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
4187 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
4188 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
4189 tmp0 += z1 + z2;
4190 tmp3 += z1 + z3;
4191
4192 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
4193 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
4194 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
4195 tmp1 += z1 + z3;
4196 tmp2 += z1 + z2;
4197
4198 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4199
4200 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4201 CONST_BITS+PASS1_BITS+3)
4202 & RANGE_MASK];
4203 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4204 CONST_BITS+PASS1_BITS+3)
4205 & RANGE_MASK];
4206 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4207 CONST_BITS+PASS1_BITS+3)
4208 & RANGE_MASK];
4209 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4210 CONST_BITS+PASS1_BITS+3)
4211 & RANGE_MASK];
4212 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4213 CONST_BITS+PASS1_BITS+3)
4214 & RANGE_MASK];
4237 GLOBAL(void)
4238 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4239 JCOEFPTR coef_block,
4240 JSAMPARRAY output_buf, JDIMENSION output_col)
4241 {
4242 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4243 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4244 INT32 z1, z2, z3, z4;
4245 JCOEFPTR inptr;
4246 ISLOW_MULT_TYPE * quantptr;
4247 int * wsptr;
4248 JSAMPROW outptr;
4249 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4250 int ctr;
4251 int workspace[7*14]; /* buffers data between passes */
4252 SHIFT_TEMPS
4253
4254 /* Pass 1: process columns from input, store into work array.
4255 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4256 */
4257 inptr = coef_block;
4258 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4259 wsptr = workspace;
4260 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4261 /* Even part */
4262
4263 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4264 z1 <<= CONST_BITS;
4265 /* Add fudge factor here for final descale. */
4266 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4267 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4268 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4269 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4270 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4271
4272 tmp10 = z1 + z2;
4273 tmp11 = z1 + z3;
4274 tmp12 = z1 - z4;
4275
4276 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4324
4325 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4326 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4327 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4328 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4329 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4330 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4331 wsptr[7*3] = (int) (tmp23 + tmp13);
4332 wsptr[7*10] = (int) (tmp23 - tmp13);
4333 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4334 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4335 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4336 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4337 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4338 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4339 }
4340
4341 /* Pass 2: process 14 rows from work array, store into output array.
4342 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4343 */
4344 wsptr = workspace;
4345 for (ctr = 0; ctr < 14; ctr++) {
4346 outptr = output_buf[ctr] + output_col;
4347
4348 /* Even part */
4349
4350 /* Add fudge factor here for final descale. */
4351 tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4352 tmp23 <<= CONST_BITS;
4353
4354 z1 = (INT32) wsptr[2];
4355 z2 = (INT32) wsptr[4];
4356 z3 = (INT32) wsptr[6];
4357
4358 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4359 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4360 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4361 tmp10 = z1 + z3;
4362 z2 -= tmp10;
4363 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4364 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4365 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4366 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4367
4368 /* Odd part */
4369
4370 z1 = (INT32) wsptr[1];
4371 z2 = (INT32) wsptr[3];
4420 GLOBAL(void)
4421 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4422 JCOEFPTR coef_block,
4423 JSAMPARRAY output_buf, JDIMENSION output_col)
4424 {
4425 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4426 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4427 INT32 z1, z2, z3, z4;
4428 JCOEFPTR inptr;
4429 ISLOW_MULT_TYPE * quantptr;
4430 int * wsptr;
4431 JSAMPROW outptr;
4432 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4433 int ctr;
4434 int workspace[6*12]; /* buffers data between passes */
4435 SHIFT_TEMPS
4436
4437 /* Pass 1: process columns from input, store into work array.
4438 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4439 */
4440 inptr = coef_block;
4441 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4442 wsptr = workspace;
4443 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4444 /* Even part */
4445
4446 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4447 z3 <<= CONST_BITS;
4448 /* Add fudge factor here for final descale. */
4449 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4450
4451 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4452 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4453
4454 tmp10 = z3 + z4;
4455 tmp11 = z3 - z4;
4456
4457 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4458 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4459 z1 <<= CONST_BITS;
4503
4504 /* Final output stage */
4505
4506 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4507 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4508 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4509 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4510 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4511 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4512 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4513 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4514 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4515 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4516 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4517 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4518 }
4519
4520 /* Pass 2: process 12 rows from work array, store into output array.
4521 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4522 */
4523 wsptr = workspace;
4524 for (ctr = 0; ctr < 12; ctr++) {
4525 outptr = output_buf[ctr] + output_col;
4526
4527 /* Even part */
4528
4529 /* Add fudge factor here for final descale. */
4530 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4531 tmp10 <<= CONST_BITS;
4532 tmp12 = (INT32) wsptr[4];
4533 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4534 tmp11 = tmp10 + tmp20;
4535 tmp21 = tmp10 - tmp20 - tmp20;
4536 tmp20 = (INT32) wsptr[2];
4537 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4538 tmp20 = tmp11 + tmp10;
4539 tmp22 = tmp11 - tmp10;
4540
4541 /* Odd part */
4542
4543 z1 = (INT32) wsptr[1];
4544 z2 = (INT32) wsptr[3];
4545 z3 = (INT32) wsptr[5];
4546 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4547 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4548 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4549 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4550
4584 GLOBAL(void)
4585 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4586 JCOEFPTR coef_block,
4587 JSAMPARRAY output_buf, JDIMENSION output_col)
4588 {
4589 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4590 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4591 INT32 z1, z2, z3, z4, z5;
4592 JCOEFPTR inptr;
4593 ISLOW_MULT_TYPE * quantptr;
4594 int * wsptr;
4595 JSAMPROW outptr;
4596 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4597 int ctr;
4598 int workspace[5*10]; /* buffers data between passes */
4599 SHIFT_TEMPS
4600
4601 /* Pass 1: process columns from input, store into work array.
4602 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4603 */
4604 inptr = coef_block;
4605 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4606 wsptr = workspace;
4607 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4608 /* Even part */
4609
4610 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4611 z3 <<= CONST_BITS;
4612 /* Add fudge factor here for final descale. */
4613 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4614 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4615 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4616 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4617 tmp10 = z3 + z1;
4618 tmp11 = z3 - z2;
4619
4620 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4621 CONST_BITS-PASS1_BITS);
4622
4623 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4659 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4660 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4661
4662 /* Final output stage */
4663
4664 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4665 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4666 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4667 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4668 wsptr[5*2] = (int) (tmp22 + tmp12);
4669 wsptr[5*7] = (int) (tmp22 - tmp12);
4670 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4671 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4672 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4673 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4674 }
4675
4676 /* Pass 2: process 10 rows from work array, store into output array.
4677 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4678 */
4679 wsptr = workspace;
4680 for (ctr = 0; ctr < 10; ctr++) {
4681 outptr = output_buf[ctr] + output_col;
4682
4683 /* Even part */
4684
4685 /* Add fudge factor here for final descale. */
4686 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4687 tmp12 <<= CONST_BITS;
4688 tmp13 = (INT32) wsptr[2];
4689 tmp14 = (INT32) wsptr[4];
4690 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4691 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4692 z3 = tmp12 + z2;
4693 tmp10 = z3 + z1;
4694 tmp11 = z3 - z1;
4695 tmp12 -= z2 << 2;
4696
4697 /* Odd part */
4698
4699 z2 = (INT32) wsptr[1];
4700 z3 = (INT32) wsptr[3];
4701
4702 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4703 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4704 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4705
4706 /* Final output stage */
4733 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4734 */
4735
4736 GLOBAL(void)
4737 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4738 JCOEFPTR coef_block,
4739 JSAMPARRAY output_buf, JDIMENSION output_col)
4740 {
4741 INT32 tmp0, tmp1, tmp2, tmp3;
4742 INT32 tmp10, tmp11, tmp12, tmp13;
4743 INT32 z1, z2, z3;
4744 JCOEFPTR inptr;
4745 ISLOW_MULT_TYPE * quantptr;
4746 int * wsptr;
4747 JSAMPROW outptr;
4748 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4749 int ctr;
4750 int workspace[4*8]; /* buffers data between passes */
4751 SHIFT_TEMPS
4752
4753 /* Pass 1: process columns from input, store into work array. */
4754 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
4755 /* furthermore, we scale the results by 2**PASS1_BITS. */
4756
4757 inptr = coef_block;
4758 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4759 wsptr = workspace;
4760 for (ctr = 4; ctr > 0; ctr--) {
4761 /* Due to quantization, we will usually find that many of the input
4762 * coefficients are zero, especially the AC terms. We can exploit this
4763 * by short-circuiting the IDCT calculation for any column in which all
4764 * the AC terms are zero. In that case each output is equal to the
4765 * DC coefficient (with scale factor as needed).
4766 * With typical images and quantization tables, half or more of the
4767 * column DCT calculations can be simplified this way.
4768 */
4769
4770 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4771 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4772 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4773 inptr[DCTSIZE*7] == 0) {
4774 /* AC terms all zero */
4775 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4776
4777 wsptr[4*0] = dcval;
4778 wsptr[4*1] = dcval;
4779 wsptr[4*2] = dcval;
4780 wsptr[4*3] = dcval;
4781 wsptr[4*4] = dcval;
4782 wsptr[4*5] = dcval;
4783 wsptr[4*6] = dcval;
4784 wsptr[4*7] = dcval;
4785
4786 inptr++; /* advance pointers to next column */
4787 quantptr++;
4788 wsptr++;
4789 continue;
4790 }
4791
4792 /* Even part: reverse the even part of the forward DCT. */
4793 /* The rotator is sqrt(2)*c(-6). */
4794
4795 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4796 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4797
4798 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
4799 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
4800 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
4801
4802 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4803 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4804 z2 <<= CONST_BITS;
4805 z3 <<= CONST_BITS;
4806 /* Add fudge factor here for final descale. */
4807 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4808
4809 tmp0 = z2 + z3;
4810 tmp1 = z2 - z3;
4811
4812 tmp10 = tmp0 + tmp2;
4813 tmp13 = tmp0 - tmp2;
4814 tmp11 = tmp1 + tmp3;
4815 tmp12 = tmp1 - tmp3;
4816
4817 /* Odd part per figure 8; the matrix is unitary and hence its
4818 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4819 */
4820
4821 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4822 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4823 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4824 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4825
4826 z2 = tmp0 + tmp2;
4827 z3 = tmp1 + tmp3;
4828
4829 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
4830 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
4831 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
4832 z2 += z1;
4833 z3 += z1;
4834
4835 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
4836 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
4837 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
4838 tmp0 += z1 + z2;
4839 tmp3 += z1 + z3;
4840
4841 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
4842 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
4843 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
4844 tmp1 += z1 + z3;
4845 tmp2 += z1 + z2;
4846
4847 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4848
4849 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4850 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4851 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4852 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4853 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4854 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4855 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4856 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4857
4858 inptr++; /* advance pointers to next column */
4859 quantptr++;
4860 wsptr++;
4861 }
4862
4863 /* Pass 2: process 8 rows from work array, store into output array.
4864 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4865 */
4866 wsptr = workspace;
4867 for (ctr = 0; ctr < 8; ctr++) {
4868 outptr = output_buf[ctr] + output_col;
4869
4870 /* Even part */
4871
4872 /* Add fudge factor here for final descale. */
4873 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4874 tmp2 = (INT32) wsptr[2];
4875
4876 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4877 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4878
4879 /* Odd part */
4880 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4881
4882 z2 = (INT32) wsptr[1];
4883 z3 = (INT32) wsptr[3];
4884
4885 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4886 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4887 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4888
4889 /* Final output stage */
4890
4891 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4892 CONST_BITS+PASS1_BITS+3)
4893 & RANGE_MASK];
4915
4916 GLOBAL(void)
4917 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4918 JCOEFPTR coef_block,
4919 JSAMPARRAY output_buf, JDIMENSION output_col)
4920 {
4921 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
4922 INT32 z1, z2, z3;
4923 JCOEFPTR inptr;
4924 ISLOW_MULT_TYPE * quantptr;
4925 int * wsptr;
4926 JSAMPROW outptr;
4927 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4928 int ctr;
4929 int workspace[3*6]; /* buffers data between passes */
4930 SHIFT_TEMPS
4931
4932 /* Pass 1: process columns from input, store into work array.
4933 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4934 */
4935 inptr = coef_block;
4936 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4937 wsptr = workspace;
4938 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
4939 /* Even part */
4940
4941 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4942 tmp0 <<= CONST_BITS;
4943 /* Add fudge factor here for final descale. */
4944 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4945 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4946 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
4947 tmp1 = tmp0 + tmp10;
4948 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
4949 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4950 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
4951 tmp10 = tmp1 + tmp0;
4952 tmp12 = tmp1 - tmp0;
4953
4954 /* Odd part */
4957 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4958 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4959 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4960 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
4961 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
4962 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
4963
4964 /* Final output stage */
4965
4966 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
4967 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
4968 wsptr[3*1] = (int) (tmp11 + tmp1);
4969 wsptr[3*4] = (int) (tmp11 - tmp1);
4970 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
4971 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
4972 }
4973
4974 /* Pass 2: process 6 rows from work array, store into output array.
4975 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4976 */
4977 wsptr = workspace;
4978 for (ctr = 0; ctr < 6; ctr++) {
4979 outptr = output_buf[ctr] + output_col;
4980
4981 /* Even part */
4982
4983 /* Add fudge factor here for final descale. */
4984 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4985 tmp0 <<= CONST_BITS;
4986 tmp2 = (INT32) wsptr[2];
4987 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
4988 tmp10 = tmp0 + tmp12;
4989 tmp2 = tmp0 - tmp12 - tmp12;
4990
4991 /* Odd part */
4992
4993 tmp12 = (INT32) wsptr[1];
4994 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
4995
4996 /* Final output stage */
4997
4998 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4999 CONST_BITS+PASS1_BITS+3)
5000 & RANGE_MASK];
5001 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5002 CONST_BITS+PASS1_BITS+3)
5003 & RANGE_MASK];
5004 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5020 GLOBAL(void)
5021 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5022 JCOEFPTR coef_block,
5023 JSAMPARRAY output_buf, JDIMENSION output_col)
5024 {
5025 INT32 tmp0, tmp2, tmp10, tmp12;
5026 INT32 z1, z2, z3;
5027 JCOEFPTR inptr;
5028 ISLOW_MULT_TYPE * quantptr;
5029 INT32 * wsptr;
5030 JSAMPROW outptr;
5031 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5032 int ctr;
5033 INT32 workspace[2*4]; /* buffers data between passes */
5034 SHIFT_TEMPS
5035
5036 /* Pass 1: process columns from input, store into work array.
5037 * 4-point IDCT kernel,
5038 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5039 */
5040 inptr = coef_block;
5041 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5042 wsptr = workspace;
5043 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5044 /* Even part */
5045
5046 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5047 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5048
5049 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5050 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5051
5052 /* Odd part */
5053 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5054
5055 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5056 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5057
5058 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5059 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5060 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5061
5062 /* Final output stage */
5063
5064 wsptr[2*0] = tmp10 + tmp0;
5065 wsptr[2*3] = tmp10 - tmp0;
5066 wsptr[2*1] = tmp12 + tmp2;
5067 wsptr[2*2] = tmp12 - tmp2;
5068 }
5069
5070 /* Pass 2: process 4 rows from work array, store into output array. */
5071
5072 wsptr = workspace;
5073 for (ctr = 0; ctr < 4; ctr++) {
5074 outptr = output_buf[ctr] + output_col;
5075
5076 /* Even part */
5077
5078 /* Add fudge factor here for final descale. */
5079 tmp10 = wsptr[0] + (ONE << (CONST_BITS+2));
5080
5081 /* Odd part */
5082
5083 tmp0 = wsptr[1];
5084
5085 /* Final output stage */
5086
5087 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5088 & RANGE_MASK];
5089 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5090 & RANGE_MASK];
5091
5092 wsptr += 2; /* advance pointer to next row */
5093 }
5094 }
5095
5096
5097 /*
5098 * Perform dequantization and inverse DCT on one block of coefficients,
5099 * producing a 1x2 output block.
5100 *
5101 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5102 */
5103
5104 GLOBAL(void)
5105 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5106 JCOEFPTR coef_block,
5107 JSAMPARRAY output_buf, JDIMENSION output_col)
5108 {
5109 INT32 tmp0, tmp10;
5110 ISLOW_MULT_TYPE * quantptr;
5111 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5112 SHIFT_TEMPS
5113
5114 /* Process 1 column from input, store into output array. */
5115
5116 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5117
5118 /* Even part */
5119
5120 tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5121 /* Add fudge factor here for final descale. */
5122 tmp10 += ONE << 2;
5123
5124 /* Odd part */
5125
5126 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5127
5128 /* Final output stage */
5129
5130 output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
5131 & RANGE_MASK];
5132 output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
5133 & RANGE_MASK];
5134 }
5135
5136 #endif /* IDCT_SCALING_SUPPORTED */
5137 #endif /* DCT_ISLOW_SUPPORTED */
|
1 /*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2016 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
148 * For 12-bit samples, a full 32-bit multiplication will be needed.
149 */
150
151 #if BITS_IN_JSAMPLE == 8
152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153 #else
154 #define MULTIPLY(var,const) ((var) * (const))
155 #endif
156
157
158 /* Dequantize a coefficient by multiplying it by the multiplier-table
159 * entry; produce an int result. In this module, both inputs and result
160 * are 16 bits or less, so either int or short multiply will work.
161 */
162
163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166 /*
167 * Perform dequantization and inverse DCT on one block of coefficients.
168 *
169 * Optimized algorithm with 12 multiplications in the 1-D kernel.
170 * cK represents sqrt(2) * cos(K*pi/16).
171 */
172
173 GLOBAL(void)
174 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
175 JCOEFPTR coef_block,
176 JSAMPARRAY output_buf, JDIMENSION output_col)
177 {
178 INT32 tmp0, tmp1, tmp2, tmp3;
179 INT32 tmp10, tmp11, tmp12, tmp13;
180 INT32 z1, z2, z3;
181 JCOEFPTR inptr;
182 ISLOW_MULT_TYPE * quantptr;
183 int * wsptr;
184 JSAMPROW outptr;
185 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186 int ctr;
187 int workspace[DCTSIZE2]; /* buffers data between passes */
188 SHIFT_TEMPS
189
190 /* Pass 1: process columns from input, store into work array.
191 * Note results are scaled up by sqrt(8) compared to a true IDCT;
192 * furthermore, we scale the results by 2**PASS1_BITS.
193 */
194
195 inptr = coef_block;
196 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197 wsptr = workspace;
198 for (ctr = DCTSIZE; ctr > 0; ctr--) {
199 /* Due to quantization, we will usually find that many of the input
200 * coefficients are zero, especially the AC terms. We can exploit this
201 * by short-circuiting the IDCT calculation for any column in which all
202 * the AC terms are zero. In that case each output is equal to the
203 * DC coefficient (with scale factor as needed).
204 * With typical images and quantization tables, half or more of the
205 * column DCT calculations can be simplified this way.
206 */
207
208 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211 inptr[DCTSIZE*7] == 0) {
212 /* AC terms all zero */
213 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
214
215 wsptr[DCTSIZE*0] = dcval;
216 wsptr[DCTSIZE*1] = dcval;
217 wsptr[DCTSIZE*2] = dcval;
218 wsptr[DCTSIZE*3] = dcval;
219 wsptr[DCTSIZE*4] = dcval;
220 wsptr[DCTSIZE*5] = dcval;
221 wsptr[DCTSIZE*6] = dcval;
222 wsptr[DCTSIZE*7] = dcval;
223
224 inptr++; /* advance pointers to next column */
225 quantptr++;
226 wsptr++;
227 continue;
228 }
229
230 /* Even part: reverse the even part of the forward DCT.
231 * The rotator is c(-6).
232 */
233
234 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236 z2 <<= CONST_BITS;
237 z3 <<= CONST_BITS;
238 /* Add fudge factor here for final descale. */
239 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
240
241 tmp0 = z2 + z3;
242 tmp1 = z2 - z3;
243
244 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
246
247 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
248 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
249 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
250
251 tmp10 = tmp0 + tmp2;
252 tmp13 = tmp0 - tmp2;
253 tmp11 = tmp1 + tmp3;
254 tmp12 = tmp1 - tmp3;
255
256 /* Odd part per figure 8; the matrix is unitary and hence its
257 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
258 */
259
260 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
264
265 z2 = tmp0 + tmp2;
266 z3 = tmp1 + tmp3;
267
268 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
269 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
270 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
271 z2 += z1;
272 z3 += z1;
273
274 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
276 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
277 tmp0 += z1 + z2;
278 tmp3 += z1 + z3;
279
280 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
282 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
283 tmp1 += z1 + z3;
284 tmp2 += z1 + z2;
285
286 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
287
288 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
296
297 inptr++; /* advance pointers to next column */
298 quantptr++;
299 wsptr++;
300 }
301
302 /* Pass 2: process rows from work array, store into output array.
303 * Note that we must descale the results by a factor of 8 == 2**3,
304 * and also undo the PASS1_BITS scaling.
305 */
306
307 wsptr = workspace;
308 for (ctr = 0; ctr < DCTSIZE; ctr++) {
309 outptr = output_buf[ctr] + output_col;
310
311 /* Add range center and fudge factor for final descale and range-limit. */
312 z2 = (INT32) wsptr[0] +
313 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314 (ONE << (PASS1_BITS+2)));
315
316 /* Rows of zeroes can be exploited in the same way as we did with columns.
317 * However, the column calculation has created many nonzero AC terms, so
318 * the simplification applies less often (typically 5% to 10% of the time).
319 * On machines with very fast multiplication, it's possible that the
320 * test takes more time than it's worth. In that case this section
321 * may be commented out.
322 */
323
324 #ifndef NO_ZERO_ROW_TEST
325 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327 /* AC terms all zero */
328 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329 & RANGE_MASK];
330
331 outptr[0] = dcval;
332 outptr[1] = dcval;
333 outptr[2] = dcval;
334 outptr[3] = dcval;
335 outptr[4] = dcval;
336 outptr[5] = dcval;
337 outptr[6] = dcval;
338 outptr[7] = dcval;
339
340 wsptr += DCTSIZE; /* advance pointer to next row */
341 continue;
342 }
343 #endif
344
345 /* Even part: reverse the even part of the forward DCT.
346 * The rotator is c(-6).
347 */
348
349 z3 = (INT32) wsptr[4];
350
351 tmp0 = (z2 + z3) << CONST_BITS;
352 tmp1 = (z2 - z3) << CONST_BITS;
353
354 z2 = (INT32) wsptr[2];
355 z3 = (INT32) wsptr[6];
356
357 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
358 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
359 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
360
361 tmp10 = tmp0 + tmp2;
362 tmp13 = tmp0 - tmp2;
363 tmp11 = tmp1 + tmp3;
364 tmp12 = tmp1 - tmp3;
365
366 /* Odd part per figure 8; the matrix is unitary and hence its
367 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
368 */
369
370 tmp0 = (INT32) wsptr[7];
371 tmp1 = (INT32) wsptr[5];
372 tmp2 = (INT32) wsptr[3];
373 tmp3 = (INT32) wsptr[1];
374
375 z2 = tmp0 + tmp2;
376 z3 = tmp1 + tmp3;
377
378 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
379 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
380 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
381 z2 += z1;
382 z3 += z1;
383
384 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
386 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
387 tmp0 += z1 + z2;
388 tmp3 += z1 + z3;
389
390 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
392 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
393 tmp1 += z1 + z3;
394 tmp2 += z1 + z2;
395
396 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
397
398 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399 CONST_BITS+PASS1_BITS+3)
400 & RANGE_MASK];
401 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402 CONST_BITS+PASS1_BITS+3)
403 & RANGE_MASK];
404 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405 CONST_BITS+PASS1_BITS+3)
406 & RANGE_MASK];
407 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408 CONST_BITS+PASS1_BITS+3)
409 & RANGE_MASK];
410 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411 CONST_BITS+PASS1_BITS+3)
412 & RANGE_MASK];
413 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414 CONST_BITS+PASS1_BITS+3)
415 & RANGE_MASK];
416 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417 CONST_BITS+PASS1_BITS+3)
418 & RANGE_MASK];
419 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420 CONST_BITS+PASS1_BITS+3)
421 & RANGE_MASK];
422
423 wsptr += DCTSIZE; /* advance pointer to next row */
424 }
425 }
426
427 #ifdef IDCT_SCALING_SUPPORTED
428
429
430 /*
431 * Perform dequantization and inverse DCT on one block of coefficients,
432 * producing a reduced-size 7x7 output block.
433 *
434 * Optimized algorithm with 12 multiplications in the 1-D kernel.
435 * cK represents sqrt(2) * cos(K*pi/14).
436 */
437
438 GLOBAL(void)
439 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
440 JCOEFPTR coef_block,
441 JSAMPARRAY output_buf, JDIMENSION output_col)
442 {
443 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444 INT32 z1, z2, z3;
445 JCOEFPTR inptr;
446 ISLOW_MULT_TYPE * quantptr;
447 int * wsptr;
448 JSAMPROW outptr;
449 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450 int ctr;
451 int workspace[7*7]; /* buffers data between passes */
452 SHIFT_TEMPS
496
497 /* Final output stage */
498
499 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
506 }
507
508 /* Pass 2: process 7 rows from work array, store into output array. */
509
510 wsptr = workspace;
511 for (ctr = 0; ctr < 7; ctr++) {
512 outptr = output_buf[ctr] + output_col;
513
514 /* Even part */
515
516 /* Add range center and fudge factor for final descale and range-limit. */
517 tmp13 = (INT32) wsptr[0] +
518 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519 (ONE << (PASS1_BITS+2)));
520 tmp13 <<= CONST_BITS;
521
522 z1 = (INT32) wsptr[2];
523 z2 = (INT32) wsptr[4];
524 z3 = (INT32) wsptr[6];
525
526 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
527 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
528 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529 tmp0 = z1 + z3;
530 z2 -= tmp0;
531 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
533 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
534 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
535
536 /* Odd part */
537
538 z1 = (INT32) wsptr[1];
539 z2 = (INT32) wsptr[3];
634 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
635
636 /* Final output stage */
637
638 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640 wsptr[6*1] = (int) (tmp11 + tmp1);
641 wsptr[6*4] = (int) (tmp11 - tmp1);
642 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
644 }
645
646 /* Pass 2: process 6 rows from work array, store into output array. */
647
648 wsptr = workspace;
649 for (ctr = 0; ctr < 6; ctr++) {
650 outptr = output_buf[ctr] + output_col;
651
652 /* Even part */
653
654 /* Add range center and fudge factor for final descale and range-limit. */
655 tmp0 = (INT32) wsptr[0] +
656 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657 (ONE << (PASS1_BITS+2)));
658 tmp0 <<= CONST_BITS;
659 tmp2 = (INT32) wsptr[4];
660 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
661 tmp1 = tmp0 + tmp10;
662 tmp11 = tmp0 - tmp10 - tmp10;
663 tmp10 = (INT32) wsptr[2];
664 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
665 tmp10 = tmp1 + tmp0;
666 tmp12 = tmp1 - tmp0;
667
668 /* Odd part */
669
670 z1 = (INT32) wsptr[1];
671 z2 = (INT32) wsptr[3];
672 z3 = (INT32) wsptr[5];
673 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676 tmp1 = (z1 - z2 - z3) << CONST_BITS;
677
755 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
756 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
757
758 /* Final output stage */
759
760 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
765 }
766
767 /* Pass 2: process 5 rows from work array, store into output array. */
768
769 wsptr = workspace;
770 for (ctr = 0; ctr < 5; ctr++) {
771 outptr = output_buf[ctr] + output_col;
772
773 /* Even part */
774
775 /* Add range center and fudge factor for final descale and range-limit. */
776 tmp12 = (INT32) wsptr[0] +
777 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778 (ONE << (PASS1_BITS+2)));
779 tmp12 <<= CONST_BITS;
780 tmp0 = (INT32) wsptr[2];
781 tmp1 = (INT32) wsptr[4];
782 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784 z3 = tmp12 + z2;
785 tmp10 = z3 + z1;
786 tmp11 = z3 - z1;
787 tmp12 -= z2 << 2;
788
789 /* Odd part */
790
791 z2 = (INT32) wsptr[1];
792 z3 = (INT32) wsptr[3];
793
794 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
795 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
796 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
797
798 /* Final output stage */
869 CONST_BITS-PASS1_BITS);
870 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871 CONST_BITS-PASS1_BITS);
872
873 /* Final output stage */
874
875 wsptr[4*0] = (int) (tmp10 + tmp0);
876 wsptr[4*3] = (int) (tmp10 - tmp0);
877 wsptr[4*1] = (int) (tmp12 + tmp2);
878 wsptr[4*2] = (int) (tmp12 - tmp2);
879 }
880
881 /* Pass 2: process 4 rows from work array, store into output array. */
882
883 wsptr = workspace;
884 for (ctr = 0; ctr < 4; ctr++) {
885 outptr = output_buf[ctr] + output_col;
886
887 /* Even part */
888
889 /* Add range center and fudge factor for final descale and range-limit. */
890 tmp0 = (INT32) wsptr[0] +
891 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892 (ONE << (PASS1_BITS+2)));
893 tmp2 = (INT32) wsptr[2];
894
895 tmp10 = (tmp0 + tmp2) << CONST_BITS;
896 tmp12 = (tmp0 - tmp2) << CONST_BITS;
897
898 /* Odd part */
899 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
900
901 z2 = (INT32) wsptr[1];
902 z3 = (INT32) wsptr[3];
903
904 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
905 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
907
908 /* Final output stage */
909
910 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911 CONST_BITS+PASS1_BITS+3)
912 & RANGE_MASK];
968 /* Odd part */
969
970 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
972
973 /* Final output stage */
974
975 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
978 }
979
980 /* Pass 2: process 3 rows from work array, store into output array. */
981
982 wsptr = workspace;
983 for (ctr = 0; ctr < 3; ctr++) {
984 outptr = output_buf[ctr] + output_col;
985
986 /* Even part */
987
988 /* Add range center and fudge factor for final descale and range-limit. */
989 tmp0 = (INT32) wsptr[0] +
990 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991 (ONE << (PASS1_BITS+2)));
992 tmp0 <<= CONST_BITS;
993 tmp2 = (INT32) wsptr[2];
994 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995 tmp10 = tmp0 + tmp12;
996 tmp2 = tmp0 - tmp12 - tmp12;
997
998 /* Odd part */
999
1000 tmp12 = (INT32) wsptr[1];
1001 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1002
1003 /* Final output stage */
1004
1005 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006 CONST_BITS+PASS1_BITS+3)
1007 & RANGE_MASK];
1008 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009 CONST_BITS+PASS1_BITS+3)
1010 & RANGE_MASK];
1011 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012 CONST_BITS+PASS1_BITS+3)
1013 & RANGE_MASK];
1014
1015 wsptr += 3; /* advance pointer to next row */
1016 }
1017 }
1018
1019
1020 /*
1021 * Perform dequantization and inverse DCT on one block of coefficients,
1022 * producing a reduced-size 2x2 output block.
1023 *
1024 * Multiplication-less algorithm.
1025 */
1026
1027 GLOBAL(void)
1028 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1029 JCOEFPTR coef_block,
1030 JSAMPARRAY output_buf, JDIMENSION output_col)
1031 {
1032 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033 ISLOW_MULT_TYPE * quantptr;
1034 JSAMPROW outptr;
1035 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1036 ISHIFT_TEMPS
1037
1038 /* Pass 1: process columns from input. */
1039
1040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1041
1042 /* Column 0 */
1043 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045 /* Add range center and fudge factor for final descale and range-limit. */
1046 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1047
1048 tmp0 = tmp4 + tmp5;
1049 tmp2 = tmp4 - tmp5;
1050
1051 /* Column 1 */
1052 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1054
1055 tmp1 = tmp4 + tmp5;
1056 tmp3 = tmp4 - tmp5;
1057
1058 /* Pass 2: process 2 rows, store into output array. */
1059
1060 /* Row 0 */
1061 outptr = output_buf[0] + output_col;
1062
1063 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1065
1066 /* Row 1 */
1067 outptr = output_buf[1] + output_col;
1068
1069 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1071 }
1072
1073
1074 /*
1075 * Perform dequantization and inverse DCT on one block of coefficients,
1076 * producing a reduced-size 1x1 output block.
1077 *
1078 * We hardly need an inverse DCT routine for this: just take the
1079 * average pixel value, which is one-eighth of the DC coefficient.
1080 */
1081
1082 GLOBAL(void)
1083 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1084 JCOEFPTR coef_block,
1085 JSAMPARRAY output_buf, JDIMENSION output_col)
1086 {
1087 DCTELEM dcval;
1088 ISLOW_MULT_TYPE * quantptr;
1089 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1090 ISHIFT_TEMPS
1091
1092 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1093
1094 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1095
1096 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097 /* Add range center and fudge factor for descale and range-limit. */
1098 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1099
1100 output_buf[0][output_col] =
1101 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1102 }
1103
1104
1105 /*
1106 * Perform dequantization and inverse DCT on one block of coefficients,
1107 * producing a 9x9 output block.
1108 *
1109 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110 * cK represents sqrt(2) * cos(K*pi/18).
1111 */
1112
1113 GLOBAL(void)
1114 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1115 JCOEFPTR coef_block,
1116 JSAMPARRAY output_buf, JDIMENSION output_col)
1117 {
1118 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119 INT32 z1, z2, z3, z4;
1120 JCOEFPTR inptr;
1121 ISLOW_MULT_TYPE * quantptr;
1180
1181 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1190 }
1191
1192 /* Pass 2: process 9 rows from work array, store into output array. */
1193
1194 wsptr = workspace;
1195 for (ctr = 0; ctr < 9; ctr++) {
1196 outptr = output_buf[ctr] + output_col;
1197
1198 /* Even part */
1199
1200 /* Add range center and fudge factor for final descale and range-limit. */
1201 tmp0 = (INT32) wsptr[0] +
1202 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203 (ONE << (PASS1_BITS+2)));
1204 tmp0 <<= CONST_BITS;
1205
1206 z1 = (INT32) wsptr[2];
1207 z2 = (INT32) wsptr[4];
1208 z3 = (INT32) wsptr[6];
1209
1210 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211 tmp1 = tmp0 + tmp3;
1212 tmp2 = tmp0 - tmp3 - tmp3;
1213
1214 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215 tmp11 = tmp2 + tmp0;
1216 tmp14 = tmp2 - tmp0 - tmp0;
1217
1218 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1221
1222 tmp10 = tmp1 + tmp0 - tmp3;
1223 tmp12 = tmp1 - tmp0 + tmp2;
1365 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369 wsptr[8*2] = (int) (tmp22 + tmp12);
1370 wsptr[8*7] = (int) (tmp22 - tmp12);
1371 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1375 }
1376
1377 /* Pass 2: process 10 rows from work array, store into output array. */
1378
1379 wsptr = workspace;
1380 for (ctr = 0; ctr < 10; ctr++) {
1381 outptr = output_buf[ctr] + output_col;
1382
1383 /* Even part */
1384
1385 /* Add range center and fudge factor for final descale and range-limit. */
1386 z3 = (INT32) wsptr[0] +
1387 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388 (ONE << (PASS1_BITS+2)));
1389 z3 <<= CONST_BITS;
1390 z4 = (INT32) wsptr[4];
1391 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393 tmp10 = z3 + z1;
1394 tmp11 = z3 - z2;
1395
1396 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1397
1398 z2 = (INT32) wsptr[2];
1399 z3 = (INT32) wsptr[6];
1400
1401 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404
1405 tmp20 = tmp10 + tmp12;
1406 tmp24 = tmp10 - tmp12;
1407 tmp21 = tmp11 + tmp13;
1408 tmp23 = tmp11 - tmp13;
1560 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1570 }
1571
1572 /* Pass 2: process 11 rows from work array, store into output array. */
1573
1574 wsptr = workspace;
1575 for (ctr = 0; ctr < 11; ctr++) {
1576 outptr = output_buf[ctr] + output_col;
1577
1578 /* Even part */
1579
1580 /* Add range center and fudge factor for final descale and range-limit. */
1581 tmp10 = (INT32) wsptr[0] +
1582 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583 (ONE << (PASS1_BITS+2)));
1584 tmp10 <<= CONST_BITS;
1585
1586 z1 = (INT32) wsptr[2];
1587 z2 = (INT32) wsptr[4];
1588 z3 = (INT32) wsptr[6];
1589
1590 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592 z4 = z1 + z3;
1593 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594 z4 -= z2;
1595 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596 tmp21 = tmp20 + tmp23 + tmp25 -
1597 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600 tmp24 += tmp25;
1601 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1766 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1776 }
1777
1778 /* Pass 2: process 12 rows from work array, store into output array. */
1779
1780 wsptr = workspace;
1781 for (ctr = 0; ctr < 12; ctr++) {
1782 outptr = output_buf[ctr] + output_col;
1783
1784 /* Even part */
1785
1786 /* Add range center and fudge factor for final descale and range-limit. */
1787 z3 = (INT32) wsptr[0] +
1788 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789 (ONE << (PASS1_BITS+2)));
1790 z3 <<= CONST_BITS;
1791
1792 z4 = (INT32) wsptr[4];
1793 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794
1795 tmp10 = z3 + z4;
1796 tmp11 = z3 - z4;
1797
1798 z1 = (INT32) wsptr[2];
1799 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800 z1 <<= CONST_BITS;
1801 z2 = (INT32) wsptr[6];
1802 z2 <<= CONST_BITS;
1803
1804 tmp12 = z1 - z2;
1805
1806 tmp21 = z3 + tmp12;
1807 tmp24 = z3 - tmp12;
1808
1809 tmp12 = z4 + z2;
1989 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1999 }
2000
2001 /* Pass 2: process 13 rows from work array, store into output array. */
2002
2003 wsptr = workspace;
2004 for (ctr = 0; ctr < 13; ctr++) {
2005 outptr = output_buf[ctr] + output_col;
2006
2007 /* Even part */
2008
2009 /* Add range center and fudge factor for final descale and range-limit. */
2010 z1 = (INT32) wsptr[0] +
2011 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012 (ONE << (PASS1_BITS+2)));
2013 z1 <<= CONST_BITS;
2014
2015 z2 = (INT32) wsptr[2];
2016 z3 = (INT32) wsptr[4];
2017 z4 = (INT32) wsptr[6];
2018
2019 tmp10 = z3 + z4;
2020 tmp11 = z3 - z4;
2021
2022 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2024
2025 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2027
2028 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2030
2031 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2218 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220 wsptr[8*3] = (int) (tmp23 + tmp13);
2221 wsptr[8*10] = (int) (tmp23 - tmp13);
2222 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2228 }
2229
2230 /* Pass 2: process 14 rows from work array, store into output array. */
2231
2232 wsptr = workspace;
2233 for (ctr = 0; ctr < 14; ctr++) {
2234 outptr = output_buf[ctr] + output_col;
2235
2236 /* Even part */
2237
2238 /* Add range center and fudge factor for final descale and range-limit. */
2239 z1 = (INT32) wsptr[0] +
2240 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241 (ONE << (PASS1_BITS+2)));
2242 z1 <<= CONST_BITS;
2243 z4 = (INT32) wsptr[4];
2244 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2247
2248 tmp10 = z1 + z2;
2249 tmp11 = z1 + z3;
2250 tmp12 = z1 - z4;
2251
2252 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2253
2254 z1 = (INT32) wsptr[2];
2255 z2 = (INT32) wsptr[6];
2256
2257 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2258
2259 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2452 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2462 }
2463
2464 /* Pass 2: process 15 rows from work array, store into output array. */
2465
2466 wsptr = workspace;
2467 for (ctr = 0; ctr < 15; ctr++) {
2468 outptr = output_buf[ctr] + output_col;
2469
2470 /* Even part */
2471
2472 /* Add range center and fudge factor for final descale and range-limit. */
2473 z1 = (INT32) wsptr[0] +
2474 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475 (ONE << (PASS1_BITS+2)));
2476 z1 <<= CONST_BITS;
2477
2478 z2 = (INT32) wsptr[2];
2479 z3 = (INT32) wsptr[4];
2480 z4 = (INT32) wsptr[6];
2481
2482 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484
2485 tmp12 = z1 - tmp10;
2486 tmp13 = z1 + tmp11;
2487 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2488
2489 z4 = z2 - z3;
2490 z3 += z2;
2491 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2494
2495 tmp20 = tmp13 + tmp10 + tmp11;
2607 JCOEFPTR inptr;
2608 ISLOW_MULT_TYPE * quantptr;
2609 int * wsptr;
2610 JSAMPROW outptr;
2611 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612 int ctr;
2613 int workspace[8*16]; /* buffers data between passes */
2614 SHIFT_TEMPS
2615
2616 /* Pass 1: process columns from input, store into work array. */
2617
2618 inptr = coef_block;
2619 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620 wsptr = workspace;
2621 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622 /* Even part */
2623
2624 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625 tmp0 <<= CONST_BITS;
2626 /* Add fudge factor here for final descale. */
2627 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2628
2629 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2631 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2632
2633 tmp10 = tmp0 + tmp1;
2634 tmp11 = tmp0 - tmp1;
2635 tmp12 = tmp0 + tmp2;
2636 tmp13 = tmp0 - tmp2;
2637
2638 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640 z3 = z1 - z2;
2641 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2642 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2643
2644 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2645 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2646 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2705 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2706 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2707 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2715 }
2716
2717 /* Pass 2: process 16 rows from work array, store into output array. */
2718
2719 wsptr = workspace;
2720 for (ctr = 0; ctr < 16; ctr++) {
2721 outptr = output_buf[ctr] + output_col;
2722
2723 /* Even part */
2724
2725 /* Add range center and fudge factor for final descale and range-limit. */
2726 tmp0 = (INT32) wsptr[0] +
2727 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728 (ONE << (PASS1_BITS+2)));
2729 tmp0 <<= CONST_BITS;
2730
2731 z1 = (INT32) wsptr[4];
2732 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2734
2735 tmp10 = tmp0 + tmp1;
2736 tmp11 = tmp0 - tmp1;
2737 tmp12 = tmp0 + tmp2;
2738 tmp13 = tmp0 - tmp2;
2739
2740 z1 = (INT32) wsptr[2];
2741 z2 = (INT32) wsptr[6];
2742 z3 = z1 - z2;
2743 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2745
2746 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860 */
2861
2862 GLOBAL(void)
2863 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2864 JCOEFPTR coef_block,
2865 JSAMPARRAY output_buf, JDIMENSION output_col)
2866 {
2867 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869 INT32 z1, z2, z3, z4;
2870 JCOEFPTR inptr;
2871 ISLOW_MULT_TYPE * quantptr;
2872 int * wsptr;
2873 JSAMPROW outptr;
2874 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875 int ctr;
2876 int workspace[8*8]; /* buffers data between passes */
2877 SHIFT_TEMPS
2878
2879 /* Pass 1: process columns from input, store into work array.
2880 * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881 * furthermore, we scale the results by 2**PASS1_BITS.
2882 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2883 */
2884
2885 inptr = coef_block;
2886 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887 wsptr = workspace;
2888 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889 /* Due to quantization, we will usually find that many of the input
2890 * coefficients are zero, especially the AC terms. We can exploit this
2891 * by short-circuiting the IDCT calculation for any column in which all
2892 * the AC terms are zero. In that case each output is equal to the
2893 * DC coefficient (with scale factor as needed).
2894 * With typical images and quantization tables, half or more of the
2895 * column DCT calculations can be simplified this way.
2896 */
2897
2898 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901 inptr[DCTSIZE*7] == 0) {
2902 /* AC terms all zero */
2903 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2904
2905 wsptr[DCTSIZE*0] = dcval;
2906 wsptr[DCTSIZE*1] = dcval;
2907 wsptr[DCTSIZE*2] = dcval;
2908 wsptr[DCTSIZE*3] = dcval;
2909 wsptr[DCTSIZE*4] = dcval;
2910 wsptr[DCTSIZE*5] = dcval;
2911 wsptr[DCTSIZE*6] = dcval;
2912 wsptr[DCTSIZE*7] = dcval;
2913
2914 inptr++; /* advance pointers to next column */
2915 quantptr++;
2916 wsptr++;
2917 continue;
2918 }
2919
2920 /* Even part: reverse the even part of the forward DCT.
2921 * The rotator is c(-6).
2922 */
2923
2924 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926 z2 <<= CONST_BITS;
2927 z3 <<= CONST_BITS;
2928 /* Add fudge factor here for final descale. */
2929 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2930
2931 tmp0 = z2 + z3;
2932 tmp1 = z2 - z3;
2933
2934 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2936
2937 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2938 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2939 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2940
2941 tmp10 = tmp0 + tmp2;
2942 tmp13 = tmp0 - tmp2;
2943 tmp11 = tmp1 + tmp3;
2944 tmp12 = tmp1 - tmp3;
2945
2946 /* Odd part per figure 8; the matrix is unitary and hence its
2947 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2948 */
2949
2950 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2954
2955 z2 = tmp0 + tmp2;
2956 z3 = tmp1 + tmp3;
2957
2958 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2959 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2960 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2961 z2 += z1;
2962 z3 += z1;
2963
2964 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2966 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2967 tmp0 += z1 + z2;
2968 tmp3 += z1 + z3;
2969
2970 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2972 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2973 tmp1 += z1 + z3;
2974 tmp2 += z1 + z2;
2975
2976 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2977
2978 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2986
2987 inptr++; /* advance pointers to next column */
2988 quantptr++;
2989 wsptr++;
2990 }
2991
2992 /* Pass 2: process 8 rows from work array, store into output array.
2993 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2994 */
2995
2996 wsptr = workspace;
2997 for (ctr = 0; ctr < 8; ctr++) {
2998 outptr = output_buf[ctr] + output_col;
2999
3000 /* Even part */
3001
3002 /* Add range center and fudge factor for final descale and range-limit. */
3003 tmp0 = (INT32) wsptr[0] +
3004 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005 (ONE << (PASS1_BITS+2)));
3006 tmp0 <<= CONST_BITS;
3007
3008 z1 = (INT32) wsptr[4];
3009 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3010 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3011
3012 tmp10 = tmp0 + tmp1;
3013 tmp11 = tmp0 - tmp1;
3014 tmp12 = tmp0 + tmp2;
3015 tmp13 = tmp0 - tmp2;
3016
3017 z1 = (INT32) wsptr[2];
3018 z2 = (INT32) wsptr[6];
3019 z3 = z1 - z2;
3020 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3021 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3022
3023 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3024 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3025 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3139 GLOBAL(void)
3140 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3141 JCOEFPTR coef_block,
3142 JSAMPARRAY output_buf, JDIMENSION output_col)
3143 {
3144 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146 INT32 z1, z2, z3, z4;
3147 JCOEFPTR inptr;
3148 ISLOW_MULT_TYPE * quantptr;
3149 int * wsptr;
3150 JSAMPROW outptr;
3151 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152 int ctr;
3153 int workspace[8*7]; /* buffers data between passes */
3154 SHIFT_TEMPS
3155
3156 /* Pass 1: process columns from input, store into work array.
3157 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3158 */
3159
3160 inptr = coef_block;
3161 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162 wsptr = workspace;
3163 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164 /* Even part */
3165
3166 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167 tmp23 <<= CONST_BITS;
3168 /* Add fudge factor here for final descale. */
3169 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3170
3171 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3174
3175 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3176 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3177 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178 tmp10 = z1 + z3;
3179 z2 -= tmp10;
3195 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3196 tmp11 += tmp12;
3197 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3198 tmp10 += z2;
3199 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3200
3201 /* Final output stage */
3202
3203 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3210 }
3211
3212 /* Pass 2: process 7 rows from work array, store into output array.
3213 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3214 */
3215
3216 wsptr = workspace;
3217 for (ctr = 0; ctr < 7; ctr++) {
3218 outptr = output_buf[ctr] + output_col;
3219
3220 /* Even part */
3221
3222 /* Add range center and fudge factor for final descale and range-limit. */
3223 z1 = (INT32) wsptr[0] +
3224 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225 (ONE << (PASS1_BITS+2)));
3226 z1 <<= CONST_BITS;
3227 z4 = (INT32) wsptr[4];
3228 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3229 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3230 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3231
3232 tmp10 = z1 + z2;
3233 tmp11 = z1 + z3;
3234 tmp12 = z1 - z4;
3235
3236 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3237
3238 z1 = (INT32) wsptr[2];
3239 z2 = (INT32) wsptr[6];
3240
3241 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3242
3243 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3338 GLOBAL(void)
3339 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3340 JCOEFPTR coef_block,
3341 JSAMPARRAY output_buf, JDIMENSION output_col)
3342 {
3343 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345 INT32 z1, z2, z3, z4;
3346 JCOEFPTR inptr;
3347 ISLOW_MULT_TYPE * quantptr;
3348 int * wsptr;
3349 JSAMPROW outptr;
3350 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351 int ctr;
3352 int workspace[8*6]; /* buffers data between passes */
3353 SHIFT_TEMPS
3354
3355 /* Pass 1: process columns from input, store into work array.
3356 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3357 */
3358
3359 inptr = coef_block;
3360 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361 wsptr = workspace;
3362 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363 /* Even part */
3364
3365 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366 tmp10 <<= CONST_BITS;
3367 /* Add fudge factor here for final descale. */
3368 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3371 tmp11 = tmp10 + tmp20;
3372 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3375 tmp20 = tmp11 + tmp10;
3376 tmp22 = tmp11 - tmp10;
3377
3378 /* Odd part */
3381 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3387
3388 /* Final output stage */
3389
3390 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392 wsptr[8*1] = (int) (tmp21 + tmp11);
3393 wsptr[8*4] = (int) (tmp21 - tmp11);
3394 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3396 }
3397
3398 /* Pass 2: process 6 rows from work array, store into output array.
3399 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3400 */
3401
3402 wsptr = workspace;
3403 for (ctr = 0; ctr < 6; ctr++) {
3404 outptr = output_buf[ctr] + output_col;
3405
3406 /* Even part */
3407
3408 /* Add range center and fudge factor for final descale and range-limit. */
3409 z3 = (INT32) wsptr[0] +
3410 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411 (ONE << (PASS1_BITS+2)));
3412 z3 <<= CONST_BITS;
3413
3414 z4 = (INT32) wsptr[4];
3415 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3416
3417 tmp10 = z3 + z4;
3418 tmp11 = z3 - z4;
3419
3420 z1 = (INT32) wsptr[2];
3421 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422 z1 <<= CONST_BITS;
3423 z2 = (INT32) wsptr[6];
3424 z2 <<= CONST_BITS;
3425
3426 tmp12 = z1 - z2;
3427
3428 tmp21 = z3 + tmp12;
3429 tmp24 = z3 - tmp12;
3430
3431 tmp12 = z4 + z2;
3518 GLOBAL(void)
3519 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3520 JCOEFPTR coef_block,
3521 JSAMPARRAY output_buf, JDIMENSION output_col)
3522 {
3523 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525 INT32 z1, z2, z3, z4;
3526 JCOEFPTR inptr;
3527 ISLOW_MULT_TYPE * quantptr;
3528 int * wsptr;
3529 JSAMPROW outptr;
3530 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531 int ctr;
3532 int workspace[8*5]; /* buffers data between passes */
3533 SHIFT_TEMPS
3534
3535 /* Pass 1: process columns from input, store into work array.
3536 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3537 */
3538
3539 inptr = coef_block;
3540 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541 wsptr = workspace;
3542 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543 /* Even part */
3544
3545 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546 tmp12 <<= CONST_BITS;
3547 /* Add fudge factor here for final descale. */
3548 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553 z3 = tmp12 + z2;
3554 tmp10 = z3 + z1;
3555 tmp11 = z3 - z1;
3556 tmp12 -= z2 << 2;
3557
3558 /* Odd part */
3559
3560 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3562
3563 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3564 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3565 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3566
3567 /* Final output stage */
3568
3569 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3574 }
3575
3576 /* Pass 2: process 5 rows from work array, store into output array.
3577 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3578 */
3579
3580 wsptr = workspace;
3581 for (ctr = 0; ctr < 5; ctr++) {
3582 outptr = output_buf[ctr] + output_col;
3583
3584 /* Even part */
3585
3586 /* Add range center and fudge factor for final descale and range-limit. */
3587 z3 = (INT32) wsptr[0] +
3588 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589 (ONE << (PASS1_BITS+2)));
3590 z3 <<= CONST_BITS;
3591 z4 = (INT32) wsptr[4];
3592 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3593 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3594 tmp10 = z3 + z1;
3595 tmp11 = z3 - z2;
3596
3597 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3598
3599 z2 = (INT32) wsptr[2];
3600 z3 = (INT32) wsptr[6];
3601
3602 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3603 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3605
3606 tmp20 = tmp10 + tmp12;
3607 tmp24 = tmp10 - tmp12;
3608 tmp21 = tmp11 + tmp13;
3609 tmp23 = tmp11 - tmp13;
3681 */
3682
3683 GLOBAL(void)
3684 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3685 JCOEFPTR coef_block,
3686 JSAMPARRAY output_buf, JDIMENSION output_col)
3687 {
3688 INT32 tmp0, tmp1, tmp2, tmp3;
3689 INT32 tmp10, tmp11, tmp12, tmp13;
3690 INT32 z1, z2, z3;
3691 JCOEFPTR inptr;
3692 ISLOW_MULT_TYPE * quantptr;
3693 int * wsptr;
3694 JSAMPROW outptr;
3695 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696 int ctr;
3697 int workspace[8*4]; /* buffers data between passes */
3698 SHIFT_TEMPS
3699
3700 /* Pass 1: process columns from input, store into work array.
3701 * 4-point IDCT kernel,
3702 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3703 */
3704
3705 inptr = coef_block;
3706 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707 wsptr = workspace;
3708 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709 /* Even part */
3710
3711 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3713
3714 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3716
3717 /* Odd part */
3718 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3719
3720 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3722
3723 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3724 /* Add fudge factor here for final descale. */
3725 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727 CONST_BITS-PASS1_BITS);
3728 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729 CONST_BITS-PASS1_BITS);
3730
3731 /* Final output stage */
3732
3733 wsptr[8*0] = (int) (tmp10 + tmp0);
3734 wsptr[8*3] = (int) (tmp10 - tmp0);
3735 wsptr[8*1] = (int) (tmp12 + tmp2);
3736 wsptr[8*2] = (int) (tmp12 - tmp2);
3737 }
3738
3739 /* Pass 2: process rows from work array, store into output array.
3740 * Note that we must descale the results by a factor of 8 == 2**3,
3741 * and also undo the PASS1_BITS scaling.
3742 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3743 */
3744
3745 wsptr = workspace;
3746 for (ctr = 0; ctr < 4; ctr++) {
3747 outptr = output_buf[ctr] + output_col;
3748
3749 /* Even part: reverse the even part of the forward DCT.
3750 * The rotator is c(-6).
3751 */
3752
3753 /* Add range center and fudge factor for final descale and range-limit. */
3754 z2 = (INT32) wsptr[0] +
3755 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756 (ONE << (PASS1_BITS+2)));
3757 z3 = (INT32) wsptr[4];
3758
3759 tmp0 = (z2 + z3) << CONST_BITS;
3760 tmp1 = (z2 - z3) << CONST_BITS;
3761
3762 z2 = (INT32) wsptr[2];
3763 z3 = (INT32) wsptr[6];
3764
3765 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3766 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3767 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3768
3769 tmp10 = tmp0 + tmp2;
3770 tmp13 = tmp0 - tmp2;
3771 tmp11 = tmp1 + tmp3;
3772 tmp12 = tmp1 - tmp3;
3773
3774 /* Odd part per figure 8; the matrix is unitary and hence its
3775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3776 */
3777
3778 tmp0 = (INT32) wsptr[7];
3779 tmp1 = (INT32) wsptr[5];
3780 tmp2 = (INT32) wsptr[3];
3781 tmp3 = (INT32) wsptr[1];
3782
3783 z2 = tmp0 + tmp2;
3784 z3 = tmp1 + tmp3;
3785
3786 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3787 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3788 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3789 z2 += z1;
3790 z3 += z1;
3791
3792 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3794 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3795 tmp0 += z1 + z2;
3796 tmp3 += z1 + z3;
3797
3798 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3800 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3801 tmp1 += z1 + z3;
3802 tmp2 += z1 + z2;
3803
3804 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3805
3806 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807 CONST_BITS+PASS1_BITS+3)
3808 & RANGE_MASK];
3809 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810 CONST_BITS+PASS1_BITS+3)
3811 & RANGE_MASK];
3812 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813 CONST_BITS+PASS1_BITS+3)
3814 & RANGE_MASK];
3815 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816 CONST_BITS+PASS1_BITS+3)
3817 & RANGE_MASK];
3818 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819 CONST_BITS+PASS1_BITS+3)
3820 & RANGE_MASK];
3842
3843 GLOBAL(void)
3844 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3845 JCOEFPTR coef_block,
3846 JSAMPARRAY output_buf, JDIMENSION output_col)
3847 {
3848 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849 INT32 z1, z2, z3;
3850 JCOEFPTR inptr;
3851 ISLOW_MULT_TYPE * quantptr;
3852 int * wsptr;
3853 JSAMPROW outptr;
3854 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855 int ctr;
3856 int workspace[6*3]; /* buffers data between passes */
3857 SHIFT_TEMPS
3858
3859 /* Pass 1: process columns from input, store into work array.
3860 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3861 */
3862
3863 inptr = coef_block;
3864 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865 wsptr = workspace;
3866 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867 /* Even part */
3868
3869 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870 tmp0 <<= CONST_BITS;
3871 /* Add fudge factor here for final descale. */
3872 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875 tmp10 = tmp0 + tmp12;
3876 tmp2 = tmp0 - tmp12 - tmp12;
3877
3878 /* Odd part */
3879
3880 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3882
3883 /* Final output stage */
3884
3885 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3888 }
3889
3890 /* Pass 2: process 3 rows from work array, store into output array.
3891 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3892 */
3893
3894 wsptr = workspace;
3895 for (ctr = 0; ctr < 3; ctr++) {
3896 outptr = output_buf[ctr] + output_col;
3897
3898 /* Even part */
3899
3900 /* Add range center and fudge factor for final descale and range-limit. */
3901 tmp0 = (INT32) wsptr[0] +
3902 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903 (ONE << (PASS1_BITS+2)));
3904 tmp0 <<= CONST_BITS;
3905 tmp2 = (INT32) wsptr[4];
3906 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3907 tmp1 = tmp0 + tmp10;
3908 tmp11 = tmp0 - tmp10 - tmp10;
3909 tmp10 = (INT32) wsptr[2];
3910 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3911 tmp10 = tmp1 + tmp0;
3912 tmp12 = tmp1 - tmp0;
3913
3914 /* Odd part */
3915
3916 z1 = (INT32) wsptr[1];
3917 z2 = (INT32) wsptr[3];
3918 z3 = (INT32) wsptr[5];
3919 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3923
3977 wsptr = workspace;
3978 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979 /* Even part */
3980
3981 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3982
3983 /* Odd part */
3984
3985 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3986
3987 /* Final output stage */
3988
3989 wsptr[4*0] = tmp10 + tmp0;
3990 wsptr[4*1] = tmp10 - tmp0;
3991 }
3992
3993 /* Pass 2: process 2 rows from work array, store into output array.
3994 * 4-point IDCT kernel,
3995 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3996 */
3997
3998 wsptr = workspace;
3999 for (ctr = 0; ctr < 2; ctr++) {
4000 outptr = output_buf[ctr] + output_col;
4001
4002 /* Even part */
4003
4004 /* Add range center and fudge factor for final descale and range-limit. */
4005 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006 tmp2 = wsptr[2];
4007
4008 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4010
4011 /* Odd part */
4012 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4013
4014 z2 = wsptr[1];
4015 z3 = wsptr[3];
4016
4017 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4018 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4020
4021 /* Final output stage */
4022
4023 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024 CONST_BITS+3)
4025 & RANGE_MASK];
4033 CONST_BITS+3)
4034 & RANGE_MASK];
4035
4036 wsptr += 4; /* advance pointer to next row */
4037 }
4038 }
4039
4040
4041 /*
4042 * Perform dequantization and inverse DCT on one block of coefficients,
4043 * producing a 2x1 output block.
4044 *
4045 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4046 */
4047
4048 GLOBAL(void)
4049 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4050 JCOEFPTR coef_block,
4051 JSAMPARRAY output_buf, JDIMENSION output_col)
4052 {
4053 DCTELEM tmp0, tmp1;
4054 ISLOW_MULT_TYPE * quantptr;
4055 JSAMPROW outptr;
4056 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4057 ISHIFT_TEMPS
4058
4059 /* Pass 1: empty. */
4060
4061 /* Pass 2: process 1 row from input, store into output array. */
4062
4063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064 outptr = output_buf[0] + output_col;
4065
4066 /* Even part */
4067
4068 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069 /* Add range center and fudge factor for final descale and range-limit. */
4070 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4071
4072 /* Odd part */
4073
4074 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4075
4076 /* Final output stage */
4077
4078 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4080 }
4081
4082
4083 /*
4084 * Perform dequantization and inverse DCT on one block of coefficients,
4085 * producing a 8x16 output block.
4086 *
4087 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4088 */
4089
4090 GLOBAL(void)
4091 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4092 JCOEFPTR coef_block,
4093 JSAMPARRAY output_buf, JDIMENSION output_col)
4094 {
4095 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097 INT32 z1, z2, z3, z4;
4098 JCOEFPTR inptr;
4099 ISLOW_MULT_TYPE * quantptr;
4100 int * wsptr;
4101 JSAMPROW outptr;
4102 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103 int ctr;
4104 int workspace[8*16]; /* buffers data between passes */
4105 SHIFT_TEMPS
4106
4107 /* Pass 1: process columns from input, store into work array.
4108 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4109 */
4110
4111 inptr = coef_block;
4112 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113 wsptr = workspace;
4114 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115 /* Even part */
4116
4117 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118 tmp0 <<= CONST_BITS;
4119 /* Add fudge factor here for final descale. */
4120 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4121
4122 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4124 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4125
4126 tmp10 = tmp0 + tmp1;
4127 tmp11 = tmp0 - tmp1;
4128 tmp12 = tmp0 + tmp2;
4129 tmp13 = tmp0 - tmp2;
4130
4190 /* Final output stage */
4191
4192 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4193 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4194 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4195 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4196 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4197 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4198 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4199 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4200 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4208 }
4209
4210 /* Pass 2: process rows from work array, store into output array.
4211 * Note that we must descale the results by a factor of 8 == 2**3,
4212 * and also undo the PASS1_BITS scaling.
4213 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4214 */
4215
4216 wsptr = workspace;
4217 for (ctr = 0; ctr < 16; ctr++) {
4218 outptr = output_buf[ctr] + output_col;
4219
4220 /* Even part: reverse the even part of the forward DCT.
4221 * The rotator is c(-6).
4222 */
4223
4224 /* Add range center and fudge factor for final descale and range-limit. */
4225 z2 = (INT32) wsptr[0] +
4226 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227 (ONE << (PASS1_BITS+2)));
4228 z3 = (INT32) wsptr[4];
4229
4230 tmp0 = (z2 + z3) << CONST_BITS;
4231 tmp1 = (z2 - z3) << CONST_BITS;
4232
4233 z2 = (INT32) wsptr[2];
4234 z3 = (INT32) wsptr[6];
4235
4236 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4237 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4238 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4239
4240 tmp10 = tmp0 + tmp2;
4241 tmp13 = tmp0 - tmp2;
4242 tmp11 = tmp1 + tmp3;
4243 tmp12 = tmp1 - tmp3;
4244
4245 /* Odd part per figure 8; the matrix is unitary and hence its
4246 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4247 */
4248
4249 tmp0 = (INT32) wsptr[7];
4250 tmp1 = (INT32) wsptr[5];
4251 tmp2 = (INT32) wsptr[3];
4252 tmp3 = (INT32) wsptr[1];
4253
4254 z2 = tmp0 + tmp2;
4255 z3 = tmp1 + tmp3;
4256
4257 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4258 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4259 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4260 z2 += z1;
4261 z3 += z1;
4262
4263 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4265 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4266 tmp0 += z1 + z2;
4267 tmp3 += z1 + z3;
4268
4269 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4271 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4272 tmp1 += z1 + z3;
4273 tmp2 += z1 + z2;
4274
4275 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4276
4277 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278 CONST_BITS+PASS1_BITS+3)
4279 & RANGE_MASK];
4280 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281 CONST_BITS+PASS1_BITS+3)
4282 & RANGE_MASK];
4283 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284 CONST_BITS+PASS1_BITS+3)
4285 & RANGE_MASK];
4286 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287 CONST_BITS+PASS1_BITS+3)
4288 & RANGE_MASK];
4289 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290 CONST_BITS+PASS1_BITS+3)
4291 & RANGE_MASK];
4314 GLOBAL(void)
4315 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4316 JCOEFPTR coef_block,
4317 JSAMPARRAY output_buf, JDIMENSION output_col)
4318 {
4319 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321 INT32 z1, z2, z3, z4;
4322 JCOEFPTR inptr;
4323 ISLOW_MULT_TYPE * quantptr;
4324 int * wsptr;
4325 JSAMPROW outptr;
4326 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327 int ctr;
4328 int workspace[7*14]; /* buffers data between passes */
4329 SHIFT_TEMPS
4330
4331 /* Pass 1: process columns from input, store into work array.
4332 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4333 */
4334
4335 inptr = coef_block;
4336 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337 wsptr = workspace;
4338 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339 /* Even part */
4340
4341 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342 z1 <<= CONST_BITS;
4343 /* Add fudge factor here for final descale. */
4344 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4347 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4348 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4349
4350 tmp10 = z1 + z2;
4351 tmp11 = z1 + z3;
4352 tmp12 = z1 - z4;
4353
4354 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4402
4403 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409 wsptr[7*3] = (int) (tmp23 + tmp13);
4410 wsptr[7*10] = (int) (tmp23 - tmp13);
4411 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4417 }
4418
4419 /* Pass 2: process 14 rows from work array, store into output array.
4420 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4421 */
4422
4423 wsptr = workspace;
4424 for (ctr = 0; ctr < 14; ctr++) {
4425 outptr = output_buf[ctr] + output_col;
4426
4427 /* Even part */
4428
4429 /* Add range center and fudge factor for final descale and range-limit. */
4430 tmp23 = (INT32) wsptr[0] +
4431 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432 (ONE << (PASS1_BITS+2)));
4433 tmp23 <<= CONST_BITS;
4434
4435 z1 = (INT32) wsptr[2];
4436 z2 = (INT32) wsptr[4];
4437 z3 = (INT32) wsptr[6];
4438
4439 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4440 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4441 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442 tmp10 = z1 + z3;
4443 z2 -= tmp10;
4444 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4446 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4447 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4448
4449 /* Odd part */
4450
4451 z1 = (INT32) wsptr[1];
4452 z2 = (INT32) wsptr[3];
4501 GLOBAL(void)
4502 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4503 JCOEFPTR coef_block,
4504 JSAMPARRAY output_buf, JDIMENSION output_col)
4505 {
4506 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508 INT32 z1, z2, z3, z4;
4509 JCOEFPTR inptr;
4510 ISLOW_MULT_TYPE * quantptr;
4511 int * wsptr;
4512 JSAMPROW outptr;
4513 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514 int ctr;
4515 int workspace[6*12]; /* buffers data between passes */
4516 SHIFT_TEMPS
4517
4518 /* Pass 1: process columns from input, store into work array.
4519 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4520 */
4521
4522 inptr = coef_block;
4523 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524 wsptr = workspace;
4525 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526 /* Even part */
4527
4528 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529 z3 <<= CONST_BITS;
4530 /* Add fudge factor here for final descale. */
4531 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4532
4533 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4535
4536 tmp10 = z3 + z4;
4537 tmp11 = z3 - z4;
4538
4539 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541 z1 <<= CONST_BITS;
4585
4586 /* Final output stage */
4587
4588 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4600 }
4601
4602 /* Pass 2: process 12 rows from work array, store into output array.
4603 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4604 */
4605
4606 wsptr = workspace;
4607 for (ctr = 0; ctr < 12; ctr++) {
4608 outptr = output_buf[ctr] + output_col;
4609
4610 /* Even part */
4611
4612 /* Add range center and fudge factor for final descale and range-limit. */
4613 tmp10 = (INT32) wsptr[0] +
4614 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615 (ONE << (PASS1_BITS+2)));
4616 tmp10 <<= CONST_BITS;
4617 tmp12 = (INT32) wsptr[4];
4618 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4619 tmp11 = tmp10 + tmp20;
4620 tmp21 = tmp10 - tmp20 - tmp20;
4621 tmp20 = (INT32) wsptr[2];
4622 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4623 tmp20 = tmp11 + tmp10;
4624 tmp22 = tmp11 - tmp10;
4625
4626 /* Odd part */
4627
4628 z1 = (INT32) wsptr[1];
4629 z2 = (INT32) wsptr[3];
4630 z3 = (INT32) wsptr[5];
4631 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4635
4669 GLOBAL(void)
4670 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4671 JCOEFPTR coef_block,
4672 JSAMPARRAY output_buf, JDIMENSION output_col)
4673 {
4674 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676 INT32 z1, z2, z3, z4, z5;
4677 JCOEFPTR inptr;
4678 ISLOW_MULT_TYPE * quantptr;
4679 int * wsptr;
4680 JSAMPROW outptr;
4681 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682 int ctr;
4683 int workspace[5*10]; /* buffers data between passes */
4684 SHIFT_TEMPS
4685
4686 /* Pass 1: process columns from input, store into work array.
4687 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4688 */
4689
4690 inptr = coef_block;
4691 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692 wsptr = workspace;
4693 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694 /* Even part */
4695
4696 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697 z3 <<= CONST_BITS;
4698 /* Add fudge factor here for final descale. */
4699 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4702 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4703 tmp10 = z3 + z1;
4704 tmp11 = z3 - z2;
4705
4706 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4707 CONST_BITS-PASS1_BITS);
4708
4709 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4745 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4747
4748 /* Final output stage */
4749
4750 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754 wsptr[5*2] = (int) (tmp22 + tmp12);
4755 wsptr[5*7] = (int) (tmp22 - tmp12);
4756 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4760 }
4761
4762 /* Pass 2: process 10 rows from work array, store into output array.
4763 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4764 */
4765
4766 wsptr = workspace;
4767 for (ctr = 0; ctr < 10; ctr++) {
4768 outptr = output_buf[ctr] + output_col;
4769
4770 /* Even part */
4771
4772 /* Add range center and fudge factor for final descale and range-limit. */
4773 tmp12 = (INT32) wsptr[0] +
4774 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775 (ONE << (PASS1_BITS+2)));
4776 tmp12 <<= CONST_BITS;
4777 tmp13 = (INT32) wsptr[2];
4778 tmp14 = (INT32) wsptr[4];
4779 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781 z3 = tmp12 + z2;
4782 tmp10 = z3 + z1;
4783 tmp11 = z3 - z1;
4784 tmp12 -= z2 << 2;
4785
4786 /* Odd part */
4787
4788 z2 = (INT32) wsptr[1];
4789 z3 = (INT32) wsptr[3];
4790
4791 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4792 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4793 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4794
4795 /* Final output stage */
4822 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4823 */
4824
4825 GLOBAL(void)
4826 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4827 JCOEFPTR coef_block,
4828 JSAMPARRAY output_buf, JDIMENSION output_col)
4829 {
4830 INT32 tmp0, tmp1, tmp2, tmp3;
4831 INT32 tmp10, tmp11, tmp12, tmp13;
4832 INT32 z1, z2, z3;
4833 JCOEFPTR inptr;
4834 ISLOW_MULT_TYPE * quantptr;
4835 int * wsptr;
4836 JSAMPROW outptr;
4837 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838 int ctr;
4839 int workspace[4*8]; /* buffers data between passes */
4840 SHIFT_TEMPS
4841
4842 /* Pass 1: process columns from input, store into work array.
4843 * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844 * furthermore, we scale the results by 2**PASS1_BITS.
4845 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4846 */
4847
4848 inptr = coef_block;
4849 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850 wsptr = workspace;
4851 for (ctr = 4; ctr > 0; ctr--) {
4852 /* Due to quantization, we will usually find that many of the input
4853 * coefficients are zero, especially the AC terms. We can exploit this
4854 * by short-circuiting the IDCT calculation for any column in which all
4855 * the AC terms are zero. In that case each output is equal to the
4856 * DC coefficient (with scale factor as needed).
4857 * With typical images and quantization tables, half or more of the
4858 * column DCT calculations can be simplified this way.
4859 */
4860
4861 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864 inptr[DCTSIZE*7] == 0) {
4865 /* AC terms all zero */
4866 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4867
4868 wsptr[4*0] = dcval;
4869 wsptr[4*1] = dcval;
4870 wsptr[4*2] = dcval;
4871 wsptr[4*3] = dcval;
4872 wsptr[4*4] = dcval;
4873 wsptr[4*5] = dcval;
4874 wsptr[4*6] = dcval;
4875 wsptr[4*7] = dcval;
4876
4877 inptr++; /* advance pointers to next column */
4878 quantptr++;
4879 wsptr++;
4880 continue;
4881 }
4882
4883 /* Even part: reverse the even part of the forward DCT.
4884 * The rotator is c(-6).
4885 */
4886
4887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889 z2 <<= CONST_BITS;
4890 z3 <<= CONST_BITS;
4891 /* Add fudge factor here for final descale. */
4892 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4893
4894 tmp0 = z2 + z3;
4895 tmp1 = z2 - z3;
4896
4897 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4899
4900 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4901 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4902 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4903
4904 tmp10 = tmp0 + tmp2;
4905 tmp13 = tmp0 - tmp2;
4906 tmp11 = tmp1 + tmp3;
4907 tmp12 = tmp1 - tmp3;
4908
4909 /* Odd part per figure 8; the matrix is unitary and hence its
4910 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4911 */
4912
4913 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4917
4918 z2 = tmp0 + tmp2;
4919 z3 = tmp1 + tmp3;
4920
4921 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4922 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4923 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4924 z2 += z1;
4925 z3 += z1;
4926
4927 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4929 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4930 tmp0 += z1 + z2;
4931 tmp3 += z1 + z3;
4932
4933 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4935 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4936 tmp1 += z1 + z3;
4937 tmp2 += z1 + z2;
4938
4939 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4940
4941 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4949
4950 inptr++; /* advance pointers to next column */
4951 quantptr++;
4952 wsptr++;
4953 }
4954
4955 /* Pass 2: process 8 rows from work array, store into output array.
4956 * 4-point IDCT kernel,
4957 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4958 */
4959
4960 wsptr = workspace;
4961 for (ctr = 0; ctr < 8; ctr++) {
4962 outptr = output_buf[ctr] + output_col;
4963
4964 /* Even part */
4965
4966 /* Add range center and fudge factor for final descale and range-limit. */
4967 tmp0 = (INT32) wsptr[0] +
4968 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969 (ONE << (PASS1_BITS+2)));
4970 tmp2 = (INT32) wsptr[2];
4971
4972 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4974
4975 /* Odd part */
4976 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4977
4978 z2 = (INT32) wsptr[1];
4979 z3 = (INT32) wsptr[3];
4980
4981 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4982 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4984
4985 /* Final output stage */
4986
4987 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988 CONST_BITS+PASS1_BITS+3)
4989 & RANGE_MASK];
5011
5012 GLOBAL(void)
5013 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5014 JCOEFPTR coef_block,
5015 JSAMPARRAY output_buf, JDIMENSION output_col)
5016 {
5017 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018 INT32 z1, z2, z3;
5019 JCOEFPTR inptr;
5020 ISLOW_MULT_TYPE * quantptr;
5021 int * wsptr;
5022 JSAMPROW outptr;
5023 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024 int ctr;
5025 int workspace[3*6]; /* buffers data between passes */
5026 SHIFT_TEMPS
5027
5028 /* Pass 1: process columns from input, store into work array.
5029 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5030 */
5031
5032 inptr = coef_block;
5033 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034 wsptr = workspace;
5035 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036 /* Even part */
5037
5038 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039 tmp0 <<= CONST_BITS;
5040 /* Add fudge factor here for final descale. */
5041 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5044 tmp1 = tmp0 + tmp10;
5045 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5048 tmp10 = tmp1 + tmp0;
5049 tmp12 = tmp1 - tmp0;
5050
5051 /* Odd part */
5054 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5060
5061 /* Final output stage */
5062
5063 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065 wsptr[3*1] = (int) (tmp11 + tmp1);
5066 wsptr[3*4] = (int) (tmp11 - tmp1);
5067 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5069 }
5070
5071 /* Pass 2: process 6 rows from work array, store into output array.
5072 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5073 */
5074
5075 wsptr = workspace;
5076 for (ctr = 0; ctr < 6; ctr++) {
5077 outptr = output_buf[ctr] + output_col;
5078
5079 /* Even part */
5080
5081 /* Add range center and fudge factor for final descale and range-limit. */
5082 tmp0 = (INT32) wsptr[0] +
5083 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084 (ONE << (PASS1_BITS+2)));
5085 tmp0 <<= CONST_BITS;
5086 tmp2 = (INT32) wsptr[2];
5087 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088 tmp10 = tmp0 + tmp12;
5089 tmp2 = tmp0 - tmp12 - tmp12;
5090
5091 /* Odd part */
5092
5093 tmp12 = (INT32) wsptr[1];
5094 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5095
5096 /* Final output stage */
5097
5098 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099 CONST_BITS+PASS1_BITS+3)
5100 & RANGE_MASK];
5101 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102 CONST_BITS+PASS1_BITS+3)
5103 & RANGE_MASK];
5104 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5120 GLOBAL(void)
5121 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5122 JCOEFPTR coef_block,
5123 JSAMPARRAY output_buf, JDIMENSION output_col)
5124 {
5125 INT32 tmp0, tmp2, tmp10, tmp12;
5126 INT32 z1, z2, z3;
5127 JCOEFPTR inptr;
5128 ISLOW_MULT_TYPE * quantptr;
5129 INT32 * wsptr;
5130 JSAMPROW outptr;
5131 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132 int ctr;
5133 INT32 workspace[2*4]; /* buffers data between passes */
5134 SHIFT_TEMPS
5135
5136 /* Pass 1: process columns from input, store into work array.
5137 * 4-point IDCT kernel,
5138 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5139 */
5140
5141 inptr = coef_block;
5142 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143 wsptr = workspace;
5144 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145 /* Even part */
5146
5147 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5149
5150 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5152
5153 /* Odd part */
5154 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5155
5156 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5157 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5158
5159 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5160 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5161 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5162
5163 /* Final output stage */
5164
5165 wsptr[2*0] = tmp10 + tmp0;
5166 wsptr[2*3] = tmp10 - tmp0;
5167 wsptr[2*1] = tmp12 + tmp2;
5168 wsptr[2*2] = tmp12 - tmp2;
5169 }
5170
5171 /* Pass 2: process 4 rows from work array, store into output array. */
5172
5173 wsptr = workspace;
5174 for (ctr = 0; ctr < 4; ctr++) {
5175 outptr = output_buf[ctr] + output_col;
5176
5177 /* Even part */
5178
5179 /* Add range center and fudge factor for final descale and range-limit. */
5180 tmp10 = wsptr[0] +
5181 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5182 (ONE << (CONST_BITS+2)));
5183
5184 /* Odd part */
5185
5186 tmp0 = wsptr[1];
5187
5188 /* Final output stage */
5189
5190 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5191 & RANGE_MASK];
5192 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5193 & RANGE_MASK];
5194
5195 wsptr += 2; /* advance pointer to next row */
5196 }
5197 }
5198
5199
5200 /*
5201 * Perform dequantization and inverse DCT on one block of coefficients,
5202 * producing a 1x2 output block.
5203 *
5204 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5205 */
5206
5207 GLOBAL(void)
5208 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5209 JCOEFPTR coef_block,
5210 JSAMPARRAY output_buf, JDIMENSION output_col)
5211 {
5212 DCTELEM tmp0, tmp1;
5213 ISLOW_MULT_TYPE * quantptr;
5214 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5215 ISHIFT_TEMPS
5216
5217 /* Process 1 column from input, store into output array. */
5218
5219 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5220
5221 /* Even part */
5222
5223 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5224 /* Add range center and fudge factor for final descale and range-limit. */
5225 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5226
5227 /* Odd part */
5228
5229 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5230
5231 /* Final output stage */
5232
5233 output_buf[0][output_col] =
5234 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5235 output_buf[1][output_col] =
5236 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5237 }
5238
5239 #endif /* IDCT_SCALING_SUPPORTED */
5240 #endif /* DCT_ISLOW_SUPPORTED */
|