1 /*
2 * jfdctint.c
3 *
4 * Copyright (C) 1991-1996, Thomas G. Lane.
5 * Modification developed 2003-2009 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * forward DCT (Discrete Cosine Transform).
11 *
12 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13 * on each column. Direct algorithms are also available, but they are
14 * much more complex and seem not to be any faster when reduced to code.
15 *
16 * This implementation is based on an algorithm described in
17 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20 * The primary algorithm described there uses 11 multiplies and 29 adds.
21 * We use their alternate method with 12 multiplies and 32 adds.
22 * The advantage of this method is that no data path contains more than one
23 * multiplication; this allows a very simple and accurate implementation in
24 * scaled fixed-point arithmetic, with a minimal number of shifts.
25 *
148 #else
149 #define MULTIPLY(var,const) ((var) * (const))
150 #endif
151
152
153 /*
154 * Perform the forward DCT on one block of samples.
155 */
156
157 GLOBAL(void)
158 jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
159 {
160 INT32 tmp0, tmp1, tmp2, tmp3;
161 INT32 tmp10, tmp11, tmp12, tmp13;
162 INT32 z1;
163 DCTELEM *dataptr;
164 JSAMPROW elemptr;
165 int ctr;
166 SHIFT_TEMPS
167
168 /* Pass 1: process rows. */
169 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
170 /* furthermore, we scale the results by 2**PASS1_BITS. */
171
172 dataptr = data;
173 for (ctr = 0; ctr < DCTSIZE; ctr++) {
174 elemptr = sample_data[ctr] + start_col;
175
176 /* Even part per LL&M figure 1 --- note that published figure is faulty;
177 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
178 */
179
180 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
181 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
182 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
183 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
184
185 tmp10 = tmp0 + tmp3;
186 tmp12 = tmp0 - tmp3;
187 tmp11 = tmp1 + tmp2;
188 tmp13 = tmp1 - tmp2;
189
190 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
191 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
192 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
193 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
194
195 /* Apply unsigned->signed conversion */
196 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
197 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
198
199 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
200 /* Add fudge factor here for final descale. */
201 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
202 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
203 CONST_BITS-PASS1_BITS);
204 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
205 CONST_BITS-PASS1_BITS);
206
207 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
208 * cK represents sqrt(2) * cos(K*pi/16).
209 * i0..i3 in the paper are tmp0..tmp3 here.
210 */
211
212 tmp10 = tmp0 + tmp3;
213 tmp11 = tmp1 + tmp2;
214 tmp12 = tmp0 + tmp2;
215 tmp13 = tmp1 + tmp3;
216 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
217 /* Add fudge factor here for final descale. */
218 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
219
220 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
221 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
222 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
223 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
224 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
225 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
226 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
227 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
228
229 tmp12 += z1;
230 tmp13 += z1;
231
232 dataptr[1] = (DCTELEM)
233 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
234 dataptr[3] = (DCTELEM)
235 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
236 dataptr[5] = (DCTELEM)
237 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
238 dataptr[7] = (DCTELEM)
239 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
240
241 dataptr += DCTSIZE; /* advance pointer to next row */
242 }
243
244 /* Pass 2: process columns.
245 * We remove the PASS1_BITS scaling, but leave the results scaled up
246 * by an overall factor of 8.
247 */
248
249 dataptr = data;
250 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
251 /* Even part per LL&M figure 1 --- note that published figure is faulty;
252 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
253 */
254
255 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
256 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
257 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
258 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
259
260 /* Add fudge factor here for final descale. */
261 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
262 tmp12 = tmp0 - tmp3;
263 tmp11 = tmp1 + tmp2;
264 tmp13 = tmp1 - tmp2;
265
266 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
267 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
268 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
269 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
270
271 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
272 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
273
274 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
275 /* Add fudge factor here for final descale. */
276 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
277 dataptr[DCTSIZE*2] = (DCTELEM)
278 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
279 dataptr[DCTSIZE*6] = (DCTELEM)
280 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
281
282 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
283 * cK represents sqrt(2) * cos(K*pi/16).
284 * i0..i3 in the paper are tmp0..tmp3 here.
285 */
286
287 tmp10 = tmp0 + tmp3;
288 tmp11 = tmp1 + tmp2;
289 tmp12 = tmp0 + tmp2;
290 tmp13 = tmp1 + tmp3;
291 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
292 /* Add fudge factor here for final descale. */
293 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
294
295 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
296 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
297 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
298 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
299 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
300 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
301 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
302 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
303
304 tmp12 += z1;
305 tmp13 += z1;
306
307 dataptr[DCTSIZE*1] = (DCTELEM)
308 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
309 dataptr[DCTSIZE*3] = (DCTELEM)
310 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
311 dataptr[DCTSIZE*5] = (DCTELEM)
312 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
313 dataptr[DCTSIZE*7] = (DCTELEM)
314 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
315
316 dataptr++; /* advance pointer to next column */
317 }
318 }
319
320 #ifdef DCT_SCALING_SUPPORTED
321
322
323 /*
324 * Perform the forward DCT on a 7x7 sample block.
325 */
326
327 GLOBAL(void)
328 jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
329 {
330 INT32 tmp0, tmp1, tmp2, tmp3;
331 INT32 tmp10, tmp11, tmp12;
332 INT32 z1, z2, z3;
333 DCTELEM *dataptr;
334 JSAMPROW elemptr;
335 int ctr;
336 SHIFT_TEMPS
337
338 /* Pre-zero output coefficient block. */
339 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
340
341 /* Pass 1: process rows. */
342 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
343 /* furthermore, we scale the results by 2**PASS1_BITS. */
344 /* cK represents sqrt(2) * cos(K*pi/14). */
345
346 dataptr = data;
347 for (ctr = 0; ctr < 7; ctr++) {
348 elemptr = sample_data[ctr] + start_col;
349
350 /* Even part */
351
352 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
353 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
354 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
355 tmp3 = GETJSAMPLE(elemptr[3]);
356
357 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
358 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
359 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
360
361 z1 = tmp0 + tmp2;
362 /* Apply unsigned->signed conversion */
363 dataptr[0] = (DCTELEM)
364 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
365 tmp3 += tmp3;
366 z1 -= tmp3;
367 z1 -= tmp3;
368 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
369 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
370 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
371 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
372 z1 -= z2;
373 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
374 dataptr[4] = (DCTELEM)
375 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
376 CONST_BITS-PASS1_BITS);
377 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
378
379 /* Odd part */
380
381 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
382 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
455 }
456
457
458 /*
459 * Perform the forward DCT on a 6x6 sample block.
460 */
461
462 GLOBAL(void)
463 jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
464 {
465 INT32 tmp0, tmp1, tmp2;
466 INT32 tmp10, tmp11, tmp12;
467 DCTELEM *dataptr;
468 JSAMPROW elemptr;
469 int ctr;
470 SHIFT_TEMPS
471
472 /* Pre-zero output coefficient block. */
473 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
474
475 /* Pass 1: process rows. */
476 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
477 /* furthermore, we scale the results by 2**PASS1_BITS. */
478 /* cK represents sqrt(2) * cos(K*pi/12). */
479
480 dataptr = data;
481 for (ctr = 0; ctr < 6; ctr++) {
482 elemptr = sample_data[ctr] + start_col;
483
484 /* Even part */
485
486 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
487 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
488 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
489
490 tmp10 = tmp0 + tmp2;
491 tmp12 = tmp0 - tmp2;
492
493 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
494 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
495 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
496
497 /* Apply unsigned->signed conversion */
498 dataptr[0] = (DCTELEM)
499 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
500 dataptr[2] = (DCTELEM)
501 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
502 CONST_BITS-PASS1_BITS);
503 dataptr[4] = (DCTELEM)
504 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
505 CONST_BITS-PASS1_BITS);
506
507 /* Odd part */
508
509 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
510 CONST_BITS-PASS1_BITS);
511
512 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
513 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
514 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
515
516 dataptr += DCTSIZE; /* advance pointer to next row */
517 }
568 }
569
570
571 /*
572 * Perform the forward DCT on a 5x5 sample block.
573 */
574
575 GLOBAL(void)
576 jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
577 {
578 INT32 tmp0, tmp1, tmp2;
579 INT32 tmp10, tmp11;
580 DCTELEM *dataptr;
581 JSAMPROW elemptr;
582 int ctr;
583 SHIFT_TEMPS
584
585 /* Pre-zero output coefficient block. */
586 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
587
588 /* Pass 1: process rows. */
589 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
590 /* furthermore, we scale the results by 2**PASS1_BITS. */
591 /* We scale the results further by 2 as part of output adaption */
592 /* scaling for different DCT size. */
593 /* cK represents sqrt(2) * cos(K*pi/10). */
594
595 dataptr = data;
596 for (ctr = 0; ctr < 5; ctr++) {
597 elemptr = sample_data[ctr] + start_col;
598
599 /* Even part */
600
601 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
602 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
603 tmp2 = GETJSAMPLE(elemptr[2]);
604
605 tmp10 = tmp0 + tmp1;
606 tmp11 = tmp0 - tmp1;
607
608 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
609 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
610
611 /* Apply unsigned->signed conversion */
612 dataptr[0] = (DCTELEM)
613 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
614 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
615 tmp10 -= tmp2 << 2;
616 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
617 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
618 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
619
620 /* Odd part */
621
622 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
623
624 dataptr[1] = (DCTELEM)
625 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
626 CONST_BITS-PASS1_BITS-1);
627 dataptr[3] = (DCTELEM)
628 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
629 CONST_BITS-PASS1_BITS-1);
630
631 dataptr += DCTSIZE; /* advance pointer to next row */
678 }
679
680
681 /*
682 * Perform the forward DCT on a 4x4 sample block.
683 */
684
685 GLOBAL(void)
686 jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
687 {
688 INT32 tmp0, tmp1;
689 INT32 tmp10, tmp11;
690 DCTELEM *dataptr;
691 JSAMPROW elemptr;
692 int ctr;
693 SHIFT_TEMPS
694
695 /* Pre-zero output coefficient block. */
696 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
697
698 /* Pass 1: process rows. */
699 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
700 /* furthermore, we scale the results by 2**PASS1_BITS. */
701 /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
702 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
703
704 dataptr = data;
705 for (ctr = 0; ctr < 4; ctr++) {
706 elemptr = sample_data[ctr] + start_col;
707
708 /* Even part */
709
710 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
711 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
712
713 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
714 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
715
716 /* Apply unsigned->signed conversion */
717 dataptr[0] = (DCTELEM)
718 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
719 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
720
721 /* Odd part */
722
723 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
724 /* Add fudge factor here for final descale. */
725 tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
726
727 dataptr[1] = (DCTELEM)
728 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
729 CONST_BITS-PASS1_BITS-2);
730 dataptr[3] = (DCTELEM)
731 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
732 CONST_BITS-PASS1_BITS-2);
733
734 dataptr += DCTSIZE; /* advance pointer to next row */
735 }
736
737 /* Pass 2: process columns.
738 * We remove the PASS1_BITS scaling, but leave the results scaled up
739 * by an overall factor of 8.
740 */
741
742 dataptr = data;
743 for (ctr = 0; ctr < 4; ctr++) {
744 /* Even part */
745
746 /* Add fudge factor here for final descale. */
747 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
748 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
749
750 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
751 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
752
753 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
754 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
755
756 /* Odd part */
757
758 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
759 /* Add fudge factor here for final descale. */
770 }
771 }
772
773
774 /*
775 * Perform the forward DCT on a 3x3 sample block.
776 */
777
778 GLOBAL(void)
779 jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
780 {
781 INT32 tmp0, tmp1, tmp2;
782 DCTELEM *dataptr;
783 JSAMPROW elemptr;
784 int ctr;
785 SHIFT_TEMPS
786
787 /* Pre-zero output coefficient block. */
788 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
789
790 /* Pass 1: process rows. */
791 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
792 /* furthermore, we scale the results by 2**PASS1_BITS. */
793 /* We scale the results further by 2**2 as part of output adaption */
794 /* scaling for different DCT size. */
795 /* cK represents sqrt(2) * cos(K*pi/6). */
796
797 dataptr = data;
798 for (ctr = 0; ctr < 3; ctr++) {
799 elemptr = sample_data[ctr] + start_col;
800
801 /* Even part */
802
803 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
804 tmp1 = GETJSAMPLE(elemptr[1]);
805
806 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
807
808 /* Apply unsigned->signed conversion */
809 dataptr[0] = (DCTELEM)
810 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
811 dataptr[2] = (DCTELEM)
812 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
813 CONST_BITS-PASS1_BITS-2);
814
815 /* Odd part */
816
817 dataptr[1] = (DCTELEM)
818 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
819 CONST_BITS-PASS1_BITS-2);
820
821 dataptr += DCTSIZE; /* advance pointer to next row */
822 }
823
824 /* Pass 2: process columns.
825 * We remove the PASS1_BITS scaling, but leave the results scaled up
826 * by an overall factor of 8.
827 * We must also scale the output by (8/3)**2 = 64/9, which we partially
828 * fold into the constant multipliers (other part was done in pass 1):
846 CONST_BITS+PASS1_BITS);
847
848 /* Odd part */
849
850 dataptr[DCTSIZE*1] = (DCTELEM)
851 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
852 CONST_BITS+PASS1_BITS);
853
854 dataptr++; /* advance pointer to next column */
855 }
856 }
857
858
859 /*
860 * Perform the forward DCT on a 2x2 sample block.
861 */
862
863 GLOBAL(void)
864 jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
865 {
866 INT32 tmp0, tmp1, tmp2, tmp3;
867 JSAMPROW elemptr;
868
869 /* Pre-zero output coefficient block. */
870 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
871
872 /* Pass 1: process rows. */
873 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
874
875 /* Row 0 */
876 elemptr = sample_data[0] + start_col;
877
878 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
879 tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
880
881 /* Row 1 */
882 elemptr = sample_data[1] + start_col;
883
884 tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
885 tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
886
887 /* Pass 2: process columns.
888 * We leave the results scaled up by an overall factor of 8.
889 * We must also scale the output by (8/2)**2 = 2**4.
890 */
891
892 /* Column 0 */
893 /* Apply unsigned->signed conversion */
894 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4);
895 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4);
896
897 /* Column 1 */
898 data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4);
899 data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4);
900 }
901
902
903 /*
904 * Perform the forward DCT on a 1x1 sample block.
905 */
906
907 GLOBAL(void)
908 jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
909 {
910 /* Pre-zero output coefficient block. */
911 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
912
913 /* We leave the result scaled up by an overall factor of 8. */
914 /* We must also scale the output by (8/1)**2 = 2**6. */
915 /* Apply unsigned->signed conversion */
916 data[0] = (DCTELEM)
917 ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6);
918 }
919
920
921 /*
922 * Perform the forward DCT on a 9x9 sample block.
923 */
924
925 GLOBAL(void)
926 jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
927 {
928 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
929 INT32 tmp10, tmp11, tmp12, tmp13;
930 INT32 z1, z2;
931 DCTELEM workspace[8];
932 DCTELEM *dataptr;
933 DCTELEM *wsptr;
934 JSAMPROW elemptr;
935 int ctr;
936 SHIFT_TEMPS
937
938 /* Pass 1: process rows. */
939 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
940 /* we scale the results further by 2 as part of output adaption */
941 /* scaling for different DCT size. */
942 /* cK represents sqrt(2) * cos(K*pi/18). */
943
944 dataptr = data;
945 ctr = 0;
946 for (;;) {
947 elemptr = sample_data[ctr] + start_col;
948
949 /* Even part */
950
951 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
952 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
953 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
954 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
955 tmp4 = GETJSAMPLE(elemptr[4]);
956
957 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
958 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
959 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
960 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
961
962 z1 = tmp0 + tmp2 + tmp3;
963 z2 = tmp1 + tmp4;
964 /* Apply unsigned->signed conversion */
965 dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
966 dataptr[6] = (DCTELEM)
967 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
968 CONST_BITS-1);
969 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
970 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
971 dataptr[2] = (DCTELEM)
972 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
973 + z1 + z2, CONST_BITS-1);
974 dataptr[4] = (DCTELEM)
975 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
976 + z1 - z2, CONST_BITS-1);
977
978 /* Odd part */
979
980 dataptr[3] = (DCTELEM)
981 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
982 CONST_BITS-1);
983
984 tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
1067 }
1068 }
1069
1070
1071 /*
1072 * Perform the forward DCT on a 10x10 sample block.
1073 */
1074
1075 GLOBAL(void)
1076 jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1077 {
1078 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1079 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1080 DCTELEM workspace[8*2];
1081 DCTELEM *dataptr;
1082 DCTELEM *wsptr;
1083 JSAMPROW elemptr;
1084 int ctr;
1085 SHIFT_TEMPS
1086
1087 /* Pass 1: process rows. */
1088 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
1089 /* we scale the results further by 2 as part of output adaption */
1090 /* scaling for different DCT size. */
1091 /* cK represents sqrt(2) * cos(K*pi/20). */
1092
1093 dataptr = data;
1094 ctr = 0;
1095 for (;;) {
1096 elemptr = sample_data[ctr] + start_col;
1097
1098 /* Even part */
1099
1100 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1101 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1102 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1103 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1104 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1105
1106 tmp10 = tmp0 + tmp4;
1107 tmp13 = tmp0 - tmp4;
1108 tmp11 = tmp1 + tmp3;
1109 tmp14 = tmp1 - tmp3;
1110
1111 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1112 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1113 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1114 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1115 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1116
1117 /* Apply unsigned->signed conversion */
1118 dataptr[0] = (DCTELEM)
1119 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
1120 tmp12 += tmp12;
1121 dataptr[4] = (DCTELEM)
1122 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1123 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
1124 CONST_BITS-1);
1125 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
1126 dataptr[2] = (DCTELEM)
1127 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
1128 CONST_BITS-1);
1129 dataptr[6] = (DCTELEM)
1130 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
1131 CONST_BITS-1);
1132
1133 /* Odd part */
1134
1135 tmp10 = tmp0 + tmp4;
1136 tmp11 = tmp1 - tmp3;
1137 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
1231 }
1232
1233
1234 /*
1235 * Perform the forward DCT on an 11x11 sample block.
1236 */
1237
1238 GLOBAL(void)
1239 jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1240 {
1241 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1242 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1243 INT32 z1, z2, z3;
1244 DCTELEM workspace[8*3];
1245 DCTELEM *dataptr;
1246 DCTELEM *wsptr;
1247 JSAMPROW elemptr;
1248 int ctr;
1249 SHIFT_TEMPS
1250
1251 /* Pass 1: process rows. */
1252 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
1253 /* we scale the results further by 2 as part of output adaption */
1254 /* scaling for different DCT size. */
1255 /* cK represents sqrt(2) * cos(K*pi/22). */
1256
1257 dataptr = data;
1258 ctr = 0;
1259 for (;;) {
1260 elemptr = sample_data[ctr] + start_col;
1261
1262 /* Even part */
1263
1264 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1265 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1266 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1267 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1268 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1269 tmp5 = GETJSAMPLE(elemptr[5]);
1270
1271 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1272 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1273 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1274 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1275 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1276
1277 /* Apply unsigned->signed conversion */
1278 dataptr[0] = (DCTELEM)
1279 ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
1280 tmp5 += tmp5;
1281 tmp0 -= tmp5;
1282 tmp1 -= tmp5;
1283 tmp2 -= tmp5;
1284 tmp3 -= tmp5;
1285 tmp4 -= tmp5;
1286 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
1287 MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
1288 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
1289 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
1290 dataptr[2] = (DCTELEM)
1291 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1292 - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
1293 CONST_BITS-1);
1294 dataptr[4] = (DCTELEM)
1295 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1296 - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
1297 + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
1413 }
1414 }
1415
1416
1417 /*
1418 * Perform the forward DCT on a 12x12 sample block.
1419 */
1420
1421 GLOBAL(void)
1422 jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1423 {
1424 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1425 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1426 DCTELEM workspace[8*4];
1427 DCTELEM *dataptr;
1428 DCTELEM *wsptr;
1429 JSAMPROW elemptr;
1430 int ctr;
1431 SHIFT_TEMPS
1432
1433 /* Pass 1: process rows. */
1434 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1435 /* cK represents sqrt(2) * cos(K*pi/24). */
1436
1437 dataptr = data;
1438 ctr = 0;
1439 for (;;) {
1440 elemptr = sample_data[ctr] + start_col;
1441
1442 /* Even part */
1443
1444 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1445 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1446 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1447 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1448 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1449 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1450
1451 tmp10 = tmp0 + tmp5;
1452 tmp13 = tmp0 - tmp5;
1453 tmp11 = tmp1 + tmp4;
1454 tmp14 = tmp1 - tmp4;
1455 tmp12 = tmp2 + tmp3;
1456 tmp15 = tmp2 - tmp3;
1457
1458 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1459 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1460 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1461 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1462 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1463 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1464
1465 /* Apply unsigned->signed conversion */
1466 dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1467 dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1468 dataptr[4] = (DCTELEM)
1469 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1470 CONST_BITS);
1471 dataptr[2] = (DCTELEM)
1472 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1473 CONST_BITS);
1474
1475 /* Odd part */
1476
1477 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
1478 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
1479 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
1480 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
1481 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
1482 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1483 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
1484 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1485 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1579 }
1580
1581
1582 /*
1583 * Perform the forward DCT on a 13x13 sample block.
1584 */
1585
1586 GLOBAL(void)
1587 jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1588 {
1589 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1590 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1591 INT32 z1, z2;
1592 DCTELEM workspace[8*5];
1593 DCTELEM *dataptr;
1594 DCTELEM *wsptr;
1595 JSAMPROW elemptr;
1596 int ctr;
1597 SHIFT_TEMPS
1598
1599 /* Pass 1: process rows. */
1600 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1601 /* cK represents sqrt(2) * cos(K*pi/26). */
1602
1603 dataptr = data;
1604 ctr = 0;
1605 for (;;) {
1606 elemptr = sample_data[ctr] + start_col;
1607
1608 /* Even part */
1609
1610 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1611 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1612 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1613 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1614 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1615 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1616 tmp6 = GETJSAMPLE(elemptr[6]);
1617
1618 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1619 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1620 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1621 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1622 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1623 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1624
1625 /* Apply unsigned->signed conversion */
1626 dataptr[0] = (DCTELEM)
1627 (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1628 tmp6 += tmp6;
1629 tmp0 -= tmp6;
1630 tmp1 -= tmp6;
1631 tmp2 -= tmp6;
1632 tmp3 -= tmp6;
1633 tmp4 -= tmp6;
1634 tmp5 -= tmp6;
1635 dataptr[2] = (DCTELEM)
1636 DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
1637 MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
1638 MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
1639 MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
1640 MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
1641 MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
1642 CONST_BITS);
1643 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1644 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1645 MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
1777 }
1778 }
1779
1780
1781 /*
1782 * Perform the forward DCT on a 14x14 sample block.
1783 */
1784
1785 GLOBAL(void)
1786 jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1787 {
1788 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1789 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1790 DCTELEM workspace[8*6];
1791 DCTELEM *dataptr;
1792 DCTELEM *wsptr;
1793 JSAMPROW elemptr;
1794 int ctr;
1795 SHIFT_TEMPS
1796
1797 /* Pass 1: process rows. */
1798 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1799 /* cK represents sqrt(2) * cos(K*pi/28). */
1800
1801 dataptr = data;
1802 ctr = 0;
1803 for (;;) {
1804 elemptr = sample_data[ctr] + start_col;
1805
1806 /* Even part */
1807
1808 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1809 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1810 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1811 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1812 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1813 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1814 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1815
1816 tmp10 = tmp0 + tmp6;
1817 tmp14 = tmp0 - tmp6;
1818 tmp11 = tmp1 + tmp5;
1819 tmp15 = tmp1 - tmp5;
1820 tmp12 = tmp2 + tmp4;
1821 tmp16 = tmp2 - tmp4;
1822
1823 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1824 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1825 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1826 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1827 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1828 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1829 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1830
1831 /* Apply unsigned->signed conversion */
1832 dataptr[0] = (DCTELEM)
1833 (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1834 tmp13 += tmp13;
1835 dataptr[4] = (DCTELEM)
1836 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1837 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1838 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
1839 CONST_BITS);
1840
1841 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
1842
1843 dataptr[2] = (DCTELEM)
1844 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
1845 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
1846 CONST_BITS);
1847 dataptr[6] = (DCTELEM)
1848 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
1849 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
1850 CONST_BITS);
1851
1978 }
1979
1980
1981 /*
1982 * Perform the forward DCT on a 15x15 sample block.
1983 */
1984
1985 GLOBAL(void)
1986 jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1987 {
1988 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1989 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1990 INT32 z1, z2, z3;
1991 DCTELEM workspace[8*7];
1992 DCTELEM *dataptr;
1993 DCTELEM *wsptr;
1994 JSAMPROW elemptr;
1995 int ctr;
1996 SHIFT_TEMPS
1997
1998 /* Pass 1: process rows. */
1999 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
2000 /* cK represents sqrt(2) * cos(K*pi/30). */
2001
2002 dataptr = data;
2003 ctr = 0;
2004 for (;;) {
2005 elemptr = sample_data[ctr] + start_col;
2006
2007 /* Even part */
2008
2009 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2010 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2011 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2012 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2013 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2014 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2015 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2016 tmp7 = GETJSAMPLE(elemptr[7]);
2017
2018 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2019 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2020 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2021 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2022 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2023 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2024 tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2025
2026 z1 = tmp0 + tmp4 + tmp5;
2027 z2 = tmp1 + tmp3 + tmp6;
2028 z3 = tmp2 + tmp7;
2029 /* Apply unsigned->signed conversion */
2030 dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2031 z3 += z3;
2032 dataptr[6] = (DCTELEM)
2033 DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2034 MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
2035 CONST_BITS);
2036 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2037 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
2038 MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
2039 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
2040 MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
2041 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
2042 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
2043 MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
2044
2045 dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2046 dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2047
2048 /* Odd part */
2049
2156 }
2157 }
2158
2159
2160 /*
2161 * Perform the forward DCT on a 16x16 sample block.
2162 */
2163
2164 GLOBAL(void)
2165 jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2166 {
2167 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2168 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2169 DCTELEM workspace[DCTSIZE2];
2170 DCTELEM *dataptr;
2171 DCTELEM *wsptr;
2172 JSAMPROW elemptr;
2173 int ctr;
2174 SHIFT_TEMPS
2175
2176 /* Pass 1: process rows. */
2177 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2178 /* furthermore, we scale the results by 2**PASS1_BITS. */
2179 /* cK represents sqrt(2) * cos(K*pi/32). */
2180
2181 dataptr = data;
2182 ctr = 0;
2183 for (;;) {
2184 elemptr = sample_data[ctr] + start_col;
2185
2186 /* Even part */
2187
2188 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2189 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2190 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2191 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2192 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2193 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2194 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2195 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2196
2197 tmp10 = tmp0 + tmp7;
2198 tmp14 = tmp0 - tmp7;
2199 tmp11 = tmp1 + tmp6;
2200 tmp15 = tmp1 - tmp6;
2201 tmp12 = tmp2 + tmp5;
2202 tmp16 = tmp2 - tmp5;
2203 tmp13 = tmp3 + tmp4;
2204 tmp17 = tmp3 - tmp4;
2205
2206 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2207 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2208 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2209 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2210 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2211 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2212 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2213 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2214
2215 /* Apply unsigned->signed conversion */
2216 dataptr[0] = (DCTELEM)
2217 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2218 dataptr[4] = (DCTELEM)
2219 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2220 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2221 CONST_BITS-PASS1_BITS);
2222
2223 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2224 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2225
2226 dataptr[2] = (DCTELEM)
2227 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2228 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2229 CONST_BITS-PASS1_BITS);
2230 dataptr[6] = (DCTELEM)
2231 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2232 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2233 CONST_BITS-PASS1_BITS);
2234
2235 /* Odd part */
2258
2259 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2260 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2261 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2262 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2263
2264 ctr++;
2265
2266 if (ctr != DCTSIZE) {
2267 if (ctr == DCTSIZE * 2)
2268 break; /* Done. */
2269 dataptr += DCTSIZE; /* advance pointer to next row */
2270 } else
2271 dataptr = workspace; /* switch pointer to extended workspace */
2272 }
2273
2274 /* Pass 2: process columns.
2275 * We remove the PASS1_BITS scaling, but leave the results scaled up
2276 * by an overall factor of 8.
2277 * We must also scale the output by (8/16)**2 = 1/2**2.
2278 */
2279
2280 dataptr = data;
2281 wsptr = workspace;
2282 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2283 /* Even part */
2284
2285 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2286 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2287 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2288 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2289 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2290 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2291 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2292 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2293
2294 tmp10 = tmp0 + tmp7;
2295 tmp14 = tmp0 - tmp7;
2296 tmp11 = tmp1 + tmp6;
2297 tmp15 = tmp1 - tmp6;
2363 }
2364
2365
2366 /*
2367 * Perform the forward DCT on a 16x8 sample block.
2368 *
2369 * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2370 */
2371
2372 GLOBAL(void)
2373 jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2374 {
2375 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2376 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2377 INT32 z1;
2378 DCTELEM *dataptr;
2379 JSAMPROW elemptr;
2380 int ctr;
2381 SHIFT_TEMPS
2382
2383 /* Pass 1: process rows. */
2384 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2385 /* furthermore, we scale the results by 2**PASS1_BITS. */
2386 /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
2387
2388 dataptr = data;
2389 ctr = 0;
2390 for (ctr = 0; ctr < DCTSIZE; ctr++) {
2391 elemptr = sample_data[ctr] + start_col;
2392
2393 /* Even part */
2394
2395 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2396 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2397 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2398 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2399 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2400 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2401 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2402 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2403
2404 tmp10 = tmp0 + tmp7;
2405 tmp14 = tmp0 - tmp7;
2406 tmp11 = tmp1 + tmp6;
2407 tmp15 = tmp1 - tmp6;
2408 tmp12 = tmp2 + tmp5;
2409 tmp16 = tmp2 - tmp5;
2410 tmp13 = tmp3 + tmp4;
2411 tmp17 = tmp3 - tmp4;
2412
2413 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2414 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2415 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2416 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2417 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2418 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2419 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2420 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2421
2422 /* Apply unsigned->signed conversion */
2423 dataptr[0] = (DCTELEM)
2424 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2425 dataptr[4] = (DCTELEM)
2426 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2427 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2428 CONST_BITS-PASS1_BITS);
2429
2430 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2431 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2432
2433 dataptr[2] = (DCTELEM)
2434 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2435 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2436 CONST_BITS-PASS1_BITS);
2437 dataptr[6] = (DCTELEM)
2438 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2439 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2440 CONST_BITS-PASS1_BITS);
2441
2442 /* Odd part */
2458 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2459 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2460 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2461 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2462 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2463 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2464 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2465
2466 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2467 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2468 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2469 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2470
2471 dataptr += DCTSIZE; /* advance pointer to next row */
2472 }
2473
2474 /* Pass 2: process columns.
2475 * We remove the PASS1_BITS scaling, but leave the results scaled up
2476 * by an overall factor of 8.
2477 * We must also scale the output by 8/16 = 1/2.
2478 */
2479
2480 dataptr = data;
2481 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2482 /* Even part per LL&M figure 1 --- note that published figure is faulty;
2483 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
2484 */
2485
2486 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2487 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2488 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2489 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2490
2491 tmp10 = tmp0 + tmp3;
2492 tmp12 = tmp0 - tmp3;
2493 tmp11 = tmp1 + tmp2;
2494 tmp13 = tmp1 - tmp2;
2495
2496 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2497 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2498 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2499 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2500
2501 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
2502 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
2503
2504 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
2505 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
2506 CONST_BITS+PASS1_BITS+1);
2507 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
2508 CONST_BITS+PASS1_BITS+1);
2509
2510 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2511 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2512 * i0..i3 in the paper are tmp0..tmp3 here.
2513 */
2514
2515 tmp10 = tmp0 + tmp3;
2516 tmp11 = tmp1 + tmp2;
2517 tmp12 = tmp0 + tmp2;
2518 tmp13 = tmp1 + tmp3;
2519 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
2520
2521 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
2522 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
2523 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
2524 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
2525 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
2526 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
2527 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
2528 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
2529
2530 tmp12 += z1;
2531 tmp13 += z1;
2532
2533 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
2534 CONST_BITS+PASS1_BITS+1);
2535 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
2536 CONST_BITS+PASS1_BITS+1);
2537 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
2538 CONST_BITS+PASS1_BITS+1);
2539 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
2540 CONST_BITS+PASS1_BITS+1);
2541
2542 dataptr++; /* advance pointer to next column */
2543 }
2544 }
2545
2546
2547 /*
2548 * Perform the forward DCT on a 14x7 sample block.
2549 *
2550 * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2551 */
2552
2553 GLOBAL(void)
2554 jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2555 {
2556 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2557 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2558 INT32 z1, z2, z3;
2559 DCTELEM *dataptr;
2560 JSAMPROW elemptr;
2561 int ctr;
2562 SHIFT_TEMPS
2563
2564 /* Zero bottom row of output coefficient block. */
2565 MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2566
2567 /* Pass 1: process rows. */
2568 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2569 /* furthermore, we scale the results by 2**PASS1_BITS. */
2570 /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
2571
2572 dataptr = data;
2573 for (ctr = 0; ctr < 7; ctr++) {
2574 elemptr = sample_data[ctr] + start_col;
2575
2576 /* Even part */
2577
2578 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2579 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2580 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2581 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2582 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2583 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2584 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2585
2586 tmp10 = tmp0 + tmp6;
2587 tmp14 = tmp0 - tmp6;
2588 tmp11 = tmp1 + tmp5;
2589 tmp15 = tmp1 - tmp5;
2590 tmp12 = tmp2 + tmp4;
2591 tmp16 = tmp2 - tmp4;
2592
2593 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2594 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2595 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2596 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2597 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2598 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2599 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2600
2601 /* Apply unsigned->signed conversion */
2602 dataptr[0] = (DCTELEM)
2603 ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
2604 tmp13 += tmp13;
2605 dataptr[4] = (DCTELEM)
2606 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2607 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2608 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
2609 CONST_BITS-PASS1_BITS);
2610
2611 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
2612
2613 dataptr[2] = (DCTELEM)
2614 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
2615 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
2616 CONST_BITS-PASS1_BITS);
2617 dataptr[6] = (DCTELEM)
2618 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
2619 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
2620 CONST_BITS-PASS1_BITS);
2621
2710
2711 /*
2712 * Perform the forward DCT on a 12x6 sample block.
2713 *
2714 * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2715 */
2716
2717 GLOBAL(void)
2718 jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2719 {
2720 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2721 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2722 DCTELEM *dataptr;
2723 JSAMPROW elemptr;
2724 int ctr;
2725 SHIFT_TEMPS
2726
2727 /* Zero 2 bottom rows of output coefficient block. */
2728 MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2729
2730 /* Pass 1: process rows. */
2731 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2732 /* furthermore, we scale the results by 2**PASS1_BITS. */
2733 /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
2734
2735 dataptr = data;
2736 for (ctr = 0; ctr < 6; ctr++) {
2737 elemptr = sample_data[ctr] + start_col;
2738
2739 /* Even part */
2740
2741 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2742 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2743 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2744 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2745 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2746 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2747
2748 tmp10 = tmp0 + tmp5;
2749 tmp13 = tmp0 - tmp5;
2750 tmp11 = tmp1 + tmp4;
2751 tmp14 = tmp1 - tmp4;
2752 tmp12 = tmp2 + tmp3;
2753 tmp15 = tmp2 - tmp3;
2754
2755 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2756 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2757 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2758 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2759 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2760 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2761
2762 /* Apply unsigned->signed conversion */
2763 dataptr[0] = (DCTELEM)
2764 ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
2765 dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
2766 dataptr[4] = (DCTELEM)
2767 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2768 CONST_BITS-PASS1_BITS);
2769 dataptr[2] = (DCTELEM)
2770 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2771 CONST_BITS-PASS1_BITS);
2772
2773 /* Odd part */
2774
2775 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
2776 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
2777 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
2778 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
2779 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
2780 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2781 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
2782 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2849
2850 /*
2851 * Perform the forward DCT on a 10x5 sample block.
2852 *
2853 * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2854 */
2855
2856 GLOBAL(void)
2857 jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2858 {
2859 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2860 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2861 DCTELEM *dataptr;
2862 JSAMPROW elemptr;
2863 int ctr;
2864 SHIFT_TEMPS
2865
2866 /* Zero 3 bottom rows of output coefficient block. */
2867 MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
2868
2869 /* Pass 1: process rows. */
2870 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2871 /* furthermore, we scale the results by 2**PASS1_BITS. */
2872 /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
2873
2874 dataptr = data;
2875 for (ctr = 0; ctr < 5; ctr++) {
2876 elemptr = sample_data[ctr] + start_col;
2877
2878 /* Even part */
2879
2880 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
2881 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
2882 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
2883 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
2884 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
2885
2886 tmp10 = tmp0 + tmp4;
2887 tmp13 = tmp0 - tmp4;
2888 tmp11 = tmp1 + tmp3;
2889 tmp14 = tmp1 - tmp3;
2890
2891 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
2892 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
2893 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
2894 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
2895 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
2896
2897 /* Apply unsigned->signed conversion */
2898 dataptr[0] = (DCTELEM)
2899 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
2900 tmp12 += tmp12;
2901 dataptr[4] = (DCTELEM)
2902 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
2903 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
2904 CONST_BITS-PASS1_BITS);
2905 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
2906 dataptr[2] = (DCTELEM)
2907 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
2908 CONST_BITS-PASS1_BITS);
2909 dataptr[6] = (DCTELEM)
2910 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
2911 CONST_BITS-PASS1_BITS);
2912
2913 /* Odd part */
2914
2915 tmp10 = tmp0 + tmp4;
2916 tmp11 = tmp1 - tmp3;
2917 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
2982 /*
2983 * Perform the forward DCT on an 8x4 sample block.
2984 *
2985 * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
2986 */
2987
2988 GLOBAL(void)
2989 jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2990 {
2991 INT32 tmp0, tmp1, tmp2, tmp3;
2992 INT32 tmp10, tmp11, tmp12, tmp13;
2993 INT32 z1;
2994 DCTELEM *dataptr;
2995 JSAMPROW elemptr;
2996 int ctr;
2997 SHIFT_TEMPS
2998
2999 /* Zero 4 bottom rows of output coefficient block. */
3000 MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3001
3002 /* Pass 1: process rows. */
3003 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3004 /* furthermore, we scale the results by 2**PASS1_BITS. */
3005 /* We must also scale the output by 8/4 = 2, which we add here. */
3006
3007 dataptr = data;
3008 for (ctr = 0; ctr < 4; ctr++) {
3009 elemptr = sample_data[ctr] + start_col;
3010
3011 /* Even part per LL&M figure 1 --- note that published figure is faulty;
3012 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
3013 */
3014
3015 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3016 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3017 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3018 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3019
3020 tmp10 = tmp0 + tmp3;
3021 tmp12 = tmp0 - tmp3;
3022 tmp11 = tmp1 + tmp2;
3023 tmp13 = tmp1 - tmp2;
3024
3025 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3026 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3027 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3028 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3029
3030 /* Apply unsigned->signed conversion */
3031 dataptr[0] = (DCTELEM)
3032 ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3033 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3034
3035 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
3036 /* Add fudge factor here for final descale. */
3037 z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3038 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
3039 CONST_BITS-PASS1_BITS-1);
3040 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
3041 CONST_BITS-PASS1_BITS-1);
3042
3043 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3044 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3045 * i0..i3 in the paper are tmp0..tmp3 here.
3046 */
3047
3048 tmp10 = tmp0 + tmp3;
3049 tmp11 = tmp1 + tmp2;
3050 tmp12 = tmp0 + tmp2;
3051 tmp13 = tmp1 + tmp3;
3052 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3053 /* Add fudge factor here for final descale. */
3054 z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3055
3056 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3057 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3058 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3059 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3060 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
3061 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
3062 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
3063 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3064
3065 tmp12 += z1;
3066 tmp13 += z1;
3067
3068 dataptr[1] = (DCTELEM)
3069 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
3070 dataptr[3] = (DCTELEM)
3071 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
3072 dataptr[5] = (DCTELEM)
3073 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
3074 dataptr[7] = (DCTELEM)
3075 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
3076
3077 dataptr += DCTSIZE; /* advance pointer to next row */
3078 }
3079
3080 /* Pass 2: process columns.
3081 * We remove the PASS1_BITS scaling, but leave the results scaled up
3082 * by an overall factor of 8.
3083 * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3084 */
3085
3086 dataptr = data;
3087 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3088 /* Even part */
3089
3090 /* Add fudge factor here for final descale. */
3091 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
3092 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3093
3094 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3095 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3096
3097 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3098 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3099
3100 /* Odd part */
3101
3102 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3103 /* Add fudge factor here for final descale. */
3117
3118 /*
3119 * Perform the forward DCT on a 6x3 sample block.
3120 *
3121 * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3122 */
3123
3124 GLOBAL(void)
3125 jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3126 {
3127 INT32 tmp0, tmp1, tmp2;
3128 INT32 tmp10, tmp11, tmp12;
3129 DCTELEM *dataptr;
3130 JSAMPROW elemptr;
3131 int ctr;
3132 SHIFT_TEMPS
3133
3134 /* Pre-zero output coefficient block. */
3135 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3136
3137 /* Pass 1: process rows. */
3138 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3139 /* furthermore, we scale the results by 2**PASS1_BITS. */
3140 /* We scale the results further by 2 as part of output adaption */
3141 /* scaling for different DCT size. */
3142 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
3143
3144 dataptr = data;
3145 for (ctr = 0; ctr < 3; ctr++) {
3146 elemptr = sample_data[ctr] + start_col;
3147
3148 /* Even part */
3149
3150 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3151 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3152 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3153
3154 tmp10 = tmp0 + tmp2;
3155 tmp12 = tmp0 - tmp2;
3156
3157 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3158 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3159 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3160
3161 /* Apply unsigned->signed conversion */
3162 dataptr[0] = (DCTELEM)
3163 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3164 dataptr[2] = (DCTELEM)
3165 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3166 CONST_BITS-PASS1_BITS-1);
3167 dataptr[4] = (DCTELEM)
3168 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3169 CONST_BITS-PASS1_BITS-1);
3170
3171 /* Odd part */
3172
3173 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3174 CONST_BITS-PASS1_BITS-1);
3175
3176 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3177 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3178 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3179
3180 dataptr += DCTSIZE; /* advance pointer to next row */
3181 }
3217
3218 /*
3219 * Perform the forward DCT on a 4x2 sample block.
3220 *
3221 * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3222 */
3223
3224 GLOBAL(void)
3225 jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3226 {
3227 INT32 tmp0, tmp1;
3228 INT32 tmp10, tmp11;
3229 DCTELEM *dataptr;
3230 JSAMPROW elemptr;
3231 int ctr;
3232 SHIFT_TEMPS
3233
3234 /* Pre-zero output coefficient block. */
3235 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3236
3237 /* Pass 1: process rows. */
3238 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3239 /* furthermore, we scale the results by 2**PASS1_BITS. */
3240 /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
3241 /* 4-point FDCT kernel, */
3242 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
3243
3244 dataptr = data;
3245 for (ctr = 0; ctr < 2; ctr++) {
3246 elemptr = sample_data[ctr] + start_col;
3247
3248 /* Even part */
3249
3250 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3251 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3252
3253 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3254 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3255
3256 /* Apply unsigned->signed conversion */
3257 dataptr[0] = (DCTELEM)
3258 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
3259 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
3260
3261 /* Odd part */
3262
3263 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3264 /* Add fudge factor here for final descale. */
3265 tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
3266
3267 dataptr[1] = (DCTELEM)
3268 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3269 CONST_BITS-PASS1_BITS-3);
3270 dataptr[3] = (DCTELEM)
3271 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3272 CONST_BITS-PASS1_BITS-3);
3273
3274 dataptr += DCTSIZE; /* advance pointer to next row */
3275 }
3276
3290 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3291
3292 /* Odd part */
3293
3294 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3295
3296 dataptr++; /* advance pointer to next column */
3297 }
3298 }
3299
3300
3301 /*
3302 * Perform the forward DCT on a 2x1 sample block.
3303 *
3304 * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3305 */
3306
3307 GLOBAL(void)
3308 jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3309 {
3310 INT32 tmp0, tmp1;
3311 JSAMPROW elemptr;
3312
3313 /* Pre-zero output coefficient block. */
3314 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3315
3316 elemptr = sample_data[0] + start_col;
3317
3318 tmp0 = GETJSAMPLE(elemptr[0]);
3319 tmp1 = GETJSAMPLE(elemptr[1]);
3320
3321 /* We leave the results scaled up by an overall factor of 8.
3322 * We must also scale the output by (8/2)*(8/1) = 2**5.
3323 */
3324
3325 /* Even part */
3326 /* Apply unsigned->signed conversion */
3327 data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
3328
3329 /* Odd part */
3330 data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
3331 }
3332
3333
3334 /*
3335 * Perform the forward DCT on an 8x16 sample block.
3336 *
3337 * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3338 */
3339
3340 GLOBAL(void)
3341 jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3342 {
3343 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3344 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3345 INT32 z1;
3346 DCTELEM workspace[DCTSIZE2];
3347 DCTELEM *dataptr;
3348 DCTELEM *wsptr;
3349 JSAMPROW elemptr;
3350 int ctr;
3351 SHIFT_TEMPS
3352
3353 /* Pass 1: process rows. */
3354 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3355 /* furthermore, we scale the results by 2**PASS1_BITS. */
3356
3357 dataptr = data;
3358 ctr = 0;
3359 for (;;) {
3360 elemptr = sample_data[ctr] + start_col;
3361
3362 /* Even part per LL&M figure 1 --- note that published figure is faulty;
3363 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
3364 */
3365
3366 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3367 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3368 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3369 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3370
3371 tmp10 = tmp0 + tmp3;
3372 tmp12 = tmp0 - tmp3;
3373 tmp11 = tmp1 + tmp2;
3374 tmp13 = tmp1 - tmp2;
3375
3376 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3377 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3378 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3379 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3380
3381 /* Apply unsigned->signed conversion */
3382 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
3383 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
3384
3385 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
3386 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
3387 CONST_BITS-PASS1_BITS);
3388 dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
3389 CONST_BITS-PASS1_BITS);
3390
3391 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3392 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3393 * i0..i3 in the paper are tmp0..tmp3 here.
3394 */
3395
3396 tmp10 = tmp0 + tmp3;
3397 tmp11 = tmp1 + tmp2;
3398 tmp12 = tmp0 + tmp2;
3399 tmp13 = tmp1 + tmp3;
3400 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3401
3402 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3403 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3404 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3405 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3406 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
3407 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
3408 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
3409 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3410
3411 tmp12 += z1;
3412 tmp13 += z1;
3413
3414 dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
3415 dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
3416 dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
3417 dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3418
3419 ctr++;
3420
3421 if (ctr != DCTSIZE) {
3422 if (ctr == DCTSIZE * 2)
3423 break; /* Done. */
3424 dataptr += DCTSIZE; /* advance pointer to next row */
3425 } else
3426 dataptr = workspace; /* switch pointer to extended workspace */
3427 }
3428
3429 /* Pass 2: process columns.
3430 * We remove the PASS1_BITS scaling, but leave the results scaled up
3431 * by an overall factor of 8.
3432 * We must also scale the output by 8/16 = 1/2.
3433 * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3434 */
3435
3436 dataptr = data;
3437 wsptr = workspace;
3524 *
3525 * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3526 */
3527
3528 GLOBAL(void)
3529 jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3530 {
3531 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3532 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3533 INT32 z1, z2, z3;
3534 DCTELEM workspace[8*6];
3535 DCTELEM *dataptr;
3536 DCTELEM *wsptr;
3537 JSAMPROW elemptr;
3538 int ctr;
3539 SHIFT_TEMPS
3540
3541 /* Pre-zero output coefficient block. */
3542 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3543
3544 /* Pass 1: process rows. */
3545 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3546 /* furthermore, we scale the results by 2**PASS1_BITS. */
3547 /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
3548
3549 dataptr = data;
3550 ctr = 0;
3551 for (;;) {
3552 elemptr = sample_data[ctr] + start_col;
3553
3554 /* Even part */
3555
3556 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3557 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3558 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3559 tmp3 = GETJSAMPLE(elemptr[3]);
3560
3561 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3562 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3563 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3564
3565 z1 = tmp0 + tmp2;
3566 /* Apply unsigned->signed conversion */
3567 dataptr[0] = (DCTELEM)
3568 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
3569 tmp3 += tmp3;
3570 z1 -= tmp3;
3571 z1 -= tmp3;
3572 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
3573 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
3574 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
3575 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3576 z1 -= z2;
3577 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
3578 dataptr[4] = (DCTELEM)
3579 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3580 CONST_BITS-PASS1_BITS);
3581 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3582
3583 /* Odd part */
3584
3585 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3586 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3704 * Perform the forward DCT on a 6x12 sample block.
3705 *
3706 * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3707 */
3708
3709 GLOBAL(void)
3710 jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3711 {
3712 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3713 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3714 DCTELEM workspace[8*4];
3715 DCTELEM *dataptr;
3716 DCTELEM *wsptr;
3717 JSAMPROW elemptr;
3718 int ctr;
3719 SHIFT_TEMPS
3720
3721 /* Pre-zero output coefficient block. */
3722 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3723
3724 /* Pass 1: process rows. */
3725 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3726 /* furthermore, we scale the results by 2**PASS1_BITS. */
3727 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
3728
3729 dataptr = data;
3730 ctr = 0;
3731 for (;;) {
3732 elemptr = sample_data[ctr] + start_col;
3733
3734 /* Even part */
3735
3736 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3737 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3738 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3739
3740 tmp10 = tmp0 + tmp2;
3741 tmp12 = tmp0 - tmp2;
3742
3743 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3744 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3745 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3746
3747 /* Apply unsigned->signed conversion */
3748 dataptr[0] = (DCTELEM)
3749 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
3750 dataptr[2] = (DCTELEM)
3751 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3752 CONST_BITS-PASS1_BITS);
3753 dataptr[4] = (DCTELEM)
3754 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3755 CONST_BITS-PASS1_BITS);
3756
3757 /* Odd part */
3758
3759 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3760 CONST_BITS-PASS1_BITS);
3761
3762 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3763 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3764 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3765
3766 ctr++;
3767
3853 * Perform the forward DCT on a 5x10 sample block.
3854 *
3855 * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
3856 */
3857
3858 GLOBAL(void)
3859 jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3860 {
3861 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
3862 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3863 DCTELEM workspace[8*2];
3864 DCTELEM *dataptr;
3865 DCTELEM *wsptr;
3866 JSAMPROW elemptr;
3867 int ctr;
3868 SHIFT_TEMPS
3869
3870 /* Pre-zero output coefficient block. */
3871 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3872
3873 /* Pass 1: process rows. */
3874 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3875 /* furthermore, we scale the results by 2**PASS1_BITS. */
3876 /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
3877
3878 dataptr = data;
3879 ctr = 0;
3880 for (;;) {
3881 elemptr = sample_data[ctr] + start_col;
3882
3883 /* Even part */
3884
3885 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
3886 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
3887 tmp2 = GETJSAMPLE(elemptr[2]);
3888
3889 tmp10 = tmp0 + tmp1;
3890 tmp11 = tmp0 - tmp1;
3891
3892 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
3893 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
3894
3895 /* Apply unsigned->signed conversion */
3896 dataptr[0] = (DCTELEM)
3897 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
3898 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
3899 tmp10 -= tmp2 << 2;
3900 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
3901 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
3902 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
3903
3904 /* Odd part */
3905
3906 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
3907
3908 dataptr[1] = (DCTELEM)
3909 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
3910 CONST_BITS-PASS1_BITS);
3911 dataptr[3] = (DCTELEM)
3912 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
3913 CONST_BITS-PASS1_BITS);
3914
3915 ctr++;
3998 /*
3999 * Perform the forward DCT on a 4x8 sample block.
4000 *
4001 * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4002 */
4003
4004 GLOBAL(void)
4005 jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4006 {
4007 INT32 tmp0, tmp1, tmp2, tmp3;
4008 INT32 tmp10, tmp11, tmp12, tmp13;
4009 INT32 z1;
4010 DCTELEM *dataptr;
4011 JSAMPROW elemptr;
4012 int ctr;
4013 SHIFT_TEMPS
4014
4015 /* Pre-zero output coefficient block. */
4016 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4017
4018 /* Pass 1: process rows. */
4019 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
4020 /* furthermore, we scale the results by 2**PASS1_BITS. */
4021 /* We must also scale the output by 8/4 = 2, which we add here. */
4022 /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
4023
4024 dataptr = data;
4025 for (ctr = 0; ctr < DCTSIZE; ctr++) {
4026 elemptr = sample_data[ctr] + start_col;
4027
4028 /* Even part */
4029
4030 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4031 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4032
4033 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4034 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4035
4036 /* Apply unsigned->signed conversion */
4037 dataptr[0] = (DCTELEM)
4038 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4039 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4040
4041 /* Odd part */
4042
4043 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4044 /* Add fudge factor here for final descale. */
4045 tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4046
4047 dataptr[1] = (DCTELEM)
4048 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4049 CONST_BITS-PASS1_BITS-1);
4050 dataptr[3] = (DCTELEM)
4051 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4052 CONST_BITS-PASS1_BITS-1);
4053
4054 dataptr += DCTSIZE; /* advance pointer to next row */
4055 }
4056
4057 /* Pass 2: process columns.
4058 * We remove the PASS1_BITS scaling, but leave the results scaled up
4059 * by an overall factor of 8.
4060 */
4061
4062 dataptr = data;
4063 for (ctr = 0; ctr < 4; ctr++) {
4064 /* Even part per LL&M figure 1 --- note that published figure is faulty;
4065 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
4066 */
4067
4068 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4069 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4070 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4071 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4072
4073 /* Add fudge factor here for final descale. */
4074 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
4075 tmp12 = tmp0 - tmp3;
4076 tmp11 = tmp1 + tmp2;
4077 tmp13 = tmp1 - tmp2;
4078
4079 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4080 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4081 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4082 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4083
4084 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
4085 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
4086
4087 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
4088 /* Add fudge factor here for final descale. */
4089 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4090 dataptr[DCTSIZE*2] = (DCTELEM)
4091 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
4092 dataptr[DCTSIZE*6] = (DCTELEM)
4093 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
4094
4095 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4096 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4097 * i0..i3 in the paper are tmp0..tmp3 here.
4098 */
4099
4100 tmp10 = tmp0 + tmp3;
4101 tmp11 = tmp1 + tmp2;
4102 tmp12 = tmp0 + tmp2;
4103 tmp13 = tmp1 + tmp3;
4104 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
4105 /* Add fudge factor here for final descale. */
4106 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4107
4108 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
4109 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
4110 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
4111 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
4112 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
4113 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
4114 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
4115 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
4116
4117 tmp12 += z1;
4118 tmp13 += z1;
4119
4120 dataptr[DCTSIZE*1] = (DCTELEM)
4121 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
4122 dataptr[DCTSIZE*3] = (DCTELEM)
4123 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
4124 dataptr[DCTSIZE*5] = (DCTELEM)
4125 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
4126 dataptr[DCTSIZE*7] = (DCTELEM)
4127 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
4128
4129 dataptr++; /* advance pointer to next column */
4130 }
4131 }
4132
4133
4134 /*
4135 * Perform the forward DCT on a 3x6 sample block.
4136 *
4137 * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4138 */
4139
4140 GLOBAL(void)
4141 jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4142 {
4143 INT32 tmp0, tmp1, tmp2;
4144 INT32 tmp10, tmp11, tmp12;
4145 DCTELEM *dataptr;
4146 JSAMPROW elemptr;
4147 int ctr;
4148 SHIFT_TEMPS
4149
4150 /* Pre-zero output coefficient block. */
4151 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4152
4153 /* Pass 1: process rows. */
4154 /* Note results are scaled up by sqrt(8) compared to a true DCT; */
4155 /* furthermore, we scale the results by 2**PASS1_BITS. */
4156 /* We scale the results further by 2 as part of output adaption */
4157 /* scaling for different DCT size. */
4158 /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
4159
4160 dataptr = data;
4161 for (ctr = 0; ctr < 6; ctr++) {
4162 elemptr = sample_data[ctr] + start_col;
4163
4164 /* Even part */
4165
4166 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4167 tmp1 = GETJSAMPLE(elemptr[1]);
4168
4169 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4170
4171 /* Apply unsigned->signed conversion */
4172 dataptr[0] = (DCTELEM)
4173 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4174 dataptr[2] = (DCTELEM)
4175 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4176 CONST_BITS-PASS1_BITS-1);
4177
4178 /* Odd part */
4179
4180 dataptr[1] = (DCTELEM)
4181 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
4182 CONST_BITS-PASS1_BITS-1);
4183
4184 dataptr += DCTSIZE; /* advance pointer to next row */
4185 }
4186
4187 /* Pass 2: process columns.
4188 * We remove the PASS1_BITS scaling, but leave the results scaled up
4189 * by an overall factor of 8.
4190 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4191 * fold into the constant multipliers (other part was done in pass 1):
4238
4239 /*
4240 * Perform the forward DCT on a 2x4 sample block.
4241 *
4242 * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4243 */
4244
4245 GLOBAL(void)
4246 jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4247 {
4248 INT32 tmp0, tmp1;
4249 INT32 tmp10, tmp11;
4250 DCTELEM *dataptr;
4251 JSAMPROW elemptr;
4252 int ctr;
4253 SHIFT_TEMPS
4254
4255 /* Pre-zero output coefficient block. */
4256 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4257
4258 /* Pass 1: process rows. */
4259 /* Note results are scaled up by sqrt(8) compared to a true DCT. */
4260 /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
4261
4262 dataptr = data;
4263 for (ctr = 0; ctr < 4; ctr++) {
4264 elemptr = sample_data[ctr] + start_col;
4265
4266 /* Even part */
4267
4268 tmp0 = GETJSAMPLE(elemptr[0]);
4269 tmp1 = GETJSAMPLE(elemptr[1]);
4270
4271 /* Apply unsigned->signed conversion */
4272 dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
4273
4274 /* Odd part */
4275
4276 dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
4277
4278 dataptr += DCTSIZE; /* advance pointer to next row */
4279 }
4280
4281 /* Pass 2: process columns.
4282 * We leave the results scaled up by an overall factor of 8.
4283 * 4-point FDCT kernel,
4284 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4285 */
4286
4287 dataptr = data;
4288 for (ctr = 0; ctr < 2; ctr++) {
4289 /* Even part */
4290
4291 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4307 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4308 CONST_BITS);
4309 dataptr[DCTSIZE*3] = (DCTELEM)
4310 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4311 CONST_BITS);
4312
4313 dataptr++; /* advance pointer to next column */
4314 }
4315 }
4316
4317
4318 /*
4319 * Perform the forward DCT on a 1x2 sample block.
4320 *
4321 * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4322 */
4323
4324 GLOBAL(void)
4325 jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4326 {
4327 INT32 tmp0, tmp1;
4328
4329 /* Pre-zero output coefficient block. */
4330 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4331
4332 tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4333 tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4334
4335 /* We leave the results scaled up by an overall factor of 8.
4336 * We must also scale the output by (8/1)*(8/2) = 2**5.
4337 */
4338
4339 /* Even part */
4340 /* Apply unsigned->signed conversion */
4341 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
4342
4343 /* Odd part */
4344 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
4345 }
4346
4347 #endif /* DCT_SCALING_SUPPORTED */
4348 #endif /* DCT_ISLOW_SUPPORTED */
|
1 /*
2 * jfdctint.c
3 *
4 * Copyright (C) 1991-1996, Thomas G. Lane.
5 * Modification developed 2003-2015 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * forward DCT (Discrete Cosine Transform).
11 *
12 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13 * on each column. Direct algorithms are also available, but they are
14 * much more complex and seem not to be any faster when reduced to code.
15 *
16 * This implementation is based on an algorithm described in
17 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20 * The primary algorithm described there uses 11 multiplies and 29 adds.
21 * We use their alternate method with 12 multiplies and 32 adds.
22 * The advantage of this method is that no data path contains more than one
23 * multiplication; this allows a very simple and accurate implementation in
24 * scaled fixed-point arithmetic, with a minimal number of shifts.
25 *
148 #else
149 #define MULTIPLY(var,const) ((var) * (const))
150 #endif
151
152
153 /*
154 * Perform the forward DCT on one block of samples.
155 */
156
157 GLOBAL(void)
158 jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
159 {
160 INT32 tmp0, tmp1, tmp2, tmp3;
161 INT32 tmp10, tmp11, tmp12, tmp13;
162 INT32 z1;
163 DCTELEM *dataptr;
164 JSAMPROW elemptr;
165 int ctr;
166 SHIFT_TEMPS
167
168 /* Pass 1: process rows.
169 * Note results are scaled up by sqrt(8) compared to a true DCT;
170 * furthermore, we scale the results by 2**PASS1_BITS.
171 * cK represents sqrt(2) * cos(K*pi/16).
172 */
173
174 dataptr = data;
175 for (ctr = 0; ctr < DCTSIZE; ctr++) {
176 elemptr = sample_data[ctr] + start_col;
177
178 /* Even part per LL&M figure 1 --- note that published figure is faulty;
179 * rotator "c1" should be "c6".
180 */
181
182 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
183 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
184 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
185 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
186
187 tmp10 = tmp0 + tmp3;
188 tmp12 = tmp0 - tmp3;
189 tmp11 = tmp1 + tmp2;
190 tmp13 = tmp1 - tmp2;
191
192 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
193 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
194 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
195 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
196
197 /* Apply unsigned->signed conversion. */
198 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
199 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
200
201 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
202 /* Add fudge factor here for final descale. */
203 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
204
205 dataptr[2] = (DCTELEM)
206 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
207 CONST_BITS-PASS1_BITS);
208 dataptr[6] = (DCTELEM)
209 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
210 CONST_BITS-PASS1_BITS);
211
212 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
213 * i0..i3 in the paper are tmp0..tmp3 here.
214 */
215
216 tmp12 = tmp0 + tmp2;
217 tmp13 = tmp1 + tmp3;
218
219 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
220 /* Add fudge factor here for final descale. */
221 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
222
223 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
224 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
225 tmp12 += z1;
226 tmp13 += z1;
227
228 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
229 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
230 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
231 tmp0 += z1 + tmp12;
232 tmp3 += z1 + tmp13;
233
234 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
235 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
236 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
237 tmp1 += z1 + tmp13;
238 tmp2 += z1 + tmp12;
239
240 dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
241 dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
242 dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
243 dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
244
245 dataptr += DCTSIZE; /* advance pointer to next row */
246 }
247
248 /* Pass 2: process columns.
249 * We remove the PASS1_BITS scaling, but leave the results scaled up
250 * by an overall factor of 8.
251 * cK represents sqrt(2) * cos(K*pi/16).
252 */
253
254 dataptr = data;
255 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
256 /* Even part per LL&M figure 1 --- note that published figure is faulty;
257 * rotator "c1" should be "c6".
258 */
259
260 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
261 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
262 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
263 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
264
265 /* Add fudge factor here for final descale. */
266 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
267 tmp12 = tmp0 - tmp3;
268 tmp11 = tmp1 + tmp2;
269 tmp13 = tmp1 - tmp2;
270
271 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
272 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
273 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
274 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
275
276 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
277 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
278
279 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
280 /* Add fudge factor here for final descale. */
281 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
282
283 dataptr[DCTSIZE*2] = (DCTELEM)
284 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
285 CONST_BITS+PASS1_BITS);
286 dataptr[DCTSIZE*6] = (DCTELEM)
287 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
288 CONST_BITS+PASS1_BITS);
289
290 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
291 * i0..i3 in the paper are tmp0..tmp3 here.
292 */
293
294 tmp12 = tmp0 + tmp2;
295 tmp13 = tmp1 + tmp3;
296
297 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
298 /* Add fudge factor here for final descale. */
299 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
300
301 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
302 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
303 tmp12 += z1;
304 tmp13 += z1;
305
306 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
307 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
308 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
309 tmp0 += z1 + tmp12;
310 tmp3 += z1 + tmp13;
311
312 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
313 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
314 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
315 tmp1 += z1 + tmp13;
316 tmp2 += z1 + tmp12;
317
318 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
319 dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
320 dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
321 dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
322
323 dataptr++; /* advance pointer to next column */
324 }
325 }
326
327 #ifdef DCT_SCALING_SUPPORTED
328
329
330 /*
331 * Perform the forward DCT on a 7x7 sample block.
332 */
333
334 GLOBAL(void)
335 jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
336 {
337 INT32 tmp0, tmp1, tmp2, tmp3;
338 INT32 tmp10, tmp11, tmp12;
339 INT32 z1, z2, z3;
340 DCTELEM *dataptr;
341 JSAMPROW elemptr;
342 int ctr;
343 SHIFT_TEMPS
344
345 /* Pre-zero output coefficient block. */
346 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
347
348 /* Pass 1: process rows.
349 * Note results are scaled up by sqrt(8) compared to a true DCT;
350 * furthermore, we scale the results by 2**PASS1_BITS.
351 * cK represents sqrt(2) * cos(K*pi/14).
352 */
353
354 dataptr = data;
355 for (ctr = 0; ctr < 7; ctr++) {
356 elemptr = sample_data[ctr] + start_col;
357
358 /* Even part */
359
360 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
361 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
362 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
363 tmp3 = GETJSAMPLE(elemptr[3]);
364
365 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
366 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
367 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
368
369 z1 = tmp0 + tmp2;
370 /* Apply unsigned->signed conversion. */
371 dataptr[0] = (DCTELEM)
372 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
373 tmp3 += tmp3;
374 z1 -= tmp3;
375 z1 -= tmp3;
376 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
377 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
378 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
379 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
380 z1 -= z2;
381 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
382 dataptr[4] = (DCTELEM)
383 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
384 CONST_BITS-PASS1_BITS);
385 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
386
387 /* Odd part */
388
389 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
390 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
463 }
464
465
466 /*
467 * Perform the forward DCT on a 6x6 sample block.
468 */
469
470 GLOBAL(void)
471 jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
472 {
473 INT32 tmp0, tmp1, tmp2;
474 INT32 tmp10, tmp11, tmp12;
475 DCTELEM *dataptr;
476 JSAMPROW elemptr;
477 int ctr;
478 SHIFT_TEMPS
479
480 /* Pre-zero output coefficient block. */
481 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
482
483 /* Pass 1: process rows.
484 * Note results are scaled up by sqrt(8) compared to a true DCT;
485 * furthermore, we scale the results by 2**PASS1_BITS.
486 * cK represents sqrt(2) * cos(K*pi/12).
487 */
488
489 dataptr = data;
490 for (ctr = 0; ctr < 6; ctr++) {
491 elemptr = sample_data[ctr] + start_col;
492
493 /* Even part */
494
495 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
496 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
497 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
498
499 tmp10 = tmp0 + tmp2;
500 tmp12 = tmp0 - tmp2;
501
502 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
503 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
504 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
505
506 /* Apply unsigned->signed conversion. */
507 dataptr[0] = (DCTELEM)
508 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
509 dataptr[2] = (DCTELEM)
510 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
511 CONST_BITS-PASS1_BITS);
512 dataptr[4] = (DCTELEM)
513 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
514 CONST_BITS-PASS1_BITS);
515
516 /* Odd part */
517
518 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
519 CONST_BITS-PASS1_BITS);
520
521 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
522 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
523 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
524
525 dataptr += DCTSIZE; /* advance pointer to next row */
526 }
577 }
578
579
580 /*
581 * Perform the forward DCT on a 5x5 sample block.
582 */
583
584 GLOBAL(void)
585 jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
586 {
587 INT32 tmp0, tmp1, tmp2;
588 INT32 tmp10, tmp11;
589 DCTELEM *dataptr;
590 JSAMPROW elemptr;
591 int ctr;
592 SHIFT_TEMPS
593
594 /* Pre-zero output coefficient block. */
595 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
596
597 /* Pass 1: process rows.
598 * Note results are scaled up by sqrt(8) compared to a true DCT;
599 * furthermore, we scale the results by 2**PASS1_BITS.
600 * We scale the results further by 2 as part of output adaption
601 * scaling for different DCT size.
602 * cK represents sqrt(2) * cos(K*pi/10).
603 */
604
605 dataptr = data;
606 for (ctr = 0; ctr < 5; ctr++) {
607 elemptr = sample_data[ctr] + start_col;
608
609 /* Even part */
610
611 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
612 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
613 tmp2 = GETJSAMPLE(elemptr[2]);
614
615 tmp10 = tmp0 + tmp1;
616 tmp11 = tmp0 - tmp1;
617
618 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
619 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
620
621 /* Apply unsigned->signed conversion. */
622 dataptr[0] = (DCTELEM)
623 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
624 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
625 tmp10 -= tmp2 << 2;
626 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
627 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
628 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
629
630 /* Odd part */
631
632 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
633
634 dataptr[1] = (DCTELEM)
635 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
636 CONST_BITS-PASS1_BITS-1);
637 dataptr[3] = (DCTELEM)
638 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
639 CONST_BITS-PASS1_BITS-1);
640
641 dataptr += DCTSIZE; /* advance pointer to next row */
688 }
689
690
691 /*
692 * Perform the forward DCT on a 4x4 sample block.
693 */
694
695 GLOBAL(void)
696 jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
697 {
698 INT32 tmp0, tmp1;
699 INT32 tmp10, tmp11;
700 DCTELEM *dataptr;
701 JSAMPROW elemptr;
702 int ctr;
703 SHIFT_TEMPS
704
705 /* Pre-zero output coefficient block. */
706 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
707
708 /* Pass 1: process rows.
709 * Note results are scaled up by sqrt(8) compared to a true DCT;
710 * furthermore, we scale the results by 2**PASS1_BITS.
711 * We must also scale the output by (8/4)**2 = 2**2, which we add here.
712 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
713 */
714
715 dataptr = data;
716 for (ctr = 0; ctr < 4; ctr++) {
717 elemptr = sample_data[ctr] + start_col;
718
719 /* Even part */
720
721 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
722 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
723
724 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
725 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
726
727 /* Apply unsigned->signed conversion. */
728 dataptr[0] = (DCTELEM)
729 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
730 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
731
732 /* Odd part */
733
734 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
735 /* Add fudge factor here for final descale. */
736 tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
737
738 dataptr[1] = (DCTELEM)
739 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
740 CONST_BITS-PASS1_BITS-2);
741 dataptr[3] = (DCTELEM)
742 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
743 CONST_BITS-PASS1_BITS-2);
744
745 dataptr += DCTSIZE; /* advance pointer to next row */
746 }
747
748 /* Pass 2: process columns.
749 * We remove the PASS1_BITS scaling, but leave the results scaled up
750 * by an overall factor of 8.
751 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
752 */
753
754 dataptr = data;
755 for (ctr = 0; ctr < 4; ctr++) {
756 /* Even part */
757
758 /* Add fudge factor here for final descale. */
759 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
760 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
761
762 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
763 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
764
765 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
766 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
767
768 /* Odd part */
769
770 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
771 /* Add fudge factor here for final descale. */
782 }
783 }
784
785
786 /*
787 * Perform the forward DCT on a 3x3 sample block.
788 */
789
790 GLOBAL(void)
791 jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
792 {
793 INT32 tmp0, tmp1, tmp2;
794 DCTELEM *dataptr;
795 JSAMPROW elemptr;
796 int ctr;
797 SHIFT_TEMPS
798
799 /* Pre-zero output coefficient block. */
800 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
801
802 /* Pass 1: process rows.
803 * Note results are scaled up by sqrt(8) compared to a true DCT;
804 * furthermore, we scale the results by 2**PASS1_BITS.
805 * We scale the results further by 2**2 as part of output adaption
806 * scaling for different DCT size.
807 * cK represents sqrt(2) * cos(K*pi/6).
808 */
809
810 dataptr = data;
811 for (ctr = 0; ctr < 3; ctr++) {
812 elemptr = sample_data[ctr] + start_col;
813
814 /* Even part */
815
816 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
817 tmp1 = GETJSAMPLE(elemptr[1]);
818
819 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
820
821 /* Apply unsigned->signed conversion. */
822 dataptr[0] = (DCTELEM)
823 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
824 dataptr[2] = (DCTELEM)
825 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
826 CONST_BITS-PASS1_BITS-2);
827
828 /* Odd part */
829
830 dataptr[1] = (DCTELEM)
831 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
832 CONST_BITS-PASS1_BITS-2);
833
834 dataptr += DCTSIZE; /* advance pointer to next row */
835 }
836
837 /* Pass 2: process columns.
838 * We remove the PASS1_BITS scaling, but leave the results scaled up
839 * by an overall factor of 8.
840 * We must also scale the output by (8/3)**2 = 64/9, which we partially
841 * fold into the constant multipliers (other part was done in pass 1):
859 CONST_BITS+PASS1_BITS);
860
861 /* Odd part */
862
863 dataptr[DCTSIZE*1] = (DCTELEM)
864 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
865 CONST_BITS+PASS1_BITS);
866
867 dataptr++; /* advance pointer to next column */
868 }
869 }
870
871
872 /*
873 * Perform the forward DCT on a 2x2 sample block.
874 */
875
876 GLOBAL(void)
877 jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
878 {
879 DCTELEM tmp0, tmp1, tmp2, tmp3;
880 JSAMPROW elemptr;
881
882 /* Pre-zero output coefficient block. */
883 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
884
885 /* Pass 1: process rows.
886 * Note results are scaled up by sqrt(8) compared to a true DCT.
887 */
888
889 /* Row 0 */
890 elemptr = sample_data[0] + start_col;
891
892 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
893 tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
894
895 /* Row 1 */
896 elemptr = sample_data[1] + start_col;
897
898 tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
899 tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
900
901 /* Pass 2: process columns.
902 * We leave the results scaled up by an overall factor of 8.
903 * We must also scale the output by (8/2)**2 = 2**4.
904 */
905
906 /* Column 0 */
907 /* Apply unsigned->signed conversion. */
908 data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
909 data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
910
911 /* Column 1 */
912 data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
913 data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
914 }
915
916
917 /*
918 * Perform the forward DCT on a 1x1 sample block.
919 */
920
921 GLOBAL(void)
922 jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
923 {
924 DCTELEM dcval;
925
926 /* Pre-zero output coefficient block. */
927 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
928
929 dcval = GETJSAMPLE(sample_data[0][start_col]);
930
931 /* We leave the result scaled up by an overall factor of 8. */
932 /* We must also scale the output by (8/1)**2 = 2**6. */
933 /* Apply unsigned->signed conversion. */
934 data[0] = (dcval - CENTERJSAMPLE) << 6;
935 }
936
937
938 /*
939 * Perform the forward DCT on a 9x9 sample block.
940 */
941
942 GLOBAL(void)
943 jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
944 {
945 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
946 INT32 tmp10, tmp11, tmp12, tmp13;
947 INT32 z1, z2;
948 DCTELEM workspace[8];
949 DCTELEM *dataptr;
950 DCTELEM *wsptr;
951 JSAMPROW elemptr;
952 int ctr;
953 SHIFT_TEMPS
954
955 /* Pass 1: process rows.
956 * Note results are scaled up by sqrt(8) compared to a true DCT;
957 * we scale the results further by 2 as part of output adaption
958 * scaling for different DCT size.
959 * cK represents sqrt(2) * cos(K*pi/18).
960 */
961
962 dataptr = data;
963 ctr = 0;
964 for (;;) {
965 elemptr = sample_data[ctr] + start_col;
966
967 /* Even part */
968
969 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
970 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
971 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
972 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
973 tmp4 = GETJSAMPLE(elemptr[4]);
974
975 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
976 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
977 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
978 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
979
980 z1 = tmp0 + tmp2 + tmp3;
981 z2 = tmp1 + tmp4;
982 /* Apply unsigned->signed conversion. */
983 dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
984 dataptr[6] = (DCTELEM)
985 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
986 CONST_BITS-1);
987 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
988 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
989 dataptr[2] = (DCTELEM)
990 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
991 + z1 + z2, CONST_BITS-1);
992 dataptr[4] = (DCTELEM)
993 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
994 + z1 - z2, CONST_BITS-1);
995
996 /* Odd part */
997
998 dataptr[3] = (DCTELEM)
999 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
1000 CONST_BITS-1);
1001
1002 tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
1085 }
1086 }
1087
1088
1089 /*
1090 * Perform the forward DCT on a 10x10 sample block.
1091 */
1092
1093 GLOBAL(void)
1094 jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1095 {
1096 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1097 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1098 DCTELEM workspace[8*2];
1099 DCTELEM *dataptr;
1100 DCTELEM *wsptr;
1101 JSAMPROW elemptr;
1102 int ctr;
1103 SHIFT_TEMPS
1104
1105 /* Pass 1: process rows.
1106 * Note results are scaled up by sqrt(8) compared to a true DCT;
1107 * we scale the results further by 2 as part of output adaption
1108 * scaling for different DCT size.
1109 * cK represents sqrt(2) * cos(K*pi/20).
1110 */
1111
1112 dataptr = data;
1113 ctr = 0;
1114 for (;;) {
1115 elemptr = sample_data[ctr] + start_col;
1116
1117 /* Even part */
1118
1119 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1120 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1121 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1122 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1123 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1124
1125 tmp10 = tmp0 + tmp4;
1126 tmp13 = tmp0 - tmp4;
1127 tmp11 = tmp1 + tmp3;
1128 tmp14 = tmp1 - tmp3;
1129
1130 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1131 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1132 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1133 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1134 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1135
1136 /* Apply unsigned->signed conversion. */
1137 dataptr[0] = (DCTELEM)
1138 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
1139 tmp12 += tmp12;
1140 dataptr[4] = (DCTELEM)
1141 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1142 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
1143 CONST_BITS-1);
1144 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
1145 dataptr[2] = (DCTELEM)
1146 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
1147 CONST_BITS-1);
1148 dataptr[6] = (DCTELEM)
1149 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
1150 CONST_BITS-1);
1151
1152 /* Odd part */
1153
1154 tmp10 = tmp0 + tmp4;
1155 tmp11 = tmp1 - tmp3;
1156 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
1250 }
1251
1252
1253 /*
1254 * Perform the forward DCT on an 11x11 sample block.
1255 */
1256
1257 GLOBAL(void)
1258 jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1259 {
1260 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1261 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1262 INT32 z1, z2, z3;
1263 DCTELEM workspace[8*3];
1264 DCTELEM *dataptr;
1265 DCTELEM *wsptr;
1266 JSAMPROW elemptr;
1267 int ctr;
1268 SHIFT_TEMPS
1269
1270 /* Pass 1: process rows.
1271 * Note results are scaled up by sqrt(8) compared to a true DCT;
1272 * we scale the results further by 2 as part of output adaption
1273 * scaling for different DCT size.
1274 * cK represents sqrt(2) * cos(K*pi/22).
1275 */
1276
1277 dataptr = data;
1278 ctr = 0;
1279 for (;;) {
1280 elemptr = sample_data[ctr] + start_col;
1281
1282 /* Even part */
1283
1284 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1285 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1286 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1287 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1288 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1289 tmp5 = GETJSAMPLE(elemptr[5]);
1290
1291 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1292 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1293 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1294 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1295 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1296
1297 /* Apply unsigned->signed conversion. */
1298 dataptr[0] = (DCTELEM)
1299 ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
1300 tmp5 += tmp5;
1301 tmp0 -= tmp5;
1302 tmp1 -= tmp5;
1303 tmp2 -= tmp5;
1304 tmp3 -= tmp5;
1305 tmp4 -= tmp5;
1306 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
1307 MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
1308 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
1309 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
1310 dataptr[2] = (DCTELEM)
1311 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1312 - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
1313 CONST_BITS-1);
1314 dataptr[4] = (DCTELEM)
1315 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1316 - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
1317 + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
1433 }
1434 }
1435
1436
1437 /*
1438 * Perform the forward DCT on a 12x12 sample block.
1439 */
1440
1441 GLOBAL(void)
1442 jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1443 {
1444 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1445 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1446 DCTELEM workspace[8*4];
1447 DCTELEM *dataptr;
1448 DCTELEM *wsptr;
1449 JSAMPROW elemptr;
1450 int ctr;
1451 SHIFT_TEMPS
1452
1453 /* Pass 1: process rows.
1454 * Note results are scaled up by sqrt(8) compared to a true DCT.
1455 * cK represents sqrt(2) * cos(K*pi/24).
1456 */
1457
1458 dataptr = data;
1459 ctr = 0;
1460 for (;;) {
1461 elemptr = sample_data[ctr] + start_col;
1462
1463 /* Even part */
1464
1465 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1466 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1467 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1468 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1469 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1470 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1471
1472 tmp10 = tmp0 + tmp5;
1473 tmp13 = tmp0 - tmp5;
1474 tmp11 = tmp1 + tmp4;
1475 tmp14 = tmp1 - tmp4;
1476 tmp12 = tmp2 + tmp3;
1477 tmp15 = tmp2 - tmp3;
1478
1479 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1480 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1481 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1482 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1483 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1484 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1485
1486 /* Apply unsigned->signed conversion. */
1487 dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1488 dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1489 dataptr[4] = (DCTELEM)
1490 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1491 CONST_BITS);
1492 dataptr[2] = (DCTELEM)
1493 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1494 CONST_BITS);
1495
1496 /* Odd part */
1497
1498 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
1499 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
1500 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
1501 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
1502 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
1503 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1504 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
1505 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1506 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1600 }
1601
1602
1603 /*
1604 * Perform the forward DCT on a 13x13 sample block.
1605 */
1606
1607 GLOBAL(void)
1608 jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1609 {
1610 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1611 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1612 INT32 z1, z2;
1613 DCTELEM workspace[8*5];
1614 DCTELEM *dataptr;
1615 DCTELEM *wsptr;
1616 JSAMPROW elemptr;
1617 int ctr;
1618 SHIFT_TEMPS
1619
1620 /* Pass 1: process rows.
1621 * Note results are scaled up by sqrt(8) compared to a true DCT.
1622 * cK represents sqrt(2) * cos(K*pi/26).
1623 */
1624
1625 dataptr = data;
1626 ctr = 0;
1627 for (;;) {
1628 elemptr = sample_data[ctr] + start_col;
1629
1630 /* Even part */
1631
1632 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1633 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1634 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1635 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1636 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1637 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1638 tmp6 = GETJSAMPLE(elemptr[6]);
1639
1640 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1641 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1642 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1643 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1644 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1645 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1646
1647 /* Apply unsigned->signed conversion. */
1648 dataptr[0] = (DCTELEM)
1649 (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1650 tmp6 += tmp6;
1651 tmp0 -= tmp6;
1652 tmp1 -= tmp6;
1653 tmp2 -= tmp6;
1654 tmp3 -= tmp6;
1655 tmp4 -= tmp6;
1656 tmp5 -= tmp6;
1657 dataptr[2] = (DCTELEM)
1658 DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
1659 MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
1660 MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
1661 MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
1662 MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
1663 MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
1664 CONST_BITS);
1665 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1666 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1667 MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
1799 }
1800 }
1801
1802
1803 /*
1804 * Perform the forward DCT on a 14x14 sample block.
1805 */
1806
1807 GLOBAL(void)
1808 jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1809 {
1810 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1811 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1812 DCTELEM workspace[8*6];
1813 DCTELEM *dataptr;
1814 DCTELEM *wsptr;
1815 JSAMPROW elemptr;
1816 int ctr;
1817 SHIFT_TEMPS
1818
1819 /* Pass 1: process rows.
1820 * Note results are scaled up by sqrt(8) compared to a true DCT.
1821 * cK represents sqrt(2) * cos(K*pi/28).
1822 */
1823
1824 dataptr = data;
1825 ctr = 0;
1826 for (;;) {
1827 elemptr = sample_data[ctr] + start_col;
1828
1829 /* Even part */
1830
1831 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1832 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1833 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1834 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1835 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1836 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1837 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1838
1839 tmp10 = tmp0 + tmp6;
1840 tmp14 = tmp0 - tmp6;
1841 tmp11 = tmp1 + tmp5;
1842 tmp15 = tmp1 - tmp5;
1843 tmp12 = tmp2 + tmp4;
1844 tmp16 = tmp2 - tmp4;
1845
1846 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1847 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1848 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1849 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1850 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1851 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1852 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1853
1854 /* Apply unsigned->signed conversion. */
1855 dataptr[0] = (DCTELEM)
1856 (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1857 tmp13 += tmp13;
1858 dataptr[4] = (DCTELEM)
1859 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1860 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1861 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
1862 CONST_BITS);
1863
1864 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
1865
1866 dataptr[2] = (DCTELEM)
1867 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
1868 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
1869 CONST_BITS);
1870 dataptr[6] = (DCTELEM)
1871 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
1872 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
1873 CONST_BITS);
1874
2001 }
2002
2003
2004 /*
2005 * Perform the forward DCT on a 15x15 sample block.
2006 */
2007
2008 GLOBAL(void)
2009 jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2010 {
2011 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2012 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2013 INT32 z1, z2, z3;
2014 DCTELEM workspace[8*7];
2015 DCTELEM *dataptr;
2016 DCTELEM *wsptr;
2017 JSAMPROW elemptr;
2018 int ctr;
2019 SHIFT_TEMPS
2020
2021 /* Pass 1: process rows.
2022 * Note results are scaled up by sqrt(8) compared to a true DCT.
2023 * cK represents sqrt(2) * cos(K*pi/30).
2024 */
2025
2026 dataptr = data;
2027 ctr = 0;
2028 for (;;) {
2029 elemptr = sample_data[ctr] + start_col;
2030
2031 /* Even part */
2032
2033 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2034 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2035 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2036 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2037 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2038 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2039 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2040 tmp7 = GETJSAMPLE(elemptr[7]);
2041
2042 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2043 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2044 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2045 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2046 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2047 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2048 tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2049
2050 z1 = tmp0 + tmp4 + tmp5;
2051 z2 = tmp1 + tmp3 + tmp6;
2052 z3 = tmp2 + tmp7;
2053 /* Apply unsigned->signed conversion. */
2054 dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2055 z3 += z3;
2056 dataptr[6] = (DCTELEM)
2057 DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2058 MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
2059 CONST_BITS);
2060 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2061 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
2062 MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
2063 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
2064 MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
2065 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
2066 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
2067 MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
2068
2069 dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2070 dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2071
2072 /* Odd part */
2073
2180 }
2181 }
2182
2183
2184 /*
2185 * Perform the forward DCT on a 16x16 sample block.
2186 */
2187
2188 GLOBAL(void)
2189 jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2190 {
2191 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2192 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2193 DCTELEM workspace[DCTSIZE2];
2194 DCTELEM *dataptr;
2195 DCTELEM *wsptr;
2196 JSAMPROW elemptr;
2197 int ctr;
2198 SHIFT_TEMPS
2199
2200 /* Pass 1: process rows.
2201 * Note results are scaled up by sqrt(8) compared to a true DCT;
2202 * furthermore, we scale the results by 2**PASS1_BITS.
2203 * cK represents sqrt(2) * cos(K*pi/32).
2204 */
2205
2206 dataptr = data;
2207 ctr = 0;
2208 for (;;) {
2209 elemptr = sample_data[ctr] + start_col;
2210
2211 /* Even part */
2212
2213 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2214 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2215 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2216 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2217 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2218 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2219 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2220 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2221
2222 tmp10 = tmp0 + tmp7;
2223 tmp14 = tmp0 - tmp7;
2224 tmp11 = tmp1 + tmp6;
2225 tmp15 = tmp1 - tmp6;
2226 tmp12 = tmp2 + tmp5;
2227 tmp16 = tmp2 - tmp5;
2228 tmp13 = tmp3 + tmp4;
2229 tmp17 = tmp3 - tmp4;
2230
2231 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2232 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2233 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2234 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2235 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2236 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2237 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2238 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2239
2240 /* Apply unsigned->signed conversion. */
2241 dataptr[0] = (DCTELEM)
2242 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2243 dataptr[4] = (DCTELEM)
2244 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2245 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2246 CONST_BITS-PASS1_BITS);
2247
2248 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2249 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2250
2251 dataptr[2] = (DCTELEM)
2252 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2253 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2254 CONST_BITS-PASS1_BITS);
2255 dataptr[6] = (DCTELEM)
2256 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2257 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2258 CONST_BITS-PASS1_BITS);
2259
2260 /* Odd part */
2283
2284 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2285 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2286 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2287 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2288
2289 ctr++;
2290
2291 if (ctr != DCTSIZE) {
2292 if (ctr == DCTSIZE * 2)
2293 break; /* Done. */
2294 dataptr += DCTSIZE; /* advance pointer to next row */
2295 } else
2296 dataptr = workspace; /* switch pointer to extended workspace */
2297 }
2298
2299 /* Pass 2: process columns.
2300 * We remove the PASS1_BITS scaling, but leave the results scaled up
2301 * by an overall factor of 8.
2302 * We must also scale the output by (8/16)**2 = 1/2**2.
2303 * cK represents sqrt(2) * cos(K*pi/32).
2304 */
2305
2306 dataptr = data;
2307 wsptr = workspace;
2308 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2309 /* Even part */
2310
2311 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2312 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2313 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2314 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2315 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2316 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2317 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2318 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2319
2320 tmp10 = tmp0 + tmp7;
2321 tmp14 = tmp0 - tmp7;
2322 tmp11 = tmp1 + tmp6;
2323 tmp15 = tmp1 - tmp6;
2389 }
2390
2391
2392 /*
2393 * Perform the forward DCT on a 16x8 sample block.
2394 *
2395 * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2396 */
2397
2398 GLOBAL(void)
2399 jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2400 {
2401 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2402 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2403 INT32 z1;
2404 DCTELEM *dataptr;
2405 JSAMPROW elemptr;
2406 int ctr;
2407 SHIFT_TEMPS
2408
2409 /* Pass 1: process rows.
2410 * Note results are scaled up by sqrt(8) compared to a true DCT;
2411 * furthermore, we scale the results by 2**PASS1_BITS.
2412 * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2413 */
2414
2415 dataptr = data;
2416 ctr = 0;
2417 for (ctr = 0; ctr < DCTSIZE; ctr++) {
2418 elemptr = sample_data[ctr] + start_col;
2419
2420 /* Even part */
2421
2422 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2423 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2424 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2425 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2426 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2427 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2428 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2429 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2430
2431 tmp10 = tmp0 + tmp7;
2432 tmp14 = tmp0 - tmp7;
2433 tmp11 = tmp1 + tmp6;
2434 tmp15 = tmp1 - tmp6;
2435 tmp12 = tmp2 + tmp5;
2436 tmp16 = tmp2 - tmp5;
2437 tmp13 = tmp3 + tmp4;
2438 tmp17 = tmp3 - tmp4;
2439
2440 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2441 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2442 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2443 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2444 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2445 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2446 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2447 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2448
2449 /* Apply unsigned->signed conversion. */
2450 dataptr[0] = (DCTELEM)
2451 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2452 dataptr[4] = (DCTELEM)
2453 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2454 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2455 CONST_BITS-PASS1_BITS);
2456
2457 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2458 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2459
2460 dataptr[2] = (DCTELEM)
2461 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2462 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2463 CONST_BITS-PASS1_BITS);
2464 dataptr[6] = (DCTELEM)
2465 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2466 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2467 CONST_BITS-PASS1_BITS);
2468
2469 /* Odd part */
2485 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2486 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2487 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2488 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2489 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2490 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2491 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2492
2493 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2494 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2495 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2496 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2497
2498 dataptr += DCTSIZE; /* advance pointer to next row */
2499 }
2500
2501 /* Pass 2: process columns.
2502 * We remove the PASS1_BITS scaling, but leave the results scaled up
2503 * by an overall factor of 8.
2504 * We must also scale the output by 8/16 = 1/2.
2505 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2506 */
2507
2508 dataptr = data;
2509 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2510 /* Even part per LL&M figure 1 --- note that published figure is faulty;
2511 * rotator "c1" should be "c6".
2512 */
2513
2514 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2515 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2516 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2517 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2518
2519 tmp10 = tmp0 + tmp3;
2520 tmp12 = tmp0 - tmp3;
2521 tmp11 = tmp1 + tmp2;
2522 tmp13 = tmp1 - tmp2;
2523
2524 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2525 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2526 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2527 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2528
2529 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
2530 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
2531
2532 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
2533 dataptr[DCTSIZE*2] = (DCTELEM)
2534 DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
2535 CONST_BITS+PASS1_BITS+1);
2536 dataptr[DCTSIZE*6] = (DCTELEM)
2537 DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
2538 CONST_BITS+PASS1_BITS+1);
2539
2540 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2541 * i0..i3 in the paper are tmp0..tmp3 here.
2542 */
2543
2544 tmp12 = tmp0 + tmp2;
2545 tmp13 = tmp1 + tmp3;
2546
2547 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
2548 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
2549 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
2550 tmp12 += z1;
2551 tmp13 += z1;
2552
2553 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2554 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
2555 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
2556 tmp0 += z1 + tmp12;
2557 tmp3 += z1 + tmp13;
2558
2559 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2560 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
2561 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
2562 tmp1 += z1 + tmp13;
2563 tmp2 += z1 + tmp12;
2564
2565 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
2566 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
2567 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
2568 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
2569
2570 dataptr++; /* advance pointer to next column */
2571 }
2572 }
2573
2574
2575 /*
2576 * Perform the forward DCT on a 14x7 sample block.
2577 *
2578 * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2579 */
2580
2581 GLOBAL(void)
2582 jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2583 {
2584 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2585 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2586 INT32 z1, z2, z3;
2587 DCTELEM *dataptr;
2588 JSAMPROW elemptr;
2589 int ctr;
2590 SHIFT_TEMPS
2591
2592 /* Zero bottom row of output coefficient block. */
2593 MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2594
2595 /* Pass 1: process rows.
2596 * Note results are scaled up by sqrt(8) compared to a true DCT;
2597 * furthermore, we scale the results by 2**PASS1_BITS.
2598 * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
2599 */
2600
2601 dataptr = data;
2602 for (ctr = 0; ctr < 7; ctr++) {
2603 elemptr = sample_data[ctr] + start_col;
2604
2605 /* Even part */
2606
2607 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2608 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2609 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2610 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2611 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2612 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2613 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2614
2615 tmp10 = tmp0 + tmp6;
2616 tmp14 = tmp0 - tmp6;
2617 tmp11 = tmp1 + tmp5;
2618 tmp15 = tmp1 - tmp5;
2619 tmp12 = tmp2 + tmp4;
2620 tmp16 = tmp2 - tmp4;
2621
2622 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2623 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2624 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2625 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2626 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2627 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2628 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2629
2630 /* Apply unsigned->signed conversion. */
2631 dataptr[0] = (DCTELEM)
2632 ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
2633 tmp13 += tmp13;
2634 dataptr[4] = (DCTELEM)
2635 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2636 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2637 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
2638 CONST_BITS-PASS1_BITS);
2639
2640 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
2641
2642 dataptr[2] = (DCTELEM)
2643 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
2644 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
2645 CONST_BITS-PASS1_BITS);
2646 dataptr[6] = (DCTELEM)
2647 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
2648 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
2649 CONST_BITS-PASS1_BITS);
2650
2739
2740 /*
2741 * Perform the forward DCT on a 12x6 sample block.
2742 *
2743 * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2744 */
2745
2746 GLOBAL(void)
2747 jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2748 {
2749 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2750 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2751 DCTELEM *dataptr;
2752 JSAMPROW elemptr;
2753 int ctr;
2754 SHIFT_TEMPS
2755
2756 /* Zero 2 bottom rows of output coefficient block. */
2757 MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2758
2759 /* Pass 1: process rows.
2760 * Note results are scaled up by sqrt(8) compared to a true DCT;
2761 * furthermore, we scale the results by 2**PASS1_BITS.
2762 * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
2763 */
2764
2765 dataptr = data;
2766 for (ctr = 0; ctr < 6; ctr++) {
2767 elemptr = sample_data[ctr] + start_col;
2768
2769 /* Even part */
2770
2771 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2772 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2773 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2774 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2775 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2776 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2777
2778 tmp10 = tmp0 + tmp5;
2779 tmp13 = tmp0 - tmp5;
2780 tmp11 = tmp1 + tmp4;
2781 tmp14 = tmp1 - tmp4;
2782 tmp12 = tmp2 + tmp3;
2783 tmp15 = tmp2 - tmp3;
2784
2785 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2786 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2787 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2788 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2789 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2790 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2791
2792 /* Apply unsigned->signed conversion. */
2793 dataptr[0] = (DCTELEM)
2794 ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
2795 dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
2796 dataptr[4] = (DCTELEM)
2797 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2798 CONST_BITS-PASS1_BITS);
2799 dataptr[2] = (DCTELEM)
2800 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2801 CONST_BITS-PASS1_BITS);
2802
2803 /* Odd part */
2804
2805 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
2806 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
2807 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
2808 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
2809 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
2810 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2811 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
2812 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2879
2880 /*
2881 * Perform the forward DCT on a 10x5 sample block.
2882 *
2883 * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2884 */
2885
2886 GLOBAL(void)
2887 jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2888 {
2889 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2890 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2891 DCTELEM *dataptr;
2892 JSAMPROW elemptr;
2893 int ctr;
2894 SHIFT_TEMPS
2895
2896 /* Zero 3 bottom rows of output coefficient block. */
2897 MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
2898
2899 /* Pass 1: process rows.
2900 * Note results are scaled up by sqrt(8) compared to a true DCT;
2901 * furthermore, we scale the results by 2**PASS1_BITS.
2902 * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
2903 */
2904
2905 dataptr = data;
2906 for (ctr = 0; ctr < 5; ctr++) {
2907 elemptr = sample_data[ctr] + start_col;
2908
2909 /* Even part */
2910
2911 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
2912 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
2913 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
2914 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
2915 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
2916
2917 tmp10 = tmp0 + tmp4;
2918 tmp13 = tmp0 - tmp4;
2919 tmp11 = tmp1 + tmp3;
2920 tmp14 = tmp1 - tmp3;
2921
2922 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
2923 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
2924 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
2925 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
2926 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
2927
2928 /* Apply unsigned->signed conversion. */
2929 dataptr[0] = (DCTELEM)
2930 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
2931 tmp12 += tmp12;
2932 dataptr[4] = (DCTELEM)
2933 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
2934 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
2935 CONST_BITS-PASS1_BITS);
2936 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
2937 dataptr[2] = (DCTELEM)
2938 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
2939 CONST_BITS-PASS1_BITS);
2940 dataptr[6] = (DCTELEM)
2941 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
2942 CONST_BITS-PASS1_BITS);
2943
2944 /* Odd part */
2945
2946 tmp10 = tmp0 + tmp4;
2947 tmp11 = tmp1 - tmp3;
2948 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
3013 /*
3014 * Perform the forward DCT on an 8x4 sample block.
3015 *
3016 * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
3017 */
3018
3019 GLOBAL(void)
3020 jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3021 {
3022 INT32 tmp0, tmp1, tmp2, tmp3;
3023 INT32 tmp10, tmp11, tmp12, tmp13;
3024 INT32 z1;
3025 DCTELEM *dataptr;
3026 JSAMPROW elemptr;
3027 int ctr;
3028 SHIFT_TEMPS
3029
3030 /* Zero 4 bottom rows of output coefficient block. */
3031 MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3032
3033 /* Pass 1: process rows.
3034 * Note results are scaled up by sqrt(8) compared to a true DCT;
3035 * furthermore, we scale the results by 2**PASS1_BITS.
3036 * We must also scale the output by 8/4 = 2, which we add here.
3037 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3038 */
3039
3040 dataptr = data;
3041 for (ctr = 0; ctr < 4; ctr++) {
3042 elemptr = sample_data[ctr] + start_col;
3043
3044 /* Even part per LL&M figure 1 --- note that published figure is faulty;
3045 * rotator "c1" should be "c6".
3046 */
3047
3048 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3049 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3050 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3051 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3052
3053 tmp10 = tmp0 + tmp3;
3054 tmp12 = tmp0 - tmp3;
3055 tmp11 = tmp1 + tmp2;
3056 tmp13 = tmp1 - tmp2;
3057
3058 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3059 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3060 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3061 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3062
3063 /* Apply unsigned->signed conversion. */
3064 dataptr[0] = (DCTELEM)
3065 ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3066 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3067
3068 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3069 /* Add fudge factor here for final descale. */
3070 z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3071
3072 dataptr[2] = (DCTELEM)
3073 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3074 CONST_BITS-PASS1_BITS-1);
3075 dataptr[6] = (DCTELEM)
3076 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3077 CONST_BITS-PASS1_BITS-1);
3078
3079 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3080 * i0..i3 in the paper are tmp0..tmp3 here.
3081 */
3082
3083 tmp12 = tmp0 + tmp2;
3084 tmp13 = tmp1 + tmp3;
3085
3086 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3087 /* Add fudge factor here for final descale. */
3088 z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3089
3090 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3091 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3092 tmp12 += z1;
3093 tmp13 += z1;
3094
3095 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3096 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3097 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3098 tmp0 += z1 + tmp12;
3099 tmp3 += z1 + tmp13;
3100
3101 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3102 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3103 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3104 tmp1 += z1 + tmp13;
3105 tmp2 += z1 + tmp12;
3106
3107 dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
3108 dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
3109 dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
3110 dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
3111
3112 dataptr += DCTSIZE; /* advance pointer to next row */
3113 }
3114
3115 /* Pass 2: process columns.
3116 * We remove the PASS1_BITS scaling, but leave the results scaled up
3117 * by an overall factor of 8.
3118 * 4-point FDCT kernel,
3119 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3120 */
3121
3122 dataptr = data;
3123 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3124 /* Even part */
3125
3126 /* Add fudge factor here for final descale. */
3127 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
3128 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3129
3130 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3131 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3132
3133 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3134 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3135
3136 /* Odd part */
3137
3138 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3139 /* Add fudge factor here for final descale. */
3153
3154 /*
3155 * Perform the forward DCT on a 6x3 sample block.
3156 *
3157 * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3158 */
3159
3160 GLOBAL(void)
3161 jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3162 {
3163 INT32 tmp0, tmp1, tmp2;
3164 INT32 tmp10, tmp11, tmp12;
3165 DCTELEM *dataptr;
3166 JSAMPROW elemptr;
3167 int ctr;
3168 SHIFT_TEMPS
3169
3170 /* Pre-zero output coefficient block. */
3171 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3172
3173 /* Pass 1: process rows.
3174 * Note results are scaled up by sqrt(8) compared to a true DCT;
3175 * furthermore, we scale the results by 2**PASS1_BITS.
3176 * We scale the results further by 2 as part of output adaption
3177 * scaling for different DCT size.
3178 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3179 */
3180
3181 dataptr = data;
3182 for (ctr = 0; ctr < 3; ctr++) {
3183 elemptr = sample_data[ctr] + start_col;
3184
3185 /* Even part */
3186
3187 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3188 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3189 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3190
3191 tmp10 = tmp0 + tmp2;
3192 tmp12 = tmp0 - tmp2;
3193
3194 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3195 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3196 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3197
3198 /* Apply unsigned->signed conversion. */
3199 dataptr[0] = (DCTELEM)
3200 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3201 dataptr[2] = (DCTELEM)
3202 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3203 CONST_BITS-PASS1_BITS-1);
3204 dataptr[4] = (DCTELEM)
3205 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3206 CONST_BITS-PASS1_BITS-1);
3207
3208 /* Odd part */
3209
3210 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3211 CONST_BITS-PASS1_BITS-1);
3212
3213 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3214 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3215 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3216
3217 dataptr += DCTSIZE; /* advance pointer to next row */
3218 }
3254
3255 /*
3256 * Perform the forward DCT on a 4x2 sample block.
3257 *
3258 * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3259 */
3260
3261 GLOBAL(void)
3262 jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3263 {
3264 INT32 tmp0, tmp1;
3265 INT32 tmp10, tmp11;
3266 DCTELEM *dataptr;
3267 JSAMPROW elemptr;
3268 int ctr;
3269 SHIFT_TEMPS
3270
3271 /* Pre-zero output coefficient block. */
3272 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3273
3274 /* Pass 1: process rows.
3275 * Note results are scaled up by sqrt(8) compared to a true DCT;
3276 * furthermore, we scale the results by 2**PASS1_BITS.
3277 * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here.
3278 * 4-point FDCT kernel,
3279 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3280 */
3281
3282 dataptr = data;
3283 for (ctr = 0; ctr < 2; ctr++) {
3284 elemptr = sample_data[ctr] + start_col;
3285
3286 /* Even part */
3287
3288 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3289 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3290
3291 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3292 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3293
3294 /* Apply unsigned->signed conversion. */
3295 dataptr[0] = (DCTELEM)
3296 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
3297 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
3298
3299 /* Odd part */
3300
3301 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3302 /* Add fudge factor here for final descale. */
3303 tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
3304
3305 dataptr[1] = (DCTELEM)
3306 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3307 CONST_BITS-PASS1_BITS-3);
3308 dataptr[3] = (DCTELEM)
3309 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3310 CONST_BITS-PASS1_BITS-3);
3311
3312 dataptr += DCTSIZE; /* advance pointer to next row */
3313 }
3314
3328 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3329
3330 /* Odd part */
3331
3332 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3333
3334 dataptr++; /* advance pointer to next column */
3335 }
3336 }
3337
3338
3339 /*
3340 * Perform the forward DCT on a 2x1 sample block.
3341 *
3342 * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3343 */
3344
3345 GLOBAL(void)
3346 jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3347 {
3348 DCTELEM tmp0, tmp1;
3349 JSAMPROW elemptr;
3350
3351 /* Pre-zero output coefficient block. */
3352 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3353
3354 elemptr = sample_data[0] + start_col;
3355
3356 tmp0 = GETJSAMPLE(elemptr[0]);
3357 tmp1 = GETJSAMPLE(elemptr[1]);
3358
3359 /* We leave the results scaled up by an overall factor of 8.
3360 * We must also scale the output by (8/2)*(8/1) = 2**5.
3361 */
3362
3363 /* Even part */
3364
3365 /* Apply unsigned->signed conversion. */
3366 data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
3367
3368 /* Odd part */
3369
3370 data[1] = (tmp0 - tmp1) << 5;
3371 }
3372
3373
3374 /*
3375 * Perform the forward DCT on an 8x16 sample block.
3376 *
3377 * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3378 */
3379
3380 GLOBAL(void)
3381 jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3382 {
3383 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3384 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3385 INT32 z1;
3386 DCTELEM workspace[DCTSIZE2];
3387 DCTELEM *dataptr;
3388 DCTELEM *wsptr;
3389 JSAMPROW elemptr;
3390 int ctr;
3391 SHIFT_TEMPS
3392
3393 /* Pass 1: process rows.
3394 * Note results are scaled up by sqrt(8) compared to a true DCT;
3395 * furthermore, we scale the results by 2**PASS1_BITS.
3396 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3397 */
3398
3399 dataptr = data;
3400 ctr = 0;
3401 for (;;) {
3402 elemptr = sample_data[ctr] + start_col;
3403
3404 /* Even part per LL&M figure 1 --- note that published figure is faulty;
3405 * rotator "c1" should be "c6".
3406 */
3407
3408 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3409 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3410 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3411 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3412
3413 tmp10 = tmp0 + tmp3;
3414 tmp12 = tmp0 - tmp3;
3415 tmp11 = tmp1 + tmp2;
3416 tmp13 = tmp1 - tmp2;
3417
3418 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3419 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3420 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3421 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3422
3423 /* Apply unsigned->signed conversion. */
3424 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
3425 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
3426
3427 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3428 dataptr[2] = (DCTELEM)
3429 DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3430 CONST_BITS-PASS1_BITS);
3431 dataptr[6] = (DCTELEM)
3432 DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3433 CONST_BITS-PASS1_BITS);
3434
3435 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3436 * i0..i3 in the paper are tmp0..tmp3 here.
3437 */
3438
3439 tmp12 = tmp0 + tmp2;
3440 tmp13 = tmp1 + tmp3;
3441
3442 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3443 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3444 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3445 tmp12 += z1;
3446 tmp13 += z1;
3447
3448 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3449 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3450 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3451 tmp0 += z1 + tmp12;
3452 tmp3 += z1 + tmp13;
3453
3454 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3455 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3456 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3457 tmp1 += z1 + tmp13;
3458 tmp2 += z1 + tmp12;
3459
3460 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3461 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3462 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3463 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
3464
3465 ctr++;
3466
3467 if (ctr != DCTSIZE) {
3468 if (ctr == DCTSIZE * 2)
3469 break; /* Done. */
3470 dataptr += DCTSIZE; /* advance pointer to next row */
3471 } else
3472 dataptr = workspace; /* switch pointer to extended workspace */
3473 }
3474
3475 /* Pass 2: process columns.
3476 * We remove the PASS1_BITS scaling, but leave the results scaled up
3477 * by an overall factor of 8.
3478 * We must also scale the output by 8/16 = 1/2.
3479 * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3480 */
3481
3482 dataptr = data;
3483 wsptr = workspace;
3570 *
3571 * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3572 */
3573
3574 GLOBAL(void)
3575 jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3576 {
3577 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3578 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3579 INT32 z1, z2, z3;
3580 DCTELEM workspace[8*6];
3581 DCTELEM *dataptr;
3582 DCTELEM *wsptr;
3583 JSAMPROW elemptr;
3584 int ctr;
3585 SHIFT_TEMPS
3586
3587 /* Pre-zero output coefficient block. */
3588 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3589
3590 /* Pass 1: process rows.
3591 * Note results are scaled up by sqrt(8) compared to a true DCT;
3592 * furthermore, we scale the results by 2**PASS1_BITS.
3593 * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3594 */
3595
3596 dataptr = data;
3597 ctr = 0;
3598 for (;;) {
3599 elemptr = sample_data[ctr] + start_col;
3600
3601 /* Even part */
3602
3603 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3604 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3605 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3606 tmp3 = GETJSAMPLE(elemptr[3]);
3607
3608 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3609 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3610 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3611
3612 z1 = tmp0 + tmp2;
3613 /* Apply unsigned->signed conversion. */
3614 dataptr[0] = (DCTELEM)
3615 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
3616 tmp3 += tmp3;
3617 z1 -= tmp3;
3618 z1 -= tmp3;
3619 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
3620 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
3621 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
3622 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3623 z1 -= z2;
3624 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
3625 dataptr[4] = (DCTELEM)
3626 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3627 CONST_BITS-PASS1_BITS);
3628 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3629
3630 /* Odd part */
3631
3632 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3633 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3751 * Perform the forward DCT on a 6x12 sample block.
3752 *
3753 * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3754 */
3755
3756 GLOBAL(void)
3757 jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3758 {
3759 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3760 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3761 DCTELEM workspace[8*4];
3762 DCTELEM *dataptr;
3763 DCTELEM *wsptr;
3764 JSAMPROW elemptr;
3765 int ctr;
3766 SHIFT_TEMPS
3767
3768 /* Pre-zero output coefficient block. */
3769 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3770
3771 /* Pass 1: process rows.
3772 * Note results are scaled up by sqrt(8) compared to a true DCT;
3773 * furthermore, we scale the results by 2**PASS1_BITS.
3774 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3775 */
3776
3777 dataptr = data;
3778 ctr = 0;
3779 for (;;) {
3780 elemptr = sample_data[ctr] + start_col;
3781
3782 /* Even part */
3783
3784 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3785 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3786 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3787
3788 tmp10 = tmp0 + tmp2;
3789 tmp12 = tmp0 - tmp2;
3790
3791 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3792 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3793 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3794
3795 /* Apply unsigned->signed conversion. */
3796 dataptr[0] = (DCTELEM)
3797 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
3798 dataptr[2] = (DCTELEM)
3799 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3800 CONST_BITS-PASS1_BITS);
3801 dataptr[4] = (DCTELEM)
3802 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3803 CONST_BITS-PASS1_BITS);
3804
3805 /* Odd part */
3806
3807 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3808 CONST_BITS-PASS1_BITS);
3809
3810 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3811 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3812 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3813
3814 ctr++;
3815
3901 * Perform the forward DCT on a 5x10 sample block.
3902 *
3903 * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
3904 */
3905
3906 GLOBAL(void)
3907 jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3908 {
3909 INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
3910 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3911 DCTELEM workspace[8*2];
3912 DCTELEM *dataptr;
3913 DCTELEM *wsptr;
3914 JSAMPROW elemptr;
3915 int ctr;
3916 SHIFT_TEMPS
3917
3918 /* Pre-zero output coefficient block. */
3919 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3920
3921 /* Pass 1: process rows.
3922 * Note results are scaled up by sqrt(8) compared to a true DCT;
3923 * furthermore, we scale the results by 2**PASS1_BITS.
3924 * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3925 */
3926
3927 dataptr = data;
3928 ctr = 0;
3929 for (;;) {
3930 elemptr = sample_data[ctr] + start_col;
3931
3932 /* Even part */
3933
3934 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
3935 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
3936 tmp2 = GETJSAMPLE(elemptr[2]);
3937
3938 tmp10 = tmp0 + tmp1;
3939 tmp11 = tmp0 - tmp1;
3940
3941 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
3942 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
3943
3944 /* Apply unsigned->signed conversion. */
3945 dataptr[0] = (DCTELEM)
3946 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
3947 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
3948 tmp10 -= tmp2 << 2;
3949 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
3950 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
3951 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
3952
3953 /* Odd part */
3954
3955 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
3956
3957 dataptr[1] = (DCTELEM)
3958 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
3959 CONST_BITS-PASS1_BITS);
3960 dataptr[3] = (DCTELEM)
3961 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
3962 CONST_BITS-PASS1_BITS);
3963
3964 ctr++;
4047 /*
4048 * Perform the forward DCT on a 4x8 sample block.
4049 *
4050 * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4051 */
4052
4053 GLOBAL(void)
4054 jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4055 {
4056 INT32 tmp0, tmp1, tmp2, tmp3;
4057 INT32 tmp10, tmp11, tmp12, tmp13;
4058 INT32 z1;
4059 DCTELEM *dataptr;
4060 JSAMPROW elemptr;
4061 int ctr;
4062 SHIFT_TEMPS
4063
4064 /* Pre-zero output coefficient block. */
4065 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4066
4067 /* Pass 1: process rows.
4068 * Note results are scaled up by sqrt(8) compared to a true DCT;
4069 * furthermore, we scale the results by 2**PASS1_BITS.
4070 * We must also scale the output by 8/4 = 2, which we add here.
4071 * 4-point FDCT kernel,
4072 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4073 */
4074
4075 dataptr = data;
4076 for (ctr = 0; ctr < DCTSIZE; ctr++) {
4077 elemptr = sample_data[ctr] + start_col;
4078
4079 /* Even part */
4080
4081 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4082 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4083
4084 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4085 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4086
4087 /* Apply unsigned->signed conversion. */
4088 dataptr[0] = (DCTELEM)
4089 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4090 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4091
4092 /* Odd part */
4093
4094 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4095 /* Add fudge factor here for final descale. */
4096 tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4097
4098 dataptr[1] = (DCTELEM)
4099 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4100 CONST_BITS-PASS1_BITS-1);
4101 dataptr[3] = (DCTELEM)
4102 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4103 CONST_BITS-PASS1_BITS-1);
4104
4105 dataptr += DCTSIZE; /* advance pointer to next row */
4106 }
4107
4108 /* Pass 2: process columns.
4109 * We remove the PASS1_BITS scaling, but leave the results scaled up
4110 * by an overall factor of 8.
4111 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4112 */
4113
4114 dataptr = data;
4115 for (ctr = 0; ctr < 4; ctr++) {
4116 /* Even part per LL&M figure 1 --- note that published figure is faulty;
4117 * rotator "c1" should be "c6".
4118 */
4119
4120 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4121 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4122 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4123 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4124
4125 /* Add fudge factor here for final descale. */
4126 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
4127 tmp12 = tmp0 - tmp3;
4128 tmp11 = tmp1 + tmp2;
4129 tmp13 = tmp1 - tmp2;
4130
4131 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4132 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4133 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4134 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4135
4136 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
4137 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
4138
4139 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
4140 /* Add fudge factor here for final descale. */
4141 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4142
4143 dataptr[DCTSIZE*2] = (DCTELEM)
4144 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
4145 CONST_BITS+PASS1_BITS);
4146 dataptr[DCTSIZE*6] = (DCTELEM)
4147 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
4148 CONST_BITS+PASS1_BITS);
4149
4150 /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4151 * i0..i3 in the paper are tmp0..tmp3 here.
4152 */
4153
4154 tmp12 = tmp0 + tmp2;
4155 tmp13 = tmp1 + tmp3;
4156
4157 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
4158 /* Add fudge factor here for final descale. */
4159 z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4160
4161 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
4162 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
4163 tmp12 += z1;
4164 tmp13 += z1;
4165
4166 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4167 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
4168 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
4169 tmp0 += z1 + tmp12;
4170 tmp3 += z1 + tmp13;
4171
4172 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4173 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
4174 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
4175 tmp1 += z1 + tmp13;
4176 tmp2 += z1 + tmp12;
4177
4178 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
4179 dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
4180 dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
4181 dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
4182
4183 dataptr++; /* advance pointer to next column */
4184 }
4185 }
4186
4187
4188 /*
4189 * Perform the forward DCT on a 3x6 sample block.
4190 *
4191 * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4192 */
4193
4194 GLOBAL(void)
4195 jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4196 {
4197 INT32 tmp0, tmp1, tmp2;
4198 INT32 tmp10, tmp11, tmp12;
4199 DCTELEM *dataptr;
4200 JSAMPROW elemptr;
4201 int ctr;
4202 SHIFT_TEMPS
4203
4204 /* Pre-zero output coefficient block. */
4205 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4206
4207 /* Pass 1: process rows.
4208 * Note results are scaled up by sqrt(8) compared to a true DCT;
4209 * furthermore, we scale the results by 2**PASS1_BITS.
4210 * We scale the results further by 2 as part of output adaption
4211 * scaling for different DCT size.
4212 * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4213 */
4214
4215 dataptr = data;
4216 for (ctr = 0; ctr < 6; ctr++) {
4217 elemptr = sample_data[ctr] + start_col;
4218
4219 /* Even part */
4220
4221 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4222 tmp1 = GETJSAMPLE(elemptr[1]);
4223
4224 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4225
4226 /* Apply unsigned->signed conversion. */
4227 dataptr[0] = (DCTELEM)
4228 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4229 dataptr[2] = (DCTELEM)
4230 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4231 CONST_BITS-PASS1_BITS-1);
4232
4233 /* Odd part */
4234
4235 dataptr[1] = (DCTELEM)
4236 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
4237 CONST_BITS-PASS1_BITS-1);
4238
4239 dataptr += DCTSIZE; /* advance pointer to next row */
4240 }
4241
4242 /* Pass 2: process columns.
4243 * We remove the PASS1_BITS scaling, but leave the results scaled up
4244 * by an overall factor of 8.
4245 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4246 * fold into the constant multipliers (other part was done in pass 1):
4293
4294 /*
4295 * Perform the forward DCT on a 2x4 sample block.
4296 *
4297 * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4298 */
4299
4300 GLOBAL(void)
4301 jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4302 {
4303 INT32 tmp0, tmp1;
4304 INT32 tmp10, tmp11;
4305 DCTELEM *dataptr;
4306 JSAMPROW elemptr;
4307 int ctr;
4308 SHIFT_TEMPS
4309
4310 /* Pre-zero output coefficient block. */
4311 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4312
4313 /* Pass 1: process rows.
4314 * Note results are scaled up by sqrt(8) compared to a true DCT.
4315 * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here.
4316 */
4317
4318 dataptr = data;
4319 for (ctr = 0; ctr < 4; ctr++) {
4320 elemptr = sample_data[ctr] + start_col;
4321
4322 /* Even part */
4323
4324 tmp0 = GETJSAMPLE(elemptr[0]);
4325 tmp1 = GETJSAMPLE(elemptr[1]);
4326
4327 /* Apply unsigned->signed conversion. */
4328 dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
4329
4330 /* Odd part */
4331
4332 dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
4333
4334 dataptr += DCTSIZE; /* advance pointer to next row */
4335 }
4336
4337 /* Pass 2: process columns.
4338 * We leave the results scaled up by an overall factor of 8.
4339 * 4-point FDCT kernel,
4340 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4341 */
4342
4343 dataptr = data;
4344 for (ctr = 0; ctr < 2; ctr++) {
4345 /* Even part */
4346
4347 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4363 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4364 CONST_BITS);
4365 dataptr[DCTSIZE*3] = (DCTELEM)
4366 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4367 CONST_BITS);
4368
4369 dataptr++; /* advance pointer to next column */
4370 }
4371 }
4372
4373
4374 /*
4375 * Perform the forward DCT on a 1x2 sample block.
4376 *
4377 * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4378 */
4379
4380 GLOBAL(void)
4381 jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4382 {
4383 DCTELEM tmp0, tmp1;
4384
4385 /* Pre-zero output coefficient block. */
4386 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4387
4388 /* Pass 1: empty. */
4389
4390 /* Pass 2: process columns.
4391 * We leave the results scaled up by an overall factor of 8.
4392 * We must also scale the output by (8/1)*(8/2) = 2**5.
4393 */
4394
4395 /* Even part */
4396
4397 tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4398 tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4399
4400 /* Apply unsigned->signed conversion. */
4401 data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
4402
4403 /* Odd part */
4404
4405 data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
4406 }
4407
4408 #endif /* DCT_SCALING_SUPPORTED */
4409 #endif /* DCT_ISLOW_SUPPORTED */
|