1 /*
   2  * jcdctmgr.c
   3  *
   4  * Copyright (C) 1994-1996, Thomas G. Lane.
   5  * Modified 2003-2013 by Guido Vollbeding.
   6  * This file is part of the Independent JPEG Group's software.
   7  * For conditions of distribution and use, see the accompanying README file.
   8  *
   9  * This file contains the forward-DCT management logic.
  10  * This code selects a particular DCT implementation to be used,
  11  * and it performs related housekeeping chores including coefficient
  12  * quantization.
  13  */
  14 
  15 #define JPEG_INTERNALS
  16 #include "jinclude.h"
  17 #include "jpeglib.h"
  18 #include "jdct.h"               /* Private declarations for DCT subsystem */
  19 
  20 
  21 /* Private subobject for this module */
  22 
  23 typedef struct {
  24   struct jpeg_forward_dct pub;  /* public fields */
  25 
  26   /* Pointer to the DCT routine actually in use */
  27   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
  28 
  29 #ifdef DCT_FLOAT_SUPPORTED
  30   /* Same as above for the floating-point case. */
  31   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
  32 #endif
  33 } my_fdct_controller;
  34 
  35 typedef my_fdct_controller * my_fdct_ptr;
  36 
  37 
  38 /* The allocated post-DCT divisor tables -- big enough for any
  39  * supported variant and not identical to the quant table entries,
  40  * because of scaling (especially for an unnormalized DCT) --
  41  * are pointed to by dct_table in the per-component comp_info
  42  * structures.  Each table is given in normal array order.
  43  */
  44 
  45 typedef union {
  46   DCTELEM int_array[DCTSIZE2];
  47 #ifdef DCT_FLOAT_SUPPORTED
  48   FAST_FLOAT float_array[DCTSIZE2];
  49 #endif
  50 } divisor_table;
  51 
  52 
  53 /* The current scaled-DCT routines require ISLOW-style divisor tables,
  54  * so be sure to compile that code if either ISLOW or SCALING is requested.
  55  */
  56 #ifdef DCT_ISLOW_SUPPORTED
  57 #define PROVIDE_ISLOW_TABLES
  58 #else
  59 #ifdef DCT_SCALING_SUPPORTED
  60 #define PROVIDE_ISLOW_TABLES
  61 #endif
  62 #endif
  63 
  64 
  65 /*
  66  * Perform forward DCT on one or more blocks of a component.
  67  *
  68  * The input samples are taken from the sample_data[] array starting at
  69  * position start_row/start_col, and moving to the right for any additional
  70  * blocks. The quantized coefficients are returned in coef_blocks[].
  71  */
  72 
  73 METHODDEF(void)
  74 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  75              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  76              JDIMENSION start_row, JDIMENSION start_col,
  77              JDIMENSION num_blocks)
  78 /* This version is used for integer DCT implementations. */
  79 {
  80   /* This routine is heavily used, so it's worth coding it tightly. */
  81   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  82   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
  83   DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
  84   DCTELEM workspace[DCTSIZE2];  /* work area for FDCT subroutine */
  85   JDIMENSION bi;
  86 
  87   sample_data += start_row;     /* fold in the vertical offset once */
  88 
  89   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
  90     /* Perform the DCT */
  91     (*do_dct) (workspace, sample_data, start_col);
  92 
  93     /* Quantize/descale the coefficients, and store into coef_blocks[] */
  94     { register DCTELEM temp, qval;
  95       register int i;
  96       register JCOEFPTR output_ptr = coef_blocks[bi];
  97 
  98       for (i = 0; i < DCTSIZE2; i++) {
  99         qval = divisors[i];
 100         temp = workspace[i];
 101         /* Divide the coefficient value by qval, ensuring proper rounding.
 102          * Since C does not specify the direction of rounding for negative
 103          * quotients, we have to force the dividend positive for portability.
 104          *
 105          * In most files, at least half of the output values will be zero
 106          * (at default quantization settings, more like three-quarters...)
 107          * so we should ensure that this case is fast.  On many machines,
 108          * a comparison is enough cheaper than a divide to make a special test
 109          * a win.  Since both inputs will be nonnegative, we need only test
 110          * for a < b to discover whether a/b is 0.
 111          * If your machine's division is fast enough, define FAST_DIVIDE.
 112          */
 113 #ifdef FAST_DIVIDE
 114 #define DIVIDE_BY(a,b)  a /= b
 115 #else
 116 #define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
 117 #endif
 118         if (temp < 0) {
 119           temp = -temp;
 120           temp += qval>>1;        /* for rounding */
 121           DIVIDE_BY(temp, qval);
 122           temp = -temp;
 123         } else {
 124           temp += qval>>1;        /* for rounding */
 125           DIVIDE_BY(temp, qval);
 126         }
 127         output_ptr[i] = (JCOEF) temp;
 128       }
 129     }
 130   }
 131 }
 132 
 133 
 134 #ifdef DCT_FLOAT_SUPPORTED
 135 
 136 METHODDEF(void)
 137 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
 138                    JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 139                    JDIMENSION start_row, JDIMENSION start_col,
 140                    JDIMENSION num_blocks)
 141 /* This version is used for floating-point DCT implementations. */
 142 {
 143   /* This routine is heavily used, so it's worth coding it tightly. */
 144   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 145   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
 146   FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
 147   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
 148   JDIMENSION bi;
 149 
 150   sample_data += start_row;     /* fold in the vertical offset once */
 151 
 152   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
 153     /* Perform the DCT */
 154     (*do_dct) (workspace, sample_data, start_col);
 155 
 156     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 157     { register FAST_FLOAT temp;
 158       register int i;
 159       register JCOEFPTR output_ptr = coef_blocks[bi];
 160 
 161       for (i = 0; i < DCTSIZE2; i++) {
 162         /* Apply the quantization and scaling factor */
 163         temp = workspace[i] * divisors[i];
 164         /* Round to nearest integer.
 165          * Since C does not specify the direction of rounding for negative
 166          * quotients, we have to force the dividend positive for portability.
 167          * The maximum coefficient size is +-16K (for 12-bit data), so this
 168          * code should work for either 16-bit or 32-bit ints.
 169          */
 170         output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
 171       }
 172     }
 173   }
 174 }
 175 
 176 #endif /* DCT_FLOAT_SUPPORTED */
 177 
 178 
 179 /*
 180  * Initialize for a processing pass.
 181  * Verify that all referenced Q-tables are present, and set up
 182  * the divisor table for each one.
 183  * In the current implementation, DCT of all components is done during
 184  * the first pass, even if only some components will be output in the
 185  * first scan.  Hence all components should be examined here.
 186  */
 187 
 188 METHODDEF(void)
 189 start_pass_fdctmgr (j_compress_ptr cinfo)
 190 {
 191   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 192   int ci, qtblno, i;
 193   jpeg_component_info *compptr;
 194   int method = 0;
 195   JQUANT_TBL * qtbl;
 196   DCTELEM * dtbl;
 197 
 198   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 199        ci++, compptr++) {
 200     /* Select the proper DCT routine for this component's scaling */
 201     switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
 202 #ifdef DCT_SCALING_SUPPORTED
 203     case ((1 << 8) + 1):
 204       fdct->do_dct[ci] = jpeg_fdct_1x1;
 205       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 206       break;
 207     case ((2 << 8) + 2):
 208       fdct->do_dct[ci] = jpeg_fdct_2x2;
 209       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 210       break;
 211     case ((3 << 8) + 3):
 212       fdct->do_dct[ci] = jpeg_fdct_3x3;
 213       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 214       break;
 215     case ((4 << 8) + 4):
 216       fdct->do_dct[ci] = jpeg_fdct_4x4;
 217       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 218       break;
 219     case ((5 << 8) + 5):
 220       fdct->do_dct[ci] = jpeg_fdct_5x5;
 221       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 222       break;
 223     case ((6 << 8) + 6):
 224       fdct->do_dct[ci] = jpeg_fdct_6x6;
 225       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 226       break;
 227     case ((7 << 8) + 7):
 228       fdct->do_dct[ci] = jpeg_fdct_7x7;
 229       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 230       break;
 231     case ((9 << 8) + 9):
 232       fdct->do_dct[ci] = jpeg_fdct_9x9;
 233       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 234       break;
 235     case ((10 << 8) + 10):
 236       fdct->do_dct[ci] = jpeg_fdct_10x10;
 237       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 238       break;
 239     case ((11 << 8) + 11):
 240       fdct->do_dct[ci] = jpeg_fdct_11x11;
 241       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 242       break;
 243     case ((12 << 8) + 12):
 244       fdct->do_dct[ci] = jpeg_fdct_12x12;
 245       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 246       break;
 247     case ((13 << 8) + 13):
 248       fdct->do_dct[ci] = jpeg_fdct_13x13;
 249       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 250       break;
 251     case ((14 << 8) + 14):
 252       fdct->do_dct[ci] = jpeg_fdct_14x14;
 253       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 254       break;
 255     case ((15 << 8) + 15):
 256       fdct->do_dct[ci] = jpeg_fdct_15x15;
 257       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 258       break;
 259     case ((16 << 8) + 16):
 260       fdct->do_dct[ci] = jpeg_fdct_16x16;
 261       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 262       break;
 263     case ((16 << 8) + 8):
 264       fdct->do_dct[ci] = jpeg_fdct_16x8;
 265       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 266       break;
 267     case ((14 << 8) + 7):
 268       fdct->do_dct[ci] = jpeg_fdct_14x7;
 269       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 270       break;
 271     case ((12 << 8) + 6):
 272       fdct->do_dct[ci] = jpeg_fdct_12x6;
 273       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 274       break;
 275     case ((10 << 8) + 5):
 276       fdct->do_dct[ci] = jpeg_fdct_10x5;
 277       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 278       break;
 279     case ((8 << 8) + 4):
 280       fdct->do_dct[ci] = jpeg_fdct_8x4;
 281       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 282       break;
 283     case ((6 << 8) + 3):
 284       fdct->do_dct[ci] = jpeg_fdct_6x3;
 285       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 286       break;
 287     case ((4 << 8) + 2):
 288       fdct->do_dct[ci] = jpeg_fdct_4x2;
 289       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 290       break;
 291     case ((2 << 8) + 1):
 292       fdct->do_dct[ci] = jpeg_fdct_2x1;
 293       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 294       break;
 295     case ((8 << 8) + 16):
 296       fdct->do_dct[ci] = jpeg_fdct_8x16;
 297       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 298       break;
 299     case ((7 << 8) + 14):
 300       fdct->do_dct[ci] = jpeg_fdct_7x14;
 301       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 302       break;
 303     case ((6 << 8) + 12):
 304       fdct->do_dct[ci] = jpeg_fdct_6x12;
 305       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 306       break;
 307     case ((5 << 8) + 10):
 308       fdct->do_dct[ci] = jpeg_fdct_5x10;
 309       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 310       break;
 311     case ((4 << 8) + 8):
 312       fdct->do_dct[ci] = jpeg_fdct_4x8;
 313       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 314       break;
 315     case ((3 << 8) + 6):
 316       fdct->do_dct[ci] = jpeg_fdct_3x6;
 317       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 318       break;
 319     case ((2 << 8) + 4):
 320       fdct->do_dct[ci] = jpeg_fdct_2x4;
 321       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 322       break;
 323     case ((1 << 8) + 2):
 324       fdct->do_dct[ci] = jpeg_fdct_1x2;
 325       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 326       break;
 327 #endif
 328     case ((DCTSIZE << 8) + DCTSIZE):
 329       switch (cinfo->dct_method) {
 330 #ifdef DCT_ISLOW_SUPPORTED
 331       case JDCT_ISLOW:
 332         fdct->do_dct[ci] = jpeg_fdct_islow;
 333         method = JDCT_ISLOW;
 334         break;
 335 #endif
 336 #ifdef DCT_IFAST_SUPPORTED
 337       case JDCT_IFAST:
 338         fdct->do_dct[ci] = jpeg_fdct_ifast;
 339         method = JDCT_IFAST;
 340         break;
 341 #endif
 342 #ifdef DCT_FLOAT_SUPPORTED
 343       case JDCT_FLOAT:
 344         fdct->do_float_dct[ci] = jpeg_fdct_float;
 345         method = JDCT_FLOAT;
 346         break;
 347 #endif
 348       default:
 349         ERREXIT(cinfo, JERR_NOT_COMPILED);
 350         break;
 351       }
 352       break;
 353     default:
 354       ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
 355                compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
 356       break;
 357     }
 358     qtblno = compptr->quant_tbl_no;
 359     /* Make sure specified quantization table is present */
 360     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
 361         cinfo->quant_tbl_ptrs[qtblno] == NULL)
 362       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
 363     qtbl = cinfo->quant_tbl_ptrs[qtblno];
 364     /* Create divisor table from quant table */
 365     switch (method) {
 366 #ifdef PROVIDE_ISLOW_TABLES
 367     case JDCT_ISLOW:
 368       /* For LL&M IDCT method, divisors are equal to raw quantization
 369        * coefficients multiplied by 8 (to counteract scaling).
 370        */
 371       dtbl = (DCTELEM *) compptr->dct_table;
 372       for (i = 0; i < DCTSIZE2; i++) {
 373         dtbl[i] =
 374           ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
 375       }
 376       fdct->pub.forward_DCT[ci] = forward_DCT;
 377       break;
 378 #endif
 379 #ifdef DCT_IFAST_SUPPORTED
 380     case JDCT_IFAST:
 381       {
 382         /* For AA&N IDCT method, divisors are equal to quantization
 383          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 384          *   scalefactor[0] = 1
 385          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 386          * We apply a further scale factor of 8.
 387          */
 388 #define CONST_BITS 14
 389         static const INT16 aanscales[DCTSIZE2] = {
 390           /* precomputed values scaled up by 14 bits */
 391           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 392           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
 393           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
 394           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
 395           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 396           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
 397            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
 398            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 399         };
 400         SHIFT_TEMPS
 401 
 402         dtbl = (DCTELEM *) compptr->dct_table;
 403         for (i = 0; i < DCTSIZE2; i++) {
 404           dtbl[i] = (DCTELEM)
 405             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 406                                   (INT32) aanscales[i]),
 407                     compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
 408         }
 409       }
 410       fdct->pub.forward_DCT[ci] = forward_DCT;
 411       break;
 412 #endif
 413 #ifdef DCT_FLOAT_SUPPORTED
 414     case JDCT_FLOAT:
 415       {
 416         /* For float AA&N IDCT method, divisors are equal to quantization
 417          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 418          *   scalefactor[0] = 1
 419          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 420          * We apply a further scale factor of 8.
 421          * What's actually stored is 1/divisor so that the inner loop can
 422          * use a multiplication rather than a division.
 423          */
 424         FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
 425         int row, col;
 426         static const double aanscalefactor[DCTSIZE] = {
 427           1.0, 1.387039845, 1.306562965, 1.175875602,
 428           1.0, 0.785694958, 0.541196100, 0.275899379
 429         };
 430 
 431         i = 0;
 432         for (row = 0; row < DCTSIZE; row++) {
 433           for (col = 0; col < DCTSIZE; col++) {
 434             fdtbl[i] = (FAST_FLOAT)
 435               (1.0 / ((double) qtbl->quantval[i] *
 436                       aanscalefactor[row] * aanscalefactor[col] *
 437                       (compptr->component_needed ? 16.0 : 8.0)));
 438             i++;
 439           }
 440         }
 441       }
 442       fdct->pub.forward_DCT[ci] = forward_DCT_float;
 443       break;
 444 #endif
 445     default:
 446       ERREXIT(cinfo, JERR_NOT_COMPILED);
 447       break;
 448     }
 449   }
 450 }
 451 
 452 
 453 /*
 454  * Initialize FDCT manager.
 455  */
 456 
 457 GLOBAL(void)
 458 jinit_forward_dct (j_compress_ptr cinfo)
 459 {
 460   my_fdct_ptr fdct;
 461   int ci;
 462   jpeg_component_info *compptr;
 463 
 464   fdct = (my_fdct_ptr)
 465     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 466                                 SIZEOF(my_fdct_controller));
 467   cinfo->fdct = &fdct->pub;
 468   fdct->pub.start_pass = start_pass_fdctmgr;
 469 
 470   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 471        ci++, compptr++) {
 472     /* Allocate a divisor table for each component */
 473     compptr->dct_table =
 474       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 475                                   SIZEOF(divisor_table));
 476   }
 477 }