1 /*
   2  * jcdctmgr.c
   3  *
   4  * Copyright (C) 1994-1996, Thomas G. Lane.
   5  * This file is part of the Independent JPEG Group's software.
   6  * For conditions of distribution and use, see the accompanying README file.
   7  *
   8  * This file contains the forward-DCT management logic.
   9  * This code selects a particular DCT implementation to be used,
  10  * and it performs related housekeeping chores including coefficient
  11  * quantization.
  12  */
  13 
  14 #define JPEG_INTERNALS
  15 #include "jinclude.h"
  16 #include "jpeglib.h"
  17 #include "jdct.h"               /* Private declarations for DCT subsystem */
  18 
  19 
  20 /* Private subobject for this module */
  21 
  22 typedef struct {
  23   struct jpeg_forward_dct pub;  /* public fields */
  24 
  25   /* Pointer to the DCT routine actually in use */
  26   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
  27 
  28   /* The actual post-DCT divisors --- not identical to the quant table
  29    * entries, because of scaling (especially for an unnormalized DCT).
  30    * Each table is given in normal array order.
  31    */
  32   DCTELEM * divisors[NUM_QUANT_TBLS];
  33 
  34 #ifdef DCT_FLOAT_SUPPORTED
  35   /* Same as above for the floating-point case. */
  36   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
  37   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  38 #endif
  39 } my_fdct_controller;
  40 
  41 typedef my_fdct_controller * my_fdct_ptr;
  42 
  43 
  44 /* The current scaled-DCT routines require ISLOW-style divisor tables,
  45  * so be sure to compile that code if either ISLOW or SCALING is requested.
  46  */
  47 #ifdef DCT_ISLOW_SUPPORTED
  48 #define PROVIDE_ISLOW_TABLES
  49 #else
  50 #ifdef DCT_SCALING_SUPPORTED
  51 #define PROVIDE_ISLOW_TABLES
  52 #endif
  53 #endif
  54 
  55 
  56 /*
  57  * Perform forward DCT on one or more blocks of a component.
  58  *
  59  * The input samples are taken from the sample_data[] array starting at
  60  * position start_row/start_col, and moving to the right for any additional
  61  * blocks. The quantized coefficients are returned in coef_blocks[].
  62  */
  63 
  64 METHODDEF(void)
  65 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  66              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  67              JDIMENSION start_row, JDIMENSION start_col,
  68              JDIMENSION num_blocks)
  69 /* This version is used for integer DCT implementations. */
  70 {
  71   /* This routine is heavily used, so it's worth coding it tightly. */
  72   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  73   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
  74   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  75   DCTELEM workspace[DCTSIZE2];  /* work area for FDCT subroutine */
  76   JDIMENSION bi;
  77 
  78   sample_data += start_row;     /* fold in the vertical offset once */
  79 
  80   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
  81     /* Perform the DCT */
  82     (*do_dct) (workspace, sample_data, start_col);
  83 
  84     /* Quantize/descale the coefficients, and store into coef_blocks[] */
  85     { register DCTELEM temp, qval;
  86       register int i;
  87       register JCOEFPTR output_ptr = coef_blocks[bi];
  88 
  89       for (i = 0; i < DCTSIZE2; i++) {
  90         qval = divisors[i];
  91         temp = workspace[i];
  92         /* Divide the coefficient value by qval, ensuring proper rounding.
  93          * Since C does not specify the direction of rounding for negative
  94          * quotients, we have to force the dividend positive for portability.
  95          *
  96          * In most files, at least half of the output values will be zero
  97          * (at default quantization settings, more like three-quarters...)
  98          * so we should ensure that this case is fast.  On many machines,
  99          * a comparison is enough cheaper than a divide to make a special test
 100          * a win.  Since both inputs will be nonnegative, we need only test
 101          * for a < b to discover whether a/b is 0.
 102          * If your machine's division is fast enough, define FAST_DIVIDE.
 103          */
 104 #ifdef FAST_DIVIDE
 105 #define DIVIDE_BY(a,b)  a /= b
 106 #else
 107 #define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
 108 #endif
 109         if (temp < 0) {
 110           temp = -temp;
 111           temp += qval>>1;      /* for rounding */
 112           DIVIDE_BY(temp, qval);
 113           temp = -temp;
 114         } else {
 115           temp += qval>>1;      /* for rounding */
 116           DIVIDE_BY(temp, qval);
 117         }
 118         output_ptr[i] = (JCOEF) temp;
 119       }
 120     }
 121   }
 122 }
 123 
 124 
 125 #ifdef DCT_FLOAT_SUPPORTED
 126 
 127 METHODDEF(void)
 128 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
 129                    JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 130                    JDIMENSION start_row, JDIMENSION start_col,
 131                    JDIMENSION num_blocks)
 132 /* This version is used for floating-point DCT implementations. */
 133 {
 134   /* This routine is heavily used, so it's worth coding it tightly. */
 135   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 136   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
 137   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
 138   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
 139   JDIMENSION bi;
 140 
 141   sample_data += start_row;     /* fold in the vertical offset once */
 142 
 143   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
 144     /* Perform the DCT */
 145     (*do_dct) (workspace, sample_data, start_col);
 146 
 147     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 148     { register FAST_FLOAT temp;
 149       register int i;
 150       register JCOEFPTR output_ptr = coef_blocks[bi];
 151 
 152       for (i = 0; i < DCTSIZE2; i++) {
 153         /* Apply the quantization and scaling factor */
 154         temp = workspace[i] * divisors[i];
 155         /* Round to nearest integer.
 156          * Since C does not specify the direction of rounding for negative
 157          * quotients, we have to force the dividend positive for portability.
 158          * The maximum coefficient size is +-16K (for 12-bit data), so this
 159          * code should work for either 16-bit or 32-bit ints.
 160          */
 161         output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
 162       }
 163     }
 164   }
 165 }
 166 
 167 #endif /* DCT_FLOAT_SUPPORTED */
 168 
 169 
 170 /*
 171  * Initialize for a processing pass.
 172  * Verify that all referenced Q-tables are present, and set up
 173  * the divisor table for each one.
 174  * In the current implementation, DCT of all components is done during
 175  * the first pass, even if only some components will be output in the
 176  * first scan.  Hence all components should be examined here.
 177  */
 178 
 179 METHODDEF(void)
 180 start_pass_fdctmgr (j_compress_ptr cinfo)
 181 {
 182   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 183   int ci, qtblno, i;
 184   jpeg_component_info *compptr;
 185   int method = 0;
 186   JQUANT_TBL * qtbl;
 187   DCTELEM * dtbl;
 188 
 189   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 190        ci++, compptr++) {
 191     /* Select the proper DCT routine for this component's scaling */
 192     switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
 193 #ifdef DCT_SCALING_SUPPORTED
 194     case ((1 << 8) + 1):
 195       fdct->do_dct[ci] = jpeg_fdct_1x1;
 196       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 197       break;
 198     case ((2 << 8) + 2):
 199       fdct->do_dct[ci] = jpeg_fdct_2x2;
 200       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 201       break;
 202     case ((3 << 8) + 3):
 203       fdct->do_dct[ci] = jpeg_fdct_3x3;
 204       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 205       break;
 206     case ((4 << 8) + 4):
 207       fdct->do_dct[ci] = jpeg_fdct_4x4;
 208       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 209       break;
 210     case ((5 << 8) + 5):
 211       fdct->do_dct[ci] = jpeg_fdct_5x5;
 212       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 213       break;
 214     case ((6 << 8) + 6):
 215       fdct->do_dct[ci] = jpeg_fdct_6x6;
 216       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 217       break;
 218     case ((7 << 8) + 7):
 219       fdct->do_dct[ci] = jpeg_fdct_7x7;
 220       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 221       break;
 222     case ((9 << 8) + 9):
 223       fdct->do_dct[ci] = jpeg_fdct_9x9;
 224       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 225       break;
 226     case ((10 << 8) + 10):
 227       fdct->do_dct[ci] = jpeg_fdct_10x10;
 228       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 229       break;
 230     case ((11 << 8) + 11):
 231       fdct->do_dct[ci] = jpeg_fdct_11x11;
 232       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 233       break;
 234     case ((12 << 8) + 12):
 235       fdct->do_dct[ci] = jpeg_fdct_12x12;
 236       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 237       break;
 238     case ((13 << 8) + 13):
 239       fdct->do_dct[ci] = jpeg_fdct_13x13;
 240       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 241       break;
 242     case ((14 << 8) + 14):
 243       fdct->do_dct[ci] = jpeg_fdct_14x14;
 244       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 245       break;
 246     case ((15 << 8) + 15):
 247       fdct->do_dct[ci] = jpeg_fdct_15x15;
 248       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 249       break;
 250     case ((16 << 8) + 16):
 251       fdct->do_dct[ci] = jpeg_fdct_16x16;
 252       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 253       break;
 254     case ((16 << 8) + 8):
 255       fdct->do_dct[ci] = jpeg_fdct_16x8;
 256       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 257       break;
 258     case ((14 << 8) + 7):
 259       fdct->do_dct[ci] = jpeg_fdct_14x7;
 260       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 261       break;
 262     case ((12 << 8) + 6):
 263       fdct->do_dct[ci] = jpeg_fdct_12x6;
 264       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 265       break;
 266     case ((10 << 8) + 5):
 267       fdct->do_dct[ci] = jpeg_fdct_10x5;
 268       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 269       break;
 270     case ((8 << 8) + 4):
 271       fdct->do_dct[ci] = jpeg_fdct_8x4;
 272       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 273       break;
 274     case ((6 << 8) + 3):
 275       fdct->do_dct[ci] = jpeg_fdct_6x3;
 276       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 277       break;
 278     case ((4 << 8) + 2):
 279       fdct->do_dct[ci] = jpeg_fdct_4x2;
 280       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 281       break;
 282     case ((2 << 8) + 1):
 283       fdct->do_dct[ci] = jpeg_fdct_2x1;
 284       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 285       break;
 286     case ((8 << 8) + 16):
 287       fdct->do_dct[ci] = jpeg_fdct_8x16;
 288       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 289       break;
 290     case ((7 << 8) + 14):
 291       fdct->do_dct[ci] = jpeg_fdct_7x14;
 292       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 293       break;
 294     case ((6 << 8) + 12):
 295       fdct->do_dct[ci] = jpeg_fdct_6x12;
 296       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 297       break;
 298     case ((5 << 8) + 10):
 299       fdct->do_dct[ci] = jpeg_fdct_5x10;
 300       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 301       break;
 302     case ((4 << 8) + 8):
 303       fdct->do_dct[ci] = jpeg_fdct_4x8;
 304       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 305       break;
 306     case ((3 << 8) + 6):
 307       fdct->do_dct[ci] = jpeg_fdct_3x6;
 308       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 309       break;
 310     case ((2 << 8) + 4):
 311       fdct->do_dct[ci] = jpeg_fdct_2x4;
 312       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 313       break;
 314     case ((1 << 8) + 2):
 315       fdct->do_dct[ci] = jpeg_fdct_1x2;
 316       method = JDCT_ISLOW;      /* jfdctint uses islow-style table */
 317       break;
 318 #endif
 319     case ((DCTSIZE << 8) + DCTSIZE):
 320       switch (cinfo->dct_method) {
 321 #ifdef DCT_ISLOW_SUPPORTED
 322       case JDCT_ISLOW:
 323         fdct->do_dct[ci] = jpeg_fdct_islow;
 324         method = JDCT_ISLOW;
 325         break;
 326 #endif
 327 #ifdef DCT_IFAST_SUPPORTED
 328       case JDCT_IFAST:
 329         fdct->do_dct[ci] = jpeg_fdct_ifast;
 330         method = JDCT_IFAST;
 331         break;
 332 #endif
 333 #ifdef DCT_FLOAT_SUPPORTED
 334       case JDCT_FLOAT:
 335         fdct->do_float_dct[ci] = jpeg_fdct_float;
 336         method = JDCT_FLOAT;
 337         break;
 338 #endif
 339       default:
 340         ERREXIT(cinfo, JERR_NOT_COMPILED);
 341         break;
 342       }
 343       break;
 344     default:
 345       ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
 346                compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
 347       break;
 348     }
 349     qtblno = compptr->quant_tbl_no;
 350     /* Make sure specified quantization table is present */
 351     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
 352         cinfo->quant_tbl_ptrs[qtblno] == NULL)
 353       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
 354     qtbl = cinfo->quant_tbl_ptrs[qtblno];
 355     /* Compute divisors for this quant table */
 356     /* We may do this more than once for same table, but it's not a big deal */
 357     switch (method) {
 358 #ifdef PROVIDE_ISLOW_TABLES
 359     case JDCT_ISLOW:
 360       /* For LL&M IDCT method, divisors are equal to raw quantization
 361        * coefficients multiplied by 8 (to counteract scaling).
 362        */
 363       if (fdct->divisors[qtblno] == NULL) {
 364         fdct->divisors[qtblno] = (DCTELEM *)
 365           (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 366                                       DCTSIZE2 * SIZEOF(DCTELEM));
 367       }
 368       dtbl = fdct->divisors[qtblno];
 369       for (i = 0; i < DCTSIZE2; i++) {
 370         dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
 371       }
 372       fdct->pub.forward_DCT[ci] = forward_DCT;
 373       break;
 374 #endif
 375 #ifdef DCT_IFAST_SUPPORTED
 376     case JDCT_IFAST:
 377       {
 378         /* For AA&N IDCT method, divisors are equal to quantization
 379          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 380          *   scalefactor[0] = 1
 381          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 382          * We apply a further scale factor of 8.
 383          */
 384 #define CONST_BITS 14
 385         static const INT16 aanscales[DCTSIZE2] = {
 386           /* precomputed values scaled up by 14 bits */
 387           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 388           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
 389           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
 390           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
 391           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 392           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
 393            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
 394            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 395         };
 396         SHIFT_TEMPS
 397 
 398         if (fdct->divisors[qtblno] == NULL) {
 399           fdct->divisors[qtblno] = (DCTELEM *)
 400             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 401                                         DCTSIZE2 * SIZEOF(DCTELEM));
 402         }
 403         dtbl = fdct->divisors[qtblno];
 404         for (i = 0; i < DCTSIZE2; i++) {
 405           dtbl[i] = (DCTELEM)
 406             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 407                                   (INT32) aanscales[i]),
 408                     CONST_BITS-3);
 409         }
 410       }
 411       fdct->pub.forward_DCT[ci] = forward_DCT;
 412       break;
 413 #endif
 414 #ifdef DCT_FLOAT_SUPPORTED
 415     case JDCT_FLOAT:
 416       {
 417         /* For float AA&N IDCT method, divisors are equal to quantization
 418          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 419          *   scalefactor[0] = 1
 420          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 421          * We apply a further scale factor of 8.
 422          * What's actually stored is 1/divisor so that the inner loop can
 423          * use a multiplication rather than a division.
 424          */
 425         FAST_FLOAT * fdtbl;
 426         int row, col;
 427         static const double aanscalefactor[DCTSIZE] = {
 428           1.0, 1.387039845, 1.306562965, 1.175875602,
 429           1.0, 0.785694958, 0.541196100, 0.275899379
 430         };
 431 
 432         if (fdct->float_divisors[qtblno] == NULL) {
 433           fdct->float_divisors[qtblno] = (FAST_FLOAT *)
 434             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 435                                         DCTSIZE2 * SIZEOF(FAST_FLOAT));
 436         }
 437         fdtbl = fdct->float_divisors[qtblno];
 438         i = 0;
 439         for (row = 0; row < DCTSIZE; row++) {
 440           for (col = 0; col < DCTSIZE; col++) {
 441             fdtbl[i] = (FAST_FLOAT)
 442               (1.0 / (((double) qtbl->quantval[i] *
 443                        aanscalefactor[row] * aanscalefactor[col] * 8.0)));
 444             i++;
 445           }
 446         }
 447       }
 448       fdct->pub.forward_DCT[ci] = forward_DCT_float;
 449       break;
 450 #endif
 451     default:
 452       ERREXIT(cinfo, JERR_NOT_COMPILED);
 453       break;
 454     }
 455   }
 456 }
 457 
 458 
 459 /*
 460  * Initialize FDCT manager.
 461  */
 462 
 463 GLOBAL(void)
 464 jinit_forward_dct (j_compress_ptr cinfo)
 465 {
 466   my_fdct_ptr fdct;
 467   int i;
 468 
 469   fdct = (my_fdct_ptr)
 470     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 471                                 SIZEOF(my_fdct_controller));
 472   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
 473   fdct->pub.start_pass = start_pass_fdctmgr;
 474 
 475   /* Mark divisor tables unallocated */
 476   for (i = 0; i < NUM_QUANT_TBLS; i++) {
 477     fdct->divisors[i] = NULL;
 478 #ifdef DCT_FLOAT_SUPPORTED
 479     fdct->float_divisors[i] = NULL;
 480 #endif
 481   }
 482 }