1 /*
   2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #import "AVFAudioProcessor.h"
  27 #import "AVFMediaPlayer.h"
  28 
  29 #import <AVFoundation/AVFoundation.h>
  30 
  31 #import "AVFKernelProcessor.h"
  32 #import <CoreFoundation/CoreFoundation.h>
  33 
  34 #import <pthread.h>
  35 #import <dlfcn.h>
  36 #import <objc/message.h>
  37 
  38 /*
  39  * MTAudioProcessingTap is a feature new to 10.9 but also exists in
  40  * MediaToolbox.framework in 10.8. Unfortunately the SDK we build with does not
  41  * have the header file needed to compile our audio tap, so we will have to
  42  * supply the missing pieces here. We will use dlsym to find the
  43  * MTAudioProcessingTap calls we need, this will prevent crashing on systems that
  44  * don't implement it.
  45  */
  46 extern "C" {
  47 #pragma pack(push, 4)
  48 
  49     // This is MTAudioProcessingTapCallbacks in MediaToolbox.framework
  50 struct __MTAudioTapCallbacks {
  51     int version;
  52     void *clientInfo;
  53     void (*init)(CFTypeRef tapRef, void *clientInfo, void **tapStorageOut);
  54     void (*finalize)(CFTypeRef tapRef);
  55     void (*prepare)(CFTypeRef tapRef,
  56                     CMItemCount maxFrames,
  57                     const AudioStreamBasicDescription *processingFormat);
  58     void (*unprepare)(CFTypeRef tapRef);
  59     void (*process)(CFTypeRef tapRef,
  60                     CMItemCount numberFramesIn, uint32_t flagsIn,
  61                     AudioBufferList *bufferListInOut,
  62                     CMItemCount *numberFramesOut, uint32_t *flagsOut);
  63 };
  64 
  65 #pragma pack(pop)
  66 };
  67 
  68 typedef OSStatus (*AudioTapCreateProc)(CFAllocatorRef allocator,
  69                                        const __MTAudioTapCallbacks *callbacks,
  70                                        uint32_t flags,
  71                                        CFTypeRef *tapOut);
  72 AudioTapCreateProc gAudioTapCreate = NULL;
  73 
  74 typedef void *(*AudioTapGetStorageProc)(CFTypeRef tap);
  75 AudioTapGetStorageProc gAudioTapGetStorage = NULL;
  76 
  77 typedef OSStatus (*AudioTapGetSourceAudioProc)(CFTypeRef tap,
  78                                                CMItemCount numberFrames,
  79                                                AudioBufferList *bufferListInOut,
  80                                                uint32_t *flagsOut,
  81                                                CMTimeRange *timeRangeOut,
  82                                                CMItemCount *numberFramesOut);
  83 AudioTapGetSourceAudioProc gAudioTapGetSourceAudio = NULL;
  84 
  85 pthread_mutex_t gAVFTapProcsLock = PTHREAD_MUTEX_INITIALIZER;
  86 
  87 static void InitAudioTap(CFTypeRef tapRef, void *clientInfo, void **tapStorageOut);
  88 static void FinalizeAudioTap(CFTypeRef tapRef);
  89 static void PrepareAudioTap(CFTypeRef tapRef,
  90                             CMItemCount maxFrames,
  91                             const AudioStreamBasicDescription *processingFormat);
  92 static void UnprepareAudioTap(CFTypeRef tapRef);
  93 static void ProcessAudioTap(CFTypeRef tapRef, CMItemCount numberFrames,
  94                             uint32_t /*MTAudioProcessingTapFlags*/ flags,
  95                             AudioBufferList *bufferListInOut,
  96                             CMItemCount *numberFramesOut,
  97                             uint32_t /*MTAudioProcessingTapFlags*/ *flagsOut);
  98 
  99 static OSStatus AVFTapRenderCallback(void *inRefCon,
 100                                      AudioUnitRenderActionFlags *ioActionFlags,
 101                                      const AudioTimeStamp *inTimeStamp,
 102                                      UInt32 inBusNumber,
 103                                      UInt32 inNumberFrames,
 104                                      AudioBufferList *ioData);
 105 
 106 typedef struct AVFTapContext {
 107     BOOL enabled;
 108     AVFAudioProcessor *processor; // we want this object retained, so don't use __bridge to set this!
 109 
 110     AudioUnit delayUnit;
 111     AudioUnit spectrumUnit;
 112     AudioUnit volumeUnit;
 113     AudioUnit eqUnit;
 114 
 115     AudioUnit renderUnit; // the last unit in our chain
 116     CMItemCount totalFrames;
 117 } AVFTapContext;
 118 
 119 static bool FindAudioTap() {
 120     static bool checkPerformed = false;
 121 
 122     pthread_mutex_lock(&gAVFTapProcsLock);
 123     if (!checkPerformed) {
 124         if (!gAudioTapCreate) {
 125             gAudioTapCreate = (AudioTapCreateProc)
 126                 dlsym(RTLD_DEFAULT, "MTAudioProcessingTapCreate");
 127         }
 128         if (!gAudioTapGetStorage) {
 129             gAudioTapGetStorage = (AudioTapGetStorageProc)
 130                 dlsym(RTLD_DEFAULT, "MTAudioProcessingTapGetStorage");
 131         }
 132         if (!gAudioTapGetSourceAudio) {
 133             gAudioTapGetSourceAudio = (AudioTapGetSourceAudioProc)
 134                 dlsym(RTLD_DEFAULT, "MTAudioProcessingTapGetSourceAudio");
 135         }
 136         checkPerformed = true;
 137     }
 138     pthread_mutex_unlock(&gAVFTapProcsLock);
 139 
 140     return (gAudioTapCreate != NULL)
 141         && (gAudioTapGetStorage != NULL)
 142         && (gAudioTapGetSourceAudio != NULL);
 143 }
 144 
 145 @implementation AVFAudioProcessor
 146 
 147 - (id) initWithPlayer:(AVFMediaPlayer*)player assetTrack:(AVAssetTrack *)assetTrack {
 148     if ((self = [super init]) != nil) {
 149         _player = player;
 150 
 151         // Create a mixer for this asset track
 152         [self createMixerWithTrack:assetTrack];
 153         if (_mixer) {
 154             _player.playerItem.audioMix = _mixer;
 155         }
 156 
 157         _soundLevelUnit = new AVFSoundLevelUnit();
 158         _audioSpectrum = NULL;
 159         _audioEqualizer = NULL;
 160     }
 161     return self;
 162 }
 163 
 164 - (void) dealloc {
 165     if (_soundLevelUnit) {
 166         delete _soundLevelUnit;
 167         _soundLevelUnit = NULL;
 168     }
 169 
 170     // We don't own these objects
 171     _audioSpectrum = NULL;
 172     _audioEqualizer = NULL;
 173 }
 174 
 175 - (void) createMixerWithTrack:(AVAssetTrack*)audioTrack {
 176     if (!FindAudioTap()) {
 177         NSLog(@"Audio tap is not available, cannot post-process audio");
 178         return;
 179     }
 180     if (!_mixer) {
 181         AVMutableAudioMix *mixer = [AVMutableAudioMix audioMix];
 182         if (mixer) {
 183             AVMutableAudioMixInputParameters *audioMixInputParameters =
 184                 [AVMutableAudioMixInputParameters audioMixInputParametersWithTrack:audioTrack];
 185             if (audioMixInputParameters &&
 186                 [audioMixInputParameters respondsToSelector:@selector(setAudioTapProcessor:)]) {
 187                 __MTAudioTapCallbacks callbacks;
 188 
 189                 callbacks.version = 0; // kMTAudioProcessingTapCallbacksVersion_0
 190                 callbacks.clientInfo = (__bridge void *)self,
 191                 callbacks.init = InitAudioTap;
 192                 callbacks.finalize = FinalizeAudioTap;
 193                 callbacks.prepare = PrepareAudioTap;
 194                 callbacks.unprepare = UnprepareAudioTap;
 195                 callbacks.process = ProcessAudioTap;
 196 
 197                 CFTypeRef audioProcessingTap;
 198                 if (noErr == gAudioTapCreate(kCFAllocatorDefault, &callbacks,
 199                                              1, // kMTAudioProcessingTapCreationFlag_PreEffects
 200                                              &audioProcessingTap))
 201                 {
 202                     objc_msgSend(audioMixInputParameters,
 203                                  @selector(setAudioTapProcessor:),
 204                                  audioProcessingTap);
 205 
 206                     CFRelease(audioProcessingTap); // owned by the mixer now
 207 
 208                     mixer.inputParameters = @[audioMixInputParameters];
 209 
 210                     _mixer = mixer;
 211                 }
 212             }
 213         }
 214     }
 215 }
 216 
 217 - (void) setVolume:(float)volume {
 218     _volume = volume;
 219     if (_soundLevelUnit) {
 220         _soundLevelUnit->setVolume(volume);
 221     }
 222 }
 223 
 224 - (void) setBalance:(float)balance {
 225     _balance = balance;
 226     if (_soundLevelUnit) {
 227         _soundLevelUnit->setBalance(balance);
 228     }
 229 }
 230 
 231 @end
 232 
 233 void InitAudioTap(CFTypeRef tapRef, void *clientInfo, void **tapStorageOut)
 234 {
 235     AVFAudioProcessor *processor = (__bridge AVFAudioProcessor*)clientInfo;
 236 
 237     AVFTapContext *context = (AVFTapContext*)calloc(1, sizeof(AVFTapContext));
 238     if (context) {
 239         context->enabled = NO;
 240             // processor should be retained, else we can crash when closing the media player
 241         context->processor = processor;
 242         *tapStorageOut = context;
 243 
 244         processor.tapStorage = context;
 245     }
 246 }
 247 
 248 void FinalizeAudioTap(CFTypeRef tapRef)
 249 {
 250     // NULL check is for safety, this should never be called if we don't have all
 251     // the audio tap functions
 252     if (!gAudioTapGetStorage) {
 253         // should not happen
 254         return;
 255     }
 256     AVFTapContext *context = (AVFTapContext*)gAudioTapGetStorage(tapRef);
 257 
 258     if (context) {
 259         context->processor.tapStorage = NULL;
 260         context->processor = NULL;
 261 
 262         free(context);
 263     }
 264 }
 265 
 266 static OSStatus SetupAudioUnit(AudioUnit unit,
 267                                const AudioStreamBasicDescription *processingFormat,
 268                                UInt32 maxFrames) {
 269     OSStatus status = noErr;
 270     if (noErr == status) {
 271         status = AudioUnitSetProperty(unit,
 272                                       kAudioUnitProperty_StreamFormat,
 273                                       kAudioUnitScope_Input, 0,
 274                                       processingFormat, sizeof(AudioStreamBasicDescription));
 275     }
 276     if (noErr == status) {
 277         status = AudioUnitSetProperty(unit,
 278                                       kAudioUnitProperty_StreamFormat,
 279                                       kAudioUnitScope_Output, 0,
 280                                       processingFormat, sizeof(AudioStreamBasicDescription));
 281     }
 282     if (noErr == status) {
 283         status = AudioUnitSetProperty(unit,
 284                                       kAudioUnitProperty_MaximumFramesPerSlice,
 285                                       kAudioUnitScope_Global, 0,
 286                                       &maxFrames, sizeof(UInt32));
 287     }
 288     if (noErr == status) {
 289         status = AudioUnitInitialize(unit);
 290     }
 291     return status;
 292 }
 293 
 294 static OSStatus ConnectAudioUnits(AudioUnit source, AudioUnit sink) {
 295     AudioUnitConnection connection;
 296     connection.sourceAudioUnit = source;
 297     connection.sourceOutputNumber = 0;
 298     connection.destInputNumber = 0;
 299     return AudioUnitSetProperty(sink, kAudioUnitProperty_MakeConnection,
 300                                 kAudioUnitScope_Input, 0,
 301                                 &connection, sizeof(connection));
 302 }
 303 
 304 AudioUnit FindAudioUnit(OSType type, OSType subType, OSType manu) {
 305     AudioUnit audioUnit = NULL;
 306 
 307     AudioComponentDescription audioComponentDescription;
 308     audioComponentDescription.componentType = type;
 309     audioComponentDescription.componentSubType = subType;
 310     audioComponentDescription.componentManufacturer = manu;
 311     audioComponentDescription.componentFlags = 0;
 312     audioComponentDescription.componentFlagsMask = 0;
 313 
 314     AudioComponent audioComponent = AudioComponentFindNext(NULL, &audioComponentDescription);
 315     if (audioComponent) {
 316         AudioComponentInstanceNew(audioComponent, &audioUnit);
 317     }
 318     return audioUnit;
 319 }
 320 
 321 void PrepareAudioTap(CFTypeRef tapRef,
 322                                      CMItemCount maxFrames,
 323                                      const AudioStreamBasicDescription *processingFormat)
 324 {
 325     if (!gAudioTapGetStorage) {
 326         // should not happen
 327         return;
 328     }
 329     AVFTapContext *context = (AVFTapContext*)gAudioTapGetStorage(tapRef);
 330 
 331     // Validate the audio format before we enable the processor
 332 
 333     // Failures here should rarely, if ever, happen so leave the NSLogs in for
 334     // easier diagnosis in the field
 335     if (processingFormat->mFormatID != kAudioFormatLinearPCM) {
 336         NSLog(@"AVFAudioProcessor needs linear PCM");
 337         return;
 338     }
 339 
 340     // Use the convenient kAudioFormatFlagsNativeFloatPacked to check if we can
 341     // process the incoming audio
 342     if ((processingFormat->mFormatFlags & kAudioFormatFlagsNativeFloatPacked)
 343         != kAudioFormatFlagsNativeFloatPacked) {
 344         NSLog(@"AVFAudioProcessor needs native endian packed float samples!!");
 345         return;
 346     }
 347 
 348     // Get an instance of our sound level unit
 349     context->delayUnit = FindAudioUnit(kAudioUnitType_Effect,
 350                                        kAudioUnitSubType_SampleDelay,
 351                                        kAudioUnitManufacturer_Apple);
 352     if (context->delayUnit) {
 353         OSStatus status = SetupAudioUnit(context->delayUnit, processingFormat, (UInt32)maxFrames);
 354         if (noErr != status) {
 355             NSLog(@"Error setting up delay unit: %d", status);
 356             AudioComponentInstanceDispose(context->delayUnit);
 357             context->delayUnit = NULL;
 358         }
 359     }
 360 
 361     context->eqUnit = NULL;
 362     if (context->processor.audioEqualizer) {
 363         context->eqUnit = NewKernelProcessorUnit(context->processor.audioEqualizer);
 364         if (context->eqUnit) {
 365             OSStatus status = SetupAudioUnit(context->eqUnit,
 366                                              processingFormat,
 367                                              (UInt32)maxFrames);
 368             if (noErr != status) {
 369                 NSLog(@"Error creating audio equalizer unit: %d", status);
 370                 // Don't delete the instance, that will happen when we dispose the unit
 371                 AudioComponentInstanceDispose(context->eqUnit);
 372                 context->eqUnit = NULL;
 373             }
 374         }
 375     }
 376 
 377     context->spectrumUnit = NULL;
 378     if (context->processor.audioSpectrum) {
 379         context->spectrumUnit = NewKernelProcessorUnit(context->processor.audioSpectrum);
 380         if (context->spectrumUnit) {
 381             OSStatus status = SetupAudioUnit(context->spectrumUnit,
 382                                              processingFormat,
 383                                              (UInt32)maxFrames);
 384             if (noErr != status) {
 385                 NSLog(@"Error creating audio spectrum unit: %d", status);
 386                 // Don't delete the instance, that will happen when we dispose the unit
 387                 AudioComponentInstanceDispose(context->spectrumUnit);
 388                 context->spectrumUnit = NULL;
 389             }
 390         }
 391     }
 392 
 393     context->volumeUnit = NULL;
 394     if (context->processor.soundLevelUnit) {
 395         context->volumeUnit = NewKernelProcessorUnit(context->processor.soundLevelUnit);
 396         if (context->volumeUnit) {
 397             OSStatus status = SetupAudioUnit(context->volumeUnit,
 398                                              processingFormat,
 399                                              (UInt32)maxFrames);
 400             if (noErr != status) {
 401                 NSLog(@"Error setting up Sound Level Unit: %d", status);
 402                 AudioComponentInstanceDispose(context->volumeUnit);
 403                 context->volumeUnit = NULL;
 404             }
 405         }
 406     }
 407 
 408     /*
 409      * Use AudioUnitConnections to build a processing graph
 410      * The last unit in the chain will be the unit we call to render, it will
 411      * pull through the graph until we get to the first, which will fetch samples
 412      * via the render proc we install.
 413      *
 414      * The graph will look like this:
 415      *    (render proc) -> delayUnit -> eqUnit -> spectrumUnit -> volUnit
 416      *
 417      * This will allow the EQ settings to affect the spectrum output, but not
 418      * the volume or balance.
 419      */
 420     AudioUnit firstUnit = NULL;
 421     context->renderUnit = NULL;
 422 
 423     // Set initial settings
 424     if (context->delayUnit) {
 425         if (context->renderUnit) {
 426             // Connect renderUnit output to this input
 427             ConnectAudioUnits(context->renderUnit, context->delayUnit);
 428         }
 429         context->renderUnit = context->delayUnit;
 430         if (!firstUnit) {
 431             firstUnit = context->delayUnit;
 432         }
 433     }
 434     if (context->eqUnit) {
 435         if (context->renderUnit) {
 436             ConnectAudioUnits(context->renderUnit, context->eqUnit);
 437         }
 438         context->renderUnit = context->eqUnit;
 439         if (!firstUnit) {
 440             firstUnit = context->eqUnit;
 441         }
 442     }
 443     if (context->spectrumUnit) {
 444         if (context->renderUnit) {
 445             ConnectAudioUnits(context->renderUnit, context->spectrumUnit);
 446         }
 447         context->renderUnit = context->spectrumUnit;
 448         if (!firstUnit) {
 449             firstUnit = context->spectrumUnit;
 450         }
 451     }
 452     if (context->volumeUnit) {
 453         if (context->renderUnit) {
 454             ConnectAudioUnits(context->renderUnit, context->volumeUnit);
 455         }
 456         context->renderUnit = context->volumeUnit;
 457         if (!firstUnit) {
 458             firstUnit = context->volumeUnit;
 459         }
 460     }
 461 
 462     // Set up a render callback on our first unit
 463     if (firstUnit) {
 464         AURenderCallbackStruct renderCB;
 465         renderCB.inputProc = (AURenderCallback)AVFTapRenderCallback;
 466         renderCB.inputProcRefCon = (void*)tapRef;
 467         AudioUnitSetProperty(firstUnit,
 468                              kAudioUnitProperty_SetRenderCallback,
 469                              kAudioUnitScope_Input, 0,
 470                              &renderCB, sizeof(renderCB));
 471     }
 472     context->totalFrames = 0;
 473     context->enabled = YES;
 474 }
 475 
 476 void UnprepareAudioTap(CFTypeRef tapRef)
 477 {
 478     if (!gAudioTapGetStorage) {
 479         // should not happen
 480         return;
 481     }
 482     AVFTapContext *context = (AVFTapContext*)gAudioTapGetStorage(tapRef);
 483     context->enabled = NO;
 484     context->renderUnit = NULL;
 485 
 486     if (context->delayUnit) {
 487         AudioUnitUninitialize(context->delayUnit);
 488         AudioComponentInstanceDispose(context->delayUnit);
 489         context->delayUnit = NULL;
 490     }
 491     if (context->spectrumUnit) {
 492         AudioUnitUninitialize(context->spectrumUnit);
 493         AudioComponentInstanceDispose(context->spectrumUnit);
 494         context->spectrumUnit = NULL;
 495     }
 496     if (context->volumeUnit) {
 497         AudioUnitUninitialize(context->volumeUnit);
 498         AudioComponentInstanceDispose(context->volumeUnit);
 499         context->volumeUnit = NULL;
 500     }
 501     if (context->eqUnit) {
 502         AudioUnitUninitialize(context->eqUnit);
 503         AudioComponentInstanceDispose(context->eqUnit);
 504         context->eqUnit = NULL;
 505     }
 506 }
 507 
 508 void ProcessAudioTap(CFTypeRef tapRef,
 509                      CMItemCount numberFrames,
 510                      uint32_t flags,
 511                      AudioBufferList *bufferListInOut,
 512                      CMItemCount *numberFramesOut,
 513                      uint32_t *flagsOut)
 514 {
 515     if (!gAudioTapGetStorage) {
 516         // should not happen
 517         return;
 518     }
 519     AVFTapContext *context = (AVFTapContext*)gAudioTapGetStorage(tapRef);
 520     OSStatus status = noErr;
 521 
 522     if (context->renderUnit) {
 523         AudioTimeStamp audioTimeStamp;
 524         audioTimeStamp.mSampleTime = context->totalFrames;
 525         audioTimeStamp.mFlags = kAudioTimeStampSampleTimeValid;
 526 
 527         status = AudioUnitRender(context->renderUnit,
 528                                  0,
 529                                  &audioTimeStamp,
 530                                  0,
 531                                  (UInt32)numberFrames,
 532                                  bufferListInOut);
 533         if (noErr != status) {
 534             return;
 535         }
 536         context->totalFrames += numberFrames;
 537         *numberFramesOut = numberFrames;
 538     } else {
 539         if (gAudioTapGetSourceAudio) {
 540             gAudioTapGetSourceAudio(tapRef, numberFrames, bufferListInOut,
 541                                     flagsOut, NULL, numberFramesOut);
 542         }
 543     }
 544 }
 545 
 546 static OSStatus AVFTapRenderCallback(void *inRefCon,
 547                                      AudioUnitRenderActionFlags *ioActionFlags,
 548                                      const AudioTimeStamp *inTimeStamp,
 549                                      UInt32 inBusNumber,
 550                                      UInt32 inNumberFrames,
 551                                      AudioBufferList *ioData)
 552 {
 553     if (!gAudioTapGetSourceAudio) {
 554         // should not happen
 555         return noErr;
 556     }
 557     CFTypeRef tapRef = static_cast<CFTypeRef>(inRefCon);
 558     return gAudioTapGetSourceAudio(tapRef, inNumberFrames, ioData, NULL, NULL, NULL);
 559 }