1 /*
   2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #import "AVFAudioProcessor.h"
  27 #import "AVFMediaPlayer.h"
  28 
  29 #import <AVFoundation/AVFoundation.h>
  30 #import <MediaToolbox/MediaToolbox.h>
  31 
  32 #import "AVFKernelProcessor.h"
  33 #import <CoreFoundation/CoreFoundation.h>
  34 
  35 #import <pthread.h>
  36 #import <objc/message.h>
  37 
  38 static void InitAudioTap(MTAudioProcessingTapRef tapRef, void *clientInfo, void **tapStorageOut);
  39 static void FinalizeAudioTap(MTAudioProcessingTapRef tapRef);
  40 static void PrepareAudioTap(MTAudioProcessingTapRef tapRef,
  41                             CMItemCount maxFrames,
  42                             const AudioStreamBasicDescription *processingFormat);
  43 static void UnprepareAudioTap(MTAudioProcessingTapRef tapRef);
  44 static void ProcessAudioTap(MTAudioProcessingTapRef tapRef, CMItemCount numberFrames,
  45                             MTAudioProcessingTapFlags flags,
  46                             AudioBufferList *bufferListInOut,
  47                             CMItemCount *numberFramesOut,
  48                             MTAudioProcessingTapFlags *flagsOut);
  49 
  50 static OSStatus AVFTapRenderCallback(void *inRefCon,
  51                                      AudioUnitRenderActionFlags *ioActionFlags,
  52                                      const AudioTimeStamp *inTimeStamp,
  53                                      UInt32 inBusNumber,
  54                                      UInt32 inNumberFrames,
  55                                      AudioBufferList *ioData);
  56 
  57 typedef struct AVFTapContext {
  58     BOOL enabled;
  59     AVFAudioProcessor *processor; // we want this object retained, so don't use __bridge to set this!
  60 
  61     AudioUnit delayUnit;
  62     AudioUnit spectrumUnit;
  63     AudioUnit volumeUnit;
  64     AudioUnit eqUnit;
  65 
  66     AudioUnit renderUnit; // the last unit in our chain
  67     CMItemCount totalFrames;
  68 } AVFTapContext;
  69 
  70 @implementation AVFAudioProcessor
  71 
  72 - (id) initWithPlayer:(AVFMediaPlayer*)player assetTrack:(AVAssetTrack *)assetTrack {
  73     if ((self = [super init]) != nil) {
  74         _player = player;
  75 
  76         // Create a mixer for this asset track
  77         [self createMixerWithTrack:assetTrack];
  78         if (_mixer) {
  79             _player.playerItem.audioMix = _mixer;
  80         }
  81 
  82         _soundLevelUnit = new AVFSoundLevelUnit();
  83         _audioSpectrum = NULL;
  84         _audioEqualizer = NULL;
  85     }
  86     return self;
  87 }
  88 
  89 - (void) dealloc {
  90     if (_soundLevelUnit) {
  91         delete _soundLevelUnit;
  92         _soundLevelUnit = NULL;
  93     }
  94 
  95     // We don't own these objects
  96     _audioSpectrum = NULL;
  97     _audioEqualizer = NULL;
  98 }
  99 
 100 - (void) createMixerWithTrack:(AVAssetTrack*)audioTrack {
 101     if (!_mixer) {
 102         AVMutableAudioMix *mixer = [AVMutableAudioMix audioMix];
 103         if (mixer) {
 104             AVMutableAudioMixInputParameters *audioMixInputParameters =
 105                 [AVMutableAudioMixInputParameters audioMixInputParametersWithTrack:audioTrack];
 106             if (audioMixInputParameters &&
 107                 [audioMixInputParameters respondsToSelector:@selector(setAudioTapProcessor:)]) {
 108                 MTAudioProcessingTapCallbacks callbacks;
 109 
 110                 callbacks.version = kMTAudioProcessingTapCallbacksVersion_0;
 111                 callbacks.clientInfo = (__bridge void *)self;
 112                 callbacks.init = InitAudioTap;
 113                 callbacks.finalize = FinalizeAudioTap;
 114                 callbacks.prepare = PrepareAudioTap;
 115                 callbacks.unprepare = UnprepareAudioTap;
 116                 callbacks.process = ProcessAudioTap;
 117 
 118                 MTAudioProcessingTapRef audioProcessingTap;
 119                 if (noErr == MTAudioProcessingTapCreate(kCFAllocatorDefault, &callbacks,
 120                                              kMTAudioProcessingTapCreationFlag_PreEffects,
 121                                              &audioProcessingTap))
 122                 {
 123                     objc_msgSend(audioMixInputParameters,
 124                                  @selector(setAudioTapProcessor:),
 125                                  audioProcessingTap);
 126 
 127                     CFRelease(audioProcessingTap); // owned by the mixer now
 128 
 129                     mixer.inputParameters = @[audioMixInputParameters];
 130 
 131                     _mixer = mixer;
 132                 }
 133             }
 134         }
 135     }
 136 }
 137 
 138 - (void) setVolume:(float)volume {
 139     _volume = volume;
 140     if (_soundLevelUnit) {
 141         _soundLevelUnit->setVolume(volume);
 142     }
 143 }
 144 
 145 - (void) setBalance:(float)balance {
 146     _balance = balance;
 147     if (_soundLevelUnit) {
 148         _soundLevelUnit->setBalance(balance);
 149     }
 150 }
 151 
 152 @end
 153 
 154 void InitAudioTap(MTAudioProcessingTapRef tapRef, void *clientInfo, void **tapStorageOut)
 155 {
 156     AVFAudioProcessor *processor = (__bridge AVFAudioProcessor*)clientInfo;
 157 
 158     AVFTapContext *context = (AVFTapContext*)calloc(1, sizeof(AVFTapContext));
 159     if (context) {
 160         context->enabled = NO;
 161             // processor should be retained, else we can crash when closing the media player
 162         context->processor = processor;
 163         *tapStorageOut = context;
 164 
 165         processor.tapStorage = context;
 166     }
 167 }
 168 
 169 void FinalizeAudioTap(MTAudioProcessingTapRef tapRef)
 170 {
 171     AVFTapContext *context = (AVFTapContext*)MTAudioProcessingTapGetStorage(tapRef);
 172 
 173     if (context) {
 174         context->processor.tapStorage = NULL;
 175         context->processor = NULL;
 176 
 177         free(context);
 178     }
 179 }
 180 
 181 static OSStatus SetupAudioUnit(AudioUnit unit,
 182                                const AudioStreamBasicDescription *processingFormat,
 183                                UInt32 maxFrames) {
 184     OSStatus status = noErr;
 185     if (noErr == status) {
 186         status = AudioUnitSetProperty(unit,
 187                                       kAudioUnitProperty_StreamFormat,
 188                                       kAudioUnitScope_Input, 0,
 189                                       processingFormat, sizeof(AudioStreamBasicDescription));
 190     }
 191     if (noErr == status) {
 192         status = AudioUnitSetProperty(unit,
 193                                       kAudioUnitProperty_StreamFormat,
 194                                       kAudioUnitScope_Output, 0,
 195                                       processingFormat, sizeof(AudioStreamBasicDescription));
 196     }
 197     if (noErr == status) {
 198         status = AudioUnitSetProperty(unit,
 199                                       kAudioUnitProperty_MaximumFramesPerSlice,
 200                                       kAudioUnitScope_Global, 0,
 201                                       &maxFrames, sizeof(UInt32));
 202     }
 203     if (noErr == status) {
 204         status = AudioUnitInitialize(unit);
 205     }
 206     return status;
 207 }
 208 
 209 static OSStatus ConnectAudioUnits(AudioUnit source, AudioUnit sink) {
 210     AudioUnitConnection connection;
 211     connection.sourceAudioUnit = source;
 212     connection.sourceOutputNumber = 0;
 213     connection.destInputNumber = 0;
 214     return AudioUnitSetProperty(sink, kAudioUnitProperty_MakeConnection,
 215                                 kAudioUnitScope_Input, 0,
 216                                 &connection, sizeof(connection));
 217 }
 218 
 219 AudioUnit FindAudioUnit(OSType type, OSType subType, OSType manu) {
 220     AudioUnit audioUnit = NULL;
 221 
 222     AudioComponentDescription audioComponentDescription;
 223     audioComponentDescription.componentType = type;
 224     audioComponentDescription.componentSubType = subType;
 225     audioComponentDescription.componentManufacturer = manu;
 226     audioComponentDescription.componentFlags = 0;
 227     audioComponentDescription.componentFlagsMask = 0;
 228 
 229     AudioComponent audioComponent = AudioComponentFindNext(NULL, &audioComponentDescription);
 230     if (audioComponent) {
 231         AudioComponentInstanceNew(audioComponent, &audioUnit);
 232     }
 233     return audioUnit;
 234 }
 235 
 236 void PrepareAudioTap(MTAudioProcessingTapRef tapRef,
 237                      CMItemCount maxFrames,
 238                      const AudioStreamBasicDescription *processingFormat)
 239 {
 240     AVFTapContext *context = (AVFTapContext*)MTAudioProcessingTapGetStorage(tapRef);
 241 
 242     // Validate the audio format before we enable the processor
 243 
 244     // Failures here should rarely, if ever, happen so leave the NSLogs in for
 245     // easier diagnosis in the field
 246     if (processingFormat->mFormatID != kAudioFormatLinearPCM) {
 247         NSLog(@"AVFAudioProcessor needs linear PCM");
 248         return;
 249     }
 250 
 251     // Use the convenient kAudioFormatFlagsNativeFloatPacked to check if we can
 252     // process the incoming audio
 253     if ((processingFormat->mFormatFlags & kAudioFormatFlagsNativeFloatPacked)
 254         != kAudioFormatFlagsNativeFloatPacked) {
 255         NSLog(@"AVFAudioProcessor needs native endian packed float samples!!");
 256         return;
 257     }
 258 
 259     // Get an instance of our sound level unit
 260     context->delayUnit = FindAudioUnit(kAudioUnitType_Effect,
 261                                        kAudioUnitSubType_SampleDelay,
 262                                        kAudioUnitManufacturer_Apple);
 263     if (context->delayUnit) {
 264         OSStatus status = SetupAudioUnit(context->delayUnit, processingFormat, (UInt32)maxFrames);
 265         if (noErr != status) {
 266             NSLog(@"Error setting up delay unit: %d", status);
 267             AudioComponentInstanceDispose(context->delayUnit);
 268             context->delayUnit = NULL;
 269         }
 270     }
 271 
 272     context->eqUnit = NULL;
 273     if (context->processor.audioEqualizer) {
 274         context->eqUnit = NewKernelProcessorUnit(context->processor.audioEqualizer);
 275         if (context->eqUnit) {
 276             OSStatus status = SetupAudioUnit(context->eqUnit,
 277                                              processingFormat,
 278                                              (UInt32)maxFrames);
 279             if (noErr != status) {
 280                 NSLog(@"Error creating audio equalizer unit: %d", status);
 281                 // Don't delete the instance, that will happen when we dispose the unit
 282                 AudioComponentInstanceDispose(context->eqUnit);
 283                 context->eqUnit = NULL;
 284             }
 285         }
 286     }
 287 
 288     context->spectrumUnit = NULL;
 289     if (context->processor.audioSpectrum) {
 290         context->spectrumUnit = NewKernelProcessorUnit(context->processor.audioSpectrum);
 291         if (context->spectrumUnit) {
 292             OSStatus status = SetupAudioUnit(context->spectrumUnit,
 293                                              processingFormat,
 294                                              (UInt32)maxFrames);
 295             if (noErr != status) {
 296                 NSLog(@"Error creating audio spectrum unit: %d", status);
 297                 // Don't delete the instance, that will happen when we dispose the unit
 298                 AudioComponentInstanceDispose(context->spectrumUnit);
 299                 context->spectrumUnit = NULL;
 300             }
 301         }
 302     }
 303 
 304     context->volumeUnit = NULL;
 305     if (context->processor.soundLevelUnit) {
 306         context->volumeUnit = NewKernelProcessorUnit(context->processor.soundLevelUnit);
 307         if (context->volumeUnit) {
 308             OSStatus status = SetupAudioUnit(context->volumeUnit,
 309                                              processingFormat,
 310                                              (UInt32)maxFrames);
 311             if (noErr != status) {
 312                 NSLog(@"Error setting up Sound Level Unit: %d", status);
 313                 AudioComponentInstanceDispose(context->volumeUnit);
 314                 context->volumeUnit = NULL;
 315             }
 316         }
 317     }
 318 
 319     /*
 320      * Use AudioUnitConnections to build a processing graph
 321      * The last unit in the chain will be the unit we call to render, it will
 322      * pull through the graph until we get to the first, which will fetch samples
 323      * via the render proc we install.
 324      *
 325      * The graph will look like this:
 326      *    (render proc) -> delayUnit -> eqUnit -> spectrumUnit -> volUnit
 327      *
 328      * This will allow the EQ settings to affect the spectrum output, but not
 329      * the volume or balance.
 330      */
 331     AudioUnit firstUnit = NULL;
 332     context->renderUnit = NULL;
 333 
 334     // Set initial settings
 335     if (context->delayUnit) {
 336         if (context->renderUnit) {
 337             // Connect renderUnit output to this input
 338             ConnectAudioUnits(context->renderUnit, context->delayUnit);
 339         }
 340         context->renderUnit = context->delayUnit;
 341         if (!firstUnit) {
 342             firstUnit = context->delayUnit;
 343         }
 344     }
 345     if (context->eqUnit) {
 346         if (context->renderUnit) {
 347             ConnectAudioUnits(context->renderUnit, context->eqUnit);
 348         }
 349         context->renderUnit = context->eqUnit;
 350         if (!firstUnit) {
 351             firstUnit = context->eqUnit;
 352         }
 353     }
 354     if (context->spectrumUnit) {
 355         if (context->renderUnit) {
 356             ConnectAudioUnits(context->renderUnit, context->spectrumUnit);
 357         }
 358         context->renderUnit = context->spectrumUnit;
 359         if (!firstUnit) {
 360             firstUnit = context->spectrumUnit;
 361         }
 362     }
 363     if (context->volumeUnit) {
 364         if (context->renderUnit) {
 365             ConnectAudioUnits(context->renderUnit, context->volumeUnit);
 366         }
 367         context->renderUnit = context->volumeUnit;
 368         if (!firstUnit) {
 369             firstUnit = context->volumeUnit;
 370         }
 371     }
 372 
 373     // Set up a render callback on our first unit
 374     if (firstUnit) {
 375         AURenderCallbackStruct renderCB;
 376         renderCB.inputProc = (AURenderCallback)AVFTapRenderCallback;
 377         renderCB.inputProcRefCon = (void*)tapRef;
 378         AudioUnitSetProperty(firstUnit,
 379                              kAudioUnitProperty_SetRenderCallback,
 380                              kAudioUnitScope_Input, 0,
 381                              &renderCB, sizeof(renderCB));
 382     }
 383     context->totalFrames = 0;
 384     context->enabled = YES;
 385 }
 386 
 387 void UnprepareAudioTap(MTAudioProcessingTapRef tapRef)
 388 {
 389     AVFTapContext *context = (AVFTapContext*)MTAudioProcessingTapGetStorage(tapRef);
 390     context->enabled = NO;
 391     context->renderUnit = NULL;
 392 
 393     if (context->delayUnit) {
 394         AudioUnitUninitialize(context->delayUnit);
 395         AudioComponentInstanceDispose(context->delayUnit);
 396         context->delayUnit = NULL;
 397     }
 398     if (context->spectrumUnit) {
 399         AudioUnitUninitialize(context->spectrumUnit);
 400         AudioComponentInstanceDispose(context->spectrumUnit);
 401         context->spectrumUnit = NULL;
 402     }
 403     if (context->volumeUnit) {
 404         AudioUnitUninitialize(context->volumeUnit);
 405         AudioComponentInstanceDispose(context->volumeUnit);
 406         context->volumeUnit = NULL;
 407     }
 408     if (context->eqUnit) {
 409         AudioUnitUninitialize(context->eqUnit);
 410         AudioComponentInstanceDispose(context->eqUnit);
 411         context->eqUnit = NULL;
 412     }
 413 }
 414 
 415 void ProcessAudioTap(MTAudioProcessingTapRef tapRef,
 416                      CMItemCount numberFrames,
 417                      uint32_t flags,
 418                      AudioBufferList *bufferListInOut,
 419                      CMItemCount *numberFramesOut,
 420                      uint32_t *flagsOut)
 421 {
 422     AVFTapContext *context = (AVFTapContext*)MTAudioProcessingTapGetStorage(tapRef);
 423     OSStatus status = noErr;
 424 
 425     if (context->renderUnit) {
 426         AudioTimeStamp audioTimeStamp;
 427         audioTimeStamp.mSampleTime = context->totalFrames;
 428         audioTimeStamp.mFlags = kAudioTimeStampSampleTimeValid;
 429 
 430         status = AudioUnitRender(context->renderUnit,
 431                                  0,
 432                                  &audioTimeStamp,
 433                                  0,
 434                                  (UInt32)numberFrames,
 435                                  bufferListInOut);
 436         if (noErr != status) {
 437             return;
 438         }
 439         context->totalFrames += numberFrames;
 440         *numberFramesOut = numberFrames;
 441     } else {
 442         MTAudioProcessingTapGetSourceAudio(tapRef, numberFrames, bufferListInOut,
 443                                 flagsOut, NULL, numberFramesOut);
 444     }
 445 }
 446 
 447 static OSStatus AVFTapRenderCallback(void *inRefCon,
 448                                      AudioUnitRenderActionFlags *ioActionFlags,
 449                                      const AudioTimeStamp *inTimeStamp,
 450                                      UInt32 inBusNumber,
 451                                      UInt32 inNumberFrames,
 452                                      AudioBufferList *ioData)
 453 {
 454     MTAudioProcessingTapRef tapRef = static_cast<MTAudioProcessingTapRef>(inRefCon);
 455     return MTAudioProcessingTapGetSourceAudio(tapRef, inNumberFrames, ioData, NULL, NULL, NULL);
 456 }