TFLAudioRecord.m | Explore in Territory

// Copyright 2022 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#import <AVFoundation/AVFoundation.h>

#import "tensorflow_lite_support/ios/sources/TFLCommonUtils.h"
#import "tensorflow_lite_support/ios/task/audio/core/audio_record/sources/TFLAudioRecord.h"
#import "tensorflow_lite_support/ios/task/audio/core/sources/TFLRingBuffer.h"

#define SUPPORTED_CHANNEL_COUNT 2

/** Error domain for TFL Audio Record Errors. */
static NSString *const TFLAudioRecordErrorDomain = @"org.tensorflow.lite.audio.record";

@implementation TFLAudioRecord {
  AVAudioEngine *_audioEngine;

  /* Specifying a custom buffer size on AVAUdioEngine while tapping does not take effect. Hence we
   * are storing the returned samples in a ring buffer to acheive the desired buffer size. If the
   * specified buffer size is shorter than the buffer size supported by `AVAudioEngine` only the
   * most recent data of the buffer of size `bufferSize` will be stored by the ring buffer. */
  TFLRingBuffer *_ringBuffer;
  dispatch_queue_t _conversionQueue;
  NSError *_globalError;
}

- (nullable instancetype)initWithAudioFormat:(TFLAudioFormat *)audioFormat
                                  bufferSize:(NSUInteger)bufferSize
                                       error:(NSError **)error {
  self = [self init];
  if (self) {
    if (audioFormat.channelCount > SUPPORTED_CHANNEL_COUNT) {
      [TFLCommonUtils
          createCustomError:error
                 withDomain:TFLAudioRecordErrorDomain
                       code:TFLAudioRecordErrorCodeInvalidArgumentError
                description:
                    [NSString
                        stringWithFormat:
                            @"The channel count provided does not match the supported "
                            @"channel count. Only up to %d audio channels are currently supported.",
                            SUPPORTED_CHANNEL_COUNT]];
      return nil;
    }

    if (bufferSize % audioFormat.channelCount != 0) {
      [TFLCommonUtils
          createCustomError:error
                 withDomain:TFLAudioRecordErrorDomain
                       code:TFLAudioRecordErrorCodeInvalidArgumentError
                description:@"The buffer size provided is not a multiple of channel count."];
      return nil;
    }

    NSError *waitError = nil;
    [TFLCommonUtils
        createCustomError:&waitError
               withDomain:TFLAudioRecordErrorDomain
                     code:TFLAudioRecordErrorCodeWaitingForNewMicInputError
              description:@"TFLAudioRecord hasn't started receiving samples from the audio "
                          @"input source. Please wait for the input."];

    _globalError = waitError;
    _audioFormat = audioFormat;
    _audioEngine = [[AVAudioEngine alloc] init];
    _bufferSize = bufferSize;

    _ringBuffer = [[TFLRingBuffer alloc] initWithBufferSize:_bufferSize];
    _conversionQueue =
        dispatch_queue_create("org.tensorflow.lite.AudioConversionQueue", NULL);  // Serial Queue
  }
  return self;
}

- (AVAudioPCMBuffer *)bufferFromInputBuffer:(AVAudioPCMBuffer *)pcmBuffer
                        usingAudioConverter:(AVAudioConverter *)audioConverter
                                      error:(NSError **)error {
  // Capacity of converted PCM buffer is calculated in order to maintain the same
  // latency as the input pcmBuffer.
  AVAudioFrameCount capacity = ceil(pcmBuffer.frameLength * audioConverter.outputFormat.sampleRate /
                                    audioConverter.inputFormat.sampleRate);
  AVAudioPCMBuffer *outPCMBuffer = [[AVAudioPCMBuffer alloc]
      initWithPCMFormat:audioConverter.outputFormat
          frameCapacity:capacity * (AVAudioFrameCount)audioConverter.outputFormat.channelCount];

  AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
      AVAudioPacketCount inNumberOfPackets, AVAudioConverterInputStatus *_Nonnull outStatus) {
    *outStatus = AVAudioConverterInputStatus_HaveData;
    return pcmBuffer;
  };

  NSError *conversionError = nil;
  AVAudioConverterOutputStatus converterStatus = [audioConverter convertToBuffer:outPCMBuffer
                                                                           error:&conversionError
                                                              withInputFromBlock:inputBlock];

  switch (converterStatus) {
    case AVAudioConverterOutputStatus_HaveData: {
      return outPCMBuffer;
    }
    case AVAudioConverterOutputStatus_Error: {
      NSString *errorDescription = conversionError.localizedDescription
                                       ? conversionError.localizedDescription
                                       : @"Some error occured while processing incoming audio "
                                         @"frames.";
      [TFLCommonUtils createCustomError:error
                             withDomain:TFLAudioRecordErrorDomain
                                   code:TFLAudioRecordErrorCodeProcessingError
                            description:errorDescription];
      break;
    }
    case AVAudioConverterOutputStatus_EndOfStream: {
      [TFLCommonUtils createCustomError:error
                             withDomain:TFLAudioRecordErrorDomain
                                   code:TFLAudioRecordErrorCodeProcessingError
                            description:@"Reached end of input audio stream."];
      break;
    }
    case AVAudioConverterOutputStatus_InputRanDry: {
      [TFLCommonUtils createCustomError:error
                             withDomain:TFLAudioRecordErrorDomain
                                   code:TFLAudioRecordErrorCodeProcessingError
                            description:@"Not enough input is available to satisfy the request."];
      break;
    }
  }
  return nil;
}

- (BOOL)loadAudioPCMBuffer:(AVAudioPCMBuffer *)pcmBuffer error:(NSError **)error {
  if (pcmBuffer.frameLength == 0) {
    [TFLCommonUtils createCustomError:error
                           withDomain:TFLAudioRecordErrorDomain
                                 code:TFLAudioRecordErrorCodeInvalidArgumentError
                          description:@"You may have to try with a different "
                                      @"channel count or sample rate"];
  } else if (pcmBuffer.format.commonFormat != AVAudioPCMFormatFloat32) {
    [TFLCommonUtils createCustomError:error
                           withDomain:TFLAudioRecordErrorDomain
                                 code:TFLAudioRecordErrorCodeProcessingError
                          description:@"An error occured while processing mic input."];
  } else {
    // `pcmBuffer` is already converted to an interleaved format since this method is called after
    // -[self bufferFromInputBuffer:usingAudioConverter:error:].
    // If an `AVAudioPCMBuffer` is interleaved, both floatChannelData[0] and floatChannelData[1]
    // point to the same 1d array with both channels in an interleaved format according to:
    // https://developer.apple.com/documentation/avfaudio/avaudiopcmbuffer/1386212-floatchanneldata
    // Hence we can safely access floatChannelData[0] to get the 1D data in interleaved fashion.
    if ([self->_ringBuffer loadFloatData:pcmBuffer.floatChannelData[0]
                                dataSize:pcmBuffer.frameLength
                                  offset:0
                                    size:pcmBuffer.frameLength
                                   error:error]) {
      return YES;
    }
  }
  return NO;
}

- (void)convertAndLoadBuffer:(AVAudioPCMBuffer *)buffer
         usingAudioConverter:(AVAudioConverter *)audioConverter {
  __weak TFLAudioRecord *weakSelf = self;
  dispatch_sync(self->_conversionQueue, ^{
    __strong TFLAudioRecord *strongSelf = weakSelf;
    if (!strongSelf) return;

    NSError *conversionError = nil;
    AVAudioPCMBuffer *convertedPCMBuffer = [strongSelf bufferFromInputBuffer:buffer
                                                         usingAudioConverter:audioConverter
                                                                       error:&conversionError];

    if (!(convertedPCMBuffer && [strongSelf loadAudioPCMBuffer:convertedPCMBuffer
                                                         error:&conversionError])) {
      strongSelf->_globalError = conversionError;
    } else {
      strongSelf->_globalError = nil;
    }
  });
}

- (void)startTappingMicrophoneWithError:(NSError **)error {
  AVAudioNode *inputNode = [_audioEngine inputNode];
  AVAudioFormat *format = [inputNode outputFormatForBus:0];

  AVAudioFormat *recordingFormat =
      [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
                                       sampleRate:self.audioFormat.sampleRate
                                         channels:(AVAudioChannelCount)self.audioFormat.channelCount
                                      interleaved:YES];

  AVAudioConverter *audioConverter = [[AVAudioConverter alloc] initFromFormat:format
                                                                     toFormat:recordingFormat];

  // Setting buffer size takes no effect on the input node. This class uses a ring buffer internally
  // to ensure the requested buffer size.
  __weak TFLAudioRecord *weakSelf = self;
  [inputNode installTapOnBus:0
                  bufferSize:(AVAudioFrameCount)self.bufferSize
                      format:format
                       block:^(AVAudioPCMBuffer *buffer, AVAudioTime *when) {
                         [weakSelf convertAndLoadBuffer:buffer usingAudioConverter:audioConverter];
                       }];

  [_audioEngine prepare];
  [_audioEngine startAndReturnError:error];
}

- (BOOL)startRecordingWithError:(NSError **)error {
  switch ([AVAudioSession sharedInstance].recordPermission) {
    case AVAudioSessionRecordPermissionDenied: {
      [TFLCommonUtils createCustomError:error
                             withDomain:TFLAudioRecordErrorDomain
                                   code:TFLAudioRecordErrorCodeRecordPermissionDeniedError
                            description:@"Record permissions were denied by the user. "];
      return NO;
    }

    case AVAudioSessionRecordPermissionGranted: {
      [self startTappingMicrophoneWithError:error];
      return YES;
    }

    case AVAudioSessionRecordPermissionUndetermined: {
      [TFLCommonUtils
          createCustomError:error
                 withDomain:TFLAudioRecordErrorDomain
                       code:TFLAudioRecordErrorCodeRecordPermissionUndeterminedError
                description:@"Record permissions are undertermined. Yo must use AVAudioSession's "
                            @"requestRecordPermission() to request audio record permission from "
                            @"the user. Please read Apple's documentation for further details"
                            @"If record permissions are granted, you can call this "
                            @"method in the completion handler of requestRecordPermission()."];
      return NO;
    }
  }
}

- (void)stop {
  [[_audioEngine inputNode] removeTapOnBus:0];
  [_audioEngine stop];
  __weak TFLRingBuffer *weakRingBuffer = _ringBuffer;
  dispatch_sync(self->_conversionQueue, ^{
    [weakRingBuffer clear];
  });
}

- (nullable TFLFloatBuffer *)readAtOffset:(NSUInteger)offset
                                 withSize:(NSUInteger)size
                                    error:(NSError **)error {
  __block TFLFloatBuffer *bufferToReturn = nil;
  __block NSError *readError = nil;
  __weak TFLAudioRecord *weakSelf = self;

  dispatch_sync(_conversionQueue, ^{
    __strong TFLAudioRecord *strongSelf = weakSelf;
    if (!strongSelf) return;

    if (strongSelf->_globalError) {
      readError = [strongSelf->_globalError copy];
    } else if (offset + size > [strongSelf->_ringBuffer size]) {
      [TFLCommonUtils
          createCustomError:&readError
                 withDomain:TFLAudioRecordErrorDomain
                       code:TFLAudioRecordErrorCodeInvalidArgumentError
                description:@"Index out of bounds: offset + size should be <= to the size of "
                            @"TFLAudioRecord's internal buffer."];
    } else {
      bufferToReturn = [strongSelf->_ringBuffer floatBufferWithOffset:offset size:size];
    }
  });

  if (!bufferToReturn && error) {
    *error = readError;
  }

  return bufferToReturn;
}

@end
chromium/third_party/tflite_support/src/tensorflow_lite_support/ios/task/audio/core/audio_record/sources/TFLAudioRecord.m