input_side_packet: "input_sequence_example"
input_side_packet: "inception3_pca_mean_matrix"
input_side_packet: "inception3_pca_projection_matrix"
input_side_packet: "vggish_pca_mean_matrix"
input_side_packet: "vggish_pca_projection_matrix"
output_side_packet: "sequence_example_to_serialize"
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "STRING:input_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
}
node {
calculator: "UnpackMediaSequenceCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "DATA_PATH:input_file"
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
output_side_packet: "AUDIO_DECODER_OPTIONS:audio_decoder_options"
node_options: {
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
base_packet_resampler_options {
frame_rate: 1.0
base_timestamp: 0
}
base_audio_decoder_options {
audio_stream { stream_index: 0 }
}
}
}
}
# Decode the entire video.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_file"
output_stream: "VIDEO:decoded_frames"
}
# Extract the subset of frames we want to keep.
node {
calculator: "PacketResamplerCalculator"
input_stream: "decoded_frames"
output_stream: "sampled_decoded_frames"
input_side_packet: "OPTIONS:packet_resampler_options"
}
node {
calculator: "ImageFrameToTensorCalculator"
input_stream: "sampled_decoded_frames"
output_stream: "tensor_frame"
}
node {
calculator: "TensorFlowSessionFromFrozenGraphCalculator"
output_side_packet: "SESSION:session"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowSessionFromFrozenGraphCalculatorOptions]: {
graph_proto_path: "/tmp/mediapipe/classify_image_graph_def.pb"
tag_to_tensor_names {
key: "IMG_UINT8"
value: "DecodeJpeg:0"
}
tag_to_tensor_names {
key: "INCEPTION_POOL3"
value: "pool_3/_reshape:0"
}
}
}
}
node {
calculator: "TensorFlowInferenceCalculator"
input_side_packet: "SESSION:session"
input_stream: "IMG_UINT8:tensor_frame"
output_stream: "INCEPTION_POOL3:inception3_hidden_activation_single_element_batch"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
signature_name: ""
batch_size: 1
add_batch_dim_to_tensors: false
}
}
}
# Remove the batch dimension.
node: {
calculator: "TensorSqueezeDimensionsCalculator"
input_stream: "inception3_hidden_activation_single_element_batch"
output_stream: "inception3_hidden_activation"
node_options: {
[type.googleapis.com/mediapipe.TensorSqueezeDimensionsCalculatorOptions]: {
dim: 0
}
}
}
node {
calculator: "TensorToMatrixCalculator"
input_stream: "TENSOR:inception3_hidden_activation"
output_stream: "MATRIX:inception3_hidden_activation_matrix"
}
node {
calculator: "MatrixSubtractCalculator"
input_stream: "MINUEND:inception3_hidden_activation_matrix"
input_side_packet: "SUBTRAHEND:inception3_pca_mean_matrix"
output_stream: "mean_subtracted_inception3_matrix"
}
node {
calculator: "MatrixMultiplyCalculator"
input_stream: "mean_subtracted_inception3_matrix"
input_side_packet: "inception3_pca_projection_matrix"
output_stream: "pca_inception3_matrix"
}
node {
calculator: "MatrixToVectorCalculator"
input_stream: "pca_inception3_matrix"
output_stream: "pca_inception3_vf"
}
######################## END OF VISUAL ###########################
######################## BEGIN OF AUDIO ##########################
node {
calculator: "AudioDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_file"
input_side_packet: "OPTIONS:audio_decoder_options"
output_stream: "AUDIO:audio"
output_stream: "AUDIO_HEADER:audio_header"
}
node {
calculator: "AddHeaderCalculator"
input_stream: "DATA:audio"
input_stream: "HEADER:audio_header"
output_stream: "media_audio"
}
# Always convert the audio to mono.
node {
calculator: "AverageTimeSeriesAcrossChannelsCalculator"
input_stream: "media_audio"
output_stream: "mono_waveform"
}
node {
calculator: "RationalFactorResampleCalculator"
input_stream: "mono_waveform"
output_stream: "resampled_waveform"
node_options: {
[type.googleapis.com/mediapipe.RationalFactorResampleCalculatorOptions] {
target_sample_rate: 16000.0
}
}
}
node {
calculator: "SpectrogramCalculator"
input_stream: "resampled_waveform"
output_stream: "spectrogram_squared_magnitude"
node_options: {
[type.googleapis.com/mediapipe.SpectrogramCalculatorOptions] {
frame_duration_seconds: 0.025
frame_overlap_seconds: 0.015
output_type: SQUARED_MAGNITUDE
}
}
}
node {
calculator: "MelSpectrumCalculator"
# MelSpectrumCalculator expects SQUARED_MAGNITUDE input, but its output is in
# linear magnitude units.
input_stream: "spectrogram_squared_magnitude"
output_stream: "mel_spectrum_magnitude"
node_options: {
[type.googleapis.com/mediapipe.MelSpectrumCalculatorOptions] {
# Follow the 'wideband' or '16kHz' speech convention.
channel_count: 64
min_frequency_hertz: 125.0
max_frequency_hertz: 7500.0
}
}
}
node {
calculator: "StabilizedLogCalculator"
input_stream: "mel_spectrum_magnitude"
output_stream: "log_mel_spectrum_magnitude"
node_options: {
[type.googleapis.com/mediapipe.StabilizedLogCalculatorOptions] {
stabilizer: 0.01
}
}
}
node {
calculator: "TimeSeriesFramerCalculator"
input_stream: "log_mel_spectrum_magnitude"
output_stream: "log_mel_spectrum_magnitude_with_context"
node_options: {
[type.googleapis.com/mediapipe.TimeSeriesFramerCalculatorOptions] {
frame_duration_seconds: 0.96
frame_overlap_seconds: -0.04
}
}
}
node {
calculator: "MatrixToTensorCalculator"
input_stream: "log_mel_spectrum_magnitude_with_context"
output_stream: "log_mel_spectrum_magnitude_tensor"
node_options: {
[type.googleapis.com/mediapipe.MatrixToTensorCalculatorOptions] {
transpose: true
}
}
}
node {
calculator: "TensorFlowSessionFromFrozenGraphCalculator"
output_side_packet: "SESSION:vggish_session"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowSessionFromFrozenGraphCalculatorOptions]: {
graph_proto_path: "/tmp/mediapipe/vggish_new.pb"
tag_to_tensor_names {
key: "INPUT"
value: "vggish/input_features:0"
}
tag_to_tensor_names {
key: "VGGISH"
value: "vggish/fc2/BiasAdd:0"
}
}
}
}
node {
calculator: "TensorFlowInferenceCalculator"
input_side_packet: "SESSION:vggish_session"
input_stream: "INPUT:log_mel_spectrum_magnitude_tensor"
output_stream: "VGGISH:vggish_tensor"
node_options: {
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
signature_name: ""
batch_size: 128
}
}
}
node {
calculator: "TensorToMatrixCalculator"
input_stream: "REFERENCE:log_mel_spectrum_magnitude_with_context"
input_stream: "TENSOR:vggish_tensor"
output_stream: "MATRIX:vggish_matrix"
node_options: {
[type.googleapis.com/mediapipe.TensorToMatrixCalculatorOptions] {
time_series_header_overrides {
num_channels: 128
num_samples: 1
}
}
}
}
node {
calculator: "MatrixSubtractCalculator"
input_stream: "MINUEND:vggish_matrix"
input_side_packet: "SUBTRAHEND:vggish_pca_mean_matrix"
output_stream: "mean_subtracted_vggish_matrix"
}
node {
calculator: "MatrixMultiplyCalculator"
input_stream: "mean_subtracted_vggish_matrix"
input_side_packet: "vggish_pca_projection_matrix"
output_stream: "pca_vggish_matrix"
}
node {
calculator: "MatrixToVectorCalculator"
input_stream: "pca_vggish_matrix"
output_stream: "pca_vggish_vf"
}
# Store the features in the SequenceExample.
node {
calculator: "PackMediaSequenceCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
input_stream: "FLOAT_FEATURE_RGB:pca_inception3_vf"
input_stream: "FLOAT_FEATURE_AUDIO:pca_vggish_vf"
}
# Serialize the SequenceExample to a string for storage.
node {
calculator: "StringToSequenceExampleCalculator"
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
output_side_packet: "STRING:output_sequence_example"
}