python – Audio waveform generation by streaming audio file

So I am trying to generate an audio waveform for a given audio file(can be mp3, wav etc). Currently I am using scipy.io.wavfile.read_wav to get the samples and then resample it using scipy.signal.resample and normalize it to get the audio waveform data.
For large audio files(>300mb) since scipy read_wav loads the entire audio file into memory I am looking for an implementation which can may be stream the audio file instead of reading into memory at once. Or any other better implementation ideas are welcome. Looking forward for your responses.

This is my current implementation:

import pandas as pd
import numpy as np
import scipy.signal as sps
from scipy.io.wavfile import read as read_wav
import fsspec

    
def gen_audio_wave_form(audio_file_path):
    with fsspec.open(audio_file_path) as file_obj:
        sample_rate, audio_buffer = read_wav(file_obj)
        duration = audio_buffer.shape[0] / sample_rate
    # resample to limit number of samples
    data = sps.resample(audio_buffer, 2000)

    # normalize values so that it is in the range 0-1
    normalized_audio_buffer = normalize_nd_array(data)

    target_data = create_target_data(
        audio_data=normalized_audio_buffer, duration=duration
            )
    return target_data

def normalize_nd_array(array):
    abs_array = np.abs(array)
    max_x = np.max(abs_array)
    normalized_array = np.array([x / max_x for x in abs_array])
    return normalized_array

def create_target_data(audio_data, duration: float) -> pd.DataFrame:            
    if np.ndim(audio_data) > 1:
        audio_data = [x[0] for x in audio_data]
    else:
        audio_data = audio_data.tolist()
    df = pd.DataFrame()
    df["waveform_data"] = [audio_data]
    df["duration_in_sec"] = duration
    return df

Read more here: Source link