数据读取
## 音频载入
import librosa
from scipy.io import wavfile# wavfile
wav_file = 'demo.wav'
wf_sr, wf_audio = wavfile.read(wav_file)
# R1. wf_audio为未经归一化的原始音频采样点, 一般采用int16编码,即[-32768, 32767]# librosa
# R1. 若sr不指定,则按照音频固有采样率读取;否则,则按照指定的采样率进行重采样读取;
# R2. lr_audio为归一化后的音频采样点;
lr_audio, lr_sr = librosa.load(wav_file, sr=None)
重采样
import librosa
audio_file = 'demo.wav'
audio, ssr = librosa.load(audio_file, sr=None)
resampling_audio = librosa.resample(audio, orig_sr=ssr, target_sr=tsr)
梅尔谱
from scipy.io import wavfile
import librosa, python_speech_featuresaudio_file = 'demo.wav' # default_sr = 16000
wf_sr, wf_audio = wavfile.read(audio_file)
# psf_mels.shape :: L x 13
# num_mels_per_second = np.ceil((1 - winlen) / winstep) + 1
# L = len(wf_audio) / wf_sr * num_mels_per_second
psf_mels = python_speech_features.mfcc(signal=wf_audio, samplerate=wf_sr, winlen=0.025, winstep=0.01, numcep=13)lr_audio, lr_sr = librosa.load(audio_file, sr=None)
lrf_mels = librosa.feature.melspectrogram(y=lr_audio, sr=lr_sr,n_fft=int(sampling_rate * mel_window_length / 1000),hop_length=int(sampling_rate * mel_window_step / 1000),n_mels=mel_n_channels)# ramark
# R1. psf_mels 与 lfr_mels基本一致,但是暂无找到它俩的对应关系
1