我们将使用Mozilla的Common Voice Dataset(https://www.kaggle.com/mozillaorg/common-voice),它是用户在Common Voice网站上读取的语音数据集,其目的是实现对自动语音识别的培训和测试。但是,在查看了数据集后,实际上在流派列中标记了许多样本。因此,我们可以提取这些标记的样本并进行性别识别。
import glob import os import pandas as pd import numpy as np import shutil import librosa from tqdm import tqdm def extract_feature(file_name, **kwargs): """ Extract feature from audio file `file_name` Features supported: - MFCC (mfcc) - Chroma (chroma) - MEL Spectrogram Frequency (mel) - Contrast (contrast) - Tonnetz (tonnetz) e.g: `features = extract_feature(path, mel=True, mfcc=True)` """ mfcc = kwargs.get("mfcc") chroma = kwargs.get("chroma") mel = kwargs.get("mel") contrast = kwargs.get("contrast") tonnetz = kwargs.get("tonnetz") X, sample_rate = librosa.core.load(file_name) if chroma or contrast: stft = np.abs(librosa.stft(X)) result = np.array([]) if mfcc: mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) result = np.hstack((result, mfccs)) if chroma: chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, chroma)) if mel: mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) result = np.hstack((result, mel)) if contrast: contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, contrast)) if tonnetz: tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) result = np.hstack((result, tonnetz)) return result dirname = "data" if not os.path.isdir(dirname): os.mkdir(dirname) csv_files = glob.glob("*.csv") for j, csv_file in enumerate(csv_files): print("[+] Preprocessing", csv_file) df = pd.read_csv(csv_file) # only take filename and gender columns new_df = df[["filename", "gender"]] print("Previously:", len(new_df), "rows") # take only male & female genders (i.e droping NaNs & 'other' gender) new_df = new_df[np.logical_or(new_df['gender'] == 'female', new_df['gender'] == 'male')] print("Now:", len(new_df), "rows") new_csv_file = os.path.join(dirname, csv_file) # save new preprocessed CSV new_df.to_csv(new_csv_file, index=False) # get the folder name folder_name, _ = csv_file.split(".") audio_files = glob.glob(f"{folder_name}/{folder_name}/*") all_audio_filenames = set(new_df["filename"]) for i, audio_file in tqdm(list(enumerate(audio_files)), f"Extracting features of {folder_name}"): splited = os.path.split(audio_file) # audio_filename = os.path.join(os.path.split(splited[0])[-1], splited[-1]) audio_filename = f"{os.path.split(splited[0])[-1]}/{splited[-1]}" # print("audio_filename:", audio_filename) if audio_filename in all_audio_filenames: # print("Copyying", audio_filename, "...") src_path = f"{folder_name}/{audio_filename}" target_path = f"{dirname}/{audio_filename}" #create that folder if it doesn't exist if not os.path.isdir(os.path.dirname(target_path)): os.mkdir(os.path.dirname(target_path)) features = extract_feature(src_path, mel=True) target_filename = target_path.split(".")[0] np.save(target_filename, features) # shutil.copyfile(src_path, target_path)
pip3 install numpy pandas tqdm sklearn tensorflow pyaudio librosa接下来,打开一个新的笔记本或bfwstudio并导入我们需要的模块:
import pandas as pd import numpy as np import os import tqdm from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, LSTM, Dropout from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping from sklearn.model_selection import train_test_split现在要获取每个样本的性别,有一个CSV元数据文件(在此处检查),可将每个音频样本的文件路径链接到其适当的性别:
df = pd.read_csv("balanced-all.csv") df.head()看起来是这样的:
filename gender
0 data/cv-other-train/sample-069205.npy female
1 data/cv-valid-train/sample-063134.npy female
2 data/cv-other-train/sample-080873.npy female
3 data/cv-other-train/sample-105595.npy female
4 data/cv-valid-train/sample-144613.npy female
filename gender
66933 data/cv-valid-train/sample-171098.npy male
66934 data/cv-other-train/sample-022864.npy male
66935 data/cv-valid-train/sample-080933.npy male
66936 data/cv-other-train/sample-012026.npy male
66937 data/cv-other-train/sample-013841.npy male
# get total samples n_samples = len(df) # get total male samples n_male_samples = len(df[df['gender'] == 'male']) # get total female samples n_female_samples = len(df[df['gender'] == 'female']) print("Total samples:", n_samples) print("Total male samples:", n_male_samples) print("Total female samples:", n_female_samples)输出:
Total samples: 66938
Total male samples: 33469
Total female samples: 33469
def load_data(vector_length=128): """A function to load gender recognition dataset from `data` folder After the second run, this will load from results/features.npy and results/labels.npy files as it is much faster!""" # make sure results folder exists if not os.path.isdir("results"): os.mkdir("resul...