Spaces:

SJTU-TES
/

WAV2COM

Sleeping

App Files Files Community

Fazhong Liu commited on Apr 3, 2024

Commit

9a70c5d

1 Parent(s): 53d9d2d

init

Browse files

Files changed (14) hide show

.gitattributes +35 -35
.gitignore +1 -0
app.py +73 -0
src/classifier.py +97 -0
src/direction_detection.py +248 -0
src/generate_array_feature.py +235 -0
src/main_functions.py +153 -0
src/main_gui.py +218 -0
translate/cmd_judge.py +227 -0
translate/test.py +414 -0
translate/train_man.py +160 -0
translate/train_name.py +162 -0
translate/wav2com.py +0 -0
translate/wav2npy.py +118 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# *.7z filter=lfs diff=lfs merge=lfs -text
+# *.arrow filter=lfs diff=lfs merge=lfs -text
+# *.bin filter=lfs diff=lfs merge=lfs -text
+# *.bz2 filter=lfs diff=lfs merge=lfs -text
+# *.ckpt filter=lfs diff=lfs merge=lfs -text
+# *.ftz filter=lfs diff=lfs merge=lfs -text
+# *.gz filter=lfs diff=lfs merge=lfs -text
+# *.h5 filter=lfs diff=lfs merge=lfs -text
+# *.joblib filter=lfs diff=lfs merge=lfs -text
+# *.lfs.* filter=lfs diff=lfs merge=lfs -text
+# *.mlmodel filter=lfs diff=lfs merge=lfs -text
+# *.model filter=lfs diff=lfs merge=lfs -text
+# *.msgpack filter=lfs diff=lfs merge=lfs -text
+# *.npy filter=lfs diff=lfs merge=lfs -text
+# *.npz filter=lfs diff=lfs merge=lfs -text
+# *.onnx filter=lfs diff=lfs merge=lfs -text
+# *.ot filter=lfs diff=lfs merge=lfs -text
+# *.parquet filter=lfs diff=lfs merge=lfs -text
+# *.pb filter=lfs diff=lfs merge=lfs -text
+# *.pickle filter=lfs diff=lfs merge=lfs -text
+# *.pkl filter=lfs diff=lfs merge=lfs -text
+# *.pt filter=lfs diff=lfs merge=lfs -text
+# *.pth filter=lfs diff=lfs merge=lfs -text
+# *.rar filter=lfs diff=lfs merge=lfs -text
+# *.safetensors filter=lfs diff=lfs merge=lfs -text
+# saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+# *.tar.* filter=lfs diff=lfs merge=lfs -text
+# *.tar filter=lfs diff=lfs merge=lfs -text
+# *.tflite filter=lfs diff=lfs merge=lfs -text
+# *.tgz filter=lfs diff=lfs merge=lfs -text
+# *.wasm filter=lfs diff=lfs merge=lfs -text
+# *.xz filter=lfs diff=lfs merge=lfs -text
+# *.zip filter=lfs diff=lfs merge=lfs -text
+# *.zst filter=lfs diff=lfs merge=lfs -text
+# *tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.wav

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import whisper
+from pydub import AudioSegment
+import gradio as gr
+def convert_6ch_wav_to_stereo(input_file_path, output_file_path):
+    sound = AudioSegment.from_file(input_file_path, format="wav")
+    if sound.channels != 6:
+        raise ValueError("The input file does not have 6 channels.")
+    front_left = sound.split_to_mono()[0]
+    front_right = sound.split_to_mono()[1]
+    center = sound.split_to_mono()[2]
+    back_left = sound.split_to_mono()[4]
+    back_right = sound.split_to_mono()[5]
+    center = center - 6
+    back_left = back_left - 6
+    back_right = back_right - 6
+    stereo_left = front_left.overlay(center).overlay(back_left)
+    stereo_right = front_right.overlay(center).overlay(back_right)
+    stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right)
+    stereo_sound.export(output_file_path, format="wav")
+def judge_command(file_path):
+    whisper_model = whisper.load_model("large", device="cpu")
+    out_path='./out.wav'
+    convert_6ch_wav_to_stereo(file_path,out_path)
+    result = whisper_model.transcribe(out_path,language="en")
+    text_result = result['text']
+    print(text_result)
+    return text_result
+def handle_audio_transcription(file_path):
+    try:
+        text_result = judge_command(file_path)
+        message = "Transcription successful!"
+    except Exception as e:
+        message = str(e)
+        text_result = ""
+    return message, text_result
+with gr.Blocks() as audio_transcription_page:
+    gr.Markdown(
+        '''
+        This space transcribes the spoken words from an audio file to text.
+        ## How to use this Space?
+        - Upload a '.wav' file.
+        - The transcription of the audio will be shown after you click the transcribe button.
+        '''
+    )
+    with gr.Row():
+        with gr.Column():
+            audio_file = gr.File(
+                file_types=[".wav"],
+                label="Upload a '.wav' file",
+            )
+            info = gr.Textbox(
+                value="",
+                label="Log",
+                placeholder="Transcription results will appear here...",
+            )
+        transcribe_button = gr.Button("Transcribe")
+    transcribe_button.click(
+        handle_audio_transcription,
+        [audio_file],
+        [info]
+    )
+if __name__ == "__main__":
+    audio_transcription_page.launch(debug=True)

src/classifier.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+这是带注释的，我用中文写了
+"""
+#%% 导入必要的包
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from sklearn.preprocessing import normalize
+from generate_array_feature import mald_feature, get_filelist
+import time
+#%% 定义分类器model
+# 这一个代码块是用来定义model的。
+# 定义model的batch_size, feature长度之类的
+batch_size = 10
+feature_len = 110
+loss_function = binary_crossentropy
+no_epochs = 150
+optimizer = Adam()
+verbosity = 1
+model = Sequential()
+model.add(Dense(64, input_dim=feature_len, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(32, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(16, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
+# 至此，分类器模型的基本参数已经设置完毕，接下来可以从hdf5文件中导入预先训练好的model
+model.load_weights(r"/home/fazhong/Github/czx/model.hdf5")
+# 从train2.hdf5导入model。
+# train2.hdf5 是从 data2.npy训练来的。
+# 这样与 data1.npy数据不会有重叠
+#%% 导入音频
+data_npy = np.load('./data.npy',allow_pickle=True)
+labels_npy = np.load('./labels.npy',allow_pickle=True)
+data = data_npy.tolist()
+labels_org = labels_npy.tolist()
+labels = []
+for x in labels_org:
+    labels.append(x[0])
+voice = []
+# voice 是从 一堆 wav 音频文件中提取的波形
+X = []  # X is the feature ~ data[0]
+y = []  # y is the normal (1) or attack (0) ~ data[1]
+# for file_path in name_all:
+#     file_name = file_path.split("\\")[-1]
+#     # define the normal or attack in variable cur_y
+#     if 'normal' in file_name:
+#         cur_y = 1  # normal case
+#     elif 'attack' in file_name:
+#         cur_y = 0
+#     # split the file name
+#     # read the data
+#     rate, data = read(file_path)
+#     voice += [list(data)]
+#     X += [list(mald_feature(rate, data))]
+#     print(list(mald_feature(rate, data)))
+#     # 从wav 文件提取特征的函数是 generate_array_feature.py
+#     # X 是特征，特征的维度是110维
+#     y += [cur_y]
+#     # y是标签，1代表正常样本，0代表攻击样本
+X = data
+Y = labels
+# normalization
+norm_X = normalize(X, axis=0, norm='max')
+X = np.asarray(norm_X)
+y = np.asarray(y)
+#%% 开始预测
+scores = model.evaluate(X, y)  # 这是一个总体的预测
+y_pred = np.round(model.predict(X))  # 这里会给出一个预测的结论
+print(y_pred)
+acc = 0
+for i in range(len(y)):
+    if y_pred[i] == y:  acc+=1
+print(acc/len(y))

src/direction_detection.py ADDED Viewed

	@@ -0,0 +1,248 @@

+"""
+This python script is used for direction detection.
+We design the direction detection for 3 wav files types which has
+4, 6 and 8 channels.
+"""
+import os
+import numpy as np
+from scipy.io import wavfile
+from scipy.signal import butter, lfilter, freqz
+from scipy import signal
+import matplotlib
+import matplotlib.pyplot as plt
+offsetVector = []
+"""
+The funtion butter_highpass, butter_highpass_filter and calculateResidues are shared
+Function offset is used for getAngle_for_eight
+"""
+def butter_highpass(cutoff, fs, order=5):
+    nyq = 0.5 * fs
+    normal_cutoff = cutoff / nyq
+    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
+    return b, a
+def butter_highpass_filter(data, cutoff, fs, order=5):
+    b, a = butter_highpass(cutoff, fs, order=order)
+    y = signal.filtfilt(b, a, data)
+    return y
+def calculateResidues(Chan1, Chan2, fs):
+    S1 = butter_highpass_filter(Chan1, 100, fs, 7)
+    S2 = butter_highpass_filter(Chan2, 100, fs, 7)
+    index1 = -1
+    index2 = -1
+    index = -1
+    for i in range(len(S1)):
+        if S1[i] > 0.03:
+            index1 = i
+            break
+    for i in range(len(S2)):
+        if S2[i] > 0.03:
+            index2 = i
+            break
+    if (index1 < index2):
+        index = index1
+    else:
+        index = index2
+    residues = np.mean(np.square(S1[index:index + 401] - S2[index:index + 401]))
+    # offsetVector.append( index1 )
+    return residues
+def do_iac(signal, pairs, fs):
+    # signal = data / 32767
+    residuesVector = []
+    for offset in [5, -5]:
+        # Computer overall cancellation error for this angle
+        iterator = 0
+        residues = 0
+        for mic1, mic2 in pairs:
+            Chan1 = signal[:, mic1]
+            Chan2 = signal[:, mic2]
+            S1 = Chan1  # butter_highpass_filter(Chan1 , 100 , fs , 7)
+            S2 = Chan2  # butter_highpass_filter(Chan2 , 100 , fs , 7)
+            index = -1
+            for i in range(len(S1)):
+                if (S1[i] > 0.003 and i > 40):
+                    index = i
+                    break
+            if (iterator == 0 or iterator == 4):
+                a = S1[index - 15:index + 15]
+                b = S2[index - 15:index + 15]
+                residues += np.square(np.subtract(a, b))
+            elif (iterator == 1 or iterator == 3):
+                a = S1[index - 15 + offset // 2:index + 15 + offset // 2]
+                b = S2[index - 15:index + 15]
+                residues += np.square(np.subtract(a, b))
+            elif (iterator == 2):
+                a = S1[index - 15 + offset:index + 15 + offset]
+                b = S2[index - 15:index + 15]
+                residues += np.square(np.subtract(a, b))
+            elif (iterator == 5 or iterator == 7):
+                a = S1[index - 15 - offset // 2:index + 15 - offset // 2]
+                b = S2[index - 15:index + 15]
+                residues += np.square(np.subtract(a, b))
+            elif (iterator == 6):
+                a = S1[index - 15 - offset:index + 15 - offset]
+                b = S2[index - 15:index + 15]
+                residues += np.square(np.subtract(a, b))
+            iterator += 1
+        residuesVector.append(np.mean(residues))
+    return residuesVector[0] < residuesVector[1]
+def calculateResidues_eight(Chan1, Chan2, fs):
+    S1 = Chan1  # butter_highpass_filter(Chan1 , 100 , fs , 7 )
+    S2 = Chan2  # butter_highpass_filter(Chan2 , 100 , fs , 7 )
+    index1 = -1
+    index2 = -1
+    index = -1
+    for i in range(len(S1)):
+        if S1[i] > 0.01:
+            index1 = i
+            break
+    for i in range(len(S2)):
+        if S2[i] > 0.01:
+            index2 = i
+            break
+    if (index1 < index2):
+        index = index1
+    else:
+        index = index2
+    residues = np.mean(np.square(S1[index:index + 401] - S2[index:index + 401]))
+    return residues
+def getAngle_for_eight(data, fs):
+    signal = data / 32767
+    for i in range(8):
+        column = butter_highpass_filter(signal[:, i], 100, fs, 7)
+        signal[:, i] = column
+    pairs = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 0)]
+    smallestResidues = 100
+    closestPair = (0, 0)
+    offsetIndex = -1
+    for iter in range(8):
+        chan1 = signal[:, pairs[iter][0]]
+        chan2 = signal[:, pairs[iter][1]]
+        residues = calculateResidues_eight(chan1, chan2, fs)
+        if (residues < smallestResidues):
+            smallestResidues = residues
+            closestPair = (pairs[iter])
+            offsetIndex = iter
+    if do_iac(signal, pairs, fs) == True:
+        d1 = abs(offsetIndex - 4)
+        d2 = abs((offsetIndex + 4) % 8 - 4)
+        if (d1 < d2):
+            pass
+        else:
+            closestPair = pairs[(offsetIndex + 4) % 8]
+    mics = (closestPair[0] + 1, closestPair[1] + 1)
+    return mics
+def getAngle_for_six(data, fs):
+    signal = data / 32767
+    pairs = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)]
+    smallestResidues = 100
+    closestPair = (0, 0)
+    offsetIndex = -1
+    for iter in range(6):
+        chan1 = signal[:, pairs[iter][0]]
+        chan2 = signal[:, pairs[iter][1]]
+        residues = calculateResidues(chan1, chan2, fs)
+        if (residues < smallestResidues):
+            smallestResidues = residues
+            closestPair = (pairs[iter])
+            offsetIndex = iter
+    """ if (offsetVector[offsetIndex] > offsetVector[(offsetIndex+3)%6]   ):
+        closestPair = pairs[(offsetIndex+3)%6] """
+    mics = (closestPair[0] + 1, closestPair[1] + 1)
+    # print(offsetVector)
+    return mics
+def getAngle_for_four(data, fs):
+    signal = data / 32767
+    pairs = [(0, 1), (1, 2), (2, 3), (3, 0)]
+    smallestResidues = 100
+    closestPair = (0, 0)
+    offsetIndex = -1
+    for iter in range(4):
+        chan1 = signal[:, pairs[iter][0]]
+        chan2 = signal[:, pairs[iter][1]]
+        residues = calculateResidues(chan1, chan2, fs)
+        if (residues < smallestResidues):
+            smallestResidues = residues
+            closestPair = (pairs[iter])
+            offsetIndex = iter
+    """ if (offsetVector[offsetIndex] > offsetVector[(offsetIndex+3)%6]   ):
+        closestPair = pairs[(offsetIndex+3)%6] """
+    mics = (closestPair[0] + 1, closestPair[1] + 1)
+    # print(offsetVector)
+    return mics
+def getDirection_Pair(closestPair, num_chan):
+    """
+    :param closestPair: two closet pair, such as (0,1)
+    :param num_chan: channel numbers, such as 8
+    :return: in above parameters, should be [7 0 1 2]
+    """
+    pairs = [0, 0, 0, 0]
+    pairs[1] = closestPair[0] - 1
+    pairs[2] = closestPair[1] - 1
+    pairs[0] = (pairs[1] - int(num_chan/2)) % num_chan
+    pairs[3] = (pairs[2] + int(num_chan/2)) % num_chan
+    return pairs

src/generate_array_feature.py ADDED Viewed

	@@ -0,0 +1,235 @@

+'''
+This is the main ArrayID feature building script
+revised: April 04, 2021
+'''
+import glob
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from scipy.fftpack import fft, ifft, fftfreq
+from scipy import signal
+import random
+from librosa.core import lpc
+import librosa.feature
+import csv
+from sklearn.preprocessing import normalize
+from direction_detection import *
+##############################################
+# HELPER FUNCTIONS
+# converts hz to indices -> allows splicing of freq data
+def hz_to_indices(freqs, lowcut, highcut):
+    i = 0
+    while freqs[i] < lowcut:
+        i += 1
+    low = i
+    while freqs[i] < highcut:
+        i += 1
+    return low, i
+# compresses our feature vectors
+# After extracting our features, they could be different lengths depending on
+# the input signal, so we normalize each feature vector to be the same no matter
+# the speaker
+def get_row_compressor(old_dimension, new_dimension):
+    dim_compressor = np.zeros((new_dimension, old_dimension))
+    bin_size = float(old_dimension) / new_dimension
+    next_bin_break = bin_size
+    which_row = 0
+    which_column = 0
+    while which_row < dim_compressor.shape[0] and which_column < dim_compressor.shape[1]:
+        if round(next_bin_break - which_column, 10) >= 1:
+            dim_compressor[which_row, which_column] = 1
+            which_column += 1
+        elif next_bin_break == which_column:
+            which_row += 1
+            next_bin_break += bin_size
+        else:
+            partial_credit = next_bin_break - which_column
+            dim_compressor[which_row, which_column] = partial_credit
+            which_row += 1
+            dim_compressor[which_row, which_column] = 1 - partial_credit
+            which_column += 1
+            next_bin_break += bin_size
+    dim_compressor /= bin_size
+    return dim_compressor
+# helper functions for above function
+def get_column_compressor(old_dimension, new_dimension):
+    return get_row_compressor(old_dimension, new_dimension).transpose()
+def compress_and_average(array, new_shape):
+    return np.mat(get_row_compressor(array.shape[0], new_shape[0])) * \
+           np.mat(array) * \
+           np.mat(get_column_compressor(array.shape[1], new_shape[1]))
+##############################################
+##############################################
+# MAIN FEATURE EXTRACTION FUNCTIONS
+def get_filelist(dir):
+    Filelist = []
+    for home, dirs, files in os.walk(dir):
+        for filename in files:
+            Filelist.append(os.path.join(home, filename))
+    return Filelist
+def lpcc(data, n=15):
+    """
+    f_LPC = lpcc(data, n): get the LPCC from the voice data
+    The order n is 15
+    """
+    size_lpc = n  # define the order of LPCC
+    a = lpc(data, order = size_lpc)  # use the built-in function
+    a = -a
+    f_LPC = np.zeros(len(a))
+    f_LPC[0] = np.log(size_lpc)
+    for i in range(1, len(a)):
+        k = np.arange(1, i)  # k from 1 to i-1
+        f_LPC[i] = a[i] + np.sum((1 - k/i) * a[k] * f_LPC[i - k])
+    return f_LPC[1:]
+# returns long term fft
+def get_ltfd(spec, m=20, start_index=1, end_index=86):
+    # only get the useful part
+    spec = spec[:, start_index: end_index, :(spec.shape[2] - spec.shape[2] % m)]
+    # merge the spec in the time line
+    channels = np.sum(spec, axis=2)
+    all_ffts = np.sum(channels, axis=0)
+    all_ffts /= np.max(all_ffts)
+    channels_ffts = np.asarray([channels[i, :] / np.max(channels[i, :]) for i in range(channels.shape[0])])
+    return all_ffts, channels_ffts
+# returns long term fft
+def get_ltfp(spec, m=20, start_index_fp=1, end_index_fp=86):
+    # only get the useful part
+    spec = spec[:, start_index_fp:end_index_fp, :(spec.shape[2] - spec.shape[2] % m)]
+    # split the data
+    splices = np.asarray(np.split(spec, m, axis=2))
+    # merge the data (wang ge hua)
+    mesh = np.zeros((splices.shape[0], splices.shape[1], splices.shape[2]))
+    for i in range(mesh.shape[0]):
+        for j in range(mesh.shape[1]):
+            for k in range(mesh.shape[2]):
+                mesh[i, j, k] = np.sum(splices[i, j, k, :])
+    # calculate the standard deviation
+    std_feature = np.zeros((mesh.shape[0], mesh.shape[2]))
+    for i in range(std_feature.shape[0]):
+        for j in range(std_feature.shape[1]):
+            std_feature[i, j] = np.std(mesh[i, :, j]) / np.mean(mesh[i, :, j])
+    # define the ltfp
+    LTFP = np.mean(std_feature, axis=0)
+    LTFP = LTFP / np.max(LTFP)
+    return LTFP
+def feature_distribution(channel_fft):
+    num_feature = 5
+    f_dis = np.zeros(2 * num_feature)
+    co = np.zeros((num_feature, len(channel_fft)))
+    for num in range(len(channel_fft)):
+        a = channel_fft[num]
+        for i in range(1, len(a)):
+            a[i] = a[i-1] + a[i]
+        a = a / np.max(a)
+        dis_index = [0.1, 0.3, 0.5, 0.7, 0.9]
+        for i in range(len(dis_index)):
+            co[i, num] = find_value(a, dis_index[i])
+        co[:, num] /= len(a)
+    for i in range(num_feature):
+        f_dis[i] = np.mean(co[i, :])
+        f_dis[i + num_feature] = np.std(co[i, :])
+    return co, f_dis
+def find_value(a, dis_index):
+    c = 0
+    for i in range(len(a) - 1):
+        if a[i] <= dis_index <= a[i + 1]:
+            c = i
+            break
+    return c
+def mald_feature(rate, data):
+    n_fft = 4096
+    # detect the direction
+    if data.shape[1] == 4:
+        closestPair = getAngle_for_four(data, fs=rate)
+    elif data.shape[1] == 6:
+        closestPair = getAngle_for_six(data, fs=rate)
+    elif data.shape[1] == 8:
+        closestPair = getAngle_for_eight(data, fs=rate)
+    pairs = getDirection_Pair(closestPair, data.shape[1])
+    # low and high thresholds for field print features -> we want 1 - 10kHz range
+    lowcut_fp = 1
+    highcut_fp = 5000
+    if highcut_fp > rate / 2:  # in case the sampling rate is very small
+        highcut_fp = rate / 2 - 100
+    highcut_fd = 1000
+    # input rate -> make sure to change this based on device.
+    # All of the devices are 44100 except for the AMLOGIC, which is 16kHz.
+    # If this rate is not changed acccordingly, the _ltfp and _ltfft features
+    # will be off
+    # just some helper splicing globals
+    freq = fftfreq(n_fft, 1. / rate)  # data = logmmse(data, rate)
+    start_index, end_index = hz_to_indices(freq, lowcut_fp, highcut_fd)
+    start_index_fp, end_index_fp = hz_to_indices(freq, lowcut_fp, highcut_fp)
+    # empty feature vectors
+    _lpcc = []
+    # extract lfp and lpcc from each channel independently, then sum
+    for i in pairs:
+        a = np.asfortranarray(data[:, i]).astype(dtype=float)
+        _lpcc += list(lpcc(a))
+    # calculate the spectrogram
+    spec = [signal.stft(data[:, i], fs=rate, window='hann', nperseg=1024, noverlap=768, nfft=n_fft)[2] for i in range(data.shape[1])]
+    spec = np.asarray(spec)  # convert list to numpy
+    # obtain the absolute value
+    spec = np.abs(spec)
+    # get the ltfp feature
+    # get ltfp features and compress to a 50 feature vectoc
+    _ltfd, channel_fft = get_ltfd(spec=spec, start_index=start_index, end_index=end_index)
+    _ltfd = list(compress_and_average(_ltfd.reshape(len(_ltfd), 1), (20, 1)).flat)
+    co, _fdis = feature_distribution(channel_fft)
+    # get ltfp features and compress to a 50 feature vector
+    _ltfp = get_ltfp(spec=spec, start_index_fp=start_index_fp, end_index_fp=end_index_fp)
+    _ltfp = list(compress_and_average(_ltfp.reshape(len(_ltfp), 1), (20, 1)).flat)
+    # out is final feature vector, each data point formed as a tuple : (X, y), where X is the feature vector and y is the label
+    # X_y is just compiled l ist of all the tuples
+    feature = np.concatenate((_lpcc, _ltfd, _fdis, _ltfp))
+    return feature

src/main_functions.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""
+这是带注释的，我用中文写了
+"""
+#%% 导入必要的包
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from sklearn.preprocessing import normalize
+from generate_array_feature import mald_feature, get_filelist
+import time
+#%% 定义分类器model
+# 这一个代码块是用来定义model的。
+# 定义model的batch_size, feature长度之类的
+batch_size = 10
+feature_len = 110
+loss_function = binary_crossentropy
+no_epochs = 150
+optimizer = Adam()
+verbosity = 1
+model = Sequential()
+model.add(Dense(64, input_dim=feature_len, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(32, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(16, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
+# 至此，分类器模型的基本参数已经设置完毕，接下来可以从hdf5文件中导入预先训练好的model
+model.load_weights(r"/home/fazhong/Github/czx/model.hdf5")
+# 从train2.hdf5导入model。
+# train2.hdf5 是从 data2.npy训练来的。
+# 这样与 data1.npy数据不会有重叠
+#%% 导入音频
+path_wave = r"/home/fazhong/Github/czx/voice"
+print("Loading data ...")
+name_all = get_filelist(path_wave)
+voice = []
+# voice 是从 一堆 wav 音频文件中提取的波形
+X = []  # X is the feature ~ data[0]
+y = []  # y is the normal (1) or attack (0) ~ data[1]
+for file_path in name_all:
+    file_name = file_path.split("\\")[-1]
+    # define the normal or attack in variable cur_y
+    if 'normal' in file_name:
+        cur_y = 1  # normal case
+    elif 'attack' in file_name:
+        cur_y = 0
+    # split the file name
+    # read the data
+    rate, data = read(file_path)
+    voice += [list(data)]
+    X += [list(mald_feature(rate, data))]
+    print(list(mald_feature(rate, data)))
+    # 从wav 文件提取特征的函数是 generate_array_feature.py
+    # X 是特征，特征的维度是110维
+    y += [cur_y]
+    # y是标签，1代表正常样本，0代表攻击样本
+# normalization
+norm_X = normalize(X, axis=0, norm='max')
+# X_y = [(norm_X[i], y[i]) for i in range(len(norm_X))]
+# # print(len(X_y))
+# # for i in X_y: print(i[1])
+# X_y = np.asarray(X_y)
+X = np.asarray(norm_X)
+y = np.asarray(y)
+# X = np.asarray([x[0] for x in X_y])
+# y = np.asarray([x[1] for x in X_y])
+#%% 画出特征来
+index1 = [5]  # 选第2121个元素
+x1 = X[index1]
+y1 = y[index1]  # 1，代表normal
+plt.plot(x1.T, label='normal')
+index2 = [1]  # 选择第10个元素
+x2 = X[index2]
+y2 = y[index2]  # 0, 代表attack
+plt.plot(x2.T, label='attack')
+plt.legend()
+plt.show()
+# 可以明显看出 normal 与 attack 的区别，这也是我们分类的基础
+#%% 开始预测
+scores = model.evaluate(X, y)  # 这是一个总体的预测
+y_pred = np.round(model.predict(X))  # 这里会给出一个预测的结论
+index1 = 8  # 8 是一个正常样本
+index3 = [1, 3, 5, 7, 9]  # 选一些样本，等wav 文件到了，输入就直接是wav
+for i in index3:
+    print('Starting detection:')
+    plt.plot(voice[i], label='Voice Signal')
+    plt.show()
+    time.sleep(2)
+    if y[i] == 1:  # 正常情况
+        print('the ' + str(i) + ' sample is normal')
+        title = 'the ' + str(i) + ' sample is normal'
+        plt.subplot(1, 2, 1)
+        plt.plot(X[index1])
+        plt.subplot(1, 2, 2)
+        plt.plot(X[i], label='New')
+        plt.title(title)
+        plt.show()
+        time.sleep(1)
+        if y_pred[i] == y[i]:
+            print("Successfully Detect")  # 成功预测
+            print("Run the car")
+            title = "Successfully Detect, " + "Run the car"
+            plt.title(title)
+            plt.show()
+        else:
+            print("Detection is false.")  # 失败预测
+            print("Don't run the car")
+            title = "Detection is false, " + "Don't run the car"
+            plt.title(title)
+            plt.show()
+    else:  # 异常情况，决策是相反的
+        print('the ' + str(i) + ' sample is attack')
+        title = 'the ' + str(i) + ' sample is attack'
+        plt.subplot(1, 2, 1)
+        plt.plot(X[index1], label='Normal')
+        plt.subplot(1, 2, 2)
+        plt.plot(X[i], label='New')
+        plt.title(title)
+        plt.show()
+        time.sleep(1)
+        if y_pred[i] == y[i]:
+            print("Successfully Detect")  # 成功预测
+            print("Don't run the car")
+            title = "Successfully Detect, " + "Don't run the car"
+            plt.title(title)
+            plt.show()
+        else:
+            print("Detection is false.")  # 失败预测
+            print("Run the car")
+            title = "Detection is false, " + "Run the car"
+            plt.title(title)
+            plt.show()
+    print("-------------------------")

src/main_gui.py ADDED Viewed

	@@ -0,0 +1,218 @@

+#code
+#coding=UTF-8
+# ! -*- coding: utf-8 -*-
+from __future__ import print_function
+#%% 导入必要的包
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from sklearn.preprocessing import normalize
+from generate_array_feature import mald_feature, get_filelist
+import os
+import threading
+import tkinter as tk
+from tkinter import filedialog
+from PIL import Image,ImageTk
+from tkinter.messagebox import *
+from tkinter import scrolledtext
+top1=None
+top2=None
+top4=None
+top5=None
+top6=None
+top7=None
+img_open=None
+img=None
+v1=None
+v2=None
+ll=0
+s2=''
+s1=''
+top3=None
+t2=None
+s=''
+f1="fg.txt"
+f2="fg.txt"
+v=None
+top=None
+v={}
+d1={}
+d2={}
+message=""
+ermsg=""
+picn=0
+arg = []
+class MyThread(threading.Thread):
+    def __init__(self, func, *args):#多线程启动，防止界面卡死
+        super().__init__()
+        self.func = func
+        self.args = args
+        self.setDaemon(True)
+        self.start()
+    def run(self):
+        self.func(*self.args)
+def chf(tt1):#选择音频文件
+    global f1
+    f1=filedialog.askopenfilename()
+    showinfo("Open File", "Open a new File.")
+    tt1.delete(0.0, tk.END)
+    tt1.insert(0.0, f1)
+def info():
+    pp='语言接口安全'
+    showinfo('Information',pp)
+def build_model():
+    # %% 定义分类器model
+    # 这一个代码块是用来定义model的。
+    # 定义model的batch_size, feature长度之类的
+    batch_size = 10
+    feature_len = 110
+    loss_function = binary_crossentropy
+    no_epochs = 150
+    optimizer = Adam()
+    verbosity = 1
+    model = Sequential()
+    model.add(Dense(64, input_dim=feature_len, activation='relu'))
+    model.add(Dropout(0.2))
+    model.add(Dense(32, activation='relu'))
+    model.add(Dropout(0.2))
+    model.add(Dense(16, activation='relu'))
+    model.add(Dense(1, activation='sigmoid'))
+    model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
+    # 至此，分类器模型的基本参数已经设置完毕，接下来可以从hdf5文件中导入预先训练好的model
+    model.load_weights("model.hdf5")
+    # 从train2.hdf5导入model。
+    # train2.hdf5 是从 data2.npy训练来的。
+    # 这样与 data1.npy数据不会有重叠
+    return model
+def show_data(f1):
+    file_path = f1
+    print(f1)
+    rate, data = read(file_path)
+    plt.plot(data, label='Voice Signal')
+    plt.show()
+def show_feature(f1):
+    file_path = f1
+    file_name = file_path.split("\\")[-1]
+    # define the normal or attack in variable cur_y
+    if 'normal' in file_name:
+        cur_y = 1  # normal case
+    elif 'attack' in file_name:
+        cur_y = 0
+    # split the file name
+    # read the data
+    rate, data = read(file_path)
+    X = mald_feature(rate, data)
+    # 从wav 文件提取特征的函数是 generate_array_feature.py
+    # X 是特征，特征的维度是110维
+    y = cur_y
+    # y是标签，1代表正常样本，0代表攻击样本
+    if y == 1: # 正常情况
+        title = 'the sample is normal'
+    else:
+        title = 'the sample is attack'
+    plt.plot(X)
+    plt.title(title)
+    plt.show()
+def detect(f1, model):
+    file_path = f1
+    file_name = file_path.split("\\")[-1]
+    # define the normal or attack in variable cur_y
+    if 'normal' in file_name:
+        cur_y = 1  # normal case
+    elif 'attack' in file_name:
+        cur_y = 0
+    # split the file name
+    # read the data
+    rate, data = read(file_path)
+    X = []
+    X += [list(mald_feature(rate, data))]
+    X += [list(mald_feature(rate, data))]
+    # 加2次，因为model需要一个二维的
+    X = np.asarray(X)
+    # 从wav 文件提取特征的函数是 generate_array_feature.py
+    # X 是特征，特征的维度是110维
+    y = cur_y
+    # y是标签，1代表正常样本，0代表攻击样本
+    y_pred = np.round(model.predict(X))
+    # 开始预测
+    y_pred = y_pred[0]
+    if y == 1:  # 正常情况
+        if y_pred == y:
+            print("成功预测")  # 成功预测
+            print("车辆运行")
+            title = "指令正常，预测正确，车辆运行"
+            print('--------------')
+            print(title)
+        else:
+            print("失败预测")  # 失败预测
+            print("车辆静止")
+            title = "指令正常，预测失败，车辆静止"
+            print('--------------')
+            print(title)
+    else:  # 异常情况，决策是相反的
+        if y_pred == y:
+            print("��功预测")  # 成功预测
+            print("车辆静止")
+            title = "指令异常，预测正确，车辆静止"
+            print('--------------')
+            print(title)
+        else:
+            print("失败预测")  # 失败预测
+            print("车辆运行")
+            title = "指令异常，预测失败，车辆运行"
+            print('--------------')
+            print(title)
+ans=""
+root=tk.Tk(className='语音接口认证系统')
+#root.iconbitmap('bf.ico')
+root.attributes("-alpha",0.9)
+tk.Label(root,height=10,width=5).grid(row=0,column=0)
+fra=tk.Frame(root,width=55,height=100)
+fra.grid(row=0,column=1)
+tk.Label(root,height=10,width=5).grid(row=0,column=2)
+tk.Label(fra,text='',height=1,width=10).grid(row=0,column=0)
+tt1=tk.Text(fra,height=2,width=30)
+tt1.grid(row=1,column=0)
+tk.Button(fra, text='请先选择语音数据', command=lambda: chf(tt1)).grid(row=1,column=1)
+model = build_model()
+train=tk.Button(fra,text='显示音频内容',font=('楷体,bold'),borderwidth=3,command=lambda :MyThread(show_data,f1))     #完成
+train.grid(row=3,column=0)
+train=tk.Button(fra,text='显示音频的特征',font=('楷体,bold'),borderwidth=3,command=lambda :MyThread(show_feature,f1))     #完成
+train.grid(row=5,column=0)
+train=tk.Button(fra,text='显示检测结果',font=('楷体,bold'),borderwidth=3,command=lambda :MyThread(detect,f1,model))     #完成
+train.grid(row=7,column=0)
+tk.mainloop()

translate/cmd_judge.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.models import load_model
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from sklearn.preprocessing import normalize
+from generate_array_feature import mald_feature, get_filelist
+import time
+from pydub import AudioSegment
+import whisper
+import os
+import spacy
+# To deal with one wav file.
+def is_command_reasonable(command, time, location):
+    commands = [
+    "OK Google.",
+    "Turn on Bluetooth.",
+    "Record a video.",
+    "Take a photo.",
+    "Open music player.",
+    "Set an alarm for 6:30 am.",
+    "Remind me to buy coffee at 7 am.",
+    "What is my schedule for tomorrow?",
+    "Square root of 2105?",
+    "Open browser.",
+    "Decrease volume.",
+    "Turn on flashlight.",
+    "Set the volume to full.",
+    "Mute the volume.",
+    "What's the definition of transmit?",
+    "Call Pizza Hut.",
+    "Call the nearest computer shop.",
+    "Show me my messages.",
+    "Translate please give me directions to Chinese.",
+    "How do you say good night in Japanese?"
+]
+    # Time : Work-0 / Rest-1 / Sleep-2
+    # Location : Work-0 / Home-1
+    commands_daily  = [
+        "Call Pizza Hut.",
+        "Remind me to buy coffee at 7 am.",
+        "Open music player.",
+        "Record a video.",
+        "Take a photo.",
+    ]
+    commands_work = [
+        "Open browser.",
+        "What is my schedule for tomorrow?",
+        "Square root of 2105?",
+        "Call the nearest computer shop.",
+        "Show me my messages.",
+        "Translate please give me directions to Chinese.",
+        "How do you say good night in Japanese?",
+        "What's the definition of transmit?",
+    ]
+    commands_basic = [
+        "OK Google.",
+        "Decrease volume.",
+        "Turn on Bluetooth.",
+        "Turn on flashlight.",
+        "Set the volume to full.",
+        "Mute the volume.",
+        "Set an alarm for 6:30 am."]
+    if time == 0 and location == 0:
+        if command in commands_daily:
+            return False
+        else:
+            return True
+    elif time ==2:
+        if command in commands_basic:
+            return True
+        else:
+            return False
+    else:
+        if command in commands_work:
+            return False
+        else:
+            return True
+def convert_6ch_wav_to_stereo(input_file_path, output_file_path):
+    sound = AudioSegment.from_file(input_file_path, format="wav")
+    if sound.channels != 6:
+        raise ValueError("The input file does not have 6 channels.")
+    front_left = sound.split_to_mono()[0]
+    front_right = sound.split_to_mono()[1]
+    center = sound.split_to_mono()[2]
+    back_left = sound.split_to_mono()[4]
+    back_right = sound.split_to_mono()[5]
+    center = center - 6
+    back_left = back_left - 6
+    back_right = back_right - 6
+    stereo_left = front_left.overlay(center).overlay(back_left)
+    stereo_right = front_right.overlay(center).overlay(back_right)
+    stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right)
+    stereo_sound.export(output_file_path, format="wav")
+def judge_human(rate,data):
+    model = load_model('/home/fazhong/Github/czx/data-task0_1/train1.keras')
+    feature =list(mald_feature(rate, data))
+    features=np.array([feature])
+    y_pred = model.predict(features)
+    return y_pred[0]
+def judge_name(rate,data):
+    model = load_model('/home/fazhong/Github/czx/data-task0/train1.keras')
+    feature =list(mald_feature(rate, data))
+    features=np.array([feature])
+    y_pred = model.predict(features)
+    y_pred_classes = np.argmax(y_pred,axis=1)
+    return y_pred_classes[0]
+def judge_command(file_path):
+    whisper_model = whisper.load_model("large")
+    out_path='/home/fazhong/Github/czx/temp/temp.wav'
+    convert_6ch_wav_to_stereo(file_path,out_path)
+    # print(out_path)
+    result = whisper_model.transcribe(out_path,language="en")
+    text_result = result['text']
+    print(text_result)
+    return text_result
+def judge_classifier(command):
+    nlp = spacy.load('en_core_web_md')
+    commands = [
+        "OK Google.",
+        "Turn on Bluetooth.",
+        "Record a video.",
+        "Take a photo.",
+        "Open music player.",
+        "Set an alarm for 6:30 am.",
+        "Remind me to buy coffee at 7 am.",
+        "What is my schedule for tomorrow?",
+        "Square root of 2105?",
+        "Open browser.",
+        "Decrease volume.",
+        "Turn on flashlight.",
+        "Set the volume to full.",
+        "Mute the volume.",
+        "What’s the definition of transmit?",
+        "Call Pizza Hut.",
+        "Call the nearest computer shop.",
+        "Show me my messages.",
+        "Translate please give me directions to Chinese.",
+        "How do you say good night in Japanese?"
+    ]
+    def classify_key(command):
+        if 'ok google' in command:
+                return 1
+        elif 'bluetooth' in command and 'on' in command:
+            return 2
+        elif 'record' in command and 'video' in command:
+            return 3
+        elif 'take' in command and 'photo' in command:
+            return 4
+        elif 'music player' in command and 'open' in command:
+            return 5
+        elif 'set' in command and 'alarm' in command:
+            return 6
+        elif 'remind' in command and 'coffee' in command:
+            return 7
+        elif 'schedule' in command or 'tomorrow' in command:
+            return 8
+        elif 'square root' in command:
+            return 9
+        elif 'open browser' in command:
+            return 10
+        elif 'decrease volume' in command:
+            return 11
+        elif 'flashlight' in command and 'on' in command:
+            return 12
+        elif 'volume' in command and 'full' in command:
+            return 13
+        elif 'mute' in command and 'volume' in command:
+            return 14
+        elif 'definition of' in command:
+            return 15
+        elif 'call' in command and 'pizza hut' in command.lower():
+            return 16
+        elif 'call' in command and 'computer shop' in command.lower():
+            return 17
+        elif 'messages' in command and 'show' in command:
+            return 18
+        elif 'translate' in command:
+            return 19
+        elif 'good night' in command and 'in japanese' in command:
+            return 20
+        else:
+            return None  # or some default value if command is not recognized
+    file_content = command
+    result_pre = classify_key(file_content.replace('.', '').replace(',', '').lower().strip())
+    if result_pre is not None:
+        return result_pre
+    input_doc = nlp(file_content.replace('.', '').replace(',', '').lower().strip())
+    similarities = [(command, input_doc.similarity(nlp(command))) for command in commands]
+    best_match = max(similarities, key=lambda item: item[1])
+    return best_match[0]
+def judge(file_path,time,location):
+    rate, data = read(file_path)
+    # Maybe change to paths?
+    temp = judge_human(rate,data)
+    temp2 = judge_name(rate,data)
+    command = judge_command(file_path)
+    text = judge_classifier(command)
+    if is_command_reasonable(text, time, location):
+        return True
+    else:
+        return False
+if __name__ == "__main__":
+    judge('/home/fazhong/Github/czx2/example/data/fengattack60/feng_attack_echo_60_01_3.150-4.000.wav',0,0)

translate/test.py ADDED Viewed

	@@ -0,0 +1,414 @@

+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.models import load_model
+from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras.utils import to_categorical
+import tensorflow as tf
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import os
+import random
+import spacy
+import matplotlib.pyplot as plt
+# == Part 1 - Read data ==
+data = np.load("/home/fazhong/Github/czx/data.npy", allow_pickle=True)
+labels = np.load("/home/fazhong/Github/czx/labels.npy", allow_pickle=True)
+texts = np.load("/home/fazhong/Github/czx/texts.npy", allow_pickle=True)
+commands = [
+    "OK Google.",
+    "Turn on Bluetooth.",
+    "Record a video.",
+    "Take a photo.",
+    "Open music player.",
+    "Set an alarm for 6:30 am.",
+    "Remind me to buy coffee at 7 am.",
+    "What is my schedule for tomorrow?",
+    "Square root of 2105?",
+    "Open browser.",
+    "Decrease volume.",
+    "Turn on flashlight.",
+    "Set the volume to full.",
+    "Mute the volume.",
+    "What's the definition of transmit?",
+    "Call Pizza Hut.",
+    "Call the nearest computer shop.",
+    "Show me my messages.",
+    "Translate please give me directions to Chinese.",
+    "How do you say good night in Japanese?"
+]
+commands_basic = [
+    0,# "OK Google.",
+    1,#"Turn on Bluetooth.",
+    5,#"Set an alarm for 6:30 am.",
+    10,#"Decrease volume.",
+    11,#"Turn on flashlight.",
+    12,#"Set the volume to full.",
+    13,#"Mute the volume.",
+]
+commands_daily = [
+    2,#"Record a video.",
+    3,#"Take a photo.",
+    4,#"Open music player.",
+    6,#"Remind me to buy coffee at 7 am.",
+    15,#"Call Pizza Hut.",
+]
+commands_work = [
+    7,#"What is my schedule for tomorrow?",
+    8,#"Square root of 2105?",
+    9,#"Open browser.",
+    14,#"What's the definition of transmit?",
+    16,#"Call the nearest computer shop.",
+    17,#"Show me my messages.",
+    18,#"Translate please give me directions to Chinese.",
+    19,#"How do you say good night in Japanese?"
+]
+def rule_judge(type,time,location):
+    if type in commands_basic:
+        if time == 0:
+            return False
+        else:
+            return True
+    elif type in commands_daily:
+        if time == 2:
+            return True
+        else:
+            return False
+    elif type in commands_work:
+        if time == 1 and location ==1:
+            return True
+        else:
+            return False
+# 0 - sleep time / 1 - work time / 2 - daily time
+times_label = [0,1,2]
+# 0 - home / 1 - factory
+location_label = [0,1]
+data_all = []
+data = data.tolist()
+labels = labels.tolist()
+texts = texts.tolist()
+acc_num = 0
+all_num = len(data)
+atk_list = []
+atk_err = []
+name_err = []
+type_err = []
+gt_label = []
+pre_label = []
+name_err_num = [0,0,0,0]
+name_acc_num = [0,0,0,0]
+command_err_num = []
+command_acc_num = []
+for i in range(20):
+    command_err_num.append(0)
+    command_acc_num.append(0)
+for i in range(len(data)):
+    tmp = []
+    tmp.append(np.array(data[i][0]))
+    tmp.extend([labels[i][0]])
+    tmp.extend([labels[i][1]])
+    tmp.extend([labels[i][2]])
+    data_all.append(tmp)
+data = data_all
+time_labels = []
+location_labels = []
+for i in range(len(data)):
+    time_labels.append(random.randint(0,2))
+    location_labels.append(random.randint(0,1))
+rule_err = []
+for i in range(len(data)):
+    if not rule_judge(data[i][2],time_labels[i],location_labels[i]):
+        rule_err.append(i)
+#  == Part 2 - Judge of Human ==
+model = load_model('/home/fazhong/Github/czx/data-task0_1/train1.keras')
+X = np.asarray([x[0] for x in data])
+y = np.asarray([x[1] for x in data])
+type = np.asarray([x[3] for x in data])
+y_pred = model.predict(X)
+y_pred = y_pred.reshape((len(y_pred), 1))
+y = y.reshape((len(y), 1))
+for i in range(len(y)):
+    if(y_pred[i]>0.5):y_pred[i]=1
+    else:
+        y_pred[i] = 0
+        atk_list.append(i)
+    if(y_pred[i]!=y[i]):
+        atk_err.append(i)
+ACCU = np.sum((y_pred == y)) / len(y)
+print(len(y))
+print("ACCU is " + str(100 * ACCU))
+#  == Part 3 - Judge of Name ==
+model = load_model('/home/fazhong/Github/czx/data-task0/train1.keras')
+y_name = np.asarray([x[2] for x in data])
+y_pred = model.predict(X)
+y_pred_classes = np.argmax(y_pred,axis=1)
+ACCU = np.sum((y_pred_classes == y_name)) / len(y_name)
+for i in range(len(y_name)):
+    if(y_pred_classes[i]!=y_name[i]):
+        name_err.append(i)
+print("ACCU is " + str(100 * ACCU))
+# Part 4 - Transcribe and Judge of Reason
+# PS! Attack的文本不需要跑分类
+nlp = spacy.load('en_core_web_md')
+def classify_key(command):
+    if 'ok google' in command:
+            return 1
+    elif 'okay' in command:
+            return 1
+    elif 'bluetooth' in command:
+        return 2
+    elif 'record' in command and 'video' in command:
+        return 3
+    elif 'take' in command and 'photo' in command:
+        return 4
+    elif 'music' in command:
+        return 5
+    elif 'alarm' in command:
+        return 6
+    elif 'remind' in command and 'coffee' in command:
+        return 7
+    elif 'am' in command :
+        return 7
+    elif 'schedule' in command or 'tomorrow' in command:
+        return 8
+    elif 'square root' in command:
+        return 9
+    elif 'open browser' in command:
+        return 10
+    elif 'decrease volume' in command:
+        return 11
+    elif 'flashlight' in command and 'on' in command:
+        return 12
+    elif 'hello freshlight' in command.lower():
+        return 12
+    elif 'turn on' in command:
+        return 12
+    elif 'volume' in command and 'full' in command:
+        return 13
+    elif 'mute' in command :
+        return 14
+    elif 'move' in command :
+        return 14
+    elif 'more' in command :
+        return 14
+    elif 'motor' in command :
+        return 14
+    elif 'mood' in command :
+        return 14
+    elif 'most' in command :
+        return 14
+    elif 'what' in command :
+        return 14
+    elif 'with' in command :
+        return 14
+    elif 'milk' in command :
+        return 14
+    elif 'use' in command :
+        return 14
+    elif 'definition of' in command:
+        return 15
+    elif 'call' in command and 'pizza hut' in command.lower():
+        return 16
+    elif 'copies are' in command.lower() or 'call a piece of heart' in command.lower() or 'copies of' in command.lower():
+        return 16
+    elif 'peace' in command.lower():
+        return 16
+    elif 'heart' in command.lower():
+        return 16
+    elif 'pisa' in command.lower():
+        return 16
+    elif 'piece' in command.lower():
+        return 16
+    elif 'hard' in command.lower():
+        return 16
+    elif 'call' in command and 'computer shop' in command.lower():
+        return 17
+    elif 'message' in command :
+        return 18
+    elif 'translate' in command:
+        return 19
+    elif 'good night' in command and 'in japanese' in command:
+        return 20
+    else:
+        return None  # or some default value if command is not recognized
+correct_count = 0
+total_count = 0
+category_number = 0
+total_normal = 0
+normal_texts = []
+normal_labels = []
+All_Normal_names = []
+# Test of rule module
+test_flag = True
+atk_org_list = []
+for i in range(len(texts)):
+    if test_flag:
+        normal_texts.append(texts[i])
+        All_Normal_names.append(y_name[i])
+        normal_labels.append(type[i])
+        if y[i] == 0:
+            atk_org_list.append(i)
+    else:
+        if y[i] == 1:
+            normal_texts.append(texts[i])
+            All_Normal_names.append(y_name[i])
+            normal_labels.append(type[i])
+print(len(atk_org_list))
+# for text in texts:
+#     if texts.index(text) in atk_list:
+#         print(texts.index(text))
+#         continue
+#     else:
+#         normal_texts.append(text)
+weird_name = []
+weird_command = []
+# for i in range(len(data)):
+#     if not rule_judge(data[i][2],time_labels[i],location_labels[i]):
+#         rule_err.append(i)
+for i in range(len(normal_texts)):
+    text = normal_texts[i]
+    category_number = normal_labels[i]
+    # print(text)
+    # print(category_number)
+    result_pre = classify_key(text.replace('.', '').replace(',', '').lower().strip())
+    # IF rule - judge
+    # if not rule_judge(category_number-1,time_labels[i],location_labels[i]):
+    #     command_err_num[category_number-1]+=1
+    #     name_err_num[All_Normal_names[i]]+=1
+    #     continue
+    if i in atk_org_list:
+        command_err_num[category_number-1]+=1
+        name_err_num[All_Normal_names[i]]+=1
+        continue
+    if result_pre is not None:
+        if result_pre  == category_number:
+            correct_count += 1
+            command_acc_num[category_number-1]+=1
+            name_acc_num[All_Normal_names[i]]+=1
+            continue
+    input_doc = nlp(text.replace('.', '').replace(',', '').lower().strip())
+    similarities = [(command, input_doc.similarity(nlp(command))) for command in commands]
+    best_match = max(similarities, key=lambda item: item[1])
+    best_match_index = commands.index(best_match[0]) + 1
+    if best_match_index == category_number:
+        correct_count += 1
+        command_acc_num[category_number-1]+=1
+        name_acc_num[All_Normal_names[i]]+=1
+    else:
+        # print(text.replace('.', '').replace(',', '').lower().strip())
+        # if category_number==16:
+        #     print(input_doc,commands[category_number-1],commands[best_match_index-1])
+        command_err_num[category_number-1]+=1
+        name_err_num[All_Normal_names[i]]+=1
+        # if 'thank' in str(input_doc):
+        #     pass
+        #     # print('?')
+        #     # print(texts.index(text))
+        #     # print(data[texts.index(text)])
+        # weird_name.append(y_name[texts.index(text)])
+        # weird_command.append(type[texts.index(text)])
+        type_err.append(texts.index(text))
+# 计算正确率
+accuracy = correct_count / len(normal_texts)
+print(f"Accuracy: {accuracy:.2f}")
+# Part 5 - Results
+atk_set = set(atk_err)
+name_set = set(name_err)
+type_set = set(type_err)
+#rule_set = set(rule_err)
+err_list = list(atk_set | name_set | type_set)
+print(len(err_list))
+# print(weird_name)
+print(name_err_num)
+print(name_acc_num)
+print(command_err_num)
+print(command_acc_num)
+# print(weird_command)
+#print(atk_list)
+# print(len(atk_list))
+# print(all_num)
+# print(atk_err)
+# print(name_err)
+# print(type_err)
+# print(type_set)
+# print(err_list)
+# # 设置柱状图的位置编号
+# x = np.arange(len(name_err_num))
+# # 画柱状图
+# plt.bar(x - 0.2, name_acc_num, width=0.4, label='Correct', color='green')
+# plt.bar(x + 0.2, name_err_num, width=0.4, label='Error', color='red')
+# # 添加标题和标签
+# plt.xlabel('Names')
+# plt.ylabel('Counts')
+# plt.title('Accuracy and Errors by Name')
+# plt.xticks(x, ['User1', 'User2', 'User3', 'User4']) # 假设有四个名字
+# plt.legend()
+# #plt.savefig('/home/fazhong/Github/czx/user.png')
+# # 显示图形
+# plt.close()
+# # 设置柱状图的位置编号
+# x = np.arange(len(command_err_num))
+# # 画柱状图
+# plt.bar(x - 0.2, command_acc_num, width=0.4, label='Correct', color='blue')
+# plt.bar(x + 0.2, command_err_num, width=0.4, label='Error', color='orange')
+# # 添加标题和标签
+# plt.xlabel('Commands')
+# plt.ylabel('Counts')
+# plt.title('Accuracy and Errors by Command')
+# plt.xticks(x, [i for i in range(20)]) # 假设有六个命令
+# plt.legend()
+# # 显示图形
+# #plt.savefig('/home/fazhong/Github/czx/com.png')

translate/train_man.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+This task is running a cross validation.
+We start from the two-fold validation.
+"""
+#%% Import necessary packages and EER function
+# test the numpy
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint
+import tensorflow as tf
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import os
+import random
+def eer(x_test, y_test, model):
+    preds = model.predict(x_test)
+    fpr, tpr, thresholds = roc_curve(y_test, preds)
+    return brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
+#%%
+data = np.load("/home/fazhong/Github/czx/data.npy", allow_pickle=True)
+labels = np.load("/home/fazhong/Github/czx/labels.npy", allow_pickle=True)
+data_all = []
+data = data.tolist()
+#print(data[0])
+labels = labels.tolist()
+for i in range(len(data)):
+    tmp = []
+    tmp.append(np.array(data[i][0]))
+    tmp.extend([labels[i][0]])
+    tmp.extend([labels[i][1]])
+    tmp.extend([labels[i][2]])
+    data_all.append(tmp)
+random.shuffle(data_all)
+data = data_all
+# ?
+#np.random.shuffle(data)
+batch_size = 10
+feature_len = 110
+loss_function = binary_crossentropy
+## batch
+no_epochs = 150
+optimizer = Adam()
+verbosity = 1
+model = Sequential()
+model.add(Dense(64, input_dim=feature_len, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(32, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(16, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
+#%% training and save the hdf5 file
+data_train = data[:int(0.5*(len(data)))]
+print(len(data_train))
+X1 = np.asarray([x[0] for x in data_train])
+print(X1.shape)
+y1 = np.asarray([x[1] for x in data_train])
+print(y1.shape)
+data_test = data[int(0.5*(len(data))):]
+X2 = np.asarray([x[0] for x in data_test])
+y2 = np.asarray([x[1] for x in data_test])
+checkpointer = ModelCheckpoint(filepath="./data-task0/train1.keras",
+                               verbose=verbosity, save_best_only=True)
+print('-' * 30)
+print('Training for whole data set')
+history = model.fit(X1, y1,
+                    # validation_data=(x[test], y[test]),
+                    validation_split=0.1,
+                    batch_size=batch_size,
+                    epochs=no_epochs,
+                    verbose=verbosity,
+                    callbacks=[checkpointer, tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7)]
+                    )
+## train for X2
+checkpointer = ModelCheckpoint(filepath="./data-task0/train2.keras",
+                               verbose=verbosity, save_best_only=True)
+print('-' * 30)
+print('Training for whole data set')
+history = model.fit(X2, y2,
+                    # validation_data=(x[test], y[test]),
+                    validation_split=0.1,
+                    batch_size=batch_size,
+                    epochs=no_epochs,
+                    verbose=verbosity,
+                    callbacks=[checkpointer, tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7)]
+                    )
+#%% calculate the final result.
+#data_train = np.load("./main_task/data-task0/data1.npy", allow_pickle=True)
+X1 = np.asarray([x[0] for x in data_train])
+y1 = np.asarray([x[1] for x in data_train])
+#data_test = np.load("./main_task/data-task0/data2.npy", allow_pickle=True)
+X2 = np.asarray([x[0] for x in data_test])
+y2 = np.asarray([x[1] for x in data_test])
+model.load_weights("./data-task0/train1.keras")
+scores = model.evaluate(X2, y2)
+y_pred2 = model.predict(X2)
+print(y_pred2.shape)
+model.load_weights("./data-task0/train2.keras")
+scores = model.evaluate(X1, y1)
+y_pred1 = model.predict(X1)
+y_pred = np.concatenate((y_pred1, y_pred2))
+y_pred = y_pred.reshape((len(y_pred), 1))
+y_label = np.concatenate((y1, y2))
+y_label = y_label.reshape((len(y_label), 1))
+for i in range(len(y_label)):
+    if(y_pred[i]>0.5):y_pred[i]=1
+    else:y_pred[i] = 0
+ACCU = np.sum((y_pred == y_label)) / len(y_label)
+print("ACCU is " + str(100 * ACCU))
+fpr, tpr, thresholds = roc_curve(y_label, y_pred)
+EER = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
+print(EER)
+# #%% calculate the final result.
+# num_all = np.zeros((20, 1))
+# num_success = np.zeros((20, 1))
+# for user_num in range(1, 21, 1):
+#     # testing the data on the train1.hdf5
+#     model.load_weights("./data-task0/train1.keras")
+#     print("user number is " + str(user_num))
+#     X_test = np.asarray([x[0] for x in data_test if (x[5] == user_num and x[1] == 0)])
+#     y_test = np.asarray([x[1] for x in data_test if (x[5] == user_num and x[1] == 0)])
+#     scores = model.evaluate(X_test, y_test)
+#     num_all[user_num - 1] += len(y_test)
+#     num_success[user_num - 1] += np.round(len(y_test)*scores[1])
+# for user_num in range(1, 21, 1):
+#     # testing the data on the train2.hdf5
+#     model.load_weights("./data-task0/train2.keras")
+#     print("user number is " + str(user_num))
+#     X_test = np.asarray([x[0] for x in data_train if (x[5] == user_num and x[1] == 0)])
+#     y_test = np.asarray([x[1] for x in data_train if (x[5] == user_num and x[1] == 0)])
+#     scores = model.evaluate(X_test, y_test)
+#     num_all[user_num - 1] += len(y_test)
+#     num_success[user_num - 1] += np.round(len(y_test)*scores[1])
+# #%% show the results
+# for user_num in range(1, 21, 1):
+#     print("user number is " + str(user_num))
+#     print("[=========]  total number is " + str(int(num_all[user_num - 1])) + ", and wrong detect " + str(int(num_all[user_num - 1] - num_success[user_num - 1]))
+#           + " samples, rate is " + str(np.round(num_success[user_num - 1] / num_all[user_num - 1], 4)))
+# print("total number is " + str(int(np.sum(num_all))) + ", and detect " + str(int(np.sum(num_all) - np.sum(num_success)))
+#       + " samples, rate is " + str((np.sum(num_success) / np.sum(num_all))))

translate/train_name.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+This task is running a cross validation.
+We start from the two-fold validation.
+"""
+#%% Import necessary packages and EER function
+# test the numpy
+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import categorical_crossentropy
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras.utils import to_categorical
+import tensorflow as tf
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import os
+import random
+def eer(x_test, y_test, model):
+    preds = model.predict(x_test)
+    fpr, tpr, thresholds = roc_curve(y_test, preds)
+    return brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
+#%%
+data = np.load("/home/fazhong/Github/czx/data.npy", allow_pickle=True)
+labels = np.load("/home/fazhong/Github/czx/labels.npy", allow_pickle=True)
+data_all = []
+data = data.tolist()
+#print(data[0])
+labels = labels.tolist()
+for i in range(len(data)):
+    tmp = []
+    tmp.append(np.array(data[i][0]))
+    tmp.extend(to_categorical([labels[i][1]],num_classes=4).tolist())
+    tmp.extend([labels[i][0]])
+    tmp.extend([labels[i][2]])
+    data_all.append(tmp)
+random.shuffle(data_all)
+data = data_all
+# ?
+#print(data)
+#np.random.shuffle(data)
+batch_size = 10
+feature_len = 110
+loss_function = categorical_crossentropy
+## batch
+no_epochs = 150
+optimizer = Adam()
+verbosity = 1
+model = Sequential()
+model.add(Dense(64, input_dim=feature_len, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(32, activation='relu'))
+model.add(Dropout(0.2))
+model.add(Dense(16, activation='relu'))
+model.add(Dense(4, activation='softmax'))
+model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
+#%% training and save the hdf5 file
+data_train = data[:int(0.5*(len(data)))]
+print(len(data_train))
+X1 = np.asarray([x[0] for x in data_train])
+print(X1.shape)
+temp = [x[1] for x in data_train]
+print(len(temp))
+print(len(temp[1]))
+y1 = np.asarray([x[1] for x in data_train])
+print(y1.shape)
+data_test = data[int(0.5*(len(data))):]
+X2 = np.asarray([x[0] for x in data_test])
+y2 = np.asarray([x[1] for x in data_test])
+checkpointer = ModelCheckpoint(filepath="./data-task0/train1.keras",
+                               verbose=verbosity, save_best_only=True)
+print('-' * 30)
+print('Training for whole data set')
+history = model.fit(X1, y1,
+                    # validation_data=(x[test], y[test]),
+                    validation_split=0.1,
+                    batch_size=batch_size,
+                    epochs=no_epochs,
+                    verbose=verbosity,
+                    callbacks=[checkpointer, tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7)]
+                    )
+## train for X2
+checkpointer = ModelCheckpoint(filepath="./data-task0/train2.keras",
+                               verbose=verbosity, save_best_only=True)
+print('-' * 30)
+print('Training for whole data set')
+history = model.fit(X2, y2,
+                    # validation_data=(x[test], y[test]),
+                    validation_split=0.1,
+                    batch_size=batch_size,
+                    epochs=no_epochs,
+                    verbose=verbosity,
+                    callbacks=[checkpointer, tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7)]
+                    )
+#%% calculate the final result.
+#data_train = np.load("./main_task/data-task0/data1.npy", allow_pickle=True)
+X1 = np.asarray([x[0] for x in data_train])
+y1 = np.asarray([x[1] for x in data_train])
+#data_test = np.load("./main_task/data-task0/data2.npy", allow_pickle=True)
+X2 = np.asarray([x[0] for x in data_test])
+y2 = np.asarray([x[1] for x in data_test])
+model.load_weights("./data-task0/train1.keras")
+scores = model.evaluate(X2, y2)
+y_pred2 = model.predict(X2)
+print(y_pred2.shape)
+model.load_weights("./data-task0/train2.keras")
+scores = model.evaluate(X1, y1)
+y_pred1 = model.predict(X1)
+y_pred = np.concatenate((y_pred1, y_pred2))
+y_pred_classes = np.argmax(y_pred,axis=1)
+y_label_classes = np.argmax(np.concatenate((y1, y2)),axis=1)
+print(y_pred_classes)
+ACCU = np.sum((y_pred_classes == y_label_classes)) / len(y_label_classes)
+print("ACCU is " + str(100 * ACCU))
+fpr, tpr, thresholds = roc_curve(y_label_classes, y_pred)
+EER = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
+print(EER)
+# #%% calculate the final result.
+# num_all = np.zeros((20, 1))
+# num_success = np.zeros((20, 1))
+# for user_num in range(1, 21, 1):
+#     # testing the data on the train1.hdf5
+#     model.load_weights("./data-task0/train1.keras")
+#     print("user number is " + str(user_num))
+#     X_test = np.asarray([x[0] for x in data_test if (x[5] == user_num and x[1] == 0)])
+#     y_test = np.asarray([x[1] for x in data_test if (x[5] == user_num and x[1] == 0)])
+#     scores = model.evaluate(X_test, y_test)
+#     num_all[user_num - 1] += len(y_test)
+#     num_success[user_num - 1] += np.round(len(y_test)*scores[1])
+# for user_num in range(1, 21, 1):
+#     # testing the data on the train2.hdf5
+#     model.load_weights("./data-task0/train2.keras")
+#     print("user number is " + str(user_num))
+#     X_test = np.asarray([x[0] for x in data_train if (x[5] == user_num and x[1] == 0)])
+#     y_test = np.asarray([x[1] for x in data_train if (x[5] == user_num and x[1] == 0)])
+#     scores = model.evaluate(X_test, y_test)
+#     num_all[user_num - 1] += len(y_test)
+#     num_success[user_num - 1] += np.round(len(y_test)*scores[1])
+# #%% show the results
+# for user_num in range(1, 21, 1):
+#     print("user number is " + str(user_num))
+#     print("[=========]  total number is " + str(int(num_all[user_num - 1])) + ", and wrong detect " + str(int(num_all[user_num - 1] - num_success[user_num - 1]))
+#           + " samples, rate is " + str(np.round(num_success[user_num - 1] / num_all[user_num - 1], 4)))
+# print("total number is " + str(int(np.sum(num_all))) + ", and detect " + str(int(np.sum(num_all) - np.sum(num_success)))
+#       + " samples, rate is " + str((np.sum(num_success) / np.sum(num_all))))

translate/wav2com.py ADDED Viewed

File without changes

translate/wav2npy.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import numpy as np
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.losses import binary_crossentropy
+from tensorflow.keras.optimizers import Adam
+from sklearn.metrics import roc_curve
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+import matplotlib.pyplot as plt
+from scipy.io.wavfile import read
+from sklearn.preprocessing import normalize
+from generate_array_feature import mald_feature, get_filelist
+import time
+import os
+from pydub import AudioSegment
+import whisper
+folder_path = '/home/fazhong/Github/czx2/example/data'
+names = ['feng','jc','meng','zhan']
+types = ['01','02','03','04','05','06','07','08','09','09','10','11','12','13','14','15','16','17','18','19','20']
+voice = []
+def convert_6ch_wav_to_stereo(input_file_path, output_file_path):
+    sound = AudioSegment.from_file(input_file_path, format="wav")
+    if sound.channels != 6:
+        raise ValueError("The input file does not have 6 channels.")
+    front_left = sound.split_to_mono()[0]
+    front_right = sound.split_to_mono()[1]
+    center = sound.split_to_mono()[2]
+    back_left = sound.split_to_mono()[4]
+    back_right = sound.split_to_mono()[5]
+    center = center - 6
+    back_left = back_left - 6
+    back_right = back_right - 6
+    stereo_left = front_left.overlay(center).overlay(back_left)
+    stereo_right = front_right.overlay(center).overlay(back_right)
+    stereo_sound = AudioSegment.from_mono_audiosegments(stereo_left, stereo_right)
+    stereo_sound.export(output_file_path, format="wav")
+def read_all_files(directory):
+    data = []
+    labels = []
+    texts = []
+    whisper_model = whisper.load_model("large")
+    out_path='/home/fazhong/Github/czx/temp/temp.wav'
+    i=0
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            #if i > 10:return data,labels,texts
+            content = []
+            content_label = []
+            file_path = os.path.join(root, file)
+            convert_6ch_wav_to_stereo(file_path,out_path)
+            result = whisper_model.transcribe(out_path,language="en")
+            text_result = result['text']
+            texts.append(text_result)
+            print(file)
+            if 'normal' in file:
+                label = 1  # normal case
+            elif 'attack' in file:
+                label = 0
+            for name in names:
+                if name in file:
+                    name_index = names.index(name)
+            if label == 0:
+                category_number = int(file.split('_')[4])
+            elif label == 1:
+                category_number = int(file.split('_')[3])
+            rate, wavdata = read(file_path)
+            content.append(list(mald_feature(rate, wavdata)))
+            content_label.append(label)
+            content_label.append(name_index)
+            content_label.append(category_number)
+            data.append(content)
+            labels.append(content_label)
+            i+=1
+    return data,labels,texts
+# 调用函数
+data,labels,texts = read_all_files(folder_path)
+data_array = np.array(data)
+labels_array = np.array(labels)
+texts_array = np.array(texts)
+filename = 'data.npy'
+filename2 = 'labels.npy'
+filename3 = 'texts.npy'
+np.save(filename, data_array)
+np.save(filename2, labels_array)
+np.save(filename3, texts_array)
+print('fin')
+# #%% 导入音频
+# path_wave = r"/home/fazhong/Github/czx/voice"
+# print("Loading data ...")
+# name_all = get_filelist(path_wave)
+# voice = []
+# # voice 是从 一堆 wav 音频文件中提取的波形
+# X = []  # X is the feature ~ data[0]
+# y = []  # y is the normal (1) or attack (0) ~ data[1]
+# for file_path in name_all:
+#     file_name = file_path.split("\\")[-1]
+#     # define the normal or attack in variable cur_y
+#     if 'normal' in file_name:
+#         cur_y = 1  # normal case
+#     elif 'attack' in file_name:
+#         cur_y = 0
+#     # split the file name
+#     # read the data
+#     rate, data = read(file_path)
+#     voice += [list(data)]
+#     X += [list(mald_feature(rate, data))]
+#     y += [cur_y]
+# norm_X = normalize(X, axis=0, norm='max')
+# X = np.asarray(norm_X)
+# y = np.asarray(y)