YsnHdn commited on
Commit
4deb54c
·
1 Parent(s): 85c8f52

Adding the voice feature

Browse files
Dockerfile CHANGED
@@ -1,6 +1,3 @@
1
- # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
-
4
  FROM python:3.9
5
 
6
  WORKDIR /code
@@ -8,7 +5,13 @@ WORKDIR /code
8
  # Copy requirements.txt separately to leverage Docker layer caching
9
  COPY ./requirements.txt /code/requirements.txt
10
 
11
- # Install dependencies
 
 
 
 
 
 
12
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
 
14
  # Set the HF_HOME environment variable
@@ -19,6 +22,7 @@ RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
19
 
20
  # Copy the model files into the image
21
  COPY ./DistillMDPI1 /code/DistillMDPI1
 
22
 
23
  # Copy the rest of the application files
24
  COPY . .
@@ -27,4 +31,4 @@ COPY . .
27
  RUN mkdir -p /code/static/uploads && chmod -R 777 /code/static/uploads
28
 
29
  # Command to run your application
30
- CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
 
 
 
 
1
  FROM python:3.9
2
 
3
  WORKDIR /code
 
5
  # Copy requirements.txt separately to leverage Docker layer caching
6
  COPY ./requirements.txt /code/requirements.txt
7
 
8
+ # Install system dependencies for audio processing
9
+ RUN apt-get update && apt-get install -y \
10
+ ffmpeg \
11
+ libsndfile1 \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install Python dependencies
15
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
16
 
17
  # Set the HF_HOME environment variable
 
22
 
23
  # Copy the model files into the image
24
  COPY ./DistillMDPI1 /code/DistillMDPI1
25
+ COPY ./Neptune /code/Neptune
26
 
27
  # Copy the rest of the application files
28
  COPY . .
 
31
  RUN mkdir -p /code/static/uploads && chmod -R 777 /code/static/uploads
32
 
33
  # Command to run your application
34
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
Model.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ from transformers import AutoTokenizer
6
+ import pickle
7
+
8
+ # Load the tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("DistillMDPI1/DistillMDPI1/saved_tokenizer")
10
+
11
+
12
+
13
+ # Step 1: Ensure the tokenizer has the [MULT] token
14
+ tokenizer.add_special_tokens({'additional_special_tokens': ['<MULT>']})
15
+ mult_token_id = tokenizer.convert_tokens_to_ids('<MULT>')
16
+ cls_token_id = tokenizer.cls_token_id
17
+ sep_token_id = tokenizer.sep_token_id
18
+ pad_token_id = tokenizer.pad_token_id
19
+
20
+ ## Voice Part functions
21
+ maxlen = 255 # maximum of length
22
+ batch_size = 32
23
+ max_pred = 5 # max tokens of prediction
24
+ n_layers = 6 # number of Encoder of Encoder Layer
25
+ n_heads = 12 # number of heads in Multi-Head Attention
26
+ d_model = 768 # Embedding Size
27
+ d_ff = 768 * 4 # 4*d_model, FeedForward dimension
28
+ d_k = d_v = 64 # dimension of K(=Q), V
29
+ n_segments = 2
30
+ vocab_size = tokenizer.vocab_size +1
31
+
32
+ def get_attn_pad_mask(seq_q, seq_k):
33
+ batch_size, len_q = seq_q.size()
34
+ batch_size, len_k = seq_k.size()
35
+ # eq(zero) is PAD token
36
+ pad_attn_mask = seq_k.data.eq(1).unsqueeze(1) # batch_size x 1 x len_k(=len_q), one is masking
37
+ return pad_attn_mask.expand(batch_size, len_q, len_k) # batch_size x len_q x len_k
38
+
39
+ class Embedding(nn.Module):
40
+ def __init__(self):
41
+ super(Embedding, self).__init__()
42
+ self.tok_embed = nn.Embedding(vocab_size, d_model) # token embedding
43
+ self.pos_embed = nn.Embedding(maxlen, d_model) # position embedding
44
+ self.seg_embed = nn.Embedding(n_segments, d_model) # segment(token type) embedding
45
+ self.norm = nn.LayerNorm(d_model)
46
+
47
+ def forward(self, x, seg):
48
+ seq_len = x.size(1)
49
+ pos = torch.arange(seq_len, dtype=torch.long, device=x.device)
50
+ pos = pos.unsqueeze(0).expand_as(x) # (seq_len,) -> (batch_size, seq_len)
51
+ embedding = self.tok_embed(x)
52
+ embedding += self.pos_embed(pos)
53
+ embedding += self.seg_embed(seg)
54
+ return self.norm(embedding)
55
+
56
+ class ScaledDotProductAttention(nn.Module):
57
+ def __init__(self):
58
+ super(ScaledDotProductAttention, self).__init__()
59
+
60
+ def forward(self, Q, K, V, attn_mask):
61
+ scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
62
+ scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.
63
+ attn = nn.Softmax(dim=-1)(scores)
64
+ context = torch.matmul(attn, V)
65
+ return scores , context, attn
66
+
67
+ class MultiHeadAttention(nn.Module):
68
+ def __init__(self):
69
+ super(MultiHeadAttention, self).__init__()
70
+ self.W_Q = nn.Linear(d_model, d_k * n_heads)
71
+ self.W_K = nn.Linear(d_model, d_k * n_heads)
72
+ self.W_V = nn.Linear(d_model, d_v * n_heads)
73
+ self.fc = nn.Linear(n_heads * d_v, d_model)
74
+ self.norm = nn.LayerNorm(d_model)
75
+ def forward(self, Q, K, V, attn_mask):
76
+ # q: [batch_size x len_q x d_model], k: [batch_size x len_k x d_model], v: [batch_size x len_k x d_model]
77
+ residual, batch_size = Q, Q.size(0)
78
+ device = Q.device
79
+ Q, K, V = Q.to(device), K.to(device), V.to(device)
80
+ # (B, S, D) -proj-> (B, S, D) -split-> (B, S, H, W) -trans-> (B, H, S, W)
81
+ q_s = self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size x n_heads x len_q x d_k]
82
+ k_s = self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size x n_heads x len_k x d_k]
83
+ v_s = self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size x n_heads x len_k x d_v]
84
+
85
+ attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size x n_heads x len_q x len_k]
86
+
87
+ # context: [batch_size x n_heads x len_q x d_v], attn: [batch_size x n_heads x len_q(=len_k) x len_k(=len_q)]
88
+ scores ,context, attn = ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)
89
+ context = context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v) # context: [batch_size x len_q x n_heads * d_v]
90
+ output = self.fc(context)
91
+ return self.norm(output + residual), attn # output: [batch_size x len_q x d_model]
92
+
93
+ class PoswiseFeedForwardNet(nn.Module):
94
+ def __init__(self):
95
+ super(PoswiseFeedForwardNet, self).__init__()
96
+ self.fc1 = nn.Linear(d_model, d_ff)
97
+ self.fc2 = nn.Linear(d_ff, d_model)
98
+ self.gelu = nn.GELU()
99
+ def forward(self, x):
100
+ # (batch_size, len_seq, d_model) -> (batch_size, len_seq, d_ff) -> (batch_size, len_seq, d_model)
101
+ return self.fc2(self.gelu(self.fc1(x)))
102
+
103
+ class EncoderLayer(nn.Module):
104
+ def __init__(self):
105
+ super(EncoderLayer, self).__init__()
106
+ self.enc_self_attn = MultiHeadAttention()
107
+ self.pos_ffn = PoswiseFeedForwardNet()
108
+
109
+ def forward(self, enc_inputs, enc_self_attn_mask):
110
+ enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask.to(enc_inputs.device)) # enc_inputs to same Q,K,V
111
+ enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size x len_q x d_model]
112
+ return enc_outputs, attn
113
+
114
+ class BERT(nn.Module):
115
+ def __init__(self):
116
+ super(BERT, self).__init__()
117
+ self.embedding = Embedding()
118
+ self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])
119
+ self.fc = nn.Linear(d_model, d_model)
120
+ self.activ1 = nn.Tanh()
121
+ self.linear = nn.Linear(d_model, d_model)
122
+ self.activ2 = nn.GELU()
123
+ self.norm = nn.LayerNorm(d_model)
124
+ self.classifier = nn.Linear(d_model, 2)
125
+ # decoder is shared with embedding layer
126
+ embed_weight = self.embedding.tok_embed.weight
127
+ n_vocab, n_dim = embed_weight.size()
128
+ self.decoder = nn.Linear(n_dim, n_vocab, bias=False)
129
+ self.decoder.weight = embed_weight
130
+ self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))
131
+ self.mclassifier = nn.Linear(d_model, 17)
132
+
133
+ def forward(self, input_ids, segment_ids, masked_pos):
134
+ output = self.embedding(input_ids, segment_ids)
135
+ enc_self_attn_mask = get_attn_pad_mask(input_ids, input_ids).to(output.device)
136
+ for layer in self.layers:
137
+ output, enc_self_attn = layer(output, enc_self_attn_mask)
138
+ # output : [batch_size, len, d_model], attn : [batch_size, n_heads, d_mode, d_model]
139
+ # it will be decided by first token(CLS)
140
+ h_pooled = self.activ1(self.fc(output[:, 0])) # [batch_size, d_model]
141
+ logits_clsf = self.classifier(h_pooled) # [batch_size, 2]
142
+
143
+ masked_pos = masked_pos[:, :, None].expand(-1, -1, output.size(-1)) # [batch_size, max_pred, d_model]
144
+ # get masked position from final output of transformer.
145
+ h_masked = torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]
146
+ h_masked = self.norm(self.activ2(self.linear(h_masked)))
147
+ logits_lm = self.decoder(h_masked) + self.decoder_bias # [batch_size, max_pred, n_vocab]
148
+
149
+ h_mult_sent1 = self.activ1(self.fc(output[:, 1]))
150
+ logits_mclsf1 = self.mclassifier(h_mult_sent1)
151
+
152
+ mult2_token_id = mult_token_id # Assuming mult_token_id is defined globally
153
+ mult2_positions = (input_ids == mult2_token_id).nonzero(as_tuple=False) # Find positions of [MULT2] tokens
154
+ # Ensure there are exactly two [MULT] tokens in each input sequence
155
+ assert mult2_positions.size(0) == 2 * input_ids.size(0)
156
+ mult2_positions = mult2_positions[1::2][:, 1]
157
+ # Gather the hidden states corresponding to the second [MULT] token
158
+ h_mult_sent2 = output[torch.arange(output.size(0)), mult2_positions]
159
+
160
+ logits_mclsf2 = self.mclassifier(h_mult_sent2)
161
+ logits_mclsf2 = self.mclassifier(h_mult_sent2)
162
+ return logits_lm, logits_clsf , logits_mclsf1 , logits_mclsf2
Neptune/Neptune/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<MULT>": 30522
3
+ }
Neptune/Neptune/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f0cace6edaecd1f2e30cd8cbe3ad1d6d43829c3029279308d4ebb2ec8542c0
3
+ size 269574874
Neptune/Neptune/special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<MULT>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "cls_token": "[CLS]",
12
+ "mask_token": "[MASK]",
13
+ "pad_token": "[PAD]",
14
+ "sep_token": "[SEP]",
15
+ "unk_token": "[UNK]"
16
+ }
Neptune/Neptune/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Neptune/Neptune/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30522": {
44
+ "content": "<MULT>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "additional_special_tokens": [
53
+ "<MULT>"
54
+ ],
55
+ "clean_up_tokenization_spaces": true,
56
+ "cls_token": "[CLS]",
57
+ "do_lower_case": true,
58
+ "mask_token": "[MASK]",
59
+ "model_max_length": 512,
60
+ "pad_token": "[PAD]",
61
+ "sep_token": "[SEP]",
62
+ "strip_accents": null,
63
+ "tokenize_chinese_chars": true,
64
+ "tokenizer_class": "DistilBertTokenizer",
65
+ "unk_token": "[UNK]"
66
+ }
Neptune/Neptune/unique_labels.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["Physics", "Societies", "admsci", "agriculture", "ai", "applsci", "asi", "biology", "economies", "energies", "environments", "make", "mathematics", "robotics", "sports", "technologies", "vehicles"]
Neptune/Neptune/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
__pycache__/Model.cpython-310.pyc ADDED
Binary file (6.1 kB). View file
 
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
__pycache__/helper_functions.cpython-310.pyc CHANGED
Binary files a/__pycache__/helper_functions.cpython-310.pyc and b/__pycache__/helper_functions.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,13 +1,32 @@
1
- from flask import Flask, render_template,request, redirect,url_for, jsonify
2
- from helper_functions import predict_class
3
  import fitz # PyMuPDF
4
  import os, shutil
5
  import torch
6
- import pickle
7
-
 
 
8
  app = Flask(__name__)
9
  app.config['UPLOAD_FOLDER'] = 'static/uploads'
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @app.route("/")
12
  def home():
13
  predict_class = ""
@@ -24,6 +43,7 @@ def pdf():
24
 
25
  @app.route('/pdf/upload' , methods = ['POST'])
26
  def treatment():
 
27
  if request.method == 'POST' :
28
  # Récupérer le fichier PDF de la requête
29
  file = request.files['file']
@@ -53,7 +73,7 @@ def treatment():
53
  # Fermer le fichier PDF
54
  pdf_document.close()
55
  # Prepare data for the chart
56
- predicted_class , class_probabilities = predict_class([extracted_text])
57
  chart_data = {
58
  'datasets': [{
59
  'data': list(class_probabilities.values()),
@@ -77,17 +97,20 @@ def treatment():
77
  return render_template('pdf.html',extracted_text = extracted_text, class_probabilities=class_probabilities, predicted_class=predicted_class, chart_data = chart_data)
78
  return render_template('pdf.html')
79
 
 
 
80
  @app.route('/sentence' , methods = ['GET' , 'POST'])
81
  def sentence():
 
82
  if request.method == 'POST':
83
  # Get the form data
84
  text = [request.form['text']]
85
- predicted_class , class_probabilities = predict_class(text)
86
  # Prepare data for the chart
87
  chart_data = {
88
  'datasets': [{
89
  'data': list(class_probabilities.values()),
90
- 'backgroundColor': [color[2] for color in class_probabilities.keys()],
91
  'borderColor': [color[2] for color in class_probabilities.keys()]
92
  }],
93
  'labels': [label[0] for label in class_probabilities.keys()]
@@ -108,6 +131,121 @@ def sentence():
108
  # Render the initial form page
109
  return render_template('sentence.html')
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  if __name__ == '__main__':
113
  app.run(debug=True)
 
1
+ from flask import Flask, render_template,request, redirect,url_for, jsonify , session
2
+ from helper_functions import predict_class , prepare_text , inference , predict , align_predictions_with_sentences , load_models
3
  import fitz # PyMuPDF
4
  import os, shutil
5
  import torch
6
+ import tempfile
7
+ from pydub import AudioSegment
8
+ import logging
9
+
10
  app = Flask(__name__)
11
  app.config['UPLOAD_FOLDER'] = 'static/uploads'
12
 
13
+ # Global variables for models
14
+ global_model = None
15
+ global_neptune = None
16
+ global_tokenizer = None
17
+ global_pipe = None
18
+
19
+ def init_app():
20
+ global global_model, global_neptune, global_pipe
21
+ print("Loading models...")
22
+ global_model, global_neptune, global_pipe = load_models()
23
+ print("Models loaded successfully!")
24
+
25
+ # Call init_app() before the first request
26
+ @app.before_first_request
27
+ def before_first_request():
28
+ init_app()
29
+
30
  @app.route("/")
31
  def home():
32
  predict_class = ""
 
43
 
44
  @app.route('/pdf/upload' , methods = ['POST'])
45
  def treatment():
46
+ global global_model, global_tokenizer
47
  if request.method == 'POST' :
48
  # Récupérer le fichier PDF de la requête
49
  file = request.files['file']
 
73
  # Fermer le fichier PDF
74
  pdf_document.close()
75
  # Prepare data for the chart
76
+ predicted_class , class_probabilities = predict_class([extracted_text] , global_model)
77
  chart_data = {
78
  'datasets': [{
79
  'data': list(class_probabilities.values()),
 
97
  return render_template('pdf.html',extracted_text = extracted_text, class_probabilities=class_probabilities, predicted_class=predicted_class, chart_data = chart_data)
98
  return render_template('pdf.html')
99
 
100
+ ## Sentence
101
+
102
  @app.route('/sentence' , methods = ['GET' , 'POST'])
103
  def sentence():
104
+ global global_model, global_tokenizer
105
  if request.method == 'POST':
106
  # Get the form data
107
  text = [request.form['text']]
108
+ predicted_class , class_probabilities = predict_class(text , global_model)
109
  # Prepare data for the chart
110
  chart_data = {
111
  'datasets': [{
112
  'data': list(class_probabilities.values()),
113
+ 'backgroundColor': [color[2 ] for color in class_probabilities.keys()],
114
  'borderColor': [color[2] for color in class_probabilities.keys()]
115
  }],
116
  'labels': [label[0] for label in class_probabilities.keys()]
 
131
  # Render the initial form page
132
  return render_template('sentence.html')
133
 
134
+ ## Voice
135
+ @app.route("/voice_backup")
136
+ def slu_backup():
137
+ input_file = "static/uploads/2022.jep-architectures-neuronales.pdf"
138
+ # Ouvrir le fichier PDF
139
+ pdf_document = fitz.open(input_file)
140
+ # Initialiser une variable pour stocker le texte extrait
141
+ extracted_text = ""
142
+ # Boucler à travers chaque page pour extraire le texte
143
+ for page_num in range(len(pdf_document)):
144
+ # Récupérer l'objet de la page
145
+ page = pdf_document.load_page(page_num)
146
+
147
+ # Extraire le texte de la page
148
+ page_text = page.get_text()
149
+
150
+ # Ajouter le texte de la page à la variable d'extraction
151
+ extracted_text += f"\nPage {page_num + 1}:\n{page_text}"
152
+
153
+ # Fermer le fichier PDF
154
+ pdf_document.close()
155
+ # Prepare data for the chart
156
+ inference_batch, sentences = inference(extracted_text)
157
+ predictions = predict(inference_batch)
158
+ sentences_prediction = align_predictions_with_sentences(sentences, predictions)
159
+ predicted_class , class_probabilities = predict_class([extracted_text] , global_model)
160
+
161
+ chart_data = {
162
+ 'datasets': [{
163
+ 'data': list(class_probabilities.values()),
164
+ 'backgroundColor': [color[2 ] for color in class_probabilities.keys()],
165
+ 'borderColor': [color[2] for color in class_probabilities.keys()]
166
+ }],
167
+ 'labels': [label[0] for label in class_probabilities.keys()]
168
+ }
169
+ print(class_probabilities)
170
+ print(chart_data)
171
+ print(sentences_prediction)
172
+ return render_template('voice_backup.html',extracted_text = extracted_text, class_probabilities=class_probabilities, predicted_class=predicted_class, chart_data = chart_data, sentences_prediction = sentences_prediction)
173
+
174
+ logging.basicConfig(level=logging.DEBUG)
175
+
176
+ @app.route("/voice", methods=['GET', 'POST'])
177
+ def slu():
178
+ global global_neptune, global_pipe, global_model
179
+
180
+ if request.method == 'POST':
181
+ logging.debug("Received POST request")
182
+ audio_file = request.files.get('audio')
183
+
184
+ if audio_file:
185
+ logging.debug(f"Received audio file: {audio_file.filename}")
186
+
187
+ # Save audio data to a temporary file
188
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
189
+ audio_file.save(temp_audio)
190
+ temp_audio_path = temp_audio.name
191
+
192
+ logging.debug(f"Saved audio to temporary file: {temp_audio_path}")
193
+
194
+ try:
195
+ # Transcribe audio using Whisper
196
+ result = global_pipe(temp_audio_path)
197
+ extracted_text = result["text"]
198
+ logging.debug(f"Transcribed text: {extracted_text}")
199
+
200
+ # Process the transcribed text
201
+ inference_batch, sentences = inference(extracted_text)
202
+ predictions = predict(inference_batch, global_neptune)
203
+ sentences_prediction = align_predictions_with_sentences(sentences, predictions)
204
+ predicted_class, class_probabilities = predict_class([extracted_text], global_model)
205
+
206
+ chart_data = {
207
+ 'datasets': [{
208
+ 'data': list(class_probabilities.values()),
209
+ 'backgroundColor': [color[2] for color in class_probabilities.keys()],
210
+ 'borderColor': [color[2] for color in class_probabilities.keys()]
211
+ }],
212
+ 'labels': [label[0] for label in class_probabilities.keys()]
213
+ }
214
+
215
+ response_data = {
216
+ 'extracted_text': extracted_text,
217
+ 'class_probabilities' : class_probabilities,
218
+ 'predicted_class': predicted_class,
219
+ 'chart_data': chart_data,
220
+ 'sentences_prediction': sentences_prediction
221
+ }
222
+ logging.debug(f"Prepared response data: {response_data}")
223
+
224
+ return render_template('voice.html',
225
+ class_probabilities= class_probabilities,
226
+ predicted_class= predicted_class,
227
+ chart_data= chart_data,
228
+ sentences_prediction=sentences_prediction)
229
+
230
+ except Exception as e:
231
+ logging.error(f"Error processing audio: {str(e)}")
232
+ return jsonify({'error': str(e)}), 500
233
+
234
+ finally:
235
+ # Remove temporary file
236
+ os.unlink(temp_audio_path)
237
+
238
+ else:
239
+ logging.error("No audio file received")
240
+ return jsonify({'error': 'No audio file received'}), 400
241
+
242
+ # For GET request
243
+ logging.debug("Received GET request")
244
+ return render_template('voice.html',
245
+ class_probabilities={},
246
+ predicted_class=[""],
247
+ chart_data={},
248
+ sentences_prediction={})
249
 
250
  if __name__ == '__main__':
251
  app.run(debug=True)
helper_functions.py CHANGED
@@ -4,17 +4,38 @@ from transformers import AutoTokenizer , DistilBertForSequenceClassification
4
  from transformers import BatchEncoding, PreTrainedTokenizerBase
5
  from typing import Optional
6
  from torch import Tensor
 
 
 
 
 
7
 
 
8
  # Load the model
9
- model = DistilBertForSequenceClassification.from_pretrained("DistillMDPI1/DistillMDPI1/saved_model")
10
-
11
- # Load the tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained("DistillMDPI1/DistillMDPI1/saved_tokenizer")
13
-
14
- # Charger le label encoder
15
- with open("DistillMDPI1/DistillMDPI1/label_encoder.pkl", "rb") as f:
16
- label_encoder = pickle.load(f)
17
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  class_labels = {
20
  16: ('vehicles','info' , '#4f9ef8'),
@@ -35,7 +56,7 @@ class_labels = {
35
  2: ('administration','pink', '#d63384'),
36
  7: ('biology' ,'cambridge' , '#88aa99')}
37
 
38
- def predict_class(text):
39
  # Tokenisation du texte
40
  inputs = transform_list_of_texts(text, tokenizer, 510, 510, 1, 2550)
41
  # Extraire le tenseur de la liste
@@ -91,7 +112,7 @@ def transform_single_text(
91
  tokens = tokenize_whole_text(text, tokenizer)
92
  input_id_chunks, mask_chunks = split_tokens_into_smaller_chunks(tokens, chunk_size, stride, minimal_chunk_length)
93
  add_special_tokens_at_beginning_and_end(input_id_chunks, mask_chunks)
94
- add_padding_tokens(input_id_chunks, mask_chunks)
95
  input_ids, attention_mask = stack_tokens_from_all_chunks(input_id_chunks, mask_chunks)
96
  return input_ids, attention_mask
97
 
@@ -137,16 +158,19 @@ def add_special_tokens_at_beginning_and_end(input_id_chunks: list[Tensor], mask_
137
  mask_chunks[i] = torch.cat([Tensor([1]), mask_chunks[i], Tensor([1])])
138
 
139
 
140
- def add_padding_tokens(input_id_chunks: list[Tensor], mask_chunks: list[Tensor]) -> None:
141
- """Adds padding tokens (token id = 0) at the end to make sure that all chunks have exactly 512 tokens."""
 
 
142
  for i in range(len(input_id_chunks)):
143
  # get required padding length
144
- pad_len = 512 - input_id_chunks[i].shape[0]
145
  # check if tensor length satisfies required chunk size
146
  if pad_len > 0:
147
  # if padding length is more than 0, we must add padding
148
- input_id_chunks[i] = torch.cat([input_id_chunks[i], Tensor([0] * pad_len)])
149
- mask_chunks[i] = torch.cat([mask_chunks[i], Tensor([0] * pad_len)])
 
150
 
151
 
152
  def stack_tokens_from_all_chunks(input_id_chunks: list[Tensor], mask_chunks: list[Tensor]) -> tuple[Tensor, Tensor]:
@@ -165,3 +189,144 @@ def split_overlapping(tensor: Tensor, chunk_size: int, stride: int, minimal_chun
165
  result = [x for x in result if len(x) >= minimal_chunk_length]
166
  return result
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from transformers import BatchEncoding, PreTrainedTokenizerBase
5
  from typing import Optional
6
  from torch import Tensor
7
+ import numpy as np
8
+ from random import shuffle
9
+ from Model import BERT
10
+ from Model import tokenizer , mult_token_id , cls_token_id , pad_token_id , max_pred , maxlen , sep_token_id
11
+ from transformers import pipeline
12
 
13
+ device = "cpu"
14
  # Load the model
15
+ def load_models():
16
+ print("Loading DistilBERT model...")
17
+ model = DistilBertForSequenceClassification.from_pretrained("DistillMDPI1/DistillMDPI1/saved_model")
18
+
19
+ print("Loading BERT model...")
20
+ neptune = BERT()
21
+ device = "cpu"
22
+ model_save_path = "Neptune/Neptune/model.pt"
23
+ neptune.load_state_dict(torch.load(model_save_path, map_location=torch.device('cpu')))
24
+ neptune.to(device)
25
+
26
+ print("Loading speech recognition pipeline...")
27
+ pipe = pipeline(
28
+ "automatic-speech-recognition",
29
+ model="openai/whisper-tiny.en",
30
+ chunk_length_s=30,
31
+ device=device,
32
+ )
33
+ print(pipe)
34
+ # Charger le label encoder
35
+ with open("DistillMDPI1/DistillMDPI1/label_encoder.pkl", "rb") as f:
36
+ label_encoder = pickle.load(f)
37
+
38
+ return model, neptune, pipe
39
 
40
  class_labels = {
41
  16: ('vehicles','info' , '#4f9ef8'),
 
56
  2: ('administration','pink', '#d63384'),
57
  7: ('biology' ,'cambridge' , '#88aa99')}
58
 
59
+ def predict_class(text , model):
60
  # Tokenisation du texte
61
  inputs = transform_list_of_texts(text, tokenizer, 510, 510, 1, 2550)
62
  # Extraire le tenseur de la liste
 
112
  tokens = tokenize_whole_text(text, tokenizer)
113
  input_id_chunks, mask_chunks = split_tokens_into_smaller_chunks(tokens, chunk_size, stride, minimal_chunk_length)
114
  add_special_tokens_at_beginning_and_end(input_id_chunks, mask_chunks)
115
+ input_ids, attention_mask = stack_tokens_from_all_chunks(input_id_chunks, mask_chunks)
116
  input_ids, attention_mask = stack_tokens_from_all_chunks(input_id_chunks, mask_chunks)
117
  return input_ids, attention_mask
118
 
 
158
  mask_chunks[i] = torch.cat([Tensor([1]), mask_chunks[i], Tensor([1])])
159
 
160
 
161
+
162
+ def add_padding_tokens(input_id_chunks: list[Tensor], mask_chunks: list[Tensor], chunk_size: int) -> None:
163
+ """Adds padding tokens at the end to make sure that all chunks have exactly chunk_size tokens."""
164
+ pad_token_id = 0 # Assuming this is defined somewhere in your code
165
  for i in range(len(input_id_chunks)):
166
  # get required padding length
167
+ pad_len = chunk_size +2 - input_id_chunks[i].shape[0]
168
  # check if tensor length satisfies required chunk size
169
  if pad_len > 0:
170
  # if padding length is more than 0, we must add padding
171
+ input_id_chunks[i] = torch.cat([input_id_chunks[i], torch.tensor([pad_token_id] * pad_len)])
172
+ mask_chunks[i] = torch.cat([mask_chunks[i], torch.tensor([0] * pad_len)])
173
+
174
 
175
 
176
  def stack_tokens_from_all_chunks(input_id_chunks: list[Tensor], mask_chunks: list[Tensor]) -> tuple[Tensor, Tensor]:
 
189
  result = [x for x in result if len(x) >= minimal_chunk_length]
190
  return result
191
 
192
+ ## Voice part
193
+
194
+ def transform_for_inference_text(text: str,
195
+ tokenizer: PreTrainedTokenizerBase,
196
+ chunk_size: int,
197
+ stride: int,
198
+ minimal_chunk_length: int,
199
+ maximal_text_length: Optional[int],) -> BatchEncoding:
200
+ if maximal_text_length:
201
+ tokens = tokenize_text_with_truncation(text, tokenizer, maximal_text_length)
202
+ else:
203
+ tokens = tokenize_whole_text(text, tokenizer)
204
+ input_id_chunks, mask_chunks = split_tokens_into_smaller_chunks(tokens, chunk_size, stride, minimal_chunk_length)
205
+ add_special_tokens_at_beginning_and_end_inference(input_id_chunks, mask_chunks)
206
+ add_padding_tokens_inference(input_id_chunks, mask_chunks, chunk_size)
207
+ input_ids, attention_mask = stack_tokens_from_all_chunks(input_id_chunks, mask_chunks)
208
+ return {"input_ids": input_ids, "attention_mask": attention_mask}
209
+
210
+ def add_special_tokens_at_beginning_and_end_inference(input_id_chunks: list[Tensor], mask_chunks: list[Tensor]) -> None:
211
+ """
212
+ Adds special MULT token, CLS token at the beginning.
213
+ Adds SEP token at the end of each chunk.
214
+ Adds corresponding attention masks equal to 1 (attention mask is boolean).
215
+ """
216
+ for i in range(len(input_id_chunks)):
217
+ # adding MULT, CLS, and SEP tokens
218
+ input_id_chunks[i] = torch.cat([input_id_chunks[i]])
219
+ # adding attention masks corresponding to special tokens
220
+ mask_chunks[i] = torch.cat([mask_chunks[i]])
221
+
222
+ def add_padding_tokens_inference(input_id_chunks: list[Tensor], mask_chunks: list[Tensor], chunk_size: int) -> None:
223
+ """Adds padding tokens at the end to make sure that all chunks have exactly chunk_size tokens."""
224
+ pad_token_id = 0 # Assuming this is defined somewhere in your code
225
+ for i in range(len(input_id_chunks)):
226
+ # get required padding length
227
+ pad_len = chunk_size - input_id_chunks[i].shape[0]
228
+ # check if tensor length satisfies required chunk size
229
+ if pad_len > 0:
230
+ # if padding length is more than 0, we must add padding
231
+ input_id_chunks[i] = torch.cat([input_id_chunks[i], torch.tensor([pad_token_id] * pad_len)])
232
+ mask_chunks[i] = torch.cat([mask_chunks[i], torch.tensor([0] * pad_len)])
233
+
234
+ def prepare_text(tokens_splitted: BatchEncoding):
235
+ batch = []
236
+ sentences = []
237
+ input_ids_list = tokens_splitted['input_ids']
238
+
239
+ for i in range(0, len(input_ids_list), 2): # Adjust loop to stop at second last index
240
+ k = i + 1
241
+ if k == len(input_ids_list):
242
+ input_ids_a = input_ids_list[i]
243
+ input_ids_a = [token for token in input_ids_a.view(-1).tolist() if token != pad_token_id]
244
+ input_ids_b = []
245
+ input_ids = [cls_token_id] + [mult_token_id] + input_ids_a + [sep_token_id] + [mult_token_id] + input_ids_b + [sep_token_id]
246
+ text_input_a = tokenizer.decode(input_ids_a)
247
+ sentences.append(text_input_a)
248
+ segment_ids = [0] * (1 + 1 + len(input_ids_a) + 1) + [1] * (1 + len(input_ids_b) + 1)
249
+
250
+ # MASK LM
251
+ n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15))))
252
+ cand_masked_pos = [idx for idx, token in enumerate(input_ids) if token not in [cls_token_id, sep_token_id, mult_token_id]]
253
+ shuffle(cand_masked_pos)
254
+ masked_tokens, masked_pos = [], []
255
+ for pos in cand_masked_pos[:n_pred]:
256
+ masked_pos.append(pos)
257
+ masked_tokens.append(input_ids[pos])
258
+ input_ids[pos] = tokenizer.mask_token_id
259
+
260
+ # Zero Padding
261
+ n_pad = maxlen - len(input_ids)
262
+ input_ids.extend([pad_token_id] * n_pad)
263
+ segment_ids.extend([0] * n_pad)
264
+
265
+ # Zero Padding for masked tokens
266
+ if max_pred > n_pred:
267
+ n_pad = max_pred - n_pred
268
+ masked_tokens.extend([0] * n_pad)
269
+ masked_pos.extend([0] * n_pad)
270
+ else:
271
+ input_ids_a = input_ids_list[i] # Correct the indexing here
272
+ input_ids_b = input_ids_list[k] # Correct the indexing here
273
+ input_ids_a = [token for token in input_ids_a.view(-1).tolist() if token != pad_token_id]
274
+ input_ids_b = [token for token in input_ids_b.view(-1).tolist() if token != pad_token_id]
275
+ input_ids = [cls_token_id] + [mult_token_id] + input_ids_a + [sep_token_id] + [mult_token_id] + input_ids_b + [sep_token_id]
276
+ segment_ids = [0] * (1 + 1 + len(input_ids_a) + 1) + [1] * (1 + len(input_ids_b) + 1)
277
+ text_input_a = tokenizer.decode(input_ids_a)
278
+ text_input_b = tokenizer.decode(input_ids_b)
279
+ sentences.append(text_input_a)
280
+ sentences.append(text_input_b)
281
+
282
+ # MASK LM
283
+ n_pred = min(max_pred, max(1, int(round(len(input_ids) * 0.15))))
284
+ cand_masked_pos = [idx for idx, token in enumerate(input_ids) if token not in [cls_token_id, sep_token_id, mult_token_id]]
285
+ shuffle(cand_masked_pos)
286
+ masked_tokens, masked_pos = [], []
287
+ for pos in cand_masked_pos[:n_pred]:
288
+ masked_pos.append(pos)
289
+ masked_tokens.append(input_ids[pos])
290
+ input_ids[pos] = tokenizer.mask_token_id
291
+
292
+ # Zero Padding
293
+ n_pad = maxlen - len(input_ids)
294
+ input_ids.extend([pad_token_id] * n_pad)
295
+ segment_ids.extend([0] * n_pad)
296
+
297
+ # Zero Padding for masked tokens
298
+ if max_pred > n_pred:
299
+ n_pad = max_pred - n_pred
300
+ masked_tokens.extend([0] * n_pad)
301
+ masked_pos.extend([0] * n_pad)
302
+
303
+ batch.append([input_ids, segment_ids, masked_pos])
304
+ return batch, sentences
305
+
306
+ def inference(text: str):
307
+ encoded_text = transform_for_inference_text(text, tokenizer, 125, 125, 1, 2550)
308
+ batch, sentences = prepare_text(encoded_text)
309
+ return batch, sentences
310
+
311
+ def predict(inference_batch,neptune , device = device):
312
+ all_preds_mult1 = []
313
+ neptune.eval()
314
+ with torch.no_grad():
315
+ for batch in inference_batch:
316
+ input_ids = torch.tensor(batch[0], device=device, dtype=torch.long).unsqueeze(0)
317
+ segment_ids = torch.tensor(batch[1], device=device, dtype=torch.long).unsqueeze(0)
318
+ masked_pos = torch.tensor(batch[2], device=device, dtype=torch.long).unsqueeze(0)
319
+ _, _, logits_mclsf1, logits_mclsf2 = neptune(input_ids, segment_ids, masked_pos)
320
+ preds_mult1 = torch.argmax(logits_mclsf1, dim=1).cpu().detach().numpy()
321
+ preds_mult2 = torch.argmax(logits_mclsf2, dim=1).cpu().detach().numpy()
322
+
323
+ all_preds_mult1.extend(preds_mult1)
324
+ all_preds_mult1.extend(preds_mult2)
325
+
326
+ return all_preds_mult1
327
+
328
+ def align_predictions_with_sentences(sentences, preds):
329
+ dc = {} # Initialize an empty dictionary
330
+ for sentence, pred in zip(sentences, preds): # Iterate through sentences and predictions
331
+ dc[sentence] = class_labels.get(pred, "Unknown") # Look up the label for each prediction
332
+ return dc
requirements.txt CHANGED
@@ -3,4 +3,6 @@ gunicorn
3
  PyPDF2
4
  PyMuPDF
5
  torch
6
- transformers
 
 
 
3
  PyPDF2
4
  PyMuPDF
5
  torch
6
+ transformers
7
+ pydub
8
+ whisper
static/css/style2.css CHANGED
@@ -3518,6 +3518,11 @@ textarea.form-control-lg {
3518
  background-color: #d6293e;
3519
  border-color: #d6293e;
3520
  }
 
 
 
 
 
3521
  .btn-danger:hover {
3522
  color: #fff;
3523
  background-color: #b62335;
@@ -16576,6 +16581,7 @@ html[data-theme=dark] .light-mode-item {
16576
  z-index: 0;
16577
  border: 2px dashed #eef0f2;
16578
  }
 
16579
  .dropzone .dz-preview.dz-image-preview {
16580
  background: transparent;
16581
  }
@@ -17334,5 +17340,48 @@ html[data-theme=dark] .light-mode-item {
17334
  .mw-80 {
17335
  max-width: 80%;
17336
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17337
  /*User CSS*/
17338
  /*# sourceMappingURL=style.css.map */
 
3518
  background-color: #d6293e;
3519
  border-color: #d6293e;
3520
  }
3521
+ .btn-green {
3522
+ color: #fff;
3523
+ background-color: #72AB5A;
3524
+ border-color: #72AB5A;
3525
+ }
3526
  .btn-danger:hover {
3527
  color: #fff;
3528
  background-color: #b62335;
 
16581
  z-index: 0;
16582
  border: 2px dashed #eef0f2;
16583
  }
16584
+
16585
  .dropzone .dz-preview.dz-image-preview {
16586
  background: transparent;
16587
  }
 
17340
  .mw-80 {
17341
  max-width: 80%;
17342
  }
17343
+ .card-body {
17344
+ display: flex;
17345
+ flex-direction: column;
17346
+ }
17347
+
17348
+ .card-body > div:first-child {
17349
+ flex-grow: 1;
17350
+ }
17351
+
17352
+ .card-body .mt-auto {
17353
+ margin-top: auto;
17354
+ }
17355
+
17356
+ .file-upload-wrapper {
17357
+ position: relative;
17358
+ width: 100%;
17359
+ height: 100px;
17360
+ border: 2px dashed #72AB5A;
17361
+ border-radius: 5px;
17362
+ display: flex;
17363
+ justify-content: center;
17364
+ align-items: center;
17365
+ overflow: hidden;
17366
+ cursor: pointer;
17367
+ background-color: #303131;
17368
+ }
17369
+ .file-upload-wrapper input[type=file] {
17370
+ position: absolute;
17371
+ top: 0;
17372
+ left: 0;
17373
+ width: 100%;
17374
+ height: 100%;
17375
+ opacity: 0;
17376
+ cursor: pointer;
17377
+ }
17378
+ .file-upload-text {
17379
+ color: #fff;
17380
+ text-align: center;
17381
+ }
17382
+ .file-name {
17383
+ margin-top: 10px;
17384
+ color: #72AB5A;
17385
+ }
17386
  /*User CSS*/
17387
  /*# sourceMappingURL=style.css.map */
static/js/dashboard_voice.js ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Récupérez les données, les couleurs de fond, les couleurs de bordure et les étiquettes des paramètres de l'URL
2
+ var urlParams = new URLSearchParams(window.location.search);
3
+ var data = JSON.parse(decodeURIComponent(urlParams.get('data')));
4
+ var backgroundColor = JSON.parse(decodeURIComponent(urlParams.get('backgroundColor')));
5
+ var borderColor = JSON.parse(decodeURIComponent(urlParams.get('borderColor')));
6
+ var labels = JSON.parse(decodeURIComponent(urlParams.get('labels')));
7
+
8
+ // Function to initialize or update the chart with dynamic data
9
+ function initializeOrUpdateChart(data, backgroundColor, borderColor, labels) {
10
+ // Check if a chart instance exists
11
+ if (window.myChart) {
12
+ // Update the existing chart
13
+ window.myChart.data.datasets[0].data = data;
14
+ window.myChart.data.datasets[0].backgroundColor = backgroundColor;
15
+ window.myChart.data.datasets[0].borderColor = borderColor;
16
+ window.myChart.data.labels = labels;
17
+ window.myChart.update();
18
+ } else {
19
+ // Create a new chart instance
20
+ var ctx = document.getElementById('bestSellers').getContext('2d');
21
+ window.myChart = new Chart(ctx, {
22
+ type: 'doughnut',
23
+ data: {
24
+ datasets: [{
25
+ data: data,
26
+ backgroundColor: backgroundColor,
27
+ borderColor: borderColor
28
+ }],
29
+ labels: labels
30
+ },
31
+ options: {
32
+ responsive: true,
33
+ cutoutPercentage: 80,
34
+ legend: {
35
+ display: false
36
+ },
37
+ animation: {
38
+ animateScale: true,
39
+ animateRotate: true
40
+ },
41
+ plugins: {
42
+ datalabels: {
43
+ display: false,
44
+ align: 'center',
45
+ anchor: 'center'
46
+ }
47
+ }
48
+ }
49
+ });
50
+ }
51
+ }
52
+
53
+ // Initialize or update the chart when the script is loaded
54
+ initializeOrUpdateChart(data, backgroundColor, borderColor, labels);
static/js/voice.js ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const reset = document.getElementById("reset");
2
+ const currentClassProbabilitiesList = document.getElementById("class-probabilities");
3
+ const currentPredictedClass = document.getElementById('predicted-class');
4
+ const staticDiv = document.getElementById("static");
5
+ const dynamicDiv = document.getElementById("dynamic");
6
+ var chartData;
7
+
8
+ let mediaRecorder;
9
+ let audioChunks = [];
10
+
11
+ document.addEventListener('DOMContentLoaded', function() {
12
+ loadResults();
13
+ attachEventListeners();
14
+ });
15
+
16
+ function attachEventListeners() {
17
+ document.getElementById('startRecord').addEventListener('click', startRecording);
18
+ document.getElementById('stopRecord').addEventListener('click', stopRecording);
19
+ document.getElementById('uploadAudio').addEventListener('click', handleAudioUpload);
20
+ }
21
+
22
+ function initializeChart(data, backgroundColor, borderColor) {
23
+ const canvas = document.getElementById('bestSellers');
24
+
25
+ // Destroy existing chart if it exists
26
+ const existingChart = Chart.getChart(canvas);
27
+ if (existingChart) {
28
+ existingChart.destroy();
29
+ }
30
+
31
+ // Clear the canvas
32
+ const context = canvas.getContext('2d');
33
+ context.clearRect(0, 0, canvas.width, canvas.height);
34
+
35
+ data = data.map(function (element) {
36
+ return parseFloat(element).toFixed(2);
37
+ });
38
+
39
+ new Chart(canvas, {
40
+ type: 'doughnut',
41
+ data: {
42
+ datasets: [{
43
+ data: data,
44
+ backgroundColor: backgroundColor,
45
+ borderColor: borderColor,
46
+ borderWidth: 1
47
+
48
+ }]
49
+ },
50
+ options: {
51
+ responsive: true,
52
+ cutout: '80%',
53
+ plugins: {
54
+ legend: {
55
+ display: true,
56
+ },
57
+ tooltip: {
58
+ enabled: false
59
+ }
60
+ },
61
+ layout: {
62
+ padding: 0
63
+ },
64
+ elements: {
65
+ arc: {
66
+ borderWidth: 0
67
+ }
68
+ },
69
+ plugins: {
70
+ datalabels: {
71
+ display: false,
72
+ align: 'center',
73
+ anchor: 'center'
74
+ }
75
+ }
76
+ }
77
+ });
78
+ }
79
+
80
+ function loadResults() {
81
+ fetch('/voice')
82
+ .then(response => response.text())
83
+ .then(html => {
84
+ const responseDOM = new DOMParser().parseFromString(html, "text/html");
85
+ const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
86
+ currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
87
+ const PredictedClass = responseDOM.getElementById("predicted-class")
88
+ currentPredictedClass.innerHTML = PredictedClass.innerHTML;
89
+
90
+ var canvasElement = responseDOM.querySelector('.bestSellers');
91
+ console.log(canvasElement);
92
+ chartData = canvasElement.getAttribute('data-chart');
93
+ console.log(chartData);
94
+ if (chartData) {
95
+ var parsedChartData = JSON.parse(chartData);
96
+ var data = parsedChartData.datasets[0].data.slice(0, 5);
97
+ var backgroundColor = parsedChartData.datasets[0].backgroundColor.slice(0, 5);
98
+ var borderColor = parsedChartData.datasets[0].borderColor.slice(0, 5);
99
+ var labels = parsedChartData.labels.slice(0, 5);
100
+
101
+ initializeChart(data, backgroundColor, borderColor, labels);
102
+ }
103
+ })
104
+ .catch(error => console.error('Error:', error));
105
+ }
106
+
107
+ function startRecording() {
108
+ navigator.mediaDevices.getUserMedia({ audio: true })
109
+ .then(stream => {
110
+ mediaRecorder = new MediaRecorder(stream);
111
+ mediaRecorder.start();
112
+
113
+ audioChunks = [];
114
+ mediaRecorder.addEventListener("dataavailable", event => {
115
+ audioChunks.push(event.data);
116
+ });
117
+
118
+ document.getElementById('startRecord').disabled = true;
119
+ document.getElementById('stopRecord').disabled = false;
120
+ });
121
+ }
122
+
123
+ function stopRecording() {
124
+ mediaRecorder.stop();
125
+ document.getElementById('startRecord').disabled = false;
126
+ document.getElementById('stopRecord').disabled = true;
127
+
128
+ mediaRecorder.addEventListener("stop", () => {
129
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
130
+ sendAudioToServer(audioBlob);
131
+ });
132
+ }
133
+
134
+ function handleAudioUpload() {
135
+ const fileInput = document.getElementById('audioFileInput');
136
+ if (fileInput.files.length > 0) {
137
+ const file = fileInput.files[0];
138
+ sendAudioToServer(file);
139
+ } else {
140
+ console.error('No file selected');
141
+ }
142
+ }
143
+ function sendAudioToServer(audioData) {
144
+ // ... (existing code)
145
+
146
+
147
+ }
148
+ function sendAudioToServer(audioData) {
149
+ const formData = new FormData();
150
+ formData.append('audio', audioData);
151
+
152
+ document.getElementById('loadingIndicator').style.display = 'block';
153
+
154
+ // Clear existing chart
155
+ const canvas = document.getElementById('bestSellers');
156
+ const existingChart = Chart.getChart(canvas);
157
+ if (existingChart) {
158
+ existingChart.destroy();
159
+ }
160
+ const context = canvas.getContext('2d');
161
+ context.clearRect(0, 0, canvas.width, canvas.height);
162
+
163
+ fetch('/voice', {
164
+ method: 'POST',
165
+ body: formData
166
+ })
167
+ .then(response => response.text())
168
+ .then(html => {
169
+ const parser = new DOMParser();
170
+ const newDocument = parser.parseFromString(html, 'text/html');
171
+
172
+ // Update other parts of the page as before
173
+ // Update only the necessary parts of the page
174
+ document.getElementById('class-probabilities').innerHTML = newDocument.getElementById('class-probabilities').innerHTML;
175
+ document.getElementById('predicted-class').innerHTML = newDocument.getElementById('predicted-class').innerHTML;
176
+ document.getElementById('transcribedText').innerHTML = newDocument.getElementById('transcribedText').innerHTML;
177
+ document.getElementById('classifiedText').innerHTML = newDocument.getElementById('classifiedText').innerHTML;
178
+ dynamicDiv.classList.remove('d-none');
179
+ staticDiv.classList.add('d-none');
180
+ // Update chart
181
+ const newCanvasElement = newDocument.querySelector('.bestSellers');
182
+ if (newCanvasElement) {
183
+ const newChartData = newCanvasElement.getAttribute('data-chart');
184
+ if (newChartData) {
185
+ const parsedChartData = JSON.parse(newChartData);
186
+ initializeChart(
187
+ parsedChartData.datasets[0].data.slice(0, 5),
188
+ parsedChartData.datasets[0].backgroundColor.slice(0, 5),
189
+ parsedChartData.datasets[0].borderColor.slice(0, 5),
190
+ parsedChartData.labels.slice(0, 5)
191
+ );
192
+ }
193
+ }
194
+
195
+ document.getElementById('loadingIndicator').style.display = 'none';
196
+ })
197
+ .catch(error => {
198
+ console.error('Error:', error);
199
+ document.getElementById('loadingIndicator').style.display = 'none';
200
+ });
201
+ }
202
+ fetch('/voice', {
203
+ method: 'POST',
204
+ body: formData
205
+ })
206
+ .then(response => response.text())
207
+ .then(html => {
208
+ const parser = new DOMParser();
209
+ const newDocument = parser.parseFromString(html, 'text/html');
210
+
211
+
212
+
213
+ // Update chart
214
+ const newCanvasElement = newDocument.querySelector('.bestSellers');
215
+ if (newCanvasElement) {
216
+ const newChartData = newCanvasElement.getAttribute('data-chart');
217
+ if (newChartData) {
218
+ const parsedChartData = JSON.parse(newChartData);
219
+ initializeChart(
220
+ parsedChartData.datasets[0].data.slice(0, 5),
221
+ parsedChartData.datasets[0].backgroundColor.slice(0, 5),
222
+ parsedChartData.datasets[0].borderColor.slice(0, 5),
223
+ parsedChartData.labels.slice(0, 5)
224
+ );
225
+ }
226
+ }
227
+
228
+ document.getElementById('loadingIndicator').style.display = 'none';
229
+ })
230
+ .catch(error => {
231
+ console.error('Error:', error);
232
+ document.getElementById('loadingIndicator').style.display = 'none';
233
+ });
static/js/voice_backup.js ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const reset = document.getElementById("reset");
2
+ const currentClassProbabilitiesList = document.getElementById("class-probabilities");
3
+ const currentPredictedClass = document.getElementById('predicted-class')
4
+ const staticDiv = document.getElementById("static");
5
+ const dynamicDiv = document.getElementById("dynamic");
6
+ var chartData;
7
+
8
+
9
+
10
+ // Définir la fonction initializeChart en premier
11
+ function initializeChart(data, backgroundColor, borderColor, labels) {
12
+ // Créer une nouvelle instance Chart.js pour chaque élément canvas avec la classe 'bestSellers'
13
+ data = data.map(function (element) {
14
+ return parseFloat(element).toFixed(2);
15
+ });
16
+ document.querySelectorAll('.bestSellers').forEach(function (canvas) {
17
+ // Initialiser le graphique
18
+ new Chart(canvas, {
19
+ type: 'doughnut', // Définir le type de graphique sur doughnut
20
+ data: {
21
+ datasets: [{
22
+ data: data,
23
+ backgroundColor: backgroundColor,
24
+ borderColor: borderColor,
25
+ }],
26
+ labels: labels
27
+ },
28
+ options: {
29
+ responsive: true, // Rendre le graphique responsive
30
+ cutoutPercentage: 80, // Définir le pourcentage de découpe
31
+ legend: {
32
+ display: false, // Masquer la légende
33
+ },
34
+ animation: {
35
+ animateScale: true,
36
+ animateRotate: true
37
+ },
38
+ plugins: {
39
+ datalabels: {
40
+ display: false,
41
+ align: 'center',
42
+ anchor: 'center'
43
+ }
44
+ }
45
+ }
46
+ });
47
+ });
48
+ }
49
+ document.addEventListener('DOMContentLoaded', function() {
50
+ loadResults();
51
+ });
52
+ function loadResults() {
53
+ fetch('/voice')
54
+ .then(response => response.text())
55
+ .then(html => {
56
+ const responseDOM = new DOMParser().parseFromString(html, "text/html");
57
+ const classProbabilitiesList = responseDOM.getElementById("class-probabilities");
58
+ currentClassProbabilitiesList.innerHTML = classProbabilitiesList.innerHTML;
59
+ const PredictedClass = responseDOM.getElementById("predicted-class")
60
+ currentPredictedClass.innerHTML = PredictedClass.innerHTML;
61
+
62
+ var canvasElement = responseDOM.querySelector('.bestSellers'); // Sélectionnez le premier élément avec la classe 'bestSellers'
63
+ console.log(canvasElement);
64
+ chartData = canvasElement.getAttribute('data-chart');
65
+ console.log(chartData);
66
+ var data = JSON.parse(chartData).datasets[0].data.slice(0, 5);
67
+ var backgroundColor = JSON.parse(chartData).datasets[0].backgroundColor.slice(0, 5);
68
+ var borderColor = JSON.parse(chartData).datasets[0].borderColor.slice(0, 5);
69
+ var labels = JSON.parse(chartData).labels.slice(0, 5);
70
+
71
+ // Créer de nouveaux graphiques
72
+ loadDashboardScript(data, backgroundColor, borderColor, labels);
73
+ })
74
+ .catch(error => console.error('Error:', error));
75
+ }
76
+
77
+
78
+
79
+ function loadDashboardScript(data, backgroundColor, borderColor, labels) { // Correction ici
80
+ var scriptElement = document.createElement('script');
81
+ scriptElement.type = 'text/javascript';
82
+ scriptElement.src = '../static/js/dashboard_pdf.js';
83
+ // Attendez que le script soit chargé avant d'appeler la fonction d'initialisation
84
+ scriptElement.onload = function () {
85
+ initializeChart(data, backgroundColor, borderColor, labels);
86
+ };
87
+ document.body.appendChild(scriptElement);
88
+ }
89
+
90
+ function destroyPreviousCharts() {
91
+ // Trouver tous les éléments canvas avec la classe 'bestSellers'
92
+ document.querySelectorAll('.bestSellers').forEach(function (canvas) {
93
+ // Récupérer l'instance du graphique
94
+ var chartInstance = Chart.getChart(canvas);
95
+ // Si une instance existe, détruire le graphique
96
+ if (chartInstance) {
97
+ chartInstance.destroy();
98
+ }
99
+ });
100
+ }
101
+
102
+ function createResponseElement(response) {
103
+ var pdfResponseElement = document.createElement('ul');
104
+ pdfResponseElement.classList.add('graph-legend-rectangle');
105
+ pdfResponseElement.innerHTML = response;
106
+ return pdfResponseElement;
107
+ }
108
+
109
+ // Modifiez la fonction reloadDashboardScript pour accepter les données supplémentaires
110
+ function reloadDashboardScript(data, backgroundColor, borderColor, labels) {
111
+ var scriptElement = document.createElement('script');
112
+ scriptElement.type = 'text/javascript';
113
+ scriptElement.src = `../static/js/dashboard_pdf.js?data=${encodeURIComponent(JSON.stringify(data))}&backgroundColor=${encodeURIComponent(JSON.stringify(backgroundColor))}&borderColor=${encodeURIComponent(JSON.stringify(borderColor))}&labels=${encodeURIComponent(JSON.stringify(labels))}`;
114
+ document.body.appendChild(scriptElement);
115
+ }
templates/pdf.html CHANGED
@@ -42,6 +42,9 @@
42
  <li class="nav-item nav-pills nav-pills-ocr">
43
  <a class="nav-item nav-link " href="sentence">Text Classifier</a>
44
  </li>
 
 
 
45
  </ul>
46
  </div>
47
  <!-- Main navbar END -->
 
42
  <li class="nav-item nav-pills nav-pills-ocr">
43
  <a class="nav-item nav-link " href="sentence">Text Classifier</a>
44
  </li>
45
+ <li class="nav-item nav-pills nav-pills-ocr">
46
+ <a class="nav-item nav-link " href="voice">SLU</a>
47
+ </li>
48
  </ul>
49
  </div>
50
  <!-- Main navbar END -->
templates/sentence.html CHANGED
@@ -44,6 +44,9 @@
44
  <li class="nav-item nav-pills nav-pills-ocr">
45
  <a class="nav-item nav-link active" href="sentence">Text Classifier</a>
46
  </li>
 
 
 
47
  </ul>
48
  </div>
49
  <!-- Main navbar END -->
 
44
  <li class="nav-item nav-pills nav-pills-ocr">
45
  <a class="nav-item nav-link active" href="sentence">Text Classifier</a>
46
  </li>
47
+ <li class="nav-item nav-pills nav-pills-ocr">
48
+ <a class="nav-item nav-link " href="voice">SLU</a>
49
+ </li>
50
  </ul>
51
  </div>
52
  <!-- Main navbar END -->
templates/voice.html ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en-US">
3
+
4
+ <head>
5
+ <title>Speech Transcription</title>
6
+ <link rel="icon" href="https://cdn-icons-png.flaticon.com/512/5262/5262072.png">
7
+ <link rel="stylesheet" href="../static/css/style2.css">
8
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css">
9
+ <link rel="stylesheet" href="../static/css/vendor.bundle.base.css">
10
+ </head>
11
+
12
+ <body style="background-color: #1F2020;">
13
+ <nav class="navbar navbar-expand-lg bg-ocr mb-5">
14
+ <div class="container h-100">
15
+ <!-- Logo START -->
16
+ <a class="navbar-brand" href="pdf">
17
+ <img class="navbar-brand-item" src="../static/icons/avignon_universite_blanc_RVB.png" alt="logo">
18
+ </a>
19
+ <!-- Logo END -->
20
+
21
+ <!-- Responsive navbar toggler -->
22
+ <button class="navbar-toggler ms-auto icon-md btn btn-light p-0 collapsed" type="button"
23
+ data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse"
24
+ aria-expanded="false" aria-label="Toggle navigation">
25
+ <span class="navbar-toggler-animation">
26
+ <span></span>
27
+ <span></span>
28
+ <span></span>
29
+ </span>
30
+ </button>
31
+
32
+ <!-- Main navbar START -->
33
+ <div class="navbar-collapse collapse" id="navbarCollapse">
34
+ <ul class="navbar-nav navbar-nav-scroll ms-auto p-xl-0">
35
+ <li class="nav-item nav-pills nav-pills-ocr">
36
+ <a class="nav-item nav-link mb-5" href="pdf">PDF Classifier</a>
37
+ </li>
38
+ <li class="nav-item nav-pills nav-pills-ocr">
39
+ <a class="nav-item nav-link " href="sentence">Text Classifier</a>
40
+ </li>
41
+ <li class="nav-item nav-pills nav-pills-ocr">
42
+ <a class="nav-item nav-link active" href="voice">SLU</a>
43
+ </li>
44
+ </ul>
45
+ </div>
46
+ <!-- Main navbar END -->
47
+ </div>
48
+ </nav>
49
+
50
+ <main class="pt-3 pt-md-5 pb-md-5 pt-lg-0 mt-3">
51
+ <div class="container mt-lg-7">
52
+ <div class="row">
53
+ <div class="col-lg-6 mb-4">
54
+ <div class="card bg-ocr h-100">
55
+ <div class="card-body">
56
+ <h5 class="card-title text-white">Record Speech</h5>
57
+ <p class="card-text text-white-50">Click the button to start recording your speech.</p>
58
+ <div class="mt-auto text-center">
59
+ <button id="startRecord" class="btn btn-ocr">Start Recording</button>
60
+ <button id="stopRecord" class="btn btn-green" disabled>Stop Recording</button>
61
+ </div>
62
+ </div>
63
+ </div>
64
+ </div>
65
+ <div class="col-lg-6 mb-4">
66
+ <div class="card bg-ocr">
67
+ <div class="card-body">
68
+ <h5 class="card-title text-white">Upload Audio File</h5>
69
+ <p class="card-text text-white-50">Click or drag and drop to upload an audio file.</p>
70
+ <div class="file-upload-wrapper" id="fileUploadWrapper">
71
+ <input type="file" id="audioFileInput" accept="audio/*">
72
+ <div class="file-upload-text">
73
+ <i class="bi bi-cloud-upload" style="font-size: 2rem;"></i>
74
+ <p>Click or drag audio file here</p>
75
+ <p class="file-name"></p>
76
+ </div>
77
+ </div>
78
+ <div class="text-center">
79
+ <button id="uploadAudio" class="btn btn-ocr mt-3 text-center">Upload and
80
+ Transcribe</button>
81
+ </div>
82
+ </div>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ <h1 class="text-center text-white-50"> Results</h1>
87
+ <div class="card" id = 'static' style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
88
+ <div class="card-body text-center" id="static">
89
+ <div class="col-12 d-flex justify-content-center">
90
+ <div class="col-sm-12 d-inline align-items-center" style="height: 175px; width: 229px;">
91
+ <div class="flex-shrink-0 avatar avatar-lg me-2 mb-3 mt-4">
92
+ <img class="avatar-img rounded-circle"
93
+ src="../static/icons/logo_header_128x128.png" alt="">
94
+ </div>
95
+ <h5 class="card-title text-white-50">Get more insights about your speech 📣📢.</h5>
96
+ </div>
97
+
98
+ </div>
99
+ </div>
100
+ </div>
101
+ <div class="row d-none" id = 'dynamic'>
102
+ <div class="col-4">
103
+ <div class="nav flex-column position-sticky top-10 shadow-lg" style="background-color: #222424;">
104
+ <div class="card mb-3" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
105
+ <div class="card-body">
106
+ <h4 class="card-title text-white">Text's Category</h4>
107
+ <div class="row d-flex">
108
+ <div class="col-sm-7 col-6">
109
+ <ul class="graphl-legend-rectangle" id="class-probabilities">
110
+ {% if class_probabilities %}
111
+ {% for class_label, probability in class_probabilities.items() %}
112
+ {% if loop.index <= 5 %}
113
+ <li class="text-white-50">
114
+ <span class="bg-{{ class_label[1] }}"></span>
115
+ <div class="d-flex justify-content-center">
116
+ {{ class_label[0] }}:
117
+ <span class="text-white w-100">&nbsp;{{ "%.2f" % (probability) }}%</span>
118
+ </div>
119
+ </li>
120
+ {% endif %}
121
+ {% endfor %}
122
+ {% endif %}
123
+ </ul>
124
+ </div>
125
+ <div class="col-sm-5 grid-margin col-6">
126
+ <canvas class="bestSellers" data-chart='{{ chart_data | tojson}}'
127
+ id="bestSellers"></canvas>
128
+ </div>
129
+ </div>
130
+ <div class="mb-lg-0 text-white-50">
131
+ la classe la plus dominante est <span class="fw-bolder text-white"
132
+ id="predicted-class">{{
133
+ predicted_class[0] if predicted_class else ""
134
+ }}</span>
135
+ </div>
136
+ </div>
137
+ </div>
138
+ <div class="card my-auto mt-3"
139
+ style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
140
+ <div class="card-body">
141
+ <h4 class="card-title text-white">Legend</h4>
142
+ <div class="row d-flex">
143
+ <div class="col-sm-6 col-6">
144
+ <ul class="graphl-legend-rectangle">
145
+ <li class="text-white-50"><span class="bg-info "></span>vehicles
146
+ </li>
147
+ <li class="text-white-50"><span class="bg-success"></span>environments
148
+ </li>
149
+ <li class="text-white-50"><span class="bg-danger"></span>energies
150
+ </li>
151
+ <li class="text-white-50"><span class="bg-primary"></span>Physics
152
+ </li>
153
+ <li class="text-white-50"><span class="bg-moss"></span>robotics
154
+ </li>
155
+ <li class="text-white-50"><span class="bg-agri"></span>agriculture
156
+ </li>
157
+ <li class="text-white-50"><span class="bg-yellow"></span>ML
158
+ </li>
159
+ <li class="text-white-50"><span class="bg-warning"></span>economies
160
+ </li>
161
+ <li class="text-white-50"><span class="bg-vanila"></span>technologies
162
+ </li>
163
+ </ul>
164
+ </div>
165
+ <div class="col-sm-6 col-6">
166
+ <ul class="graphl-legend-rectangle">
167
+
168
+ <li class="text-white-50"><span class="bg-coffe"></span>mathematics
169
+ </li>
170
+ <li class="text-white-50"><span class="bg-orange "></span>sports
171
+ </li>
172
+ <li class="text-white-50"><span class="bg-cyan"></span>AI
173
+ </li>
174
+ <li class="text-white-50"><span class="bg-rosy"></span>Innovation
175
+ </li>
176
+ <li class="text-white-50"><span class="bg-picton"></span>Science
177
+ </li>
178
+ <li class="text-white-50"><span class="bg-purple"></span>Societies
179
+ </li>
180
+ <li class="text-white-50"><span class="bg-pink"></span>administration
181
+ </li>
182
+ <li class="text-white-50"><span class="bg-cambridge"></span>biology
183
+ </li>
184
+ </ul>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+
192
+ <div class="col-8">
193
+ <div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
194
+ <div class="card-body">
195
+ <h5 class="card-title text-white mb-3">Transcribed and Classified Text</h5>
196
+ <div id="transcribedText" class="text-white-50 mb-4"></div>
197
+ <div id="static" class="text-center">
198
+ <div class="col-12 d-flex justify-content-center">
199
+ <div class="col-sm-12 d-inline align-items-center" id="classifiedText">
200
+ {% if sentences_prediction %}
201
+ {% for sentence, color in sentences_prediction.items() %}
202
+ <span class="text-bold text-start bg-{{color[1]}}">
203
+ {{sentence}}
204
+ </span>
205
+ {% endfor %}
206
+ {% endif %}
207
+ </div>
208
+ </div>
209
+ </div>
210
+ </div>
211
+ </div>
212
+ </div>
213
+ </div>
214
+ </div>
215
+ </main>
216
+
217
+ <div id="loadingIndicator" style="display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0,0,0,0.5); z-index: 9999;">
218
+ <div style="position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); color: white;">
219
+ Processing...
220
+ </div>
221
+ </div>
222
+
223
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
224
+ <script src="../static/js/voice.js" type="text/javascript"></script>
225
+ <script src="../static/js/vendor.bundle.base.js"></script>
226
+ </body>
227
+
228
+ </html>
templates/voice_backup.html ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en-US">
3
+
4
+ <head>
5
+ <title>Speech Transcription</title>
6
+ <link rel="icon" href="https://cdn-icons-png.flaticon.com/512/5262/5262072.png">
7
+ <link rel="stylesheet" href="../static/css/style2.css">
8
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css">
9
+ <link rel="stylesheet" href="../static/css/vendor.bundle.base.css">
10
+ <style>
11
+
12
+ </style>
13
+ </head>
14
+
15
+ <body style="background-color: #1F2020;">
16
+ <nav class="navbar navbar-expand-lg bg-ocr mb-5">
17
+ <div class="container h-100">
18
+ <!-- Logo START -->
19
+ <a class="navbar-brand" href="pdf">
20
+ <img class="navbar-brand-item" src="../static/icons/avignon_universite_blanc_RVB.png" alt="logo">
21
+ </a>
22
+ <!-- Logo END -->
23
+
24
+ <!-- Responsive navbar toggler -->
25
+ <button class="navbar-toggler ms-auto icon-md btn btn-light p-0 collapsed" type="button"
26
+ data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse"
27
+ aria-expanded="false" aria-label="Toggle navigation">
28
+ <span class="navbar-toggler-animation">
29
+ <span></span>
30
+ <span></span>
31
+ <span></span>
32
+ </span>
33
+ </button>
34
+
35
+ <!-- Main navbar START -->
36
+ <div class="navbar-collapse collapse" id="navbarCollapse">
37
+ <ul class="navbar-nav navbar-nav-scroll ms-auto p-xl-0">
38
+ <!-- Nav item 1 Demos -->
39
+ <li class="nav-item nav-pills nav-pills-ocr">
40
+ <a class="nav-item nav-link mb-5" href="pdf">PDF Classifier</a>
41
+ </li>
42
+ <!-- Nav item 2 Pages -->
43
+ <li class="nav-item nav-pills nav-pills-ocr">
44
+ <a class="nav-item nav-link " href="sentence">Text Classifier</a>
45
+ </li>
46
+ <li class="nav-item nav-pills nav-pills-ocr">
47
+ <a class="nav-item nav-link active" href="voice">SLU</a>
48
+ </li>
49
+ </ul>
50
+ </div>
51
+ <!-- Main navbar END -->
52
+ </div>
53
+ </nav>
54
+
55
+ <main class="pt-3 pt-md-5 pb-md-5 pt-lg-0 mt-3">
56
+ <div class="container mt-lg-7">
57
+ <div class="row">
58
+ <div class="col-lg-6 mb-4">
59
+ <div class="card bg-ocr h-100">
60
+ <div class="card-body">
61
+ <h5 class="card-title text-white">Record Speech</h5>
62
+ <p class="card-text text-white-50">Click the button to start recording your speech.</p>
63
+ <div class="mt-auto text-center">
64
+ <button id="startRecord" class="btn btn-ocr">Start Recording</button>
65
+ <button id="stopRecord" class="btn btn-green" disabled>Stop Recording</button>
66
+ </div>
67
+ </div>
68
+ </div>
69
+ </div>
70
+ <div class="col-lg-6 mb-4">
71
+ <div class="card bg-ocr">
72
+ <div class="card-body">
73
+ <h5 class="card-title text-white">Upload Audio File</h5>
74
+ <p class="card-text text-white-50">Click or drag and drop to upload an audio file.</p>
75
+ <div class="file-upload-wrapper" id="fileUploadWrapper">
76
+ <input type="file" id="audioFileInput" accept="audio/*">
77
+ <div class="file-upload-text">
78
+ <i class="bi bi-cloud-upload" style="font-size: 2rem;"></i>
79
+ <p>Click or drag audio file here</p>
80
+ <p class="file-name"></p>
81
+ </div>
82
+ </div>
83
+ <div class="text-center">
84
+ <button id="uploadAudio" class="btn btn-ocr mt-3 text-center">Upload and
85
+ Transcribe</button>
86
+ </div>
87
+ </div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ <h1 class="text-center text-white-50"> Results</h1>
92
+ <div class="row">
93
+ <div class="col-4">
94
+ <div class="nav flex-column position-sticky top-10 shadow-lg" style="background-color: #222424;">
95
+ <div class="card mb-3" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
96
+ <div class="card-body">
97
+ <h4 class="card-title text-white">Text's Category</h4>
98
+ <div class="row d-flex">
99
+ <div class="col-sm-7 col-6">
100
+ <ul class="graphl-legend-rectangle" id="class-probabilities">
101
+ {% for class_label, probability in class_probabilities.items() %}
102
+ {% if loop.index <= 5 %} <li class="text-white-50">
103
+ <span class="bg-{{ class_label[1] }}"></span>
104
+ <div class="d-flex justify-content-center">
105
+ {{ class_label[0] }}:
106
+ <span class="text-white w-100">&nbsp;{{ "%.2f" % probability
107
+ }}%</span>
108
+ </div>
109
+ </li>
110
+ {% endif %}
111
+ {% endfor %}
112
+ </ul>
113
+ </div>
114
+ <div class="col-sm-5 grid-margin col-6">
115
+ <canvas class="bestSellers" data-chart='{{ chart_data | tojson }}'
116
+ id="bestSellers#"></canvas>
117
+ </div>
118
+ </div>
119
+ <div class="mb-lg-0 text-white-50">
120
+ la classe la plus dominante est <span class="fw-bolder text-white"
121
+ id="predicted-class">{{
122
+ predicted_class[0]
123
+ }}</span>
124
+ </div>
125
+ </div>
126
+ </div>
127
+ <div class="card my-auto mt-3"
128
+ style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
129
+ <div class="card-body">
130
+ <h4 class="card-title text-white">Legend</h4>
131
+ <div class="row d-flex">
132
+ <div class="col-sm-6 col-6">
133
+ <ul class="graphl-legend-rectangle">
134
+ <li class="text-white-50"><span class="bg-info "></span>vehicles
135
+ </li>
136
+ <li class="text-white-50"><span class="bg-success"></span>environments
137
+ </li>
138
+ <li class="text-white-50"><span class="bg-danger"></span>energies
139
+ </li>
140
+ <li class="text-white-50"><span class="bg-primary"></span>Physics
141
+ </li>
142
+ <li class="text-white-50"><span class="bg-moss"></span>robotics
143
+ </li>
144
+ <li class="text-white-50"><span class="bg-agri"></span>agriculture
145
+ </li>
146
+ <li class="text-white-50"><span class="bg-yellow"></span>ML
147
+ </li>
148
+ <li class="text-white-50"><span class="bg-warning"></span>economies
149
+ </li>
150
+ <li class="text-white-50"><span class="bg-vanila"></span>technologies
151
+ </li>
152
+ </ul>
153
+ </div>
154
+ <div class="col-sm-6 col-6">
155
+ <ul class="graphl-legend-rectangle">
156
+
157
+ <li class="text-white-50"><span class="bg-coffe"></span>mathematics
158
+ </li>
159
+ <li class="text-white-50"><span class="bg-orange "></span>sports
160
+ </li>
161
+ <li class="text-white-50"><span class="bg-cyan"></span>AI
162
+ </li>
163
+ <li class="text-white-50"><span class="bg-rosy"></span>Innovation
164
+ </li>
165
+ <li class="text-white-50"><span class="bg-picton"></span>Science
166
+ </li>
167
+ <li class="text-white-50"><span class="bg-purple"></span>Societies
168
+ </li>
169
+ <li class="text-white-50"><span class="bg-pink"></span>administration
170
+ </li>
171
+ <li class="text-white-50"><span class="bg-cambridge"></span>biology
172
+ </li>
173
+ </ul>
174
+ </div>
175
+ </div>
176
+ </div>
177
+ </div>
178
+ </div>
179
+ </div>
180
+
181
+ <div class="col-8">
182
+ <div class="card" style="background-color: #303131; border: 2px dashed rgb(82, 82, 82);">
183
+ <div class="card-body text-center" id="static">
184
+ <div class="col-12 d-flex justify-content-center">
185
+ <div class="col-sm-12 d-inline align-items-center">
186
+ {% for sentence , color in sentences_prediction.items() %}
187
+ <span class="text-bold text-start bg-{{color[1]}}">
188
+ {{sentence}}
189
+ </span>
190
+ {% endfor %}
191
+ </div>
192
+
193
+ </div>
194
+ </div>
195
+ </div>
196
+ </div>
197
+ </div>
198
+
199
+ </div>
200
+ </main>
201
+ <!-- Container END -->
202
+ <script src="../static/js/dashboard_voice.js"></script>
203
+ <script src="../static/js/Chart.min.js"></script>
204
+ <script src="../static/js/voice.js" type="text/javascript"></script>
205
+ <script src="../static/js/vendor.bundle.base.js"></script>
206
+
207
+ </body>
208
+
209
+ </html>