YsnHdn commited on
Commit
f0aa55b
Β·
1 Parent(s): 53573e8

update : adding a new model based on mdpi pdfs

Browse files
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/label_encoder.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2140f9641061e34cfa413940d8b885b3016267e372ed6b7878908a47ab4759
3
- size 227
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86df1bec06e1ef392325057bc35869319c691da5f023d62caf4a09e8a5fc3e6d
3
+ size 283
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/saved_model/config.json RENAMED
@@ -1,14 +1,13 @@
1
  {
2
- "_name_or_path": "bert-base-uncased",
 
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "id2label": {
13
  "0": "LABEL_0",
14
  "1": "LABEL_1",
@@ -19,13 +18,26 @@
19
  "6": "LABEL_6",
20
  "7": "LABEL_7",
21
  "8": "LABEL_8",
22
- "9": "LABEL_9"
 
 
 
 
 
 
 
23
  },
24
  "initializer_range": 0.02,
25
- "intermediate_size": 3072,
26
  "label2id": {
27
  "LABEL_0": 0,
28
  "LABEL_1": 1,
 
 
 
 
 
 
 
29
  "LABEL_2": 2,
30
  "LABEL_3": 3,
31
  "LABEL_4": 4,
@@ -35,17 +47,17 @@
35
  "LABEL_8": 8,
36
  "LABEL_9": 9
37
  },
38
- "layer_norm_eps": 1e-12,
39
  "max_position_embeddings": 512,
40
- "model_type": "bert",
41
- "num_attention_heads": 12,
42
- "num_hidden_layers": 12,
43
  "pad_token_id": 0,
44
- "position_embedding_type": "absolute",
45
  "problem_type": "single_label_classification",
 
 
 
 
46
  "torch_dtype": "float32",
47
- "transformers_version": "4.40.0",
48
- "type_vocab_size": 2,
49
- "use_cache": true,
50
  "vocab_size": 30522
51
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": "LABEL_0",
13
  "1": "LABEL_1",
 
18
  "6": "LABEL_6",
19
  "7": "LABEL_7",
20
  "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10",
23
+ "11": "LABEL_11",
24
+ "12": "LABEL_12",
25
+ "13": "LABEL_13",
26
+ "14": "LABEL_14",
27
+ "15": "LABEL_15",
28
+ "16": "LABEL_16"
29
  },
30
  "initializer_range": 0.02,
 
31
  "label2id": {
32
  "LABEL_0": 0,
33
  "LABEL_1": 1,
34
+ "LABEL_10": 10,
35
+ "LABEL_11": 11,
36
+ "LABEL_12": 12,
37
+ "LABEL_13": 13,
38
+ "LABEL_14": 14,
39
+ "LABEL_15": 15,
40
+ "LABEL_16": 16,
41
  "LABEL_2": 2,
42
  "LABEL_3": 3,
43
  "LABEL_4": 4,
 
47
  "LABEL_8": 8,
48
  "LABEL_9": 9
49
  },
 
50
  "max_position_embeddings": 512,
51
+ "model_type": "distilbert",
52
+ "n_heads": 12,
53
+ "n_layers": 6,
54
  "pad_token_id": 0,
 
55
  "problem_type": "single_label_classification",
56
+ "qa_dropout": 0.1,
57
+ "seq_classif_dropout": 0.2,
58
+ "sinusoidal_pos_embds": false,
59
+ "tie_weights_": true,
60
  "torch_dtype": "float32",
61
+ "transformers_version": "4.41.1",
 
 
62
  "vocab_size": 30522
63
  }
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/saved_model/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77e959fadc3a09d85fa46103e4ada68e827b0a2cc64bdd660c600e2999433e27
3
- size 437983256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f10745117e7ccb36611897cc97844a4a9682fdebbf72f6b89291f3c469a3587
3
+ size 267878708
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/saved_tokenizer/special_tokens_map.json RENAMED
File without changes
DistillMDPI1/DistillMDPI1/saved_tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/saved_tokenizer/tokenizer_config.json RENAMED
@@ -43,15 +43,13 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
49
  "model_max_length": 512,
50
- "never_split": null,
51
  "pad_token": "[PAD]",
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
- "tokenizer_class": "BertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
  "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
  "unk_token": "[UNK]"
55
  }
{bert/bertModel β†’ DistillMDPI1/DistillMDPI1}/saved_tokenizer/vocab.txt RENAMED
File without changes
Dockerfile CHANGED
@@ -18,7 +18,7 @@ ENV HF_HOME /code/.cache/huggingface
18
  RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
19
 
20
  # Copy the model files into the image
21
- COPY ./distilBert /code/distilBert
22
 
23
  # Copy the rest of the application files
24
  COPY . .
 
18
  RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
19
 
20
  # Copy the model files into the image
21
+ COPY ./DistillMDPI1 /code/DistillMDPI1
22
 
23
  # Copy the rest of the application files
24
  COPY . .
helper_functions.py CHANGED
@@ -6,27 +6,34 @@ from typing import Optional
6
  from torch import Tensor
7
 
8
  # Load the model
9
- model = DistilBertForSequenceClassification.from_pretrained("distilBert/DistilBert/saved_model")
10
 
11
  # Load the tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained("distilBert/DistilBert/saved_tokenizer")
13
 
14
  # Charger le label encoder
15
- with open("distilBert/DistilBert/label_encoder.pkl", "rb") as f:
16
  label_encoder = pickle.load(f)
17
 
 
18
  class_labels = {
19
- 7: ('Databases', 'info' ,'#4f9ef8'),
20
- 1: ('Computation_and_Language', 'danger', '#d6293e'),
21
- 9: ('Hardware_Architecture', 'warning' , '#f7c32e'),
22
- 8: ('General_Literature', 'success' , '#0cbc87'),
23
- 6: ('Cryptography_and_Security', 'primary', '#0f6fec'),
24
- 5: ('Computers_and_Society', 'yellow', '#ffc107'),
25
- 3: ('Computational_Engineering', 'purple' , '#6f42c1'),
26
- 0: ('Artificial_Intelligence', 'cyan', '#0dcaf0'),
27
- 2: ('Computational_Complexity', 'pink', '#d63384'),
28
- 4: ('Computational_Geometry', 'orange', '#fd7e14')
29
- }
 
 
 
 
 
 
30
 
31
  def predict_class(text):
32
  # Tokenisation du texte
 
6
  from torch import Tensor
7
 
8
  # Load the model
9
+ model = DistilBertForSequenceClassification.from_pretrained("DistillMDPI1/DistillMDPI1/saved_model")
10
 
11
  # Load the tokenizer
12
+ tokenizer = AutoTokenizer.from_pretrained("DistillMDPI1/DistillMDPI1/saved_tokenizer")
13
 
14
  # Charger le label encoder
15
+ with open("DistillMDPI1/DistillMDPI1/label_encoder.pkl", "rb") as f:
16
  label_encoder = pickle.load(f)
17
 
18
+
19
  class_labels = {
20
+ 16: ('vehicles','info' , '#4f9ef8'),
21
+ 10: ('environments','success' , '#0cbc87'),
22
+ 9: ('energies', 'danger', '#d6293e'),
23
+ 0: ('Physics', 'primary', '#0f6fec'),
24
+ 13: ('robotics', 'moss','#B1E5F2'),
25
+ 3: ('agriculture','teal' , '#20c997'),
26
+ 11: ('Machine Learning and Knowledge Extraction', 'yellow', '#ffc107'),
27
+ 8: ('economies', 'warning' , '#f7c32e'),
28
+ 15: ('technologies','vanila' ,'#FDF0D5' ),
29
+ 12: ('mathematics','coffe' ,'#7f5539' ),
30
+ 14: ('sports', 'orange', '#fd7e14'),
31
+ 4: ('Artificial intelligence','cyan', '#0dcaf0'),
32
+ 6: ('Innovation','rosy' ,'#BF98A0'),
33
+ 5: ('Science','picton' ,'#5fa8d3' ),
34
+ 1: ('Societies','purple' , '#6f42c1'),
35
+ 2: ('administration','pink', '#d63384'),
36
+ 7: ('biology' ,'cambridge' , '#88aa99')}
37
 
38
  def predict_class(text):
39
  # Tokenisation du texte
static/css/style2.css CHANGED
@@ -15,6 +15,12 @@
15
  --bs-yellow: #ffc107;
16
  --bs-green: #198754;
17
  --bs-teal: #20c997;
 
 
 
 
 
 
18
  --bs-cyan: #0dcaf0;
19
  --bs-white: #fff;
20
  --bs-gray: #6c757d;
@@ -8861,6 +8867,31 @@ textarea.form-control-lg {
8861
  background-color: var(--bs-pink) !important;
8862
  }
8863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8864
  .bg-cyan {
8865
  --bs-bg-opacity: 1;
8866
  background-color: var(--bs-cyan) !important;
@@ -8921,6 +8952,13 @@ textarea.form-control-lg {
8921
  background-color: transparent !important;
8922
  }
8923
 
 
 
 
 
 
 
 
8924
  .bg-opacity-10 {
8925
  --bs-bg-opacity: 0.1;
8926
  }
 
15
  --bs-yellow: #ffc107;
16
  --bs-green: #198754;
17
  --bs-teal: #20c997;
18
+ --bs-moss : #B1E5F2;
19
+ --bs-vanila : #FDF0D5;
20
+ --bs-coffe : #7f5539;
21
+ --bs-rosy : #BF98A0;
22
+ --bs-picton : #5fa8d3;
23
+ --bs-cambridge : #88aa99;
24
  --bs-cyan: #0dcaf0;
25
  --bs-white: #fff;
26
  --bs-gray: #6c757d;
 
8867
  background-color: var(--bs-pink) !important;
8868
  }
8869
 
8870
+ .bg-moss {
8871
+ --bs-bg-opacity: 1;
8872
+ background-color: var(--bs-moss) !important;
8873
+ }
8874
+
8875
+ .bg-vanilla {
8876
+ --bs-bg-opacity: 1;
8877
+ background-color: var(--bg-vanilla) !important;
8878
+ }
8879
+
8880
+ .bg-coffe {
8881
+ --bs-bg-opacity: 1;
8882
+ background-color: var(--bs-coffe) !important;
8883
+ }
8884
+
8885
+ .bg-rosy {
8886
+ --bs-bg-opacity: 1;
8887
+ background-color: var(--bs-rosy) !important;
8888
+ }
8889
+
8890
+ .bg-picton {
8891
+ --bs-bg-opacity: 1;
8892
+ background-color: var(--bs-cambridge) !important;
8893
+ }
8894
+
8895
  .bg-cyan {
8896
  --bs-bg-opacity: 1;
8897
  background-color: var(--bs-cyan) !important;
 
8952
  background-color: transparent !important;
8953
  }
8954
 
8955
+ .bg-teal {
8956
+ --bs-bg-opacity: 1;
8957
+ background-color: var(--bs-teal) ;
8958
+ }
8959
+
8960
+
8961
+
8962
  .bg-opacity-10 {
8963
  --bs-bg-opacity: 0.1;
8964
  }