Spaces:

YsnHdn
/

pfe_site

Running

App Files Files Community

YsnHdn commited on Jun 2

Commit

f0aa55b

•

1 Parent(s): 53573e8

update : adding a new model based on mdpi pdfs

Browse files

Files changed (10) hide show

{bert/bertModel → DistillMDPI1/DistillMDPI1}/label_encoder.pkl +2 -2
{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_model/config.json +30 -18
{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_model/model.safetensors +2 -2
{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/special_tokens_map.json +0 -0
DistillMDPI1/DistillMDPI1/saved_tokenizer/tokenizer.json +0 -0
{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/tokenizer_config.json +1 -3
{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/vocab.txt +0 -0
Dockerfile +1 -1
helper_functions.py +21 -14
static/css/style2.css +38 -0

{bert/bertModel → DistillMDPI1/DistillMDPI1}/label_encoder.pkl RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f2140f9641061e34cfa413940d8b885b3016267e372ed6b7878908a47ab4759
-size 227

 version https://git-lfs.github.com/spec/v1
+oid sha256:86df1bec06e1ef392325057bc35869319c691da5f023d62caf4a09e8a5fc3e6d
+size 283

{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_model/config.json RENAMED Viewed

@@ -1,14 +1,13 @@
 {
-  "_name_or_path": "bert-base-uncased",
   "architectures": [
-    "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
@@ -19,13 +18,26 @@
     "6": "LABEL_6",
     "7": "LABEL_7",
     "8": "LABEL_8",
-    "9": "LABEL_9"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
     "LABEL_2": 2,
     "LABEL_3": 3,
     "LABEL_4": 4,
@@ -35,17 +47,17 @@
     "LABEL_8": 8,
     "LABEL_9": 9
   },
-  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
-  "transformers_version": "4.40.0",
-  "type_vocab_size": 2,
-  "use_cache": true,
   "vocab_size": 30522
 }

 {
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForSequenceClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
   "id2label": {
     "0": "LABEL_0",
     "1": "LABEL_1",
     "6": "LABEL_6",
     "7": "LABEL_7",
     "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16"
   },
   "initializer_range": 0.02,
   "label2id": {
     "LABEL_0": 0,
     "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
     "LABEL_2": 2,
     "LABEL_3": 3,
     "LABEL_4": 4,
     "LABEL_8": 8,
     "LABEL_9": 9
   },
   "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.41.1",
   "vocab_size": 30522
 }

{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_model/model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77e959fadc3a09d85fa46103e4ada68e827b0a2cc64bdd660c600e2999433e27
-size 437983256

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f10745117e7ccb36611897cc97844a4a9682fdebbf72f6b89291f3c469a3587
+size 267878708

{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/special_tokens_map.json RENAMED Viewed

File without changes

DistillMDPI1/DistillMDPI1/saved_tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/tokenizer_config.json RENAMED Viewed

@@ -43,15 +43,13 @@
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
-  "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
   "unk_token": "[UNK]"
 }

{bert/bertModel → DistillMDPI1/DistillMDPI1}/saved_tokenizer/vocab.txt RENAMED Viewed

File without changes

Dockerfile CHANGED Viewed

@@ -18,7 +18,7 @@ ENV HF_HOME /code/.cache/huggingface
 RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
 # Copy the model files into the image
-COPY ./distilBert /code/distilBert
 # Copy the rest of the application files
 COPY . .

 RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
 # Copy the model files into the image
+COPY ./DistillMDPI1 /code/DistillMDPI1
 # Copy the rest of the application files
 COPY . .

helper_functions.py CHANGED Viewed

@@ -6,27 +6,34 @@ from typing import Optional
 from torch import Tensor
 # Load the model
-model = DistilBertForSequenceClassification.from_pretrained("distilBert/DistilBert/saved_model")
 # Load the tokenizer
-tokenizer = AutoTokenizer.from_pretrained("distilBert/DistilBert/saved_tokenizer")
 # Charger le label encoder
-with open("distilBert/DistilBert/label_encoder.pkl", "rb") as f:
     label_encoder = pickle.load(f)
 class_labels = {
-    7: ('Databases', 'info' ,'#4f9ef8'),
-    1: ('Computation_and_Language', 'danger', '#d6293e'),
-    9: ('Hardware_Architecture', 'warning' , '#f7c32e'),
-    8: ('General_Literature', 'success' , '#0cbc87'),
-    6: ('Cryptography_and_Security', 'primary', '#0f6fec'),
-    5: ('Computers_and_Society', 'yellow', '#ffc107'),
-    3: ('Computational_Engineering', 'purple' , '#6f42c1'),
-    0: ('Artificial_Intelligence', 'cyan', '#0dcaf0'),
-    2: ('Computational_Complexity', 'pink', '#d63384'),
-    4: ('Computational_Geometry', 'orange', '#fd7e14')
-}
 def predict_class(text):
     # Tokenisation du texte

 from torch import Tensor
 # Load the model
+model = DistilBertForSequenceClassification.from_pretrained("DistillMDPI1/DistillMDPI1/saved_model")
 # Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("DistillMDPI1/DistillMDPI1/saved_tokenizer")
 # Charger le label encoder
+with open("DistillMDPI1/DistillMDPI1/label_encoder.pkl", "rb") as f:
     label_encoder = pickle.load(f)
 class_labels = {
+    16: ('vehicles','info' , '#4f9ef8'),
+    10: ('environments','success' , '#0cbc87'),
+    9: ('energies', 'danger', '#d6293e'),
+    0: ('Physics', 'primary', '#0f6fec'),
+    13: ('robotics', 'moss','#B1E5F2'),
+    3: ('agriculture','teal' , '#20c997'),
+    11: ('Machine Learning and Knowledge Extraction', 'yellow', '#ffc107'),
+    8: ('economies', 'warning' , '#f7c32e'),
+    15: ('technologies','vanila' ,'#FDF0D5' ),
+    12: ('mathematics','coffe' ,'#7f5539' ),
+    14: ('sports', 'orange', '#fd7e14'),
+    4: ('Artificial intelligence','cyan', '#0dcaf0'),
+    6: ('Innovation','rosy' ,'#BF98A0'),
+    5: ('Science','picton' ,'#5fa8d3' ),
+    1: ('Societies','purple' , '#6f42c1'),
+    2: ('administration','pink', '#d63384'),
+    7: ('biology' ,'cambridge' , '#88aa99')}
 def predict_class(text):
     # Tokenisation du texte

static/css/style2.css CHANGED Viewed

@@ -15,6 +15,12 @@
   --bs-yellow: #ffc107;
   --bs-green: #198754;
   --bs-teal: #20c997;
   --bs-cyan: #0dcaf0;
   --bs-white: #fff;
   --bs-gray: #6c757d;
@@ -8861,6 +8867,31 @@ textarea.form-control-lg {
   background-color: var(--bs-pink) !important;
 }
 .bg-cyan {
   --bs-bg-opacity: 1;
   background-color: var(--bs-cyan) !important;
@@ -8921,6 +8952,13 @@ textarea.form-control-lg {
   background-color: transparent !important;
 }
 .bg-opacity-10 {
   --bs-bg-opacity: 0.1;
 }

   --bs-yellow: #ffc107;
   --bs-green: #198754;
   --bs-teal: #20c997;
+  --bs-moss : #B1E5F2;
+  --bs-vanila : #FDF0D5;
+  --bs-coffe : #7f5539;
+  --bs-rosy : #BF98A0;
+  --bs-picton : #5fa8d3;
+  --bs-cambridge : #88aa99;
   --bs-cyan: #0dcaf0;
   --bs-white: #fff;
   --bs-gray: #6c757d;
   background-color: var(--bs-pink) !important;
 }
+.bg-moss {
+  --bs-bg-opacity: 1;
+  background-color: var(--bs-moss) !important;
+}
+.bg-vanilla {
+  --bs-bg-opacity: 1;
+  background-color: var(--bg-vanilla) !important;
+}
+.bg-coffe {
+  --bs-bg-opacity: 1;
+  background-color: var(--bs-coffe) !important;
+}
+.bg-rosy {
+  --bs-bg-opacity: 1;
+  background-color: var(--bs-rosy) !important;
+}
+.bg-picton {
+  --bs-bg-opacity: 1;
+  background-color: var(--bs-cambridge) !important;
+}
 .bg-cyan {
   --bs-bg-opacity: 1;
   background-color: var(--bs-cyan) !important;
   background-color: transparent !important;
 }
+.bg-teal {
+  --bs-bg-opacity: 1;
+  background-color: var(--bs-teal) ;
+}
 .bg-opacity-10 {
   --bs-bg-opacity: 0.1;
 }