saattrupdan commited on
Commit
820437f
1 Parent(s): b54bf84

Finished finetuning 🎉

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ language_model/unigrams.txt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - da
5
+ license: openrail
6
+ base_model: chcaa/xls-r-300m-danish
7
+ tags:
8
+ - generated_from_trainer
9
+ datasets:
10
+ - generator
11
+ metrics:
12
+ - wer
13
+ model-index:
14
+ - name: roest-315m
15
+ results:
16
+ - task:
17
+ name: Automatic Speech Recognition
18
+ type: automatic-speech-recognition
19
+ dataset:
20
+ name: generator
21
+ type: generator
22
+ config: default
23
+ split: None
24
+ args: default
25
+ metrics:
26
+ - name: Wer
27
+ type: wer
28
+ value: 0.34354151223128243
29
+ ---
30
+
31
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
32
+ should probably proofread and complete it, then remove this comment. -->
33
+
34
+ # roest-315m
35
+
36
+ This model is a fine-tuned version of [chcaa/xls-r-300m-danish](https://huggingface.co/chcaa/xls-r-300m-danish) on the generator dataset.
37
+ It achieves the following results on the evaluation set:
38
+ - Loss: 316.1826
39
+ - Wer: 0.3435
40
+ - Cer: 0.1319
41
+
42
+ ## Model description
43
+
44
+ More information needed
45
+
46
+ ## Intended uses & limitations
47
+
48
+ More information needed
49
+
50
+ ## Training and evaluation data
51
+
52
+ More information needed
53
+
54
+ ## Training procedure
55
+
56
+ ### Training hyperparameters
57
+
58
+ The following hyperparameters were used during training:
59
+ - learning_rate: 0.0001
60
+ - train_batch_size: 8
61
+ - eval_batch_size: 8
62
+ - seed: 4242
63
+ - gradient_accumulation_steps: 32
64
+ - total_train_batch_size: 256
65
+ - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
66
+ - lr_scheduler_type: cosine
67
+ - lr_scheduler_warmup_steps: 1000
68
+ - training_steps: 10000
69
+ - mixed_precision_training: Native AMP
70
+
71
+ ### Training results
72
+
73
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
74
+ |:-------------:|:-------:|:-----:|:---------------:|:------:|:------:|
75
+ | 1598.6352 | 0.5451 | 500 | 1597.7738 | 1.0 | 1.0 |
76
+ | 698.4129 | 1.0903 | 1000 | 574.9319 | 0.6189 | 0.2334 |
77
+ | 519.6434 | 1.6354 | 1500 | 463.2092 | 0.4886 | 0.1877 |
78
+ | 457.419 | 2.1806 | 2000 | 419.9849 | 0.4432 | 0.1685 |
79
+ | 424.6695 | 2.7257 | 2500 | 392.2585 | 0.4069 | 0.1562 |
80
+ | 402.5123 | 3.2709 | 3000 | 374.3170 | 0.3963 | 0.1519 |
81
+ | 388.9287 | 3.8160 | 3500 | 356.5869 | 0.3833 | 0.1479 |
82
+ | 380.4898 | 4.3612 | 4000 | 348.6322 | 0.3718 | 0.1425 |
83
+ | 371.7486 | 4.9063 | 4500 | 343.7823 | 0.3663 | 0.1401 |
84
+ | 367.6832 | 5.4514 | 5000 | 331.9190 | 0.3611 | 0.1389 |
85
+ | 357.0084 | 5.9966 | 5500 | 330.0962 | 0.3570 | 0.1367 |
86
+ | 355.2035 | 6.5417 | 6000 | 326.4210 | 0.3526 | 0.1352 |
87
+ | 352.9193 | 7.0869 | 6500 | 322.4225 | 0.3507 | 0.1342 |
88
+ | 348.084 | 7.6320 | 7000 | 321.1701 | 0.3487 | 0.1338 |
89
+ | 347.2086 | 8.1772 | 7500 | 319.7442 | 0.3471 | 0.1330 |
90
+ | 347.4033 | 8.7223 | 8000 | 317.6507 | 0.3453 | 0.1326 |
91
+ | 347.9957 | 9.2675 | 8500 | 316.4468 | 0.3440 | 0.1320 |
92
+ | 342.1603 | 9.8126 | 9000 | 316.0001 | 0.3435 | 0.1319 |
93
+ | 343.359 | 10.3578 | 9500 | 316.1837 | 0.3438 | 0.1320 |
94
+ | 344.4635 | 10.9029 | 10000 | 316.1826 | 0.3435 | 0.1319 |
95
+
96
+
97
+ ### Framework versions
98
+
99
+ - Transformers 4.44.2
100
+ - Pytorch 2.4.1+cu121
101
+ - Datasets 2.21.0
102
+ - Tokenizers 0.19.1
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 43,
3
+ "<pad>": 45,
4
+ "<s>": 42,
5
+ "<unk>": 44
6
+ }
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " ", "\u00e5", "\u00e6", "\u00e9", "\u00f8", "\u00fc", "<s>", "</s>", "\u2047", ""], "is_bpe": false}
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "chcaa/xls-r-300m-danish",
3
+ "activation_dropout": 0.1,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 42,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 768,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "sum",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 43,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "gradient_checkpointing": false,
57
+ "hidden_act": "gelu",
58
+ "hidden_dropout": 0.0,
59
+ "hidden_size": 1024,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
+ "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.1,
64
+ "mask_feature_length": 64,
65
+ "mask_feature_min_masks": 0,
66
+ "mask_feature_prob": 0.5,
67
+ "mask_time_length": 10,
68
+ "mask_time_min_masks": 2,
69
+ "mask_time_prob": 0.5,
70
+ "model_type": "wav2vec2",
71
+ "num_adapter_layers": 3,
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "output_hidden_size": 1024,
81
+ "pad_token_id": 45,
82
+ "proj_codevector_dim": 768,
83
+ "tdnn_dilation": [
84
+ 1,
85
+ 2,
86
+ 3,
87
+ 1,
88
+ 1
89
+ ],
90
+ "tdnn_dim": [
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 512,
95
+ 1500
96
+ ],
97
+ "tdnn_kernel": [
98
+ 5,
99
+ 3,
100
+ 3,
101
+ 1,
102
+ 1
103
+ ],
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.44.2",
106
+ "use_weighted_layer_sum": false,
107
+ "vocab_size": 46,
108
+ "xvector_output_dim": 512
109
+ }
language_model/5gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1223290be61f10bd2aaebbb454479e6cd66b7a2b816b0eb5f1571e6ae6cf899f
3
+ size 4332614415
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e6c338910c566bb12d367f494736dbf4f8395d4b9bd18364b7d61ba29d7675
3
+ size 18220276
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1bce4abd7d25c43669d570c295f3021b3f279e5daa49258f7fc3c80ad0f7617
3
+ size 1261996080
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "42": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "43": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "44": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "45": {
28
+ "content": "<pad>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 512,
41
+ "pad_token": "<pad>",
42
+ "processor_class": "Wav2Vec2ProcessorWithLM",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "<unk>",
47
+ "word_delimiter_token": "|"
48
+ }
vocab.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 0,
3
+ "1": 1,
4
+ "2": 2,
5
+ "3": 3,
6
+ "4": 4,
7
+ "5": 5,
8
+ "6": 6,
9
+ "7": 7,
10
+ "8": 8,
11
+ "9": 9,
12
+ "a": 10,
13
+ "b": 11,
14
+ "c": 12,
15
+ "d": 13,
16
+ "e": 14,
17
+ "f": 15,
18
+ "g": 16,
19
+ "h": 17,
20
+ "i": 18,
21
+ "j": 19,
22
+ "k": 20,
23
+ "l": 21,
24
+ "m": 22,
25
+ "n": 23,
26
+ "o": 24,
27
+ "p": 25,
28
+ "q": 26,
29
+ "r": 27,
30
+ "s": 28,
31
+ "t": 29,
32
+ "u": 30,
33
+ "v": 31,
34
+ "w": 32,
35
+ "x": 33,
36
+ "y": 34,
37
+ "z": 35,
38
+ "|": 36,
39
+ "å": 37,
40
+ "æ": 38,
41
+ "é": 39,
42
+ "ø": 40,
43
+ "ü": 41
44
+ }