Mostafa8Mehrabi commited on
Commit
5ec20bb
Β·
verified Β·
1 Parent(s): b38bc72

Upload 57.6M parameter custom language model

Browse files
.ipynb_checkpoints/upload_to_hf-checkpoint.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Automated Hugging Face Upload Script"""
3
+
4
+ from huggingface_hub import HfApi, upload_folder
5
+
6
+ def upload_model():
7
+ model_dir = "./hf_model"
8
+ repo_id = "your-username/custom-57m-language-model"
9
+
10
+ print("πŸ€— Starting Hugging Face upload...")
11
+ print(f"πŸ“ Model directory: {model_dir}")
12
+ print(f"πŸ”— Repository: {repo_id}")
13
+
14
+ try:
15
+ api = HfApi()
16
+ api.create_repo(repo_id=repo_id, exist_ok=True)
17
+ print("βœ… Repository created/verified")
18
+
19
+ upload_folder(
20
+ folder_path=model_dir,
21
+ repo_id=repo_id,
22
+ repo_type="model",
23
+ commit_message="Upload 57.6M parameter custom language model"
24
+ )
25
+
26
+ print("πŸŽ‰ Upload completed!")
27
+ print(f"πŸ”— Your model: https://huggingface.co/{repo_id}")
28
+
29
+ except Exception as e:
30
+ print(f"❌ Upload failed: {e}")
31
+ print("πŸ’‘ Make sure you're logged in: huggingface-cli login")
32
+
33
+ if __name__ == "__main__":
34
+ upload_model()
README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: mit
4
+ tags:
5
+ - causal-lm
6
+ - custom-architecture
7
+ - transformer
8
+ pipeline_tag: text-generation
9
+ ---
10
+
11
+ # Custom 57M Language Model
12
+
13
+ A custom 57.55M parameter causal language model with modern transformer architecture.
14
+
15
+ ## Model Details
16
+
17
+ - **Parameters**: 57,553,632 (57.55M)
18
+ - **Architecture**: 12-layer Transformer
19
+ - **Hidden Size**: 432
20
+ - **Attention Heads**: 8
21
+ - **Head Dimension**: 54
22
+ - **Intermediate Size**: 1,728
23
+ - **Vocabulary Size**: 50,257 (GPT-2 tokenizer)
24
+ - **Max Sequence Length**: 1,024
25
+
26
+ ## Architecture Features
27
+
28
+ - **RoPE Positional Embeddings**: Rotary Position Embedding (ΞΈ=10000.0)
29
+ - **SwiGLU Activation**: Swish-Gated Linear Unit in feed-forward networks
30
+ - **RMSNorm**: Root Mean Square Layer Normalization (Ξ΅=1e-06)
31
+ - **Tied Embeddings**: Input and output embeddings share weights
32
+ - **Dropout**: 0.1 dropout rate
33
+
34
+ ## Training Configuration
35
+
36
+ - **Dummy Phase**: 2 epochs, 1,000 samples, LR=0.0005
37
+ - **C4 Phase**: 3 epochs, 1,000 samples, LR=0.0003
38
+ - **Optimizer**: AdamW (weight_decay=0.1)
39
+ - **Scheduler**: Cosine Annealing
40
+ - **Gradient Clipping**: 1.0
41
+
42
+ ## Generation Parameters
43
+
44
+ - **Temperature**: 0.8
45
+ - **Top-K**: 50
46
+ - **Top-P**: 0.9
47
+ - **Repetition Penalty**: 1.1
48
+ - **Max New Tokens**: 100
49
+
50
+ ## Usage
51
+
52
+ ```python
53
+ from transformers import AutoTokenizer, AutoModelForCausalLM
54
+
55
+ tokenizer = AutoTokenizer.from_pretrained("your-username/custom-57m-language-model")
56
+ model = AutoModelForCausalLM.from_pretrained("your-username/custom-57m-language-model")
57
+
58
+ input_text = "The future of artificial intelligence"
59
+ inputs = tokenizer.encode(input_text, return_tensors='pt')
60
+ outputs = model.generate(
61
+ inputs,
62
+ max_length=100,
63
+ temperature=0.8,
64
+ top_k=50,
65
+ top_p=0.9,
66
+ repetition_penalty=1.1
67
+ )
68
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
69
+ print(generated_text)
70
+ ```
71
+
72
+ ## Training Dataset
73
+
74
+ - **Primary**: C4 (Colossal Clean Crawled Corpus)
75
+ - **Warm-up**: Synthetic dummy data for initial training
76
+
77
+ ## License
78
+
79
+ MIT License
80
+
81
+ ## Model Card
82
+
83
+ This model was trained as an educational demonstration of transformer architecture implementation with modern techniques like RoPE embeddings and SwiGLU activations.
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "HuggingFaceCompatibleModel"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "dim": 432,
7
+ "dropout": 0.1,
8
+ "eos_token_id": 50256,
9
+ "head_dim": 54,
10
+ "intermediate_size": 1728,
11
+ "layer_norm_eps": 1e-06,
12
+ "max_seq_len": 1024,
13
+ "model_type": "custom_language_model",
14
+ "n_heads": 8,
15
+ "n_layers": 12,
16
+ "pad_token_id": 50256,
17
+ "rope_theta": 10000.0,
18
+ "torch_dtype": "float32",
19
+ "transformers_version": "4.52.4",
20
+ "vocab_size": 50257
21
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70f07fe036168da923ce2d0901aa16765ca347fc5d60af753e4af8eef865f93
3
+ size 230250779
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch>=2.0.0
2
+ transformers>=4.30.0
3
+ tokenizers>=0.13.0
4
+ numpy>=1.24.0
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "model_max_length": 1024,
20
+ "pad_token": "<|endoftext|>",
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "<|endoftext|>"
23
+ }
upload_to_hf.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Automated Hugging Face Upload Script"""
3
+
4
+ from huggingface_hub import HfApi, upload_folder
5
+
6
+ def upload_model():
7
+ model_dir = "./hf_model"
8
+ repo_id = "Mostafa8Mehrabi/custom-57m-language-model"
9
+
10
+ print("πŸ€— Starting Hugging Face upload...")
11
+ print(f"πŸ“ Model directory: {model_dir}")
12
+ print(f"πŸ”— Repository: {repo_id}")
13
+
14
+ try:
15
+ api = HfApi()
16
+ api.create_repo(repo_id=repo_id, exist_ok=True)
17
+ print("βœ… Repository created/verified")
18
+
19
+ upload_folder(
20
+ folder_path=model_dir,
21
+ repo_id=repo_id,
22
+ repo_type="model",
23
+ commit_message="Upload 57.6M parameter custom language model"
24
+ )
25
+
26
+ print("πŸŽ‰ Upload completed!")
27
+ print(f"πŸ”— Your model: https://huggingface.co/{repo_id}")
28
+
29
+ except Exception as e:
30
+ print(f"❌ Upload failed: {e}")
31
+ print("πŸ’‘ Make sure you're logged in: huggingface-cli login")
32
+
33
+ if __name__ == "__main__":
34
+ upload_model()
vocab.json ADDED
The diff for this file is too large to render. See raw diff