update model
Browse files- README.md +8 -5
- config.json +1 -1
- generation_config.json +1 -3
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- tokenizer_config.json +2 -2
README.md
CHANGED
|
@@ -10,20 +10,23 @@ tags:
|
|
| 10 |
---
|
| 11 |
|
| 12 |
# Storm-7B
|
| 13 |
-
- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
|
| 14 |
- \\(^{1}\\)MMLab, The Chinese University of Hong Kong   \\(^{2}\\)Shanghai AI Laboratory
|
| 15 |
|
| 16 |
## Introduction
|
| 17 |
|
| 18 |
-
We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard
|
| 19 |
|
| 20 |
-
|
| 21 |
|
| 22 |
-
A snapshot of the AlpacaEval 2.0 leaderboard (2024/
|
| 23 |
|
| 24 |
| | **LC Win Rate** | **Win Rate** |
|
| 25 |
| :----------------------: | :-------------: | :----------: |
|
| 26 |
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
|
|
|
|
|
|
|
|
|
| 27 |
| GPT-4 Preview (11/06) | 50.0% | 50.0% |
|
| 28 |
| **Storm-7B** | 48.9% | 52.5% |
|
| 29 |
| Nanbeige Plus Chat v0.1 | 44.5% | 56.7% |
|
|
@@ -105,4 +108,4 @@ Storm-7B is a quick demonstration that a language model, fine-tuned with AI feed
|
|
| 105 |
month = {April},
|
| 106 |
year = {2024}
|
| 107 |
}
|
| 108 |
-
```
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
# Storm-7B
|
| 13 |
+
- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Jiaheng Liu](https://liujiaheng.github.io/) \\(^{2}\\), [Xingyuan Bu](https://scholar.google.com.hk/citations?user=cqYaRhUAAAAJ&hl=zh-CN) \\(^{2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{\dag 2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
|
| 14 |
- \\(^{1}\\)MMLab, The Chinese University of Hong Kong   \\(^{2}\\)Shanghai AI Laboratory
|
| 15 |
|
| 16 |
## Introduction
|
| 17 |
|
| 18 |
+
We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard.
|
| 19 |
|
| 20 |
+
Recent studies show that DPO benefits from iterative training with online preferences labeled by a trained reward model. In this work, we identify a pitfall of vanilla iterative DPO - improved response quality can lead to increased verbosity. To address this, we introduce iterative length-regularized DPO (iLR-DPO) to penalize response length. Our empirical results show that iLR-DPO can enhance a 7B model to perform on par with GPT-4 without increasing verbosity.
|
| 21 |
|
| 22 |
+
A snapshot of the AlpacaEval 2.0 leaderboard (Single Model, 2024/6/18) is listed below:
|
| 23 |
|
| 24 |
| | **LC Win Rate** | **Win Rate** |
|
| 25 |
| :----------------------: | :-------------: | :----------: |
|
| 26 |
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
| 27 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
| 28 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
| 29 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
| 30 |
| GPT-4 Preview (11/06) | 50.0% | 50.0% |
|
| 31 |
| **Storm-7B** | 48.9% | 52.5% |
|
| 32 |
| Nanbeige Plus Chat v0.1 | 44.5% | 56.7% |
|
|
|
|
| 108 |
month = {April},
|
| 109 |
year = {2024}
|
| 110 |
}
|
| 111 |
+
```
|
config.json
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
"sliding_window": 4096,
|
| 21 |
"tie_word_embeddings": false,
|
| 22 |
"torch_dtype": "bfloat16",
|
| 23 |
-
"transformers_version": "4.
|
| 24 |
"use_cache": true,
|
| 25 |
"vocab_size": 32002
|
| 26 |
}
|
|
|
|
| 20 |
"sliding_window": 4096,
|
| 21 |
"tie_word_embeddings": false,
|
| 22 |
"torch_dtype": "bfloat16",
|
| 23 |
+
"transformers_version": "4.38.2",
|
| 24 |
"use_cache": true,
|
| 25 |
"vocab_size": 32002
|
| 26 |
}
|
generation_config.json
CHANGED
|
@@ -2,7 +2,5 @@
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 1,
|
| 4 |
"eos_token_id": 32000,
|
| 5 |
-
"
|
| 6 |
-
"pad_token_id": 0,
|
| 7 |
-
"transformers_version": "4.39.0.dev0"
|
| 8 |
}
|
|
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 1,
|
| 4 |
"eos_token_id": 32000,
|
| 5 |
+
"transformers_version": "4.38.2"
|
|
|
|
|
|
|
| 6 |
}
|
model-00001-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4943178720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df041f6d2171d08e1bda07a6845f802ced8f57a676ea5dfcd1f5eb179e7133b8
|
| 3 |
size 4943178720
|
model-00002-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999819336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:378bd392458e446bd89d75d09584985ca1370861761eb16d117ddc941a558348
|
| 3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4540532728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c5c3b7cd892a46a5a39dc1d569833aecfade13e3b1eaba4e37e9a53ec0ffb27
|
| 3 |
size 4540532728
|
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"add_bos_token":
|
| 3 |
"add_eos_token": false,
|
| 4 |
"added_tokens_decoder": {
|
| 5 |
"0": {
|
|
@@ -52,7 +52,7 @@
|
|
| 52 |
"clean_up_tokenization_spaces": false,
|
| 53 |
"eos_token": "<|end_of_turn|>",
|
| 54 |
"legacy": true,
|
| 55 |
-
"model_max_length":
|
| 56 |
"pad_token": "<|end_of_turn|>",
|
| 57 |
"sp_model_kwargs": {},
|
| 58 |
"spaces_between_special_tokens": false,
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
"add_eos_token": false,
|
| 4 |
"added_tokens_decoder": {
|
| 5 |
"0": {
|
|
|
|
| 52 |
"clean_up_tokenization_spaces": false,
|
| 53 |
"eos_token": "<|end_of_turn|>",
|
| 54 |
"legacy": true,
|
| 55 |
+
"model_max_length": 2048,
|
| 56 |
"pad_token": "<|end_of_turn|>",
|
| 57 |
"sp_model_kwargs": {},
|
| 58 |
"spaces_between_special_tokens": false,
|