Update README.md
Browse files
README.md
CHANGED
|
@@ -29,14 +29,18 @@ MiniCPM3-4B has a 32k context window. Equipped with LLMxMapReduce, MiniCPM3-4B c
|
|
| 29 |
```python
|
| 30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 31 |
import torch
|
|
|
|
| 32 |
path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
| 33 |
device = "cuda"
|
|
|
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
|
|
|
|
| 36 |
messages = [
|
| 37 |
{"role": "user", "content": "推荐5个北京的景点。"},
|
| 38 |
]
|
| 39 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
|
|
|
| 40 |
model_outputs = model.generate(
|
| 41 |
model_inputs,
|
| 42 |
max_new_tokens=1024,
|
|
@@ -44,9 +48,11 @@ model_outputs = model.generate(
|
|
| 44 |
temperature=0.7,
|
| 45 |
repetition_penalty=1.02
|
| 46 |
)
|
|
|
|
| 47 |
output_token_ids = [
|
| 48 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
|
| 49 |
]
|
|
|
|
| 50 |
responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
|
| 51 |
print(responses)
|
| 52 |
```
|
|
@@ -55,18 +61,23 @@ print(responses)
|
|
| 55 |
```python
|
| 56 |
from transformers import AutoTokenizer
|
| 57 |
from vllm import LLM, SamplingParams
|
|
|
|
| 58 |
model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
| 59 |
prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
|
|
|
|
| 60 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 61 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
|
| 62 |
llm = LLM(
|
| 63 |
model=model_name,
|
| 64 |
trust_remote_code=True,
|
| 65 |
tensor_parallel_size=1,
|
| 66 |
-
quantization='gptq'
|
| 67 |
)
|
| 68 |
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
|
|
|
|
| 69 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
|
|
|
| 70 |
print(outputs[0].outputs[0].text)
|
| 71 |
```
|
| 72 |
|
|
@@ -226,7 +237,7 @@ print(outputs[0].outputs[0].text)
|
|
| 226 |
<td>63.2 </td>
|
| 227 |
</tr>
|
| 228 |
<tr>
|
| 229 |
-
<td>LiveCodeBench</td>
|
| 230 |
<td>22.2</td>
|
| 231 |
<td>20.2</td>
|
| 232 |
<td>19.2</td>
|
|
@@ -239,7 +250,7 @@ print(outputs[0].outputs[0].text)
|
|
| 239 |
<td colspan="15" align="left"><strong>Function Call</strong></td>
|
| 240 |
</tr>
|
| 241 |
<tr>
|
| 242 |
-
<td>BFCL</td>
|
| 243 |
<td>71.6</td>
|
| 244 |
<td>70.1</td>
|
| 245 |
<td>19.2</td>
|
|
@@ -263,6 +274,7 @@ print(outputs[0].outputs[0].text)
|
|
| 263 |
</tr>
|
| 264 |
</table>
|
| 265 |
|
|
|
|
| 266 |
## Statement
|
| 267 |
* As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
|
| 268 |
* However, it does not possess the ability to comprehend or express personal opinions or value judgments.
|
|
@@ -283,4 +295,4 @@ print(outputs[0].outputs[0].text)
|
|
| 283 |
journal={arXiv preprint arXiv:2404.06395},
|
| 284 |
year={2024}
|
| 285 |
}
|
| 286 |
-
```
|
|
|
|
| 29 |
```python
|
| 30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 31 |
import torch
|
| 32 |
+
|
| 33 |
path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
| 34 |
device = "cuda"
|
| 35 |
+
|
| 36 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
|
| 38 |
+
|
| 39 |
messages = [
|
| 40 |
{"role": "user", "content": "推荐5个北京的景点。"},
|
| 41 |
]
|
| 42 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
| 43 |
+
|
| 44 |
model_outputs = model.generate(
|
| 45 |
model_inputs,
|
| 46 |
max_new_tokens=1024,
|
|
|
|
| 48 |
temperature=0.7,
|
| 49 |
repetition_penalty=1.02
|
| 50 |
)
|
| 51 |
+
|
| 52 |
output_token_ids = [
|
| 53 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
|
| 54 |
]
|
| 55 |
+
|
| 56 |
responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
|
| 57 |
print(responses)
|
| 58 |
```
|
|
|
|
| 61 |
```python
|
| 62 |
from transformers import AutoTokenizer
|
| 63 |
from vllm import LLM, SamplingParams
|
| 64 |
+
|
| 65 |
model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
|
| 66 |
prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
|
| 67 |
+
|
| 68 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 69 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
| 70 |
+
|
| 71 |
llm = LLM(
|
| 72 |
model=model_name,
|
| 73 |
trust_remote_code=True,
|
| 74 |
tensor_parallel_size=1,
|
| 75 |
+
quantization='gptq'
|
| 76 |
)
|
| 77 |
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
|
| 78 |
+
|
| 79 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
| 80 |
+
|
| 81 |
print(outputs[0].outputs[0].text)
|
| 82 |
```
|
| 83 |
|
|
|
|
| 237 |
<td>63.2 </td>
|
| 238 |
</tr>
|
| 239 |
<tr>
|
| 240 |
+
<td>LiveCodeBench v3</td>
|
| 241 |
<td>22.2</td>
|
| 242 |
<td>20.2</td>
|
| 243 |
<td>19.2</td>
|
|
|
|
| 250 |
<td colspan="15" align="left"><strong>Function Call</strong></td>
|
| 251 |
</tr>
|
| 252 |
<tr>
|
| 253 |
+
<td>BFCL v2</td>
|
| 254 |
<td>71.6</td>
|
| 255 |
<td>70.1</td>
|
| 256 |
<td>19.2</td>
|
|
|
|
| 274 |
</tr>
|
| 275 |
</table>
|
| 276 |
|
| 277 |
+
|
| 278 |
## Statement
|
| 279 |
* As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
|
| 280 |
* However, it does not possess the ability to comprehend or express personal opinions or value judgments.
|
|
|
|
| 295 |
journal={arXiv preprint arXiv:2404.06395},
|
| 296 |
year={2024}
|
| 297 |
}
|
| 298 |
+
```
|