xianyu564
commited on
Commit
·
a85979e
1
Parent(s):
027ebe0
新增 make_gguf.py 脚本,用于合并 LoRA 模型并转换为 GGUF 格式,同时更新 .gitignore 文件以排除生成的模型目录
Browse files- .gitignore +2 -0
- release_gguf/make_gguf.py +47 -0
.gitignore
CHANGED
|
@@ -2,3 +2,5 @@
|
|
| 2 |
.venv/
|
| 3 |
__pycache__
|
| 4 |
.DS_Store
|
|
|
|
|
|
|
|
|
| 2 |
.venv/
|
| 3 |
__pycache__
|
| 4 |
.DS_Store
|
| 5 |
+
|
| 6 |
+
release_gguf/base_model
|
release_gguf/make_gguf.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# make_gguf.py
|
| 2 |
+
import argparse, subprocess, sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
+
from peft import PeftModel
|
| 6 |
+
|
| 7 |
+
def merge_lora(base_id: str, lora_dir: str, out_dir: Path):
|
| 8 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 9 |
+
print(f"[1/3] Loading base: {base_id}")
|
| 10 |
+
base = AutoModelForCausalLM.from_pretrained(base_id) # 若本地无则自动下载到缓存
|
| 11 |
+
print(f"[2/3] Loading LoRA from: {lora_dir} -> merging...")
|
| 12 |
+
model = PeftModel.from_pretrained(base, lora_dir).merge_and_unload()
|
| 13 |
+
model.save_pretrained(out_dir.as_posix())
|
| 14 |
+
AutoTokenizer.from_pretrained(base_id).save_pretrained(out_dir.as_posix())
|
| 15 |
+
print(f"[OK] merged model saved to: {out_dir}")
|
| 16 |
+
|
| 17 |
+
def to_gguf(merged_dir: Path, llama_cpp_dir: Path, out_stem: str, quant: str):
|
| 18 |
+
conv = llama_cpp_dir / "convert_hf_to_gguf.py"
|
| 19 |
+
if not conv.exists():
|
| 20 |
+
raise FileNotFoundError(f"convert_hf_to_gguf.py not found in {llama_cpp_dir}")
|
| 21 |
+
gguf_raw = merged_dir.parent / f"{out_stem}.gguf"
|
| 22 |
+
gguf_q = merged_dir.parent / f"{out_stem}.{quant}.gguf"
|
| 23 |
+
|
| 24 |
+
print(f"[3/3] Converting to GGUF: {gguf_raw.name}")
|
| 25 |
+
subprocess.check_call([sys.executable, str(conv), str(merged_dir), "--outfile", str(gguf_raw)])
|
| 26 |
+
print(f"[Q] Quantizing -> {quant}")
|
| 27 |
+
qexe = llama_cpp_dir / ("quantize.exe" if sys.platform.startswith("win") else "quantize")
|
| 28 |
+
subprocess.check_call([str(qexe), str(gguf_raw), str(gguf_q), quant])
|
| 29 |
+
print(f"[DONE] {gguf_q}")
|
| 30 |
+
|
| 31 |
+
if __name__ == "__main__":
|
| 32 |
+
ap = argparse.ArgumentParser()
|
| 33 |
+
ap.add_argument("--base", required=True, help="基座模型ID,如 Qwen/Qwen2.5-0.5B-Instruct(必须与训练时一致)")
|
| 34 |
+
ap.add_argument("--lora", required=True, help="LoRA 目录(包含 adapter_config.json 等)")
|
| 35 |
+
ap.add_argument("--llama_cpp", required=True, help="llama.cpp 的本地路径(包含 convert_hf_to_gguf.py)")
|
| 36 |
+
ap.add_argument("--outdir", default="release_gguf", help="输出目录(默认 release_gguf)")
|
| 37 |
+
ap.add_argument("--quant", default="Q4_K_M", help="量化等级:Q4_K_M/Q5_K_M/Q8_0 等")
|
| 38 |
+
ap.add_argument("--name", default="my-sft", help="导出的模型名(文件名前缀)")
|
| 39 |
+
args = ap.parse_args()
|
| 40 |
+
|
| 41 |
+
out_dir = Path(args.outdir) / "merged_fp16"
|
| 42 |
+
merge_lora(args.base, args.lora, out_dir)
|
| 43 |
+
to_gguf(out_dir, Path(args.llama_cpp), args.name, args.quant)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# git clone https://github.com/ggerganov/llama.cpp
|
| 47 |
+
# huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ./base_model
|