xianyu564 commited on
Commit
a85979e
·
1 Parent(s): 027ebe0

新增 make_gguf.py 脚本,用于合并 LoRA 模型并转换为 GGUF 格式,同时更新 .gitignore 文件以排除生成的模型目录

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. release_gguf/make_gguf.py +47 -0
.gitignore CHANGED
@@ -2,3 +2,5 @@
2
  .venv/
3
  __pycache__
4
  .DS_Store
 
 
 
2
  .venv/
3
  __pycache__
4
  .DS_Store
5
+
6
+ release_gguf/base_model
release_gguf/make_gguf.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # make_gguf.py
2
+ import argparse, subprocess, sys
3
+ from pathlib import Path
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from peft import PeftModel
6
+
7
+ def merge_lora(base_id: str, lora_dir: str, out_dir: Path):
8
+ out_dir.mkdir(parents=True, exist_ok=True)
9
+ print(f"[1/3] Loading base: {base_id}")
10
+ base = AutoModelForCausalLM.from_pretrained(base_id) # 若本地无则自动下载到缓存
11
+ print(f"[2/3] Loading LoRA from: {lora_dir} -> merging...")
12
+ model = PeftModel.from_pretrained(base, lora_dir).merge_and_unload()
13
+ model.save_pretrained(out_dir.as_posix())
14
+ AutoTokenizer.from_pretrained(base_id).save_pretrained(out_dir.as_posix())
15
+ print(f"[OK] merged model saved to: {out_dir}")
16
+
17
+ def to_gguf(merged_dir: Path, llama_cpp_dir: Path, out_stem: str, quant: str):
18
+ conv = llama_cpp_dir / "convert_hf_to_gguf.py"
19
+ if not conv.exists():
20
+ raise FileNotFoundError(f"convert_hf_to_gguf.py not found in {llama_cpp_dir}")
21
+ gguf_raw = merged_dir.parent / f"{out_stem}.gguf"
22
+ gguf_q = merged_dir.parent / f"{out_stem}.{quant}.gguf"
23
+
24
+ print(f"[3/3] Converting to GGUF: {gguf_raw.name}")
25
+ subprocess.check_call([sys.executable, str(conv), str(merged_dir), "--outfile", str(gguf_raw)])
26
+ print(f"[Q] Quantizing -> {quant}")
27
+ qexe = llama_cpp_dir / ("quantize.exe" if sys.platform.startswith("win") else "quantize")
28
+ subprocess.check_call([str(qexe), str(gguf_raw), str(gguf_q), quant])
29
+ print(f"[DONE] {gguf_q}")
30
+
31
+ if __name__ == "__main__":
32
+ ap = argparse.ArgumentParser()
33
+ ap.add_argument("--base", required=True, help="基座模型ID,如 Qwen/Qwen2.5-0.5B-Instruct(必须与训练时一致)")
34
+ ap.add_argument("--lora", required=True, help="LoRA 目录(包含 adapter_config.json 等)")
35
+ ap.add_argument("--llama_cpp", required=True, help="llama.cpp 的本地路径(包含 convert_hf_to_gguf.py)")
36
+ ap.add_argument("--outdir", default="release_gguf", help="输出目录(默认 release_gguf)")
37
+ ap.add_argument("--quant", default="Q4_K_M", help="量化等级:Q4_K_M/Q5_K_M/Q8_0 等")
38
+ ap.add_argument("--name", default="my-sft", help="导出的模型名(文件名前缀)")
39
+ args = ap.parse_args()
40
+
41
+ out_dir = Path(args.outdir) / "merged_fp16"
42
+ merge_lora(args.base, args.lora, out_dir)
43
+ to_gguf(out_dir, Path(args.llama_cpp), args.name, args.quant)
44
+
45
+
46
+ # git clone https://github.com/ggerganov/llama.cpp
47
+ # huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ./base_model