phi-3.5-mini-instruct-onnx-qnn / genai_config.json
doberst's picture
Upload 20 files
e859f11 verified
{
"model": {
"bos_token_id": 1,
"context_length": 131072,
"decoder": {
"session_options": {
"log_id": "onnxruntime-genai",
"provider_options": []
},
"head_size": 96,
"hidden_size": 3072,
"inputs": {
"input_ids": "input_ids",
"attention_mask": "attention_mask",
"past_key_names": "past_key_values.%d.key",
"past_value_names": "past_key_values.%d.value",
"past_sequence_length": "past_seq_len",
"total_sequence_length": "total_seq_len"
},
"outputs": {
"logits": "logits",
"present_key_names": "present.%d.key",
"present_value_names": "present.%d.value"
},
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"sliding_window": {
"window_size": 64,
"pad_value": 0,
"alignment": "left",
"slide_key_value_cache": false
},
"pipeline": [
{
"embeddings": {
"filename": "embeddings.onnx",
"inputs": [
"input_ids"
],
"outputs": [
"/model/embed_tokens/Gather/output_0_QuantizeLinear_Output"
]
},
"context_ctx": {
"filename": "context_ctx.onnx",
"inputs": [
"/model/embed_tokens/Gather/output_0_QuantizeLinear_Output",
"past_key_values.0.key",
"past_key_values.0.value",
"past_seq_len",
"total_seq_len",
"past_key_values.1.key",
"past_key_values.1.value",
"past_key_values.2.key",
"past_key_values.2.value",
"past_key_values.3.key",
"past_key_values.3.value",
"past_key_values.4.key",
"past_key_values.4.value",
"past_key_values.5.key",
"past_key_values.5.value",
"past_key_values.6.key",
"past_key_values.6.value",
"past_key_values.7.key",
"past_key_values.7.value",
"past_key_values.8.key",
"past_key_values.8.value",
"past_key_values.9.key",
"past_key_values.9.value",
"past_key_values.10.key",
"past_key_values.10.value",
"past_key_values.11.key",
"past_key_values.11.value",
"past_key_values.12.key",
"past_key_values.12.value",
"past_key_values.13.key",
"past_key_values.13.value",
"past_key_values.14.key",
"past_key_values.14.value",
"past_key_values.15.key",
"past_key_values.15.value",
"past_key_values.16.key",
"past_key_values.16.value",
"past_key_values.17.key",
"past_key_values.17.value",
"past_key_values.18.key",
"past_key_values.18.value",
"past_key_values.19.key",
"past_key_values.19.value",
"past_key_values.20.key",
"past_key_values.20.value",
"past_key_values.21.key",
"past_key_values.21.value",
"past_key_values.22.key",
"past_key_values.22.value",
"past_key_values.23.key",
"past_key_values.23.value",
"past_key_values.24.key",
"past_key_values.24.value",
"past_key_values.25.key",
"past_key_values.25.value",
"past_key_values.26.key",
"past_key_values.26.value",
"past_key_values.27.key",
"past_key_values.27.value",
"past_key_values.28.key",
"past_key_values.28.value",
"past_key_values.29.key",
"past_key_values.29.value",
"past_key_values.30.key",
"past_key_values.30.value",
"past_key_values.31.key",
"past_key_values.31.value"
],
"outputs": [
"present.0.key",
"present.0.value",
"present.1.key",
"present.1.value",
"present.2.key",
"present.2.value",
"present.3.key",
"present.3.value",
"present.4.key",
"present.4.value",
"present.5.key",
"present.5.value",
"present.6.key",
"present.6.value",
"present.7.key",
"present.7.value",
"present.8.key",
"present.8.value",
"present.9.key",
"present.9.value",
"present.10.key",
"present.10.value",
"present.11.key",
"present.11.value",
"present.12.key",
"present.12.value",
"present.13.key",
"present.13.value",
"present.14.key",
"present.14.value",
"present.15.key",
"present.15.value",
"present.16.key",
"present.16.value",
"present.17.key",
"present.17.value",
"present.18.key",
"present.18.value",
"present.19.key",
"present.19.value",
"present.20.key",
"present.20.value",
"present.21.key",
"present.21.value",
"present.22.key",
"present.22.value",
"present.23.key",
"present.23.value",
"present.24.key",
"present.24.value",
"present.25.key",
"present.25.value",
"present.26.key",
"present.26.value",
"present.27.key",
"present.27.value",
"present.28.key",
"present.28.value",
"present.29.key",
"present.29.value",
"present.30.key",
"present.30.value",
"present.31.key",
"present.31.value",
"/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output"
],
"session_options": {
"provider_options": [
{
"qnn": {
"htp_performance_mode": "burst",
"htp_graph_finalization_optimization_mode": "3",
"soc_model": "60"
}
}
]
},
"run_on_token_gen": false
},
"iterator_ctx": {
"filename": "iterator_ctx.onnx",
"inputs": [
"/model/embed_tokens/Gather/output_0_QuantizeLinear_Output",
"past_key_values.0.key",
"past_key_values.0.value",
"past_seq_len",
"total_seq_len",
"past_key_values.1.key",
"past_key_values.1.value",
"past_key_values.2.key",
"past_key_values.2.value",
"past_key_values.3.key",
"past_key_values.3.value",
"past_key_values.4.key",
"past_key_values.4.value",
"past_key_values.5.key",
"past_key_values.5.value",
"past_key_values.6.key",
"past_key_values.6.value",
"past_key_values.7.key",
"past_key_values.7.value",
"past_key_values.8.key",
"past_key_values.8.value",
"past_key_values.9.key",
"past_key_values.9.value",
"past_key_values.10.key",
"past_key_values.10.value",
"past_key_values.11.key",
"past_key_values.11.value",
"past_key_values.12.key",
"past_key_values.12.value",
"past_key_values.13.key",
"past_key_values.13.value",
"past_key_values.14.key",
"past_key_values.14.value",
"past_key_values.15.key",
"past_key_values.15.value",
"past_key_values.16.key",
"past_key_values.16.value",
"past_key_values.17.key",
"past_key_values.17.value",
"past_key_values.18.key",
"past_key_values.18.value",
"past_key_values.19.key",
"past_key_values.19.value",
"past_key_values.20.key",
"past_key_values.20.value",
"past_key_values.21.key",
"past_key_values.21.value",
"past_key_values.22.key",
"past_key_values.22.value",
"past_key_values.23.key",
"past_key_values.23.value",
"past_key_values.24.key",
"past_key_values.24.value",
"past_key_values.25.key",
"past_key_values.25.value",
"past_key_values.26.key",
"past_key_values.26.value",
"past_key_values.27.key",
"past_key_values.27.value",
"past_key_values.28.key",
"past_key_values.28.value",
"past_key_values.29.key",
"past_key_values.29.value",
"past_key_values.30.key",
"past_key_values.30.value",
"past_key_values.31.key",
"past_key_values.31.value"
],
"outputs": [
"present.0.key",
"present.0.value",
"present.1.key",
"present.1.value",
"present.2.key",
"present.2.value",
"present.3.key",
"present.3.value",
"present.4.key",
"present.4.value",
"present.5.key",
"present.5.value",
"present.6.key",
"present.6.value",
"present.7.key",
"present.7.value",
"present.8.key",
"present.8.value",
"present.9.key",
"present.9.value",
"present.10.key",
"present.10.value",
"present.11.key",
"present.11.value",
"present.12.key",
"present.12.value",
"present.13.key",
"present.13.value",
"present.14.key",
"present.14.value",
"present.15.key",
"present.15.value",
"present.16.key",
"present.16.value",
"present.17.key",
"present.17.value",
"present.18.key",
"present.18.value",
"present.19.key",
"present.19.value",
"present.20.key",
"present.20.value",
"present.21.key",
"present.21.value",
"present.22.key",
"present.22.value",
"present.23.key",
"present.23.value",
"present.24.key",
"present.24.value",
"present.25.key",
"present.25.value",
"present.26.key",
"present.26.value",
"present.27.key",
"present.27.value",
"present.28.key",
"present.28.value",
"present.29.key",
"present.29.value",
"present.30.key",
"present.30.value",
"present.31.key",
"present.31.value",
"/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output"
],
"session_options": {
"provider_options": [
{
"qnn": {
"htp_performance_mode": "burst",
"htp_graph_finalization_optimization_mode": "3",
"soc_model": "60"
}
}
]
},
"run_on_prompt": false
},
"lm_head": {
"filename": "lm_head.onnx",
"inputs": [
"/model/layers.32/final_norm_layernorm/SkipLayerNorm_Mul_output_0_QuantizeLinear_Output"
],
"outputs": [
"logits"
]
}
}
]
},
"eos_token_id": [
32007,
32001,
32000
],
"pad_token_id": 32000,
"type": "decoder-pipeline",
"vocab_size": 32064
},
"search": {
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": true,
"length_penalty": 1.0,
"max_length": 131072,
"min_length": 0,
"no_repeat_ngram_size": 0,
"num_beams": 1,
"num_return_sequences": 1,
"past_present_share_buffer": true,
"repetition_penalty": 1.0,
"temperature": 1.0,
"top_k": 1,
"top_p": 1.0
}
}