Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Connect to your model on Hugging Face Hub | |
| client = InferenceClient("JK-TK/bible2") | |
| # Define the response function | |
| def respond( | |
| message, | |
| history: list[tuple[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| # Construct a flat prompt: system + history + current message | |
| prompt = system_message.strip() + "\n\n" | |
| for user, assistant in history: | |
| prompt += f"User: {user}\nAssistant: {assistant}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # Generate response from model using text-generation | |
| response = "" | |
| for token in client.text_generation( | |
| prompt, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=True, | |
| ): | |
| response += token | |
| yield response | |
| # Create the chat interface | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a biblical AI assistant.", label="System message"), | |
| gr.Slider(minimum=1, maximum=1024, value=300, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.8, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |