# app.py for Gradio with PEFT import gradio as gr import torch import os from huggingface_hub import login from transformers import T5Tokenizer, T5ForConditionalGeneration from peft import PeftModel, PeftConfig # Load model once at startup model = None tokenizer = None def load_model_once(): global model, tokenizer if model is None: hf_token = os.environ.get('HF_TOKEN') login(token=hf_token) # Load base model base_model_name = "cahya/t5-base-indonesian-summarization-cased" tokenizer = T5Tokenizer.from_pretrained(base_model_name) base_model = T5ForConditionalGeneration.from_pretrained( base_model_name, load_in_8bit=True, # Quantize for CPU efficiency device_map="auto" ) model = PeftModel.from_pretrained( base_model, "reydeuss/trustify-t5-adapter", ) return model, tokenizer def summarize_text(text): if not text.strip(): return "Please enter text to summarize." model, tokenizer = load_model_once() # Add T5 prefix input_text = f"summarize: {text}" inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True) with torch.no_grad(): outputs = model.generate( **inputs, max_length=512, num_beams=4, length_penalty=2.0, early_stopping=True, no_repeat_ngram_size=2 ) summary = tokenizer.decode(outputs[0], skip_special_tokens=True) return summary # Create Gradio interface interface = gr.Interface( fn=summarize_text, inputs=gr.Textbox(lines=10, placeholder="Enter Indonesian text here...", label="Input Text"), outputs=gr.Textbox(lines=5, label="Generated Summary"), title="Indonesian Text Summarization", description="Enter Indonesian text to generate a summary using T5 model with PEFT adapters", ) interface.launch()