from datasets import load_dataset
from transformers import pipeline
import gradio as gr

# Load Jigsaw dataset (streaming for large size)
dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)

# Fetch sample comments with low, medium, high toxicity
low, medium, high = [], [], []
for example in dataset:
    score = example['toxicity']
    text = example['text']
    if score < 0.3 and len(low) < 3:
        low.append((text, score))
    elif 0.3 <= score < 0.7 and len(medium) < 3:
        medium.append((text, score))
    elif score >= 0.7 and len(high) < 3:
        high.append((text, score))
    if len(low) == 3 and len(medium) == 3 and len(high) == 3:
        break

examples_html = f"""
### 🔷 Examples of Toxicity Levels

**🟢 Low Toxicity**
- {low[0][0]} (score: {low[0][1]:.2f})
- {low[1][0]} (score: {low[1][1]:.2f})
- {low[2][0]} (score: {low[2][1]:.2f})

**🟡 Medium Toxicity**
- {medium[0][0]} (score: {medium[0][1]:.2f})
- {medium[1][0]} (score: {medium[1][1]:.2f})
- {medium[2][0]} (score: {medium[2][1]:.2f})

**🔴 High Toxicity**
- {high[0][0]} (score: {high[0][1]:.2f})
- {high[1][0]} (score: {high[1][1]:.2f})
- {high[2][0]} (score: {high[2][1]:.2f})
"""

# Load a toxicity/offensive detection pipeline
classifier = pipeline(
    "text-classification", 
    model="cardiffnlp/twitter-roberta-base-offensive",
    top_k=None
)

def predict_toxicity(text):
    preds = classifier(text)
    result_str = ""
    for pred in preds:
        result_str += f"**{pred['label']}**: {pred['score']:.2f}\n"
    return result_str.strip()

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
    gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
    gr.Markdown(examples_html)

    inp = gr.Textbox(label="🔷 Enter your comment")
    out = gr.Markdown(label="Toxicity Scores")
    btn = gr.Button("Check Toxicity")
    btn.click(fn=predict_toxicity, inputs=inp, outputs=out)

demo.launch()