Update app.py
Browse files
app.py
CHANGED
|
@@ -2,10 +2,10 @@ from datasets import load_dataset
|
|
| 2 |
from transformers import pipeline
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
-
# Load
|
| 6 |
dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
|
| 7 |
|
| 8 |
-
#
|
| 9 |
low, medium, high = [], [], []
|
| 10 |
for example in dataset:
|
| 11 |
score = example['toxicity']
|
|
@@ -20,46 +20,43 @@ for example in dataset:
|
|
| 20 |
break
|
| 21 |
|
| 22 |
examples_html = f"""
|
| 23 |
-
###
|
| 24 |
|
| 25 |
-
|
| 26 |
- {low[0][0]} (score: {low[0][1]:.2f})
|
| 27 |
- {low[1][0]} (score: {low[1][1]:.2f})
|
| 28 |
- {low[2][0]} (score: {low[2][1]:.2f})
|
| 29 |
|
| 30 |
-
|
| 31 |
- {medium[0][0]} (score: {medium[0][1]:.2f})
|
| 32 |
- {medium[1][0]} (score: {medium[1][1]:.2f})
|
| 33 |
- {medium[2][0]} (score: {medium[2][1]:.2f})
|
| 34 |
|
| 35 |
-
|
| 36 |
- {high[0][0]} (score: {high[0][1]:.2f})
|
| 37 |
- {high[1][0]} (score: {high[1][1]:.2f})
|
| 38 |
- {high[2][0]} (score: {high[2][1]:.2f})
|
| 39 |
"""
|
| 40 |
|
| 41 |
-
# Load
|
| 42 |
-
classifier = pipeline(
|
| 43 |
-
"text-classification",
|
| 44 |
-
model="cardiffnlp/twitter-roberta-base-offensive",
|
| 45 |
-
top_k=None
|
| 46 |
-
)
|
| 47 |
|
| 48 |
def predict_toxicity(text):
|
| 49 |
-
preds = classifier(text)
|
| 50 |
-
|
| 51 |
for pred in preds:
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
# Gradio UI
|
| 56 |
with gr.Blocks() as demo:
|
| 57 |
gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
|
| 58 |
gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
|
| 59 |
gr.Markdown(examples_html)
|
| 60 |
|
| 61 |
inp = gr.Textbox(label="🔷 Enter your comment")
|
| 62 |
-
out = gr.Markdown(label="Toxicity Scores")
|
| 63 |
btn = gr.Button("Check Toxicity")
|
| 64 |
btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
|
| 65 |
|
|
|
|
| 2 |
from transformers import pipeline
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
+
# Load dataset
|
| 6 |
dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
|
| 7 |
|
| 8 |
+
# Sample examples
|
| 9 |
low, medium, high = [], [], []
|
| 10 |
for example in dataset:
|
| 11 |
score = example['toxicity']
|
|
|
|
| 20 |
break
|
| 21 |
|
| 22 |
examples_html = f"""
|
| 23 |
+
### 🧪 Examples of Toxicity Levels
|
| 24 |
|
| 25 |
+
#### 🔷 Low Toxicity
|
| 26 |
- {low[0][0]} (score: {low[0][1]:.2f})
|
| 27 |
- {low[1][0]} (score: {low[1][1]:.2f})
|
| 28 |
- {low[2][0]} (score: {low[2][1]:.2f})
|
| 29 |
|
| 30 |
+
#### 🟠 Medium Toxicity
|
| 31 |
- {medium[0][0]} (score: {medium[0][1]:.2f})
|
| 32 |
- {medium[1][0]} (score: {medium[1][1]:.2f})
|
| 33 |
- {medium[2][0]} (score: {medium[2][1]:.2f})
|
| 34 |
|
| 35 |
+
#### 🔴 High Toxicity
|
| 36 |
- {high[0][0]} (score: {high[0][1]:.2f})
|
| 37 |
- {high[1][0]} (score: {high[1][1]:.2f})
|
| 38 |
- {high[2][0]} (score: {high[2][1]:.2f})
|
| 39 |
"""
|
| 40 |
|
| 41 |
+
# Load toxicity detection pipeline
|
| 42 |
+
classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive", top_k=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
def predict_toxicity(text):
|
| 45 |
+
preds = classifier(text)[0]
|
| 46 |
+
results = []
|
| 47 |
for pred in preds:
|
| 48 |
+
label = pred['label']
|
| 49 |
+
score = pred['score']
|
| 50 |
+
results.append(f"**{label}**: {score:.2f}")
|
| 51 |
+
return "\n".join(results)
|
| 52 |
|
|
|
|
| 53 |
with gr.Blocks() as demo:
|
| 54 |
gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
|
| 55 |
gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
|
| 56 |
gr.Markdown(examples_html)
|
| 57 |
|
| 58 |
inp = gr.Textbox(label="🔷 Enter your comment")
|
| 59 |
+
out = gr.Markdown(label="🔷 Toxicity Scores")
|
| 60 |
btn = gr.Button("Check Toxicity")
|
| 61 |
btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
|
| 62 |
|