Spaces:
Sleeping
Sleeping
Vargock
commited on
Commit
·
386ee45
1
Parent(s):
fc3f655
Added web-flask version so that it's not only Telegram bot
Browse files- .env.example +0 -3
- .gitignore +2 -2
- app.py +87 -0
- bot.py +0 -231
- db.py → modules/db.py +21 -41
- modules/helpers.py +33 -0
- modules/hf_helpers.py +30 -0
- static/style.css +74 -0
- templates/index.html +63 -0
.env.example
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
# .env.example (пушится)
|
| 2 |
-
TELEGRAM_TOKEN=your_telegram_token_here
|
| 3 |
-
HF_API_TOKEN=your_hf_token_here
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# local models, env files, venv
|
| 2 |
.env
|
| 3 |
-
.env.local
|
| 4 |
.venv/
|
| 5 |
-
__pycache__/
|
|
|
|
|
|
| 1 |
# local models, env files, venv
|
| 2 |
.env
|
|
|
|
| 3 |
.venv/
|
| 4 |
+
__pycache__/
|
| 5 |
+
db/
|
app.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request
|
| 2 |
+
from modules.db import init_db, save_message, get_recent
|
| 3 |
+
from modules.hf_helpers import hf_infer
|
| 4 |
+
from modules.helpers import detect_lang, MODELS, LABELS, update_history, bar
|
| 5 |
+
|
| 6 |
+
app = Flask(__name__)
|
| 7 |
+
init_db()
|
| 8 |
+
|
| 9 |
+
# Optional: color mapping for bars
|
| 10 |
+
COLOR_MAP = {
|
| 11 |
+
"POSITIVE": "green",
|
| 12 |
+
"NEGATIVE": "red",
|
| 13 |
+
"NEUTRAL": "orange",
|
| 14 |
+
"ПОЗИТИВНЫЙ": "green",
|
| 15 |
+
"НЕГАТИВНЫЙ": "red",
|
| 16 |
+
"НЕЙТРАЛЬНЫЙ": "orange",
|
| 17 |
+
"Emotion": "blue",
|
| 18 |
+
"Toxicity": "purple"
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
@app.route("/", methods=["GET", "POST"])
|
| 22 |
+
def index():
|
| 23 |
+
result = None
|
| 24 |
+
if request.method == "POST":
|
| 25 |
+
text = request.form.get("text", "").strip()
|
| 26 |
+
if text:
|
| 27 |
+
user_id = 1 # placeholder for single user
|
| 28 |
+
lang = detect_lang(text)
|
| 29 |
+
labels = LABELS[lang]
|
| 30 |
+
|
| 31 |
+
# --- Sentiment ---
|
| 32 |
+
sentiment_model = MODELS["ru_sentiment"] if lang == "ru" else MODELS["en_sentiment"]
|
| 33 |
+
sentiment_data = hf_infer(sentiment_model, text)
|
| 34 |
+
sentiment_result = None
|
| 35 |
+
if sentiment_data:
|
| 36 |
+
top_raw_label = max(sentiment_data, key=lambda x: x["score"])
|
| 37 |
+
sentiment = labels.get(top_raw_label["label"].lower() if lang == "ru" else top_raw_label["label"], top_raw_label["label"]).upper()
|
| 38 |
+
score = top_raw_label["score"]
|
| 39 |
+
sentiment_result = {
|
| 40 |
+
"name": sentiment,
|
| 41 |
+
"score": int(score * 100),
|
| 42 |
+
"bar": bar(score),
|
| 43 |
+
"color": COLOR_MAP.get(sentiment, "black")
|
| 44 |
+
}
|
| 45 |
+
save_message(user_id, text, sentiment, score)
|
| 46 |
+
|
| 47 |
+
pos_count = update_history(user_id, sentiment_result["name"] if sentiment_result else "")
|
| 48 |
+
|
| 49 |
+
# --- Emotion (EN only) ---
|
| 50 |
+
emotion_result = None
|
| 51 |
+
if lang == "en":
|
| 52 |
+
emo_data = hf_infer(MODELS["emotion"], text)
|
| 53 |
+
if emo_data:
|
| 54 |
+
top_emo = max(emo_data, key=lambda x: x["score"])
|
| 55 |
+
emotion_result = {
|
| 56 |
+
"name": top_emo["label"].capitalize(),
|
| 57 |
+
"score": int(top_emo["score"]*100),
|
| 58 |
+
"bar": bar(top_emo["score"]),
|
| 59 |
+
"color": COLOR_MAP["Emotion"]
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# --- Toxicity (EN only) ---
|
| 63 |
+
tox_result = None
|
| 64 |
+
if lang == "en":
|
| 65 |
+
tox_data = hf_infer(MODELS["toxicity"], text)
|
| 66 |
+
if tox_data:
|
| 67 |
+
top_tox = max(tox_data, key=lambda x: x["score"])
|
| 68 |
+
tox_result = {
|
| 69 |
+
"name": top_tox["label"],
|
| 70 |
+
"score": int(top_tox["score"]*100),
|
| 71 |
+
"bar": bar(top_tox["score"]),
|
| 72 |
+
"color": COLOR_MAP["Toxicity"]
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
result = {
|
| 76 |
+
"sentiment": sentiment_result,
|
| 77 |
+
"emotion": emotion_result,
|
| 78 |
+
"toxicity": tox_result,
|
| 79 |
+
"pos_count": pos_count
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
history = get_recent(1, 10)
|
| 83 |
+
return render_template("index.html", result=result, history=history)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
app.run(debug=True)
|
bot.py
DELETED
|
@@ -1,231 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import time
|
| 3 |
-
from collections import deque
|
| 4 |
-
import logging
|
| 5 |
-
import requests
|
| 6 |
-
from dotenv import load_dotenv
|
| 7 |
-
from telegram.ext import ApplicationBuilder, MessageHandler, CommandHandler, filters
|
| 8 |
-
|
| 9 |
-
from db import init_db, save_message, get_recent
|
| 10 |
-
|
| 11 |
-
load_dotenv()
|
| 12 |
-
|
| 13 |
-
logging.basicConfig(
|
| 14 |
-
level=logging.INFO,
|
| 15 |
-
format="%(asctime)s [%(levelname)s] %(message)s"
|
| 16 |
-
)
|
| 17 |
-
log = logging.getLogger("sentiment-bot")
|
| 18 |
-
|
| 19 |
-
TOKEN = os.getenv("TELEGRAM_TOKEN")
|
| 20 |
-
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 21 |
-
|
| 22 |
-
# Models
|
| 23 |
-
|
| 24 |
-
MODELS = {
|
| 25 |
-
"en_sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 26 |
-
"ru_sentiment": "blanchefort/rubert-base-cased-sentiment",
|
| 27 |
-
"emotion": "j-hartmann/emotion-english-distilroberta-base",
|
| 28 |
-
"toxicity": "unitary/toxic-bert"
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
LABELS = {
|
| 32 |
-
"en": {"LABEL_0": "NEGATIVE", "LABEL_1": "NEUTRAL", "LABEL_2": "POSITIVE"},
|
| 33 |
-
"ru": {"negative": "НЕГАТИВНЫЙ", "neutral": "НЕЙТРАЛЬНЫЙ", "positive": "ПОЗИТИВНЫЙ"}
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
USER_HISTORY = {}
|
| 37 |
-
|
| 38 |
-
# Helper Functions
|
| 39 |
-
|
| 40 |
-
def detect_lang(text: str) -> str:
|
| 41 |
-
cyr = sum('а' <= c <= 'я' or 'А' <= c <= 'Я' for c in text)
|
| 42 |
-
lat = sum('a' <= c <= 'z' or 'A' <= c <= 'Z' for c in text)
|
| 43 |
-
return "ru" if cyr > lat else "en"
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def hf_infer(model: str, text: str):
|
| 47 |
-
try:
|
| 48 |
-
resp = requests.post(
|
| 49 |
-
f"https://api-inference.huggingface.co/models/{model}",
|
| 50 |
-
headers={"Authorization": f"Bearer {HF_API_TOKEN}"},
|
| 51 |
-
json={"inputs": text},
|
| 52 |
-
timeout=15
|
| 53 |
-
)
|
| 54 |
-
resp.raise_for_status()
|
| 55 |
-
out = resp.json()
|
| 56 |
-
if isinstance(out, list) and out:
|
| 57 |
-
return out
|
| 58 |
-
return out
|
| 59 |
-
except Exception as e:
|
| 60 |
-
log.warning(f"HF inference failed ({model}): {e}")
|
| 61 |
-
return None
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def update_history(user_id: int, sentiment: str, max_len=10) -> int:
|
| 65 |
-
history = USER_HISTORY.setdefault(user_id, deque(maxlen=max_len))
|
| 66 |
-
history.append((sentiment, time.time()))
|
| 67 |
-
recent = list(history)[-5:]
|
| 68 |
-
return sum(s in ("POSITIVE", "ПОЗИТИВНЫЙ") for s, _ in recent)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def bar(score: float) -> str:
|
| 72 |
-
filled = int(score * 10)
|
| 73 |
-
return "█" * filled + "░" * (10 - filled)
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
# Core Logic
|
| 77 |
-
|
| 78 |
-
async def analyze(update, context):
|
| 79 |
-
text = update.message.text.strip()
|
| 80 |
-
if not text:
|
| 81 |
-
return
|
| 82 |
-
|
| 83 |
-
user_id = update.effective_user.id
|
| 84 |
-
lang = detect_lang(text)
|
| 85 |
-
|
| 86 |
-
# Choose sentiment model
|
| 87 |
-
sentiment_model = MODELS["ru_sentiment"] if lang == "ru" else MODELS["en_sentiment"]
|
| 88 |
-
labels = LABELS[lang]
|
| 89 |
-
|
| 90 |
-
# HF API call
|
| 91 |
-
sentiment_data = hf_infer(sentiment_model, text)
|
| 92 |
-
if not sentiment_data:
|
| 93 |
-
await update.message.reply_text("⚠️ Couldn't analyze right now, try again.")
|
| 94 |
-
return
|
| 95 |
-
|
| 96 |
-
# Normalize output inline
|
| 97 |
-
if isinstance(sentiment_data, dict):
|
| 98 |
-
sentiment_data = [sentiment_data]
|
| 99 |
-
elif isinstance(sentiment_data, list) and len(sentiment_data) == 1 and isinstance(sentiment_data[0], list):
|
| 100 |
-
sentiment_data = sentiment_data[0]
|
| 101 |
-
|
| 102 |
-
# Filter invalid entries
|
| 103 |
-
sentiment_data = [item for item in sentiment_data if isinstance(item, dict) and "label" in item and "score" in item]
|
| 104 |
-
|
| 105 |
-
if not sentiment_data:
|
| 106 |
-
await update.message.reply_text("⚠️ Couldn't analyze right now, try again.")
|
| 107 |
-
return
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
top_raw_label = max(sentiment_data, key=lambda x: x["score"])
|
| 111 |
-
sentiment = labels.get(top_raw_label["label"].lower() if lang == "ru" else top_raw_label["label"], top_raw_label["label"]).upper()
|
| 112 |
-
score = top_raw_label["score"]
|
| 113 |
-
|
| 114 |
-
save_message(user_id, text, sentiment, score)
|
| 115 |
-
pos_count = update_history(user_id, sentiment)
|
| 116 |
-
|
| 117 |
-
lines = []
|
| 118 |
-
|
| 119 |
-
if lang == "en":
|
| 120 |
-
lines.append({
|
| 121 |
-
"POSITIVE": "😊 Looks positive!",
|
| 122 |
-
"NEGATIVE": "😞 Sounds negative.",
|
| 123 |
-
"NEUTRAL": "😐 Neutral tone."
|
| 124 |
-
}[sentiment])
|
| 125 |
-
lines.append(f"Sentiment: {sentiment} ({int(score*100)}%) [{bar(score)}]")
|
| 126 |
-
lines.append(f"Positive in last 5 messages: {pos_count}/5")
|
| 127 |
-
|
| 128 |
-
# Emotion
|
| 129 |
-
emo_data = hf_infer(MODELS["emotion"], text)
|
| 130 |
-
if isinstance(emo_data, dict):
|
| 131 |
-
emo_data = [emo_data]
|
| 132 |
-
elif isinstance(emo_data, list) and len(emo_data) == 1 and isinstance(emo_data[0], list):
|
| 133 |
-
emo_data = emo_data[0]
|
| 134 |
-
emo_data = [x for x in emo_data if isinstance(x, dict) and "label" in x and "score" in x]
|
| 135 |
-
if emo_data:
|
| 136 |
-
best = max(emo_data, key=lambda x: x["score"])
|
| 137 |
-
lines.append(f"Emotion: {best['label'].capitalize()}")
|
| 138 |
-
|
| 139 |
-
# Toxicity
|
| 140 |
-
tox_data = hf_infer(MODELS["toxicity"], text)
|
| 141 |
-
if isinstance(tox_data, dict):
|
| 142 |
-
tox_data = [tox_data]
|
| 143 |
-
elif isinstance(tox_data, list) and len(tox_data) == 1 and isinstance(tox_data[0], list):
|
| 144 |
-
tox_data = tox_data[0]
|
| 145 |
-
tox_data = [x for x in tox_data if isinstance(x, dict) and "label" in x and "score" in x]
|
| 146 |
-
if tox_data:
|
| 147 |
-
t = max(tox_data, key=lambda x: x["score"])
|
| 148 |
-
lines.append(f"Toxicity: {int(t['score']*100)}% ({t['label']})")
|
| 149 |
-
|
| 150 |
-
else:
|
| 151 |
-
lines.append({
|
| 152 |
-
"ПОЗИТИВНЫЙ": "😄 Отличный настрой!",
|
| 153 |
-
"НЕГАТИВНЫЙ": "😞 Похоже на негатив.",
|
| 154 |
-
"НЕЙТРАЛЬНЫЙ": "😐 Спокойный тон."
|
| 155 |
-
}[sentiment])
|
| 156 |
-
lines.append(f"Тональность: {sentiment} ({int(score*100)}%) [{bar(score)}]")
|
| 157 |
-
lines.append(f"Позитивных за последние 5: {pos_count}/5")
|
| 158 |
-
|
| 159 |
-
await update.message.reply_text("\n".join(lines), parse_mode="HTML")
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
# Telegram Bot, User Commands
|
| 163 |
-
|
| 164 |
-
async def start(update, context):
|
| 165 |
-
await update.message.reply_text(
|
| 166 |
-
"👋 Hey! I'm a bot using Hugging Face models.\n"
|
| 167 |
-
"I analyze your messages for sentiment, emotion, and toxicity.\n"
|
| 168 |
-
"Russian is supported but with limited features.\n\nUse /info to learn more."
|
| 169 |
-
)
|
| 170 |
-
|
| 171 |
-
async def info(update, context):
|
| 172 |
-
await update.message.reply_text(
|
| 173 |
-
"ℹ️ <b>Features:</b>\n"
|
| 174 |
-
"- Sentiment analysis (EN & RU)\n"
|
| 175 |
-
"- Emotion detection (EN only)\n"
|
| 176 |
-
"- Toxicity detection (EN only)\n"
|
| 177 |
-
"- Tracks last messages per user\n"
|
| 178 |
-
"- Confidence bars & emoji insights\n\n"
|
| 179 |
-
"Use /credits for model sources or /mystats for your stats.",
|
| 180 |
-
parse_mode="HTML"
|
| 181 |
-
)
|
| 182 |
-
|
| 183 |
-
async def credits(update, context):
|
| 184 |
-
await update.message.reply_text(
|
| 185 |
-
"ℹ️ <b>Model Credits:</b>\n"
|
| 186 |
-
"- <a href='https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest'>English Sentiment</a>\n"
|
| 187 |
-
"- <a href='https://huggingface.co/blanchefort/rubert-base-cased-sentiment'>Russian Sentiment</a>\n"
|
| 188 |
-
"- <a href='https://huggingface.co/j-hartmann/emotion-english-distilroberta-base'>Emotion</a>\n"
|
| 189 |
-
"- <a href='https://huggingface.co/unitary/toxic-bert'>Toxicity</a>",
|
| 190 |
-
parse_mode="HTML"
|
| 191 |
-
)
|
| 192 |
-
|
| 193 |
-
async def mystats(update, _):
|
| 194 |
-
user_id = update.effective_user.id
|
| 195 |
-
history = get_recent(user_id, 50)
|
| 196 |
-
if not history:
|
| 197 |
-
await update.message.reply_text("No messages analyzed yet.")
|
| 198 |
-
return
|
| 199 |
-
|
| 200 |
-
sentiments = [s for _, s, _, _ in history]
|
| 201 |
-
total = len(sentiments)
|
| 202 |
-
pos = sum(s in ("POSITIVE", "ПОЗИТИВНЫЙ") for s in sentiments)
|
| 203 |
-
neu = sum(s in ("NEUTRAL", "НЕЙТРАЛЬНЫЙ") for s in sentiments)
|
| 204 |
-
neg = sum(s in ("NEGATIVE", "НЕГАТИВНЫЙ") for s in sentiments)
|
| 205 |
-
|
| 206 |
-
await update.message.reply_text(
|
| 207 |
-
f"Your stats:\n"
|
| 208 |
-
f"Positive: {pos} ({pos*100//total}%)\n"
|
| 209 |
-
f"Neutral: {neu} ({neu*100//total}%)\n"
|
| 210 |
-
f"Negative: {neg} ({neg*100//total}%)"
|
| 211 |
-
)
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
# Run Bot
|
| 215 |
-
|
| 216 |
-
def main():
|
| 217 |
-
init_db()
|
| 218 |
-
app = ApplicationBuilder().token(TOKEN).build()
|
| 219 |
-
|
| 220 |
-
app.add_handler(CommandHandler("start", start))
|
| 221 |
-
app.add_handler(CommandHandler("info", info))
|
| 222 |
-
app.add_handler(CommandHandler("credits", credits))
|
| 223 |
-
app.add_handler(CommandHandler("mystats", mystats))
|
| 224 |
-
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, analyze))
|
| 225 |
-
|
| 226 |
-
log.info("Bot running...")
|
| 227 |
-
app.run_polling()
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
if __name__ == "__main__":
|
| 231 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db.py → modules/db.py
RENAMED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
-
|
| 2 |
import sqlite3
|
| 3 |
import logging
|
| 4 |
-
from typing import
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
| 7 |
-
DB_PATH = "data.db"
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
def get_db_connection() -> Optional[sqlite3.Connection]:
|
| 14 |
global _conn
|
| 15 |
if _conn:
|
| 16 |
return _conn
|
| 17 |
-
|
| 18 |
try:
|
| 19 |
conn = sqlite3.connect(DB_PATH, check_same_thread=False, timeout=20)
|
| 20 |
conn.row_factory = sqlite3.Row
|
|
@@ -24,47 +24,35 @@ def get_db_connection() -> Optional[sqlite3.Connection]:
|
|
| 24 |
logger.error("Database connection error: %s", e)
|
| 25 |
return None
|
| 26 |
|
| 27 |
-
|
| 28 |
def init_db() -> None:
|
| 29 |
conn = get_db_connection()
|
| 30 |
-
|
| 31 |
if not conn:
|
| 32 |
raise RuntimeError("Could not obtain database connection")
|
| 33 |
-
|
| 34 |
try:
|
| 35 |
cur = conn.cursor()
|
| 36 |
-
# Enable WAL for better concurrency
|
| 37 |
cur.execute("PRAGMA journal_mode=WAL")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 48 |
-
)
|
| 49 |
-
"""
|
| 50 |
-
)
|
| 51 |
-
# Helpful indexes
|
| 52 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_id ON history(user_id)")
|
| 53 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON history(timestamp)")
|
| 54 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_timestamp ON history(user_id, timestamp)")
|
| 55 |
conn.commit()
|
| 56 |
logger.info("Database initialized")
|
| 57 |
-
|
| 58 |
except sqlite3.Error as e:
|
| 59 |
logger.exception("Database initialization error: %s", e)
|
| 60 |
raise
|
| 61 |
|
| 62 |
-
|
| 63 |
def save_message(user_id: int, text: str, sentiment: str, confidence: float) -> bool:
|
| 64 |
conn = get_db_connection()
|
| 65 |
if not conn:
|
| 66 |
return False
|
| 67 |
-
|
| 68 |
try:
|
| 69 |
cur = conn.cursor()
|
| 70 |
cur.execute(
|
|
@@ -72,35 +60,27 @@ def save_message(user_id: int, text: str, sentiment: str, confidence: float) ->
|
|
| 72 |
(user_id, text, sentiment, confidence),
|
| 73 |
)
|
| 74 |
conn.commit()
|
| 75 |
-
logger.debug("Saved message for user %s", user_id)
|
| 76 |
return True
|
| 77 |
except sqlite3.Error as e:
|
| 78 |
logger.exception("Error saving message: %s", e)
|
| 79 |
-
# rollback not strictly necessary after exception, but safe
|
| 80 |
try:
|
| 81 |
conn.rollback()
|
| 82 |
except Exception:
|
| 83 |
pass
|
| 84 |
return False
|
| 85 |
|
| 86 |
-
|
| 87 |
def get_recent(user_id: int, limit: int = 10) -> List[Tuple]:
|
| 88 |
conn = get_db_connection()
|
| 89 |
if not conn:
|
| 90 |
return []
|
| 91 |
-
|
| 92 |
try:
|
| 93 |
cur = conn.cursor()
|
| 94 |
-
cur.execute(
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
LIMIT ?
|
| 101 |
-
""",
|
| 102 |
-
(user_id, limit),
|
| 103 |
-
)
|
| 104 |
rows = cur.fetchall()
|
| 105 |
return [tuple(r) for r in rows]
|
| 106 |
except sqlite3.Error as e:
|
|
|
|
| 1 |
+
import os
|
| 2 |
import sqlite3
|
| 3 |
import logging
|
| 4 |
+
from typing import List, Tuple, Optional
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
|
|
|
| 7 |
|
| 8 |
+
DB_FOLDER = "db"
|
| 9 |
+
os.makedirs(DB_FOLDER, exist_ok=True)
|
| 10 |
+
DB_PATH = os.path.join(DB_FOLDER, "data.db")
|
| 11 |
|
| 12 |
+
_conn: Optional[sqlite3.Connection] = None
|
| 13 |
|
| 14 |
def get_db_connection() -> Optional[sqlite3.Connection]:
|
| 15 |
global _conn
|
| 16 |
if _conn:
|
| 17 |
return _conn
|
|
|
|
| 18 |
try:
|
| 19 |
conn = sqlite3.connect(DB_PATH, check_same_thread=False, timeout=20)
|
| 20 |
conn.row_factory = sqlite3.Row
|
|
|
|
| 24 |
logger.error("Database connection error: %s", e)
|
| 25 |
return None
|
| 26 |
|
|
|
|
| 27 |
def init_db() -> None:
|
| 28 |
conn = get_db_connection()
|
|
|
|
| 29 |
if not conn:
|
| 30 |
raise RuntimeError("Could not obtain database connection")
|
|
|
|
| 31 |
try:
|
| 32 |
cur = conn.cursor()
|
|
|
|
| 33 |
cur.execute("PRAGMA journal_mode=WAL")
|
| 34 |
+
cur.execute("""
|
| 35 |
+
CREATE TABLE IF NOT EXISTS history (
|
| 36 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 37 |
+
user_id INTEGER NOT NULL,
|
| 38 |
+
text TEXT NOT NULL,
|
| 39 |
+
sentiment TEXT NOT NULL,
|
| 40 |
+
confidence REAL NOT NULL,
|
| 41 |
+
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 42 |
+
)""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_id ON history(user_id)")
|
| 44 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON history(timestamp)")
|
| 45 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_timestamp ON history(user_id, timestamp)")
|
| 46 |
conn.commit()
|
| 47 |
logger.info("Database initialized")
|
|
|
|
| 48 |
except sqlite3.Error as e:
|
| 49 |
logger.exception("Database initialization error: %s", e)
|
| 50 |
raise
|
| 51 |
|
|
|
|
| 52 |
def save_message(user_id: int, text: str, sentiment: str, confidence: float) -> bool:
|
| 53 |
conn = get_db_connection()
|
| 54 |
if not conn:
|
| 55 |
return False
|
|
|
|
| 56 |
try:
|
| 57 |
cur = conn.cursor()
|
| 58 |
cur.execute(
|
|
|
|
| 60 |
(user_id, text, sentiment, confidence),
|
| 61 |
)
|
| 62 |
conn.commit()
|
|
|
|
| 63 |
return True
|
| 64 |
except sqlite3.Error as e:
|
| 65 |
logger.exception("Error saving message: %s", e)
|
|
|
|
| 66 |
try:
|
| 67 |
conn.rollback()
|
| 68 |
except Exception:
|
| 69 |
pass
|
| 70 |
return False
|
| 71 |
|
|
|
|
| 72 |
def get_recent(user_id: int, limit: int = 10) -> List[Tuple]:
|
| 73 |
conn = get_db_connection()
|
| 74 |
if not conn:
|
| 75 |
return []
|
|
|
|
| 76 |
try:
|
| 77 |
cur = conn.cursor()
|
| 78 |
+
cur.execute("""
|
| 79 |
+
SELECT text, sentiment, confidence, timestamp
|
| 80 |
+
FROM history
|
| 81 |
+
WHERE user_id = ?
|
| 82 |
+
ORDER BY timestamp DESC
|
| 83 |
+
LIMIT ?""", (user_id, limit))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
rows = cur.fetchall()
|
| 85 |
return [tuple(r) for r in rows]
|
| 86 |
except sqlite3.Error as e:
|
modules/helpers.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
from collections import deque
|
| 3 |
+
|
| 4 |
+
# Models and labels
|
| 5 |
+
MODELS = {
|
| 6 |
+
"en_sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 7 |
+
"ru_sentiment": "blanchefort/rubert-base-cased-sentiment",
|
| 8 |
+
"emotion": "j-hartmann/emotion-english-distilroberta-base",
|
| 9 |
+
"toxicity": "unitary/toxic-bert"
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
LABELS = {
|
| 13 |
+
"en": {"LABEL_0": "NEGATIVE", "LABEL_1": "NEUTRAL", "LABEL_2": "POSITIVE"},
|
| 14 |
+
"ru": {"negative": "НЕГАТИВНЫЙ", "neutral": "НЕЙТРАЛЬНЫЙ", "positive": "ПОЗИТИВНЫЙ"}
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
USER_HISTORY = {}
|
| 18 |
+
|
| 19 |
+
# Helpers
|
| 20 |
+
def detect_lang(text: str) -> str:
|
| 21 |
+
cyr = sum('а' <= c <= 'я' or 'А' <= c <= 'Я' for c in text)
|
| 22 |
+
lat = sum('a' <= c <= 'z' or 'A' <= c <= 'Z' for c in text)
|
| 23 |
+
return "ru" if cyr > lat else "en"
|
| 24 |
+
|
| 25 |
+
def update_history(user_id: int, sentiment: str, max_len=10) -> int:
|
| 26 |
+
history = USER_HISTORY.setdefault(user_id, deque(maxlen=max_len))
|
| 27 |
+
history.append((sentiment, time.time()))
|
| 28 |
+
recent = list(history)[-5:]
|
| 29 |
+
return sum(s in ("POSITIVE", "ПОЗИТИВНЫЙ") for s, _ in recent)
|
| 30 |
+
|
| 31 |
+
def bar(score: float) -> str:
|
| 32 |
+
filled = int(score * 10)
|
| 33 |
+
return "█" * filled + "░" * (10 - filled)
|
modules/hf_helpers.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 6 |
+
log = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
def hf_infer(model: str, text: str):
|
| 9 |
+
try:
|
| 10 |
+
resp = requests.post(
|
| 11 |
+
f"https://api-inference.huggingface.co/models/{model}",
|
| 12 |
+
headers={"Authorization": f"Bearer {HF_API_TOKEN}"},
|
| 13 |
+
json={"inputs": text},
|
| 14 |
+
timeout=15
|
| 15 |
+
)
|
| 16 |
+
resp.raise_for_status()
|
| 17 |
+
out = resp.json()
|
| 18 |
+
|
| 19 |
+
# Normalize: dict → list, flatten nested list
|
| 20 |
+
if isinstance(out, dict):
|
| 21 |
+
out = [out]
|
| 22 |
+
elif isinstance(out, list) and len(out) == 1 and isinstance(out[0], list):
|
| 23 |
+
out = out[0]
|
| 24 |
+
|
| 25 |
+
# Filter invalid entries
|
| 26 |
+
out = [x for x in out if isinstance(x, dict) and "label" in x and "score" in x]
|
| 27 |
+
return out
|
| 28 |
+
except Exception as e:
|
| 29 |
+
log.warning(f"HF inference failed ({model}): {e}")
|
| 30 |
+
return []
|
static/style.css
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: sans-serif;
|
| 3 |
+
background-color: #f4f4f9;
|
| 4 |
+
margin: 2rem auto;
|
| 5 |
+
max-width: 800px;
|
| 6 |
+
color: #333;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
h1,
|
| 10 |
+
h2 {
|
| 11 |
+
text-align: center;
|
| 12 |
+
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
form {
|
| 16 |
+
display: flex;
|
| 17 |
+
flex-direction: column;
|
| 18 |
+
align-items: center;
|
| 19 |
+
margin-bottom: 2rem;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
textarea {
|
| 23 |
+
width: 100%;
|
| 24 |
+
max-width: 700px;
|
| 25 |
+
height: 100px;
|
| 26 |
+
padding: 0.5rem;
|
| 27 |
+
border-radius: 6px;
|
| 28 |
+
border: 1px solid #ccc;
|
| 29 |
+
resize: vertical;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
button {
|
| 33 |
+
margin-top: 1rem;
|
| 34 |
+
padding: 0.5rem 1.5rem;
|
| 35 |
+
border: none;
|
| 36 |
+
border-radius: 6px;
|
| 37 |
+
background-color: #4CAF50;
|
| 38 |
+
color: white;
|
| 39 |
+
cursor: pointer;
|
| 40 |
+
transition: 0.2s;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
button:hover {
|
| 44 |
+
background-color: #45a049;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
.bar {
|
| 48 |
+
font-size: 1.2rem;
|
| 49 |
+
margin: 0.5rem 0;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
table {
|
| 53 |
+
width: 100%;
|
| 54 |
+
margin-top: 1rem;
|
| 55 |
+
border-radius: 6px;
|
| 56 |
+
border-collapse: collapse;
|
| 57 |
+
background-color: white;
|
| 58 |
+
box-shadow: 0px 0px 8px rgba(0, 0, 0, 0.1);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
th,
|
| 62 |
+
td {
|
| 63 |
+
border: 1px solid #ddd;
|
| 64 |
+
padding: 8px;
|
| 65 |
+
text-align: left;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
th {
|
| 69 |
+
background-color: #f2f2f2;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
tr:hover {
|
| 73 |
+
background-color: #f9f9f9;
|
| 74 |
+
}
|
templates/index.html
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>AI Text Analysis</title>
|
| 8 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
| 9 |
+
</head>
|
| 10 |
+
|
| 11 |
+
<body>
|
| 12 |
+
|
| 13 |
+
<body>
|
| 14 |
+
<h1>AI Text Analysis</h1>
|
| 15 |
+
<form method="post">
|
| 16 |
+
<textarea name="text" placeholder="Type your message...">{{ request.form.text or '' }}</textarea>
|
| 17 |
+
<button type="submit">Analyze</button>
|
| 18 |
+
</form>
|
| 19 |
+
|
| 20 |
+
{% if result %}
|
| 21 |
+
<hr>
|
| 22 |
+
<h2>Result</h2>
|
| 23 |
+
|
| 24 |
+
{% if result.sentiment %}
|
| 25 |
+
<p>Sentiment: <strong>{{ result.sentiment.name }}</strong> ({{ result.sentiment.score }}%)</p>
|
| 26 |
+
<p class="bar" style="color: {{ result.sentiment.color }}">{{ result.sentiment.bar }}</p>
|
| 27 |
+
<p>Positive in last 5 messages: {{ result.pos_count }}/5</p>
|
| 28 |
+
{% endif %}
|
| 29 |
+
|
| 30 |
+
{% if result.emotion %}
|
| 31 |
+
<p>Emotion: <strong>{{ result.emotion.name }}</strong> ({{ result.emotion.score }}%)</p>
|
| 32 |
+
<p class="bar" style="color: {{ result.emotion.color }}">{{ result.emotion.bar }}</p>
|
| 33 |
+
{% endif %}
|
| 34 |
+
|
| 35 |
+
{% if result.toxicity %}
|
| 36 |
+
<p>Toxicity: <strong>{{ result.toxicity.name }}</strong> ({{ result.toxicity.score }}%)</p>
|
| 37 |
+
<p class="bar" style="color: {{ result.toxicity.color }}">{{ result.toxicity.bar }}</p>
|
| 38 |
+
{% endif %}
|
| 39 |
+
{% endif %}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
{% if history %}
|
| 43 |
+
<h2>Recent Messages</h2>
|
| 44 |
+
<table>
|
| 45 |
+
<tr>
|
| 46 |
+
<th>Text</th>
|
| 47 |
+
<th>Sentiment</th>
|
| 48 |
+
<th>Confidence</th>
|
| 49 |
+
<th>Timestamp</th>
|
| 50 |
+
</tr>
|
| 51 |
+
{% for text, sentiment, confidence, timestamp in history %}
|
| 52 |
+
<tr>
|
| 53 |
+
<td>{{ text }}</td>
|
| 54 |
+
<td>{{ sentiment }}</td>
|
| 55 |
+
<td>{{ '%.1f' % (confidence*100) }}%</td>
|
| 56 |
+
<td>{{ timestamp }}</td>
|
| 57 |
+
</tr>
|
| 58 |
+
{% endfor %}
|
| 59 |
+
</table>
|
| 60 |
+
{% endif %}
|
| 61 |
+
</body>
|
| 62 |
+
|
| 63 |
+
</html>
|