Spaces:

Vargock
/

Text-Sentiment-Analyzzer

Sleeping

App Files Files Community

Vargock commited on Oct 27

Commit

65cfd5e

0 Parent(s):

Commiting an old Telegram project based on HG AI models to recognize text tone

Browse files

Files changed (5) hide show

.env.example +3 -0
.gitignore +5 -0
README +3 -0
bot.py +232 -0
db.py +108 -0

.env.example ADDED Viewed

	@@ -0,0 +1,3 @@

+# .env.example (пушится)
+TELEGRAM_TOKEN=your_telegram_token_here
+HF_API_TOKEN=your_hf_token_here

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# local models, env files, venv
+.env
+.env.local
+.venv/
+__pycache__/

README ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ This is a simple Telegram bot that uses the Hugging Face API to detect the sentiment of text messages. I don’t provide hosting for it, so to use it, you’ll need to create your own Telegram bot and Hugging Face API keys. Then, put them into a .env file in the root folder, following the format in the .env.example file.
2	+
3	+ Simply copy .env.example to .env and fill in your keys.

bot.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import os
+import time
+from collections import deque
+import logging
+import requests
+from dotenv import load_dotenv
+from telegram.ext import ApplicationBuilder, MessageHandler, CommandHandler, filters
+from db import init_db, save_message, get_recent
+load_dotenv()
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s"
+)
+log = logging.getLogger("sentiment-bot")
+TOKEN = os.getenv("TELEGRAM_TOKEN")
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
+# Models
+MODELS = {
+    "en_sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
+    "ru_sentiment": "blanchefort/rubert-base-cased-sentiment",
+    "emotion": "j-hartmann/emotion-english-distilroberta-base",
+    "toxicity": "unitary/toxic-bert"
+}
+LABELS = {
+    "en": {"LABEL_0": "NEGATIVE", "LABEL_1": "NEUTRAL", "LABEL_2": "POSITIVE"},
+    "ru": {"negative": "НЕГАТИВНЫЙ", "neutral": "НЕЙТРАЛЬНЫЙ", "positive": "ПОЗИТИВНЫЙ"}
+}
+USER_HISTORY = {}
+# Helper Functions
+def detect_lang(text: str) -> str:
+    cyr = sum('а' <= c <= 'я' or 'А' <= c <= 'Я' for c in text)
+    lat = sum('a' <= c <= 'z' or 'A' <= c <= 'Z' for c in text)
+    return "ru" if cyr > lat else "en"
+def hf_infer(model: str, text: str):
+    """Call Hugging Face inference API safely."""
+    try:
+        resp = requests.post(
+            f"https://api-inference.huggingface.co/models/{model}",
+            headers={"Authorization": f"Bearer {HF_API_TOKEN}"},
+            json={"inputs": text},
+            timeout=15
+        )
+        resp.raise_for_status()
+        out = resp.json()
+        if isinstance(out, list) and out:
+            return out
+        return out
+    except Exception as e:
+        log.warning(f"HF inference failed ({model}): {e}")
+        return None
+def update_history(user_id: int, sentiment: str, max_len=10) -> int:
+    history = USER_HISTORY.setdefault(user_id, deque(maxlen=max_len))
+    history.append((sentiment, time.time()))
+    recent = list(history)[-5:]
+    return sum(s in ("POSITIVE", "ПОЗИТИВНЫЙ") for s, _ in recent)
+def bar(score: float) -> str:
+    filled = int(score * 10)
+    return "█" * filled + "░" * (10 - filled)
+# Core Logic
+async def analyze(update, context):
+    text = update.message.text.strip()
+    if not text:
+        return
+    user_id = update.effective_user.id
+    lang = detect_lang(text)
+    # Choose sentiment model
+    sentiment_model = MODELS["ru_sentiment"] if lang == "ru" else MODELS["en_sentiment"]
+    labels = LABELS[lang]
+    # HF API call
+    sentiment_data = hf_infer(sentiment_model, text)
+    if not sentiment_data:
+        await update.message.reply_text("⚠️ Couldn't analyze right now, try again.")
+        return
+    # Normalize output inline
+    if isinstance(sentiment_data, dict):
+        sentiment_data = [sentiment_data]
+    elif isinstance(sentiment_data, list) and len(sentiment_data) == 1 and isinstance(sentiment_data[0], list):
+        sentiment_data = sentiment_data[0]
+    # Filter invalid entries
+    sentiment_data = [item for item in sentiment_data if isinstance(item, dict) and "label" in item and "score" in item]
+    if not sentiment_data:
+        await update.message.reply_text("⚠️ Couldn't analyze right now, try again.")
+        return
+    top_raw_label = max(sentiment_data, key=lambda x: x["score"])
+    sentiment = labels.get(top_raw_label["label"].lower() if lang == "ru" else top_raw_label["label"], top_raw_label["label"]).upper()
+    score = top_raw_label["score"]
+    save_message(user_id, text, sentiment, score)
+    pos_count = update_history(user_id, sentiment)
+    lines = []
+    if lang == "en":
+        lines.append({
+            "POSITIVE": "😊 Looks positive!",
+            "NEGATIVE": "😞 Sounds negative.",
+            "NEUTRAL": "😐 Neutral tone."
+        }[sentiment])
+        lines.append(f"Sentiment: {sentiment} ({int(score*100)}%) [{bar(score)}]")
+        lines.append(f"Positive in last 5 messages: {pos_count}/5")
+        # Emotion
+        emo_data = hf_infer(MODELS["emotion"], text)
+        if isinstance(emo_data, dict):
+            emo_data = [emo_data]
+        elif isinstance(emo_data, list) and len(emo_data) == 1 and isinstance(emo_data[0], list):
+            emo_data = emo_data[0]
+        emo_data = [x for x in emo_data if isinstance(x, dict) and "label" in x and "score" in x]
+        if emo_data:
+            best = max(emo_data, key=lambda x: x["score"])
+            lines.append(f"Emotion: {best['label'].capitalize()}")
+        # Toxicity
+        tox_data = hf_infer(MODELS["toxicity"], text)
+        if isinstance(tox_data, dict):
+            tox_data = [tox_data]
+        elif isinstance(tox_data, list) and len(tox_data) == 1 and isinstance(tox_data[0], list):
+            tox_data = tox_data[0]
+        tox_data = [x for x in tox_data if isinstance(x, dict) and "label" in x and "score" in x]
+        if tox_data:
+            t = max(tox_data, key=lambda x: x["score"])
+            lines.append(f"Toxicity: {int(t['score']*100)}% ({t['label']})")
+    else:
+        lines.append({
+            "ПОЗИТИВНЫЙ": "😄 Отличный настрой!",
+            "НЕГАТИВНЫЙ": "😞 Похоже на негатив.",
+            "НЕЙТРАЛЬНЫЙ": "😐 Спокойный тон."
+        }[sentiment])
+        lines.append(f"Тональность: {sentiment} ({int(score*100)}%) [{bar(score)}]")
+        lines.append(f"Позитивных за последние 5: {pos_count}/5")
+    await update.message.reply_text("\n".join(lines), parse_mode="HTML")
+# Telegram Bot, User Commands
+async def start(update, context):
+    await update.message.reply_text(
+        "👋 Hey! I'm a bot using Hugging Face models.\n"
+        "I analyze your messages for sentiment, emotion, and toxicity.\n"
+        "Russian is supported but with limited features.\n\nUse /info to learn more."
+    )
+async def info(update, context):
+    await update.message.reply_text(
+        "ℹ️ <b>Features:</b>\n"
+        "- Sentiment analysis (EN & RU)\n"
+        "- Emotion detection (EN only)\n"
+        "- Toxicity detection (EN only)\n"
+        "- Tracks last messages per user\n"
+        "- Confidence bars & emoji insights\n\n"
+        "Use /credits for model sources or /mystats for your stats.",
+        parse_mode="HTML"
+    )
+async def credits(update, context):
+    await update.message.reply_text(
+        "ℹ️ <b>Model Credits:</b>\n"
+        "- <a href='https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest'>English Sentiment</a>\n"
+        "- <a href='https://huggingface.co/blanchefort/rubert-base-cased-sentiment'>Russian Sentiment</a>\n"
+        "- <a href='https://huggingface.co/j-hartmann/emotion-english-distilroberta-base'>Emotion</a>\n"
+        "- <a href='https://huggingface.co/unitary/toxic-bert'>Toxicity</a>",
+        parse_mode="HTML"
+    )
+async def mystats(update, _):
+    user_id = update.effective_user.id
+    history = get_recent(user_id, 50)
+    if not history:
+        await update.message.reply_text("No messages analyzed yet.")
+        return
+    sentiments = [s for _, s, _, _ in history]
+    total = len(sentiments)
+    pos = sum(s in ("POSITIVE", "ПОЗИТИВНЫЙ") for s in sentiments)
+    neu = sum(s in ("NEUTRAL", "НЕЙТРАЛЬНЫЙ") for s in sentiments)
+    neg = sum(s in ("NEGATIVE", "НЕГАТИВНЫЙ") for s in sentiments)
+    await update.message.reply_text(
+        f"Your stats:\n"
+        f"Positive: {pos} ({pos*100//total}%)\n"
+        f"Neutral: {neu} ({neu*100//total}%)\n"
+        f"Negative: {neg} ({neg*100//total}%)"
+    )
+# Run Bot
+def main():
+    init_db()
+    app = ApplicationBuilder().token(TOKEN).build()
+    app.add_handler(CommandHandler("start", start))
+    app.add_handler(CommandHandler("info", info))
+    app.add_handler(CommandHandler("credits", credits))
+    app.add_handler(CommandHandler("mystats", mystats))
+    app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, analyze))
+    log.info("Bot running...")
+    app.run_polling()
+if __name__ == "__main__":
+    main()

db.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# db.py
+import sqlite3
+import logging
+from typing import Optional, List, Tuple, Dict
+logger = logging.getLogger(__name__)
+DB_PATH = "data.db"
+# Module-level cached connection
+_conn: Optional[sqlite3.Connection] = None
+def get_db_connection() -> Optional[sqlite3.Connection]:
+    global _conn
+    if _conn:
+        return _conn
+    try:
+        conn = sqlite3.connect(DB_PATH, check_same_thread=False, timeout=20)
+        conn.row_factory = sqlite3.Row
+        _conn = conn
+        return conn
+    except sqlite3.Error as e:
+        logger.error("Database connection error: %s", e)
+        return None
+def init_db() -> None:
+    conn = get_db_connection()
+    if not conn:
+        raise RuntimeError("Could not obtain database connection")
+    try:
+        cur = conn.cursor()
+        # Enable WAL for better concurrency
+        cur.execute("PRAGMA journal_mode=WAL")
+        # Create table
+        cur.execute(
+            """
+            CREATE TABLE IF NOT EXISTS history (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                user_id INTEGER NOT NULL,
+                text TEXT NOT NULL,
+                sentiment TEXT NOT NULL,
+                confidence REAL NOT NULL,
+                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+        )
+        # Helpful indexes
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_user_id ON history(user_id)")
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON history(timestamp)")
+        cur.execute("CREATE INDEX IF NOT EXISTS idx_user_timestamp ON history(user_id, timestamp)")
+        conn.commit()
+        logger.info("Database initialized")
+    except sqlite3.Error as e:
+        logger.exception("Database initialization error: %s", e)
+        raise
+def save_message(user_id: int, text: str, sentiment: str, confidence: float) -> bool:
+    conn = get_db_connection()
+    if not conn:
+        return False
+    try:
+        cur = conn.cursor()
+        cur.execute(
+            "INSERT INTO history (user_id, text, sentiment, confidence) VALUES (?, ?, ?, ?)",
+            (user_id, text, sentiment, confidence),
+        )
+        conn.commit()
+        logger.debug("Saved message for user %s", user_id)
+        return True
+    except sqlite3.Error as e:
+        logger.exception("Error saving message: %s", e)
+        # rollback not strictly necessary after exception, but safe
+        try:
+            conn.rollback()
+        except Exception:
+            pass
+        return False
+def get_recent(user_id: int, limit: int = 10) -> List[Tuple]:
+    conn = get_db_connection()
+    if not conn:
+        return []
+    try:
+        cur = conn.cursor()
+        cur.execute(
+            """
+            SELECT text, sentiment, confidence, timestamp
+            FROM history
+            WHERE user_id = ?
+            ORDER BY timestamp DESC
+            LIMIT ?
+            """,
+            (user_id, limit),
+        )
+        rows = cur.fetchall()
+        return [tuple(r) for r in rows]
+    except sqlite3.Error as e:
+        logger.exception("Error fetching recent messages: %s", e)
+        return []