Spaces:
Sleeping
Sleeping
Merge branch 'web-version' of https://github.com/Vargock/AI-Texts-Analysis into web-version
Browse files- README.md +1 -1
- app.py +20 -8
- modules/db.py +11 -6
- modules/hf_helpers.py +6 -1
README.md
CHANGED
|
@@ -10,4 +10,4 @@ pinned: false
|
|
| 10 |
# Text Sentiment Analyzer
|
| 11 |
This was originally designed as a simple Telegram bot that uses the Hugging Face API to detect the sentiment of text messages, but for easier testing and hosting I decided to switch it to Flask-based web version.
|
| 12 |
|
| 13 |
-
[](https://huggingface.co/spaces/Vargock/Text-Sentiment-Analyzzer)
|
|
|
|
| 10 |
# Text Sentiment Analyzer
|
| 11 |
This was originally designed as a simple Telegram bot that uses the Hugging Face API to detect the sentiment of text messages, but for easier testing and hosting I decided to switch it to Flask-based web version.
|
| 12 |
|
| 13 |
+
[](https://huggingface.co/spaces/Vargock/Text-Sentiment-Analyzzer)
|
app.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
from modules.db import init_db, save_message, get_recent
|
| 3 |
from modules.hf_helpers import hf_infer
|
| 4 |
from modules.helpers import detect_lang, MODELS, LABELS, update_history, bar
|
| 5 |
|
| 6 |
app = Flask(__name__)
|
|
|
|
|
|
|
| 7 |
init_db()
|
| 8 |
|
| 9 |
COLOR_MAP = {
|
|
@@ -17,23 +20,32 @@ COLOR_MAP = {
|
|
| 17 |
"Toxicity": "purple"
|
| 18 |
}
|
| 19 |
|
|
|
|
| 20 |
@app.route("/", methods=["GET", "POST"])
|
| 21 |
def index():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
result = None
|
|
|
|
| 23 |
if request.method == "POST":
|
| 24 |
text = request.form.get("text", "").strip()
|
| 25 |
if text:
|
| 26 |
-
user_id = 1
|
| 27 |
lang = detect_lang(text)
|
| 28 |
labels = LABELS[lang]
|
| 29 |
|
| 30 |
-
# Sentiment
|
| 31 |
sentiment_model = MODELS["ru_sentiment"] if lang == "ru" else MODELS["en_sentiment"]
|
| 32 |
sentiment_data = hf_infer(sentiment_model, text)
|
| 33 |
sentiment_result = None
|
| 34 |
if sentiment_data:
|
| 35 |
top_raw_label = max(sentiment_data, key=lambda x: x["score"])
|
| 36 |
-
sentiment = labels.get(
|
|
|
|
|
|
|
|
|
|
| 37 |
score = top_raw_label["score"]
|
| 38 |
sentiment_result = {
|
| 39 |
"name": sentiment,
|
|
@@ -53,12 +65,12 @@ def index():
|
|
| 53 |
top_emo = max(emo_data, key=lambda x: x["score"])
|
| 54 |
emotion_result = {
|
| 55 |
"name": top_emo["label"].capitalize(),
|
| 56 |
-
"score": int(top_emo["score"]*100),
|
| 57 |
"bar": bar(top_emo["score"]),
|
| 58 |
"color": COLOR_MAP["Emotion"]
|
| 59 |
}
|
| 60 |
|
| 61 |
-
#
|
| 62 |
tox_result = None
|
| 63 |
if lang == "en":
|
| 64 |
tox_data = hf_infer(MODELS["toxicity"], text)
|
|
@@ -66,7 +78,7 @@ def index():
|
|
| 66 |
top_tox = max(tox_data, key=lambda x: x["score"])
|
| 67 |
tox_result = {
|
| 68 |
"name": top_tox["label"],
|
| 69 |
-
"score": int(top_tox["score"]*100),
|
| 70 |
"bar": bar(top_tox["score"]),
|
| 71 |
"color": COLOR_MAP["Toxicity"]
|
| 72 |
}
|
|
@@ -78,7 +90,7 @@ def index():
|
|
| 78 |
"pos_count": pos_count
|
| 79 |
}
|
| 80 |
|
| 81 |
-
history = get_recent(
|
| 82 |
return render_template("index.html", result=result, history=history)
|
| 83 |
|
| 84 |
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
from flask import Flask, render_template, request, session
|
| 3 |
from modules.db import init_db, save_message, get_recent
|
| 4 |
from modules.hf_helpers import hf_infer
|
| 5 |
from modules.helpers import detect_lang, MODELS, LABELS, update_history, bar
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
+
app.secret_key = "seed-string"
|
| 9 |
+
|
| 10 |
init_db()
|
| 11 |
|
| 12 |
COLOR_MAP = {
|
|
|
|
| 20 |
"Toxicity": "purple"
|
| 21 |
}
|
| 22 |
|
| 23 |
+
|
| 24 |
@app.route("/", methods=["GET", "POST"])
|
| 25 |
def index():
|
| 26 |
+
|
| 27 |
+
if "user_id" not in session:
|
| 28 |
+
session["user_id"] = str(uuid.uuid4())
|
| 29 |
+
user_id = session["user_id"]
|
| 30 |
+
|
| 31 |
result = None
|
| 32 |
+
|
| 33 |
if request.method == "POST":
|
| 34 |
text = request.form.get("text", "").strip()
|
| 35 |
if text:
|
|
|
|
| 36 |
lang = detect_lang(text)
|
| 37 |
labels = LABELS[lang]
|
| 38 |
|
| 39 |
+
# Sentiment
|
| 40 |
sentiment_model = MODELS["ru_sentiment"] if lang == "ru" else MODELS["en_sentiment"]
|
| 41 |
sentiment_data = hf_infer(sentiment_model, text)
|
| 42 |
sentiment_result = None
|
| 43 |
if sentiment_data:
|
| 44 |
top_raw_label = max(sentiment_data, key=lambda x: x["score"])
|
| 45 |
+
sentiment = labels.get(
|
| 46 |
+
top_raw_label["label"].lower() if lang == "ru" else top_raw_label["label"],
|
| 47 |
+
top_raw_label["label"]
|
| 48 |
+
).upper()
|
| 49 |
score = top_raw_label["score"]
|
| 50 |
sentiment_result = {
|
| 51 |
"name": sentiment,
|
|
|
|
| 65 |
top_emo = max(emo_data, key=lambda x: x["score"])
|
| 66 |
emotion_result = {
|
| 67 |
"name": top_emo["label"].capitalize(),
|
| 68 |
+
"score": int(top_emo["score"] * 100),
|
| 69 |
"bar": bar(top_emo["score"]),
|
| 70 |
"color": COLOR_MAP["Emotion"]
|
| 71 |
}
|
| 72 |
|
| 73 |
+
# Toxicity (EN only)
|
| 74 |
tox_result = None
|
| 75 |
if lang == "en":
|
| 76 |
tox_data = hf_infer(MODELS["toxicity"], text)
|
|
|
|
| 78 |
top_tox = max(tox_data, key=lambda x: x["score"])
|
| 79 |
tox_result = {
|
| 80 |
"name": top_tox["label"],
|
| 81 |
+
"score": int(top_tox["score"] * 100),
|
| 82 |
"bar": bar(top_tox["score"]),
|
| 83 |
"color": COLOR_MAP["Toxicity"]
|
| 84 |
}
|
|
|
|
| 90 |
"pos_count": pos_count
|
| 91 |
}
|
| 92 |
|
| 93 |
+
history = get_recent(user_id, 10)
|
| 94 |
return render_template("index.html", result=result, history=history)
|
| 95 |
|
| 96 |
|
modules/db.py
CHANGED
|
@@ -11,6 +11,7 @@ DB_PATH = os.path.join(DB_FOLDER, "data.db")
|
|
| 11 |
|
| 12 |
_conn: Optional[sqlite3.Connection] = None
|
| 13 |
|
|
|
|
| 14 |
def get_db_connection() -> Optional[sqlite3.Connection]:
|
| 15 |
global _conn
|
| 16 |
if _conn:
|
|
@@ -24,6 +25,7 @@ def get_db_connection() -> Optional[sqlite3.Connection]:
|
|
| 24 |
logger.error("Database connection error: %s", e)
|
| 25 |
return None
|
| 26 |
|
|
|
|
| 27 |
def init_db() -> None:
|
| 28 |
conn = get_db_connection()
|
| 29 |
if not conn:
|
|
@@ -34,22 +36,23 @@ def init_db() -> None:
|
|
| 34 |
cur.execute("""
|
| 35 |
CREATE TABLE IF NOT EXISTS history (
|
| 36 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 37 |
-
user_id
|
| 38 |
text TEXT NOT NULL,
|
| 39 |
sentiment TEXT NOT NULL,
|
| 40 |
confidence REAL NOT NULL,
|
| 41 |
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 42 |
-
)
|
|
|
|
| 43 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_id ON history(user_id)")
|
| 44 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON history(timestamp)")
|
| 45 |
-
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_timestamp ON history(user_id, timestamp)")
|
| 46 |
conn.commit()
|
| 47 |
logger.info("Database initialized")
|
| 48 |
except sqlite3.Error as e:
|
| 49 |
logger.exception("Database initialization error: %s", e)
|
| 50 |
raise
|
| 51 |
|
| 52 |
-
|
|
|
|
| 53 |
conn = get_db_connection()
|
| 54 |
if not conn:
|
| 55 |
return False
|
|
@@ -69,7 +72,8 @@ def save_message(user_id: int, text: str, sentiment: str, confidence: float) ->
|
|
| 69 |
pass
|
| 70 |
return False
|
| 71 |
|
| 72 |
-
|
|
|
|
| 73 |
conn = get_db_connection()
|
| 74 |
if not conn:
|
| 75 |
return []
|
|
@@ -80,7 +84,8 @@ def get_recent(user_id: int, limit: int = 10) -> List[Tuple]:
|
|
| 80 |
FROM history
|
| 81 |
WHERE user_id = ?
|
| 82 |
ORDER BY timestamp DESC
|
| 83 |
-
LIMIT ?
|
|
|
|
| 84 |
rows = cur.fetchall()
|
| 85 |
return [tuple(r) for r in rows]
|
| 86 |
except sqlite3.Error as e:
|
|
|
|
| 11 |
|
| 12 |
_conn: Optional[sqlite3.Connection] = None
|
| 13 |
|
| 14 |
+
|
| 15 |
def get_db_connection() -> Optional[sqlite3.Connection]:
|
| 16 |
global _conn
|
| 17 |
if _conn:
|
|
|
|
| 25 |
logger.error("Database connection error: %s", e)
|
| 26 |
return None
|
| 27 |
|
| 28 |
+
|
| 29 |
def init_db() -> None:
|
| 30 |
conn = get_db_connection()
|
| 31 |
if not conn:
|
|
|
|
| 36 |
cur.execute("""
|
| 37 |
CREATE TABLE IF NOT EXISTS history (
|
| 38 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 39 |
+
user_id TEXT NOT NULL,
|
| 40 |
text TEXT NOT NULL,
|
| 41 |
sentiment TEXT NOT NULL,
|
| 42 |
confidence REAL NOT NULL,
|
| 43 |
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 44 |
+
)
|
| 45 |
+
""")
|
| 46 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_user_id ON history(user_id)")
|
| 47 |
cur.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON history(timestamp)")
|
|
|
|
| 48 |
conn.commit()
|
| 49 |
logger.info("Database initialized")
|
| 50 |
except sqlite3.Error as e:
|
| 51 |
logger.exception("Database initialization error: %s", e)
|
| 52 |
raise
|
| 53 |
|
| 54 |
+
|
| 55 |
+
def save_message(user_id: str, text: str, sentiment: str, confidence: float) -> bool:
|
| 56 |
conn = get_db_connection()
|
| 57 |
if not conn:
|
| 58 |
return False
|
|
|
|
| 72 |
pass
|
| 73 |
return False
|
| 74 |
|
| 75 |
+
|
| 76 |
+
def get_recent(user_id: str, limit: int = 10) -> List[Tuple]:
|
| 77 |
conn = get_db_connection()
|
| 78 |
if not conn:
|
| 79 |
return []
|
|
|
|
| 84 |
FROM history
|
| 85 |
WHERE user_id = ?
|
| 86 |
ORDER BY timestamp DESC
|
| 87 |
+
LIMIT ?
|
| 88 |
+
""", (user_id, limit))
|
| 89 |
rows = cur.fetchall()
|
| 90 |
return [tuple(r) for r in rows]
|
| 91 |
except sqlite3.Error as e:
|
modules/hf_helpers.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
import requests
|
| 2 |
import logging
|
| 3 |
import os
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 6 |
log = logging.getLogger(__name__)
|
|
@@ -16,7 +19,7 @@ def hf_infer(model: str, text: str):
|
|
| 16 |
resp.raise_for_status()
|
| 17 |
out = resp.json()
|
| 18 |
|
| 19 |
-
# Normalize
|
| 20 |
if isinstance(out, dict):
|
| 21 |
out = [out]
|
| 22 |
elif isinstance(out, list) and len(out) == 1 and isinstance(out[0], list):
|
|
@@ -24,7 +27,9 @@ def hf_infer(model: str, text: str):
|
|
| 24 |
|
| 25 |
# Filter invalid entries
|
| 26 |
out = [x for x in out if isinstance(x, dict) and "label" in x and "score" in x]
|
|
|
|
| 27 |
return out
|
|
|
|
| 28 |
except Exception as e:
|
| 29 |
log.warning(f"HF inference failed ({model}): {e}")
|
| 30 |
return []
|
|
|
|
| 1 |
import requests
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
|
| 8 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 9 |
log = logging.getLogger(__name__)
|
|
|
|
| 19 |
resp.raise_for_status()
|
| 20 |
out = resp.json()
|
| 21 |
|
| 22 |
+
# Normalize API output from dict → list[]
|
| 23 |
if isinstance(out, dict):
|
| 24 |
out = [out]
|
| 25 |
elif isinstance(out, list) and len(out) == 1 and isinstance(out[0], list):
|
|
|
|
| 27 |
|
| 28 |
# Filter invalid entries
|
| 29 |
out = [x for x in out if isinstance(x, dict) and "label" in x and "score" in x]
|
| 30 |
+
|
| 31 |
return out
|
| 32 |
+
|
| 33 |
except Exception as e:
|
| 34 |
log.warning(f"HF inference failed ({model}): {e}")
|
| 35 |
return []
|