Major reconstruction mode
Browse files- Dockerfile +61 -32
- add_sample_data.py +409 -409
- config.py +6 -1
- main.py +1399 -3
- requirements.txt +24 -23
- services/deepfake_checker.py +83 -0
- services/educational_content_generator.py +533 -0
- services/image_verifier.py +1377 -0
- services/input_processor.py +308 -0
- services/mongodb_service.py +684 -0
- services/razorpay_service.py +322 -0
- services/text_fact_checker.py +905 -0
- services/video_verifier.py +1310 -0
- services/websocket_service.py +239 -0
- services/youtube_api.py +211 -0
- services/youtube_caption.py +141 -0
- utils/file_utils.py +145 -0
Dockerfile
CHANGED
|
@@ -1,32 +1,61 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
#
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.13 full version (not slim) for Hugging Face deployment
|
| 2 |
+
FROM python:3.13
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 6 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
+
PIP_NO_CACHE_DIR=1 \
|
| 8 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 9 |
+
|
| 10 |
+
# Install system dependencies required by the application
|
| 11 |
+
# Including OpenCV, PyTorch, ffmpeg, and other multimedia libraries
|
| 12 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 13 |
+
build-essential \
|
| 14 |
+
ffmpeg \
|
| 15 |
+
libsm6 \
|
| 16 |
+
libxext6 \
|
| 17 |
+
libxrender1 \
|
| 18 |
+
libgomp1 \
|
| 19 |
+
libgl1-mesa-glx \
|
| 20 |
+
libglib2.0-0 \
|
| 21 |
+
git \
|
| 22 |
+
wget \
|
| 23 |
+
curl \
|
| 24 |
+
ca-certificates \
|
| 25 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 26 |
+
|
| 27 |
+
# Create non-root user for Hugging Face Spaces
|
| 28 |
+
RUN useradd -m -u 1000 user
|
| 29 |
+
|
| 30 |
+
# Set up working directory
|
| 31 |
+
WORKDIR /app
|
| 32 |
+
|
| 33 |
+
# Copy requirements first for better Docker layer caching
|
| 34 |
+
COPY --chown=user:user requirements.txt .
|
| 35 |
+
|
| 36 |
+
# Install Python dependencies as root to avoid permission issues
|
| 37 |
+
RUN pip install --upgrade pip setuptools wheel && \
|
| 38 |
+
pip install -r requirements.txt
|
| 39 |
+
|
| 40 |
+
# Switch to non-root user
|
| 41 |
+
USER user
|
| 42 |
+
|
| 43 |
+
# Set PATH for user-installed packages
|
| 44 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 45 |
+
|
| 46 |
+
# Copy application code
|
| 47 |
+
COPY --chown=user:user . .
|
| 48 |
+
|
| 49 |
+
# Expose port for Hugging Face Spaces (default: 7860)
|
| 50 |
+
EXPOSE 7860
|
| 51 |
+
|
| 52 |
+
# Set default environment variables for Hugging Face deployment
|
| 53 |
+
ENV SERVICE_HOST=0.0.0.0 \
|
| 54 |
+
SERVICE_PORT=7860
|
| 55 |
+
|
| 56 |
+
# Health check endpoint
|
| 57 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
| 58 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 59 |
+
|
| 60 |
+
# Run the application
|
| 61 |
+
CMD uvicorn main:app --host ${SERVICE_HOST} --port ${SERVICE_PORT} --workers 1
|
add_sample_data.py
CHANGED
|
@@ -1,409 +1,409 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Script to add sample rumour data to MongoDB for testing real-time updates
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import sys
|
| 8 |
-
import asyncio
|
| 9 |
-
from datetime import datetime, timedelta
|
| 10 |
-
from pymongo import MongoClient
|
| 11 |
-
from pymongo.errors import DuplicateKeyError
|
| 12 |
-
from dotenv import load_dotenv
|
| 13 |
-
|
| 14 |
-
# Load environment variables
|
| 15 |
-
load_dotenv()
|
| 16 |
-
|
| 17 |
-
def get_mongo_client():
|
| 18 |
-
"""Get MongoDB client connection"""
|
| 19 |
-
connection_string = os.getenv('MONGO_CONNECTION_STRING')
|
| 20 |
-
if not connection_string:
|
| 21 |
-
raise ValueError("MONGO_CONNECTION_STRING environment variable not set")
|
| 22 |
-
|
| 23 |
-
client = MongoClient(connection_string)
|
| 24 |
-
# Test connection
|
| 25 |
-
client.admin.command('ping')
|
| 26 |
-
return client
|
| 27 |
-
|
| 28 |
-
def add_sample_rumours():
|
| 29 |
-
"""Add sample rumour data to MongoDB"""
|
| 30 |
-
|
| 31 |
-
client = get_mongo_client()
|
| 32 |
-
db = client['aegis']
|
| 33 |
-
collection = db['debunk_posts']
|
| 34 |
-
|
| 35 |
-
# Sample rumour data with unique post_ids
|
| 36 |
-
sample_rumours = [
|
| 37 |
-
{
|
| 38 |
-
"post_id": "sample_rumour_001",
|
| 39 |
-
"claim": "Scientists have discovered a new planet that could support human life",
|
| 40 |
-
"summary": "Recent astronomical observations suggest the possibility of a habitable exoplanet",
|
| 41 |
-
"platform": "Twitter",
|
| 42 |
-
"Post_link": "https://twitter.com/example/status/123456789",
|
| 43 |
-
"verification": {
|
| 44 |
-
"verdict": "true",
|
| 45 |
-
"message": "This claim is accurate based on NASA's recent findings",
|
| 46 |
-
"reasoning": "The discovery was confirmed by multiple telescopes and peer-reviewed research",
|
| 47 |
-
"verification_date": datetime.now() - timedelta(hours=2),
|
| 48 |
-
"sources": {
|
| 49 |
-
"count": 3,
|
| 50 |
-
"links": [
|
| 51 |
-
"https://www.nasa.gov/feature/nasa-discovers-new-exoplanet",
|
| 52 |
-
"https://www.nature.com/articles/space-discovery-2024",
|
| 53 |
-
"https://www.scientificamerican.com/article/new-habitable-planet"
|
| 54 |
-
],
|
| 55 |
-
"titles": [
|
| 56 |
-
"NASA Discovers New Exoplanet",
|
| 57 |
-
"Nature: Space Discovery 2024",
|
| 58 |
-
"Scientific American: New Habitable Planet Found"
|
| 59 |
-
]
|
| 60 |
-
}
|
| 61 |
-
},
|
| 62 |
-
"stored_at": datetime.now() - timedelta(hours=2)
|
| 63 |
-
},
|
| 64 |
-
{
|
| 65 |
-
"post_id": "sample_rumour_002",
|
| 66 |
-
"claim": "Breaking: Major tech company announces they're shutting down all services",
|
| 67 |
-
"summary": "A viral post claims a major technology company is discontinuing all its services",
|
| 68 |
-
"platform": "Facebook",
|
| 69 |
-
"Post_link": "https://facebook.com/example/posts/987654321",
|
| 70 |
-
"verification": {
|
| 71 |
-
"verdict": "false",
|
| 72 |
-
"message": "This is completely false and has been debunked by the company",
|
| 73 |
-
"reasoning": "The company's official channels have confirmed this is a hoax. No such announcement was made.",
|
| 74 |
-
"verification_date": datetime.now() - timedelta(hours=1, minutes=30),
|
| 75 |
-
"sources": {
|
| 76 |
-
"count": 2,
|
| 77 |
-
"links": [
|
| 78 |
-
"https://company.com/official-statement",
|
| 79 |
-
"https://techcrunch.com/company-denies-shutdown-rumors"
|
| 80 |
-
],
|
| 81 |
-
"titles": [
|
| 82 |
-
"Official Company Statement",
|
| 83 |
-
"TechCrunch: Company Denies Shutdown Rumors"
|
| 84 |
-
]
|
| 85 |
-
}
|
| 86 |
-
},
|
| 87 |
-
"stored_at": datetime.now() - timedelta(hours=1, minutes=30)
|
| 88 |
-
},
|
| 89 |
-
{
|
| 90 |
-
"post_id": "sample_rumour_003",
|
| 91 |
-
"claim": "New study shows that coffee increases life expectancy by 5 years",
|
| 92 |
-
"summary": "A recent research paper claims significant health benefits from coffee consumption",
|
| 93 |
-
"platform": "Instagram",
|
| 94 |
-
"Post_link": "https://instagram.com/p/coffee-study-2024",
|
| 95 |
-
"verification": {
|
| 96 |
-
"verdict": "mostly true",
|
| 97 |
-
"message": "While coffee does have health benefits, the 5-year claim is exaggerated",
|
| 98 |
-
"reasoning": "Studies show moderate coffee consumption has health benefits, but the specific 5-year claim is not supported by the research cited.",
|
| 99 |
-
"verification_date": datetime.now() - timedelta(minutes=45),
|
| 100 |
-
"sources": {
|
| 101 |
-
"count": 4,
|
| 102 |
-
"links": [
|
| 103 |
-
"https://www.nejm.org/journal/coffee-health-study",
|
| 104 |
-
"https://www.mayoclinic.org/coffee-health-benefits",
|
| 105 |
-
"https://www.hsph.harvard.edu/coffee-research",
|
| 106 |
-
"https://www.healthline.com/coffee-life-expectancy-study"
|
| 107 |
-
],
|
| 108 |
-
"titles": [
|
| 109 |
-
"NEJM: Coffee Health Study",
|
| 110 |
-
"Mayo Clinic: Coffee Health Benefits",
|
| 111 |
-
"Harvard: Coffee Research",
|
| 112 |
-
"Healthline: Coffee Life Expectancy Study"
|
| 113 |
-
]
|
| 114 |
-
}
|
| 115 |
-
},
|
| 116 |
-
"stored_at": datetime.now() - timedelta(minutes=45)
|
| 117 |
-
},
|
| 118 |
-
{
|
| 119 |
-
"post_id": "sample_rumour_004",
|
| 120 |
-
"claim": "Local restaurant caught serving expired food to customers",
|
| 121 |
-
"summary": "Social media posts allege a popular local restaurant is serving expired ingredients",
|
| 122 |
-
"platform": "Reddit",
|
| 123 |
-
"Post_link": "https://reddit.com/r/localnews/expired-food-restaurant",
|
| 124 |
-
"verification": {
|
| 125 |
-
"verdict": "disputed",
|
| 126 |
-
"message": "The claims are under investigation by health authorities",
|
| 127 |
-
"reasoning": "Health department inspection is ongoing. Some allegations have been confirmed, others are disputed by the restaurant management.",
|
| 128 |
-
"verification_date": datetime.now() - timedelta(minutes=20),
|
| 129 |
-
"sources": {
|
| 130 |
-
"count": 3,
|
| 131 |
-
"links": [
|
| 132 |
-
"https://healthdept.gov/inspection-reports",
|
| 133 |
-
"https://localnews.com/restaurant-investigation",
|
| 134 |
-
"https://restaurant.com/official-response"
|
| 135 |
-
],
|
| 136 |
-
"titles": [
|
| 137 |
-
"Health Department Inspection Reports",
|
| 138 |
-
"Local News: Restaurant Investigation",
|
| 139 |
-
"Restaurant Official Response"
|
| 140 |
-
]
|
| 141 |
-
}
|
| 142 |
-
},
|
| 143 |
-
"stored_at": datetime.now() - timedelta(minutes=20)
|
| 144 |
-
},
|
| 145 |
-
{
|
| 146 |
-
"post_id": "sample_rumour_005",
|
| 147 |
-
"claim": "Mysterious lights spotted in the sky over the city last night",
|
| 148 |
-
"summary": "Multiple reports of unusual lights in the night sky",
|
| 149 |
-
"platform": "TikTok",
|
| 150 |
-
"Post_link": "https://tiktok.com/@user/video/mysterious-lights-city",
|
| 151 |
-
"verification": {
|
| 152 |
-
"verdict": "unverified",
|
| 153 |
-
"message": "Unable to verify the source or authenticity of these reports",
|
| 154 |
-
"reasoning": "No official explanation has been provided. Could be various phenomena including aircraft, drones, or natural occurrences.",
|
| 155 |
-
"verification_date": datetime.now() - timedelta(minutes=10),
|
| 156 |
-
"sources": {
|
| 157 |
-
"count": 2,
|
| 158 |
-
"links": [
|
| 159 |
-
"https://weather.gov/sky-conditions-report",
|
| 160 |
-
"https://faa.gov/flight-tracker-archive"
|
| 161 |
-
],
|
| 162 |
-
"titles": [
|
| 163 |
-
"Weather Service: Sky Conditions Report",
|
| 164 |
-
"FAA: Flight Tracker Archive"
|
| 165 |
-
]
|
| 166 |
-
}
|
| 167 |
-
},
|
| 168 |
-
"stored_at": datetime.now() - timedelta(minutes=10)
|
| 169 |
-
},
|
| 170 |
-
{
|
| 171 |
-
"post_id": "sample_rumour_006",
|
| 172 |
-
"claim": "Viral deepfake shows the president announcing an unexpected policy change",
|
| 173 |
-
"summary": "A widely shared video appears to show a surprise announcement from the president",
|
| 174 |
-
"platform": "YouTube",
|
| 175 |
-
"Post_link": "https://youtube.com/watch?v=deepfake-announcement",
|
| 176 |
-
"verification": {
|
| 177 |
-
"verdict": "false",
|
| 178 |
-
"message": "The clip is a deepfake; official channels have no record of this announcement",
|
| 179 |
-
"reasoning": "Audio-visual artifacts and mismatch with verified schedule indicate synthetic media",
|
| 180 |
-
"verification_date": datetime.now() - timedelta(minutes=5),
|
| 181 |
-
"sources": {
|
| 182 |
-
"count": 2,
|
| 183 |
-
"links": [
|
| 184 |
-
"https://whitehouse.gov/schedule",
|
| 185 |
-
"https://journal.example.com/deepfake-analysis"
|
| 186 |
-
],
|
| 187 |
-
"titles": [
|
| 188 |
-
"Official Schedule",
|
| 189 |
-
"Deepfake Analysis"
|
| 190 |
-
]
|
| 191 |
-
}
|
| 192 |
-
},
|
| 193 |
-
"stored_at": datetime.now() - timedelta(minutes=5)
|
| 194 |
-
},
|
| 195 |
-
{
|
| 196 |
-
"post_id": "sample_rumour_007",
|
| 197 |
-
"claim": "Wildfire evacuation map shows entire county under immediate threat",
|
| 198 |
-
"summary": "A map circulating online claims an entire county is being evacuated",
|
| 199 |
-
"platform": "Telegram",
|
| 200 |
-
"Post_link": "https://t.me/channel/wildfire-map",
|
| 201 |
-
"verification": {
|
| 202 |
-
"verdict": "disputed",
|
| 203 |
-
"message": "Only specific zones are under watch; no county-wide evacuation order",
|
| 204 |
-
"reasoning": "Emergency management alerts list partial warnings, not blanket evacuations",
|
| 205 |
-
"verification_date": datetime.now() - timedelta(minutes=8),
|
| 206 |
-
"sources": {
|
| 207 |
-
"count": 2,
|
| 208 |
-
"links": [
|
| 209 |
-
"https://alerts.example.gov/region-updates",
|
| 210 |
-
"https://county.gov/emergency"
|
| 211 |
-
],
|
| 212 |
-
"titles": [
|
| 213 |
-
"Regional Alerts",
|
| 214 |
-
"County Emergency Updates"
|
| 215 |
-
]
|
| 216 |
-
}
|
| 217 |
-
},
|
| 218 |
-
"stored_at": datetime.now() - timedelta(minutes=8)
|
| 219 |
-
},
|
| 220 |
-
{
|
| 221 |
-
"post_id": "sample_rumour_008",
|
| 222 |
-
"claim": "Celebrity X claimed in 2015 that vaccines are a government tracking program",
|
| 223 |
-
"summary": "A screenshot attributes an anti-vaccine quote to a well-known actor",
|
| 224 |
-
"platform": "Threads",
|
| 225 |
-
"Post_link": "https://www.threads.net/@user/post/abc123",
|
| 226 |
-
"verification": {
|
| 227 |
-
"verdict": "false",
|
| 228 |
-
"message": "No credible source supports this quote; likely fabricated image",
|
| 229 |
-
"reasoning": "Archive search and press records show no such statement from the celebrity",
|
| 230 |
-
"verification_date": datetime.now() - timedelta(minutes=12),
|
| 231 |
-
"sources": {
|
| 232 |
-
"count": 3,
|
| 233 |
-
"links": [
|
| 234 |
-
"https://archive.org/celebrity-press",
|
| 235 |
-
"https://newsdb.example.com/search",
|
| 236 |
-
"https://snopes.com/fact-check/celebrity-misattributed-quote"
|
| 237 |
-
],
|
| 238 |
-
"titles": [
|
| 239 |
-
"Press Archive",
|
| 240 |
-
"News Database",
|
| 241 |
-
"Fact Check"
|
| 242 |
-
]
|
| 243 |
-
}
|
| 244 |
-
},
|
| 245 |
-
"stored_at": datetime.now() - timedelta(minutes=12)
|
| 246 |
-
},
|
| 247 |
-
{
|
| 248 |
-
"post_id": "sample_rumour_009",
|
| 249 |
-
"claim": "Nationwide vaccine recall announced due to severe side effects",
|
| 250 |
-
"summary": "Posts claim an emergency recall affecting all batches",
|
| 251 |
-
"platform": "WhatsApp",
|
| 252 |
-
"Post_link": "https://example.com/forwarded-message",
|
| 253 |
-
"verification": {
|
| 254 |
-
"verdict": "false",
|
| 255 |
-
"message": "No regulatory recall issued; official notices contradict the claim",
|
| 256 |
-
"reasoning": "Regulatory databases list no recall matching the description",
|
| 257 |
-
"verification_date": datetime.now() - timedelta(minutes=25),
|
| 258 |
-
"sources": {
|
| 259 |
-
"count": 2,
|
| 260 |
-
"links": [
|
| 261 |
-
"https://fda.gov/recalls",
|
| 262 |
-
"https://who.int/medical-product-alerts"
|
| 263 |
-
],
|
| 264 |
-
"titles": [
|
| 265 |
-
"FDA Recalls",
|
| 266 |
-
"WHO Alerts"
|
| 267 |
-
]
|
| 268 |
-
}
|
| 269 |
-
},
|
| 270 |
-
"stored_at": datetime.now() - timedelta(minutes=25)
|
| 271 |
-
},
|
| 272 |
-
{
|
| 273 |
-
"post_id": "sample_rumour_010",
|
| 274 |
-
"claim": "Earthquake predicted to hit the capital city at 7 PM tonight",
|
| 275 |
-
"summary": "A viral message predicts an exact time for a major quake",
|
| 276 |
-
"platform": "TikTok",
|
| 277 |
-
"Post_link": "https://tiktok.com/@user/video/quake-prediction",
|
| 278 |
-
"verification": {
|
| 279 |
-
"verdict": "false",
|
| 280 |
-
"message": "Earthquakes cannot be predicted with exact timing using current science",
|
| 281 |
-
"reasoning": "Seismology consensus rejects precise short-term predictions",
|
| 282 |
-
"verification_date": datetime.now() - timedelta(minutes=18),
|
| 283 |
-
"sources": {
|
| 284 |
-
"count": 2,
|
| 285 |
-
"links": [
|
| 286 |
-
"https://usgs.gov/faqs/can-you-predict-earthquakes",
|
| 287 |
-
"https://seismo.org/position-on-prediction"
|
| 288 |
-
],
|
| 289 |
-
"titles": [
|
| 290 |
-
"USGS FAQs",
|
| 291 |
-
"Seismology Position"
|
| 292 |
-
]
|
| 293 |
-
}
|
| 294 |
-
},
|
| 295 |
-
"stored_at": datetime.now() - timedelta(minutes=18)
|
| 296 |
-
},
|
| 297 |
-
{
|
| 298 |
-
"post_id": "sample_rumour_011",
|
| 299 |
-
"claim": "Poll shows 98% support for Candidate Y after overnight update",
|
| 300 |
-
"summary": "Graphic claims near-unanimous polling shift in one night",
|
| 301 |
-
"platform": "X",
|
| 302 |
-
"Post_link": "https://x.com/example/status/shifted-poll",
|
| 303 |
-
"verification": {
|
| 304 |
-
"verdict": "uncertain",
|
| 305 |
-
"message": "No reputable pollster has published this figure; methodology unclear",
|
| 306 |
-
"reasoning": "Source lacks sampling details; awaiting official releases",
|
| 307 |
-
"verification_date": datetime.now() - timedelta(minutes=30),
|
| 308 |
-
"sources": {
|
| 309 |
-
"count": 2,
|
| 310 |
-
"links": [
|
| 311 |
-
"https://fivethirtyeight.com/polls/",
|
| 312 |
-
"https://aapor.org/methods-standards"
|
| 313 |
-
],
|
| 314 |
-
"titles": [
|
| 315 |
-
"Polling Aggregator",
|
| 316 |
-
"Survey Standards"
|
| 317 |
-
]
|
| 318 |
-
}
|
| 319 |
-
},
|
| 320 |
-
"stored_at": datetime.now() - timedelta(minutes=30)
|
| 321 |
-
}
|
| 322 |
-
]
|
| 323 |
-
|
| 324 |
-
print("🔄 Adding sample rumour data to MongoDB...")
|
| 325 |
-
|
| 326 |
-
added_count = 0
|
| 327 |
-
skipped_count = 0
|
| 328 |
-
|
| 329 |
-
for rumour in sample_rumours:
|
| 330 |
-
try:
|
| 331 |
-
# Try to insert the document
|
| 332 |
-
result = collection.insert_one(rumour)
|
| 333 |
-
print(f"✅ Added rumour: {rumour['post_id']} - {rumour['claim'][:50]}...")
|
| 334 |
-
added_count += 1
|
| 335 |
-
|
| 336 |
-
except DuplicateKeyError:
|
| 337 |
-
print(f"⚠️ Skipped rumour (already exists): {rumour['post_id']}")
|
| 338 |
-
skipped_count += 1
|
| 339 |
-
|
| 340 |
-
except Exception as e:
|
| 341 |
-
print(f"❌ Error adding rumour {rumour['post_id']}: {e}")
|
| 342 |
-
|
| 343 |
-
print(f"\n📊 Summary:")
|
| 344 |
-
print(f" ✅ Added: {added_count} rumours")
|
| 345 |
-
print(f" ⚠️ Skipped: {skipped_count} rumours")
|
| 346 |
-
print(f" 📝 Total in database: {collection.count_documents({})} rumours")
|
| 347 |
-
|
| 348 |
-
# Close connection
|
| 349 |
-
client.close()
|
| 350 |
-
print("\n🔌 MongoDB connection closed")
|
| 351 |
-
|
| 352 |
-
def test_realtime_update():
|
| 353 |
-
"""Add a new rumour to test real-time updates"""
|
| 354 |
-
|
| 355 |
-
client = get_mongo_client()
|
| 356 |
-
db = client['aegis']
|
| 357 |
-
collection = db['debunk_posts']
|
| 358 |
-
|
| 359 |
-
# Create a new rumour with current timestamp
|
| 360 |
-
new_rumour = {
|
| 361 |
-
"post_id": f"test_realtime_{int(datetime.now().timestamp())}",
|
| 362 |
-
"claim": "Test real-time update: This is a new rumour added for testing WebSocket functionality",
|
| 363 |
-
"summary": "This rumour was added to test the real-time WebSocket update system",
|
| 364 |
-
"platform": "Test Platform",
|
| 365 |
-
"Post_link": "https://example.com/test-realtime-update",
|
| 366 |
-
"verification": {
|
| 367 |
-
"verdict": "true",
|
| 368 |
-
"message": "This is a test rumour for real-time updates",
|
| 369 |
-
"reasoning": "Added programmatically to verify WebSocket functionality",
|
| 370 |
-
"verification_date": datetime.now(),
|
| 371 |
-
"sources": {
|
| 372 |
-
"count": 1,
|
| 373 |
-
"links": ["https://example.com/test-source"],
|
| 374 |
-
"titles": ["Test Source"]
|
| 375 |
-
}
|
| 376 |
-
},
|
| 377 |
-
"stored_at": datetime.now()
|
| 378 |
-
}
|
| 379 |
-
|
| 380 |
-
print("🔄 Adding test rumour for real-time update...")
|
| 381 |
-
|
| 382 |
-
try:
|
| 383 |
-
result = collection.insert_one(new_rumour)
|
| 384 |
-
print(f"✅ Test rumour added successfully!")
|
| 385 |
-
print(f" 📝 Post ID: {new_rumour['post_id']}")
|
| 386 |
-
print(f" 📅 Added at: {new_rumour['stored_at']}")
|
| 387 |
-
print(f" 🔍 MongoDB ID: {result.inserted_id}")
|
| 388 |
-
print("\n💡 Check your frontend - you should see this new rumour appear automatically!")
|
| 389 |
-
|
| 390 |
-
except Exception as e:
|
| 391 |
-
print(f"❌ Error adding test rumour: {e}")
|
| 392 |
-
|
| 393 |
-
# Close connection
|
| 394 |
-
client.close()
|
| 395 |
-
print("\n🔌 MongoDB connection closed")
|
| 396 |
-
|
| 397 |
-
if __name__ == "__main__":
|
| 398 |
-
print("🚀 MongoDB Sample Data Script")
|
| 399 |
-
print("=" * 50)
|
| 400 |
-
|
| 401 |
-
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
| 402 |
-
test_realtime_update()
|
| 403 |
-
else:
|
| 404 |
-
add_sample_rumours()
|
| 405 |
-
|
| 406 |
-
print("\n✨ Script completed!")
|
| 407 |
-
print("\n💡 Usage:")
|
| 408 |
-
print(" python add_sample_data.py # Add sample rumours")
|
| 409 |
-
print(" python add_sample_data.py test # Add test rumour for real-time updates")
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Script to add sample rumour data to MongoDB for testing real-time updates
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import asyncio
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
from pymongo import MongoClient
|
| 11 |
+
from pymongo.errors import DuplicateKeyError
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
+
# Load environment variables
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
def get_mongo_client():
|
| 18 |
+
"""Get MongoDB client connection"""
|
| 19 |
+
connection_string = os.getenv('MONGO_CONNECTION_STRING')
|
| 20 |
+
if not connection_string:
|
| 21 |
+
raise ValueError("MONGO_CONNECTION_STRING environment variable not set")
|
| 22 |
+
|
| 23 |
+
client = MongoClient(connection_string)
|
| 24 |
+
# Test connection
|
| 25 |
+
client.admin.command('ping')
|
| 26 |
+
return client
|
| 27 |
+
|
| 28 |
+
def add_sample_rumours():
|
| 29 |
+
"""Add sample rumour data to MongoDB"""
|
| 30 |
+
|
| 31 |
+
client = get_mongo_client()
|
| 32 |
+
db = client['aegis']
|
| 33 |
+
collection = db['debunk_posts']
|
| 34 |
+
|
| 35 |
+
# Sample rumour data with unique post_ids
|
| 36 |
+
sample_rumours = [
|
| 37 |
+
{
|
| 38 |
+
"post_id": "sample_rumour_001",
|
| 39 |
+
"claim": "Scientists have discovered a new planet that could support human life",
|
| 40 |
+
"summary": "Recent astronomical observations suggest the possibility of a habitable exoplanet",
|
| 41 |
+
"platform": "Twitter",
|
| 42 |
+
"Post_link": "https://twitter.com/example/status/123456789",
|
| 43 |
+
"verification": {
|
| 44 |
+
"verdict": "true",
|
| 45 |
+
"message": "This claim is accurate based on NASA's recent findings",
|
| 46 |
+
"reasoning": "The discovery was confirmed by multiple telescopes and peer-reviewed research",
|
| 47 |
+
"verification_date": datetime.now() - timedelta(hours=2),
|
| 48 |
+
"sources": {
|
| 49 |
+
"count": 3,
|
| 50 |
+
"links": [
|
| 51 |
+
"https://www.nasa.gov/feature/nasa-discovers-new-exoplanet",
|
| 52 |
+
"https://www.nature.com/articles/space-discovery-2024",
|
| 53 |
+
"https://www.scientificamerican.com/article/new-habitable-planet"
|
| 54 |
+
],
|
| 55 |
+
"titles": [
|
| 56 |
+
"NASA Discovers New Exoplanet",
|
| 57 |
+
"Nature: Space Discovery 2024",
|
| 58 |
+
"Scientific American: New Habitable Planet Found"
|
| 59 |
+
]
|
| 60 |
+
}
|
| 61 |
+
},
|
| 62 |
+
"stored_at": datetime.now() - timedelta(hours=2)
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"post_id": "sample_rumour_002",
|
| 66 |
+
"claim": "Breaking: Major tech company announces they're shutting down all services",
|
| 67 |
+
"summary": "A viral post claims a major technology company is discontinuing all its services",
|
| 68 |
+
"platform": "Facebook",
|
| 69 |
+
"Post_link": "https://facebook.com/example/posts/987654321",
|
| 70 |
+
"verification": {
|
| 71 |
+
"verdict": "false",
|
| 72 |
+
"message": "This is completely false and has been debunked by the company",
|
| 73 |
+
"reasoning": "The company's official channels have confirmed this is a hoax. No such announcement was made.",
|
| 74 |
+
"verification_date": datetime.now() - timedelta(hours=1, minutes=30),
|
| 75 |
+
"sources": {
|
| 76 |
+
"count": 2,
|
| 77 |
+
"links": [
|
| 78 |
+
"https://company.com/official-statement",
|
| 79 |
+
"https://techcrunch.com/company-denies-shutdown-rumors"
|
| 80 |
+
],
|
| 81 |
+
"titles": [
|
| 82 |
+
"Official Company Statement",
|
| 83 |
+
"TechCrunch: Company Denies Shutdown Rumors"
|
| 84 |
+
]
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"stored_at": datetime.now() - timedelta(hours=1, minutes=30)
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"post_id": "sample_rumour_003",
|
| 91 |
+
"claim": "New study shows that coffee increases life expectancy by 5 years",
|
| 92 |
+
"summary": "A recent research paper claims significant health benefits from coffee consumption",
|
| 93 |
+
"platform": "Instagram",
|
| 94 |
+
"Post_link": "https://instagram.com/p/coffee-study-2024",
|
| 95 |
+
"verification": {
|
| 96 |
+
"verdict": "mostly true",
|
| 97 |
+
"message": "While coffee does have health benefits, the 5-year claim is exaggerated",
|
| 98 |
+
"reasoning": "Studies show moderate coffee consumption has health benefits, but the specific 5-year claim is not supported by the research cited.",
|
| 99 |
+
"verification_date": datetime.now() - timedelta(minutes=45),
|
| 100 |
+
"sources": {
|
| 101 |
+
"count": 4,
|
| 102 |
+
"links": [
|
| 103 |
+
"https://www.nejm.org/journal/coffee-health-study",
|
| 104 |
+
"https://www.mayoclinic.org/coffee-health-benefits",
|
| 105 |
+
"https://www.hsph.harvard.edu/coffee-research",
|
| 106 |
+
"https://www.healthline.com/coffee-life-expectancy-study"
|
| 107 |
+
],
|
| 108 |
+
"titles": [
|
| 109 |
+
"NEJM: Coffee Health Study",
|
| 110 |
+
"Mayo Clinic: Coffee Health Benefits",
|
| 111 |
+
"Harvard: Coffee Research",
|
| 112 |
+
"Healthline: Coffee Life Expectancy Study"
|
| 113 |
+
]
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
"stored_at": datetime.now() - timedelta(minutes=45)
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"post_id": "sample_rumour_004",
|
| 120 |
+
"claim": "Local restaurant caught serving expired food to customers",
|
| 121 |
+
"summary": "Social media posts allege a popular local restaurant is serving expired ingredients",
|
| 122 |
+
"platform": "Reddit",
|
| 123 |
+
"Post_link": "https://reddit.com/r/localnews/expired-food-restaurant",
|
| 124 |
+
"verification": {
|
| 125 |
+
"verdict": "disputed",
|
| 126 |
+
"message": "The claims are under investigation by health authorities",
|
| 127 |
+
"reasoning": "Health department inspection is ongoing. Some allegations have been confirmed, others are disputed by the restaurant management.",
|
| 128 |
+
"verification_date": datetime.now() - timedelta(minutes=20),
|
| 129 |
+
"sources": {
|
| 130 |
+
"count": 3,
|
| 131 |
+
"links": [
|
| 132 |
+
"https://healthdept.gov/inspection-reports",
|
| 133 |
+
"https://localnews.com/restaurant-investigation",
|
| 134 |
+
"https://restaurant.com/official-response"
|
| 135 |
+
],
|
| 136 |
+
"titles": [
|
| 137 |
+
"Health Department Inspection Reports",
|
| 138 |
+
"Local News: Restaurant Investigation",
|
| 139 |
+
"Restaurant Official Response"
|
| 140 |
+
]
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"stored_at": datetime.now() - timedelta(minutes=20)
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"post_id": "sample_rumour_005",
|
| 147 |
+
"claim": "Mysterious lights spotted in the sky over the city last night",
|
| 148 |
+
"summary": "Multiple reports of unusual lights in the night sky",
|
| 149 |
+
"platform": "TikTok",
|
| 150 |
+
"Post_link": "https://tiktok.com/@user/video/mysterious-lights-city",
|
| 151 |
+
"verification": {
|
| 152 |
+
"verdict": "unverified",
|
| 153 |
+
"message": "Unable to verify the source or authenticity of these reports",
|
| 154 |
+
"reasoning": "No official explanation has been provided. Could be various phenomena including aircraft, drones, or natural occurrences.",
|
| 155 |
+
"verification_date": datetime.now() - timedelta(minutes=10),
|
| 156 |
+
"sources": {
|
| 157 |
+
"count": 2,
|
| 158 |
+
"links": [
|
| 159 |
+
"https://weather.gov/sky-conditions-report",
|
| 160 |
+
"https://faa.gov/flight-tracker-archive"
|
| 161 |
+
],
|
| 162 |
+
"titles": [
|
| 163 |
+
"Weather Service: Sky Conditions Report",
|
| 164 |
+
"FAA: Flight Tracker Archive"
|
| 165 |
+
]
|
| 166 |
+
}
|
| 167 |
+
},
|
| 168 |
+
"stored_at": datetime.now() - timedelta(minutes=10)
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"post_id": "sample_rumour_006",
|
| 172 |
+
"claim": "Viral deepfake shows the president announcing an unexpected policy change",
|
| 173 |
+
"summary": "A widely shared video appears to show a surprise announcement from the president",
|
| 174 |
+
"platform": "YouTube",
|
| 175 |
+
"Post_link": "https://youtube.com/watch?v=deepfake-announcement",
|
| 176 |
+
"verification": {
|
| 177 |
+
"verdict": "false",
|
| 178 |
+
"message": "The clip is a deepfake; official channels have no record of this announcement",
|
| 179 |
+
"reasoning": "Audio-visual artifacts and mismatch with verified schedule indicate synthetic media",
|
| 180 |
+
"verification_date": datetime.now() - timedelta(minutes=5),
|
| 181 |
+
"sources": {
|
| 182 |
+
"count": 2,
|
| 183 |
+
"links": [
|
| 184 |
+
"https://whitehouse.gov/schedule",
|
| 185 |
+
"https://journal.example.com/deepfake-analysis"
|
| 186 |
+
],
|
| 187 |
+
"titles": [
|
| 188 |
+
"Official Schedule",
|
| 189 |
+
"Deepfake Analysis"
|
| 190 |
+
]
|
| 191 |
+
}
|
| 192 |
+
},
|
| 193 |
+
"stored_at": datetime.now() - timedelta(minutes=5)
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"post_id": "sample_rumour_007",
|
| 197 |
+
"claim": "Wildfire evacuation map shows entire county under immediate threat",
|
| 198 |
+
"summary": "A map circulating online claims an entire county is being evacuated",
|
| 199 |
+
"platform": "Telegram",
|
| 200 |
+
"Post_link": "https://t.me/channel/wildfire-map",
|
| 201 |
+
"verification": {
|
| 202 |
+
"verdict": "disputed",
|
| 203 |
+
"message": "Only specific zones are under watch; no county-wide evacuation order",
|
| 204 |
+
"reasoning": "Emergency management alerts list partial warnings, not blanket evacuations",
|
| 205 |
+
"verification_date": datetime.now() - timedelta(minutes=8),
|
| 206 |
+
"sources": {
|
| 207 |
+
"count": 2,
|
| 208 |
+
"links": [
|
| 209 |
+
"https://alerts.example.gov/region-updates",
|
| 210 |
+
"https://county.gov/emergency"
|
| 211 |
+
],
|
| 212 |
+
"titles": [
|
| 213 |
+
"Regional Alerts",
|
| 214 |
+
"County Emergency Updates"
|
| 215 |
+
]
|
| 216 |
+
}
|
| 217 |
+
},
|
| 218 |
+
"stored_at": datetime.now() - timedelta(minutes=8)
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"post_id": "sample_rumour_008",
|
| 222 |
+
"claim": "Celebrity X claimed in 2015 that vaccines are a government tracking program",
|
| 223 |
+
"summary": "A screenshot attributes an anti-vaccine quote to a well-known actor",
|
| 224 |
+
"platform": "Threads",
|
| 225 |
+
"Post_link": "https://www.threads.net/@user/post/abc123",
|
| 226 |
+
"verification": {
|
| 227 |
+
"verdict": "false",
|
| 228 |
+
"message": "No credible source supports this quote; likely fabricated image",
|
| 229 |
+
"reasoning": "Archive search and press records show no such statement from the celebrity",
|
| 230 |
+
"verification_date": datetime.now() - timedelta(minutes=12),
|
| 231 |
+
"sources": {
|
| 232 |
+
"count": 3,
|
| 233 |
+
"links": [
|
| 234 |
+
"https://archive.org/celebrity-press",
|
| 235 |
+
"https://newsdb.example.com/search",
|
| 236 |
+
"https://snopes.com/fact-check/celebrity-misattributed-quote"
|
| 237 |
+
],
|
| 238 |
+
"titles": [
|
| 239 |
+
"Press Archive",
|
| 240 |
+
"News Database",
|
| 241 |
+
"Fact Check"
|
| 242 |
+
]
|
| 243 |
+
}
|
| 244 |
+
},
|
| 245 |
+
"stored_at": datetime.now() - timedelta(minutes=12)
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"post_id": "sample_rumour_009",
|
| 249 |
+
"claim": "Nationwide vaccine recall announced due to severe side effects",
|
| 250 |
+
"summary": "Posts claim an emergency recall affecting all batches",
|
| 251 |
+
"platform": "WhatsApp",
|
| 252 |
+
"Post_link": "https://example.com/forwarded-message",
|
| 253 |
+
"verification": {
|
| 254 |
+
"verdict": "false",
|
| 255 |
+
"message": "No regulatory recall issued; official notices contradict the claim",
|
| 256 |
+
"reasoning": "Regulatory databases list no recall matching the description",
|
| 257 |
+
"verification_date": datetime.now() - timedelta(minutes=25),
|
| 258 |
+
"sources": {
|
| 259 |
+
"count": 2,
|
| 260 |
+
"links": [
|
| 261 |
+
"https://fda.gov/recalls",
|
| 262 |
+
"https://who.int/medical-product-alerts"
|
| 263 |
+
],
|
| 264 |
+
"titles": [
|
| 265 |
+
"FDA Recalls",
|
| 266 |
+
"WHO Alerts"
|
| 267 |
+
]
|
| 268 |
+
}
|
| 269 |
+
},
|
| 270 |
+
"stored_at": datetime.now() - timedelta(minutes=25)
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"post_id": "sample_rumour_010",
|
| 274 |
+
"claim": "Earthquake predicted to hit the capital city at 7 PM tonight",
|
| 275 |
+
"summary": "A viral message predicts an exact time for a major quake",
|
| 276 |
+
"platform": "TikTok",
|
| 277 |
+
"Post_link": "https://tiktok.com/@user/video/quake-prediction",
|
| 278 |
+
"verification": {
|
| 279 |
+
"verdict": "false",
|
| 280 |
+
"message": "Earthquakes cannot be predicted with exact timing using current science",
|
| 281 |
+
"reasoning": "Seismology consensus rejects precise short-term predictions",
|
| 282 |
+
"verification_date": datetime.now() - timedelta(minutes=18),
|
| 283 |
+
"sources": {
|
| 284 |
+
"count": 2,
|
| 285 |
+
"links": [
|
| 286 |
+
"https://usgs.gov/faqs/can-you-predict-earthquakes",
|
| 287 |
+
"https://seismo.org/position-on-prediction"
|
| 288 |
+
],
|
| 289 |
+
"titles": [
|
| 290 |
+
"USGS FAQs",
|
| 291 |
+
"Seismology Position"
|
| 292 |
+
]
|
| 293 |
+
}
|
| 294 |
+
},
|
| 295 |
+
"stored_at": datetime.now() - timedelta(minutes=18)
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"post_id": "sample_rumour_011",
|
| 299 |
+
"claim": "Poll shows 98% support for Candidate Y after overnight update",
|
| 300 |
+
"summary": "Graphic claims near-unanimous polling shift in one night",
|
| 301 |
+
"platform": "X",
|
| 302 |
+
"Post_link": "https://x.com/example/status/shifted-poll",
|
| 303 |
+
"verification": {
|
| 304 |
+
"verdict": "uncertain",
|
| 305 |
+
"message": "No reputable pollster has published this figure; methodology unclear",
|
| 306 |
+
"reasoning": "Source lacks sampling details; awaiting official releases",
|
| 307 |
+
"verification_date": datetime.now() - timedelta(minutes=30),
|
| 308 |
+
"sources": {
|
| 309 |
+
"count": 2,
|
| 310 |
+
"links": [
|
| 311 |
+
"https://fivethirtyeight.com/polls/",
|
| 312 |
+
"https://aapor.org/methods-standards"
|
| 313 |
+
],
|
| 314 |
+
"titles": [
|
| 315 |
+
"Polling Aggregator",
|
| 316 |
+
"Survey Standards"
|
| 317 |
+
]
|
| 318 |
+
}
|
| 319 |
+
},
|
| 320 |
+
"stored_at": datetime.now() - timedelta(minutes=30)
|
| 321 |
+
}
|
| 322 |
+
]
|
| 323 |
+
|
| 324 |
+
print("🔄 Adding sample rumour data to MongoDB...")
|
| 325 |
+
|
| 326 |
+
added_count = 0
|
| 327 |
+
skipped_count = 0
|
| 328 |
+
|
| 329 |
+
for rumour in sample_rumours:
|
| 330 |
+
try:
|
| 331 |
+
# Try to insert the document
|
| 332 |
+
result = collection.insert_one(rumour)
|
| 333 |
+
print(f"✅ Added rumour: {rumour['post_id']} - {rumour['claim'][:50]}...")
|
| 334 |
+
added_count += 1
|
| 335 |
+
|
| 336 |
+
except DuplicateKeyError:
|
| 337 |
+
print(f"⚠️ Skipped rumour (already exists): {rumour['post_id']}")
|
| 338 |
+
skipped_count += 1
|
| 339 |
+
|
| 340 |
+
except Exception as e:
|
| 341 |
+
print(f"❌ Error adding rumour {rumour['post_id']}: {e}")
|
| 342 |
+
|
| 343 |
+
print(f"\n📊 Summary:")
|
| 344 |
+
print(f" ✅ Added: {added_count} rumours")
|
| 345 |
+
print(f" ⚠️ Skipped: {skipped_count} rumours")
|
| 346 |
+
print(f" 📝 Total in database: {collection.count_documents({})} rumours")
|
| 347 |
+
|
| 348 |
+
# Close connection
|
| 349 |
+
client.close()
|
| 350 |
+
print("\n🔌 MongoDB connection closed")
|
| 351 |
+
|
| 352 |
+
def test_realtime_update():
|
| 353 |
+
"""Add a new rumour to test real-time updates"""
|
| 354 |
+
|
| 355 |
+
client = get_mongo_client()
|
| 356 |
+
db = client['aegis']
|
| 357 |
+
collection = db['debunk_posts']
|
| 358 |
+
|
| 359 |
+
# Create a new rumour with current timestamp
|
| 360 |
+
new_rumour = {
|
| 361 |
+
"post_id": f"test_realtime_{int(datetime.now().timestamp())}",
|
| 362 |
+
"claim": "Test real-time update: This is a new rumour added for testing WebSocket functionality",
|
| 363 |
+
"summary": "This rumour was added to test the real-time WebSocket update system",
|
| 364 |
+
"platform": "Test Platform",
|
| 365 |
+
"Post_link": "https://example.com/test-realtime-update",
|
| 366 |
+
"verification": {
|
| 367 |
+
"verdict": "true",
|
| 368 |
+
"message": "This is a test rumour for real-time updates",
|
| 369 |
+
"reasoning": "Added programmatically to verify WebSocket functionality",
|
| 370 |
+
"verification_date": datetime.now(),
|
| 371 |
+
"sources": {
|
| 372 |
+
"count": 1,
|
| 373 |
+
"links": ["https://example.com/test-source"],
|
| 374 |
+
"titles": ["Test Source"]
|
| 375 |
+
}
|
| 376 |
+
},
|
| 377 |
+
"stored_at": datetime.now()
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
print("🔄 Adding test rumour for real-time update...")
|
| 381 |
+
|
| 382 |
+
try:
|
| 383 |
+
result = collection.insert_one(new_rumour)
|
| 384 |
+
print(f"✅ Test rumour added successfully!")
|
| 385 |
+
print(f" 📝 Post ID: {new_rumour['post_id']}")
|
| 386 |
+
print(f" 📅 Added at: {new_rumour['stored_at']}")
|
| 387 |
+
print(f" 🔍 MongoDB ID: {result.inserted_id}")
|
| 388 |
+
print("\n💡 Check your frontend - you should see this new rumour appear automatically!")
|
| 389 |
+
|
| 390 |
+
except Exception as e:
|
| 391 |
+
print(f"❌ Error adding test rumour: {e}")
|
| 392 |
+
|
| 393 |
+
# Close connection
|
| 394 |
+
client.close()
|
| 395 |
+
print("\n🔌 MongoDB connection closed")
|
| 396 |
+
|
| 397 |
+
if __name__ == "__main__":
|
| 398 |
+
print("🚀 MongoDB Sample Data Script")
|
| 399 |
+
print("=" * 50)
|
| 400 |
+
|
| 401 |
+
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
| 402 |
+
test_realtime_update()
|
| 403 |
+
else:
|
| 404 |
+
add_sample_rumours()
|
| 405 |
+
|
| 406 |
+
print("\n✨ Script completed!")
|
| 407 |
+
print("\n💡 Usage:")
|
| 408 |
+
print(" python add_sample_data.py # Add sample rumours")
|
| 409 |
+
print(" python add_sample_data.py test # Add test rumour for real-time updates")
|
config.py
CHANGED
|
@@ -11,7 +11,7 @@ class Config:
|
|
| 11 |
SERP_API_KEY: Optional[str] = os.getenv("SERP_API_KEY")
|
| 12 |
SERPAPI_BASE_URL: str = "https://serpapi.com/search"
|
| 13 |
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
|
| 14 |
-
GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.
|
| 15 |
GEMINI_TEMPERATURE: float = float(os.getenv("GEMINI_TEMPERATURE", "0.1"))
|
| 16 |
GEMINI_TOP_P: float = float(os.getenv("GEMINI_TOP_P", "0.8"))
|
| 17 |
GEMINI_MAX_TOKENS: int = int(os.getenv("GEMINI_MAX_TOKENS", "1000000"))
|
|
@@ -85,6 +85,11 @@ class Config:
|
|
| 85 |
UPSTASH_REDIS_TOKEN: Optional[str] = os.getenv("UPSTASH_REDIS_TOKEN")
|
| 86 |
REDIS_TTL: int = int(os.getenv("REDIS_TTL", "86400")) # 24 hours in seconds
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
@classmethod
|
| 89 |
def validate(cls) -> bool:
|
| 90 |
"""Validate configuration values"""
|
|
|
|
| 11 |
SERP_API_KEY: Optional[str] = os.getenv("SERP_API_KEY")
|
| 12 |
SERPAPI_BASE_URL: str = "https://serpapi.com/search"
|
| 13 |
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
|
| 14 |
+
GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
| 15 |
GEMINI_TEMPERATURE: float = float(os.getenv("GEMINI_TEMPERATURE", "0.1"))
|
| 16 |
GEMINI_TOP_P: float = float(os.getenv("GEMINI_TOP_P", "0.8"))
|
| 17 |
GEMINI_MAX_TOKENS: int = int(os.getenv("GEMINI_MAX_TOKENS", "1000000"))
|
|
|
|
| 85 |
UPSTASH_REDIS_TOKEN: Optional[str] = os.getenv("UPSTASH_REDIS_TOKEN")
|
| 86 |
REDIS_TTL: int = int(os.getenv("REDIS_TTL", "86400")) # 24 hours in seconds
|
| 87 |
|
| 88 |
+
# Razorpay Configuration
|
| 89 |
+
RAZORPAY_ID: Optional[str] = os.getenv("RAZORPAY_ID")
|
| 90 |
+
RAZORPAY_KEY: Optional[str] = os.getenv("RAZORPAY_KEY")
|
| 91 |
+
RAZORPAY_WEBHOOK_SECRET: Optional[str] = os.getenv("RAZORPAY_WEBHOOK_SECRET")
|
| 92 |
+
|
| 93 |
@classmethod
|
| 94 |
def validate(cls) -> bool:
|
| 95 |
"""Validate configuration values"""
|
main.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, WebSocket, WebSocketDisconnect
|
| 2 |
from typing import Optional, List, Dict, Any
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -21,9 +21,13 @@ from services.text_fact_checker import TextFactChecker
|
|
| 21 |
from services.educational_content_generator import EducationalContentGenerator
|
| 22 |
from services.mongodb_service import MongoDBService
|
| 23 |
from services.websocket_service import connection_manager, initialize_mongodb_change_stream, cleanup_mongodb_change_stream
|
|
|
|
|
|
|
| 24 |
from utils.file_utils import save_upload_file, cleanup_temp_files
|
| 25 |
from config import config
|
| 26 |
from services.deepfake_checker import detect_audio_deepfake
|
|
|
|
|
|
|
| 27 |
|
| 28 |
app = FastAPI(
|
| 29 |
title="Visual Verification Service",
|
|
@@ -36,9 +40,18 @@ logging.basicConfig(level=logging.INFO)
|
|
| 36 |
logger = logging.getLogger(__name__)
|
| 37 |
|
| 38 |
# Add CORS middleware
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
app.add_middleware(
|
| 40 |
CORSMiddleware,
|
| 41 |
-
allow_origins=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
allow_credentials=True,
|
| 43 |
allow_methods=["*"],
|
| 44 |
allow_headers=["*"],
|
|
@@ -65,15 +78,145 @@ try:
|
|
| 65 |
except Exception as e:
|
| 66 |
print(f"Warning: MongoDB service initialization failed: {e}")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
# Initialize MongoDB change service (will be set in startup event)
|
| 69 |
mongodb_change_service = None
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@app.on_event("startup")
|
| 72 |
async def startup_event():
|
| 73 |
"""Initialize services on startup"""
|
| 74 |
global mongodb_change_service
|
| 75 |
try:
|
| 76 |
mongodb_change_service = await initialize_mongodb_change_stream()
|
|
|
|
|
|
|
| 77 |
logger.info("✅ All services initialized successfully")
|
| 78 |
except Exception as e:
|
| 79 |
logger.error(f"❌ Failed to initialize services: {e}")
|
|
@@ -224,6 +367,407 @@ async def verify_text(
|
|
| 224 |
except Exception as e:
|
| 225 |
raise HTTPException(status_code=500, detail=str(e))
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
@app.post("/chatbot/verify")
|
| 228 |
async def chatbot_verify(
|
| 229 |
text_input: Optional[str] = Form(None),
|
|
@@ -313,11 +857,11 @@ async def chatbot_verify(
|
|
| 313 |
try:
|
| 314 |
gemini_prompt = f"""
|
| 315 |
You are an assistant for audio authenticity analysis.
|
| 316 |
-
File name: {os.path.basename(file_path)}
|
| 317 |
{('User question: ' + claim_context) if claim_context else ''}
|
| 318 |
The audio has been analyzed and the result is: {'deepfake' if deepfake else 'NOT deepfake'}.
|
| 319 |
Compose a clear, friendly, 1-2 line summary verdict for the user, tailored to the above context/result (do not answer with JSON or code, just a natural response).
|
| 320 |
Avoid repeating 'deepfake detection' technical language; be concise and direct.
|
|
|
|
| 321 |
"""
|
| 322 |
gemini_response = input_processor_for_audio.model.generate_content(gemini_prompt)
|
| 323 |
ai_message = None
|
|
@@ -373,6 +917,75 @@ Avoid repeating 'deepfake detection' technical language; be concise and direct.
|
|
| 373 |
print(f"🔍 DEBUG: Processing {len(urls_list)} URLs")
|
| 374 |
for i, url in enumerate(urls_list):
|
| 375 |
print(f"🔍 DEBUG: Processing URL {i}: {url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
if verification_type == "image":
|
| 377 |
print(f"🔍 DEBUG: Calling image_verifier.verify for URL")
|
| 378 |
result = await image_verifier.verify(
|
|
@@ -563,6 +1176,58 @@ async def get_recent_debunk_posts(limit: int = 5):
|
|
| 563 |
print(f"🔍 DEBUG: Exception type: {type(e).__name__}")
|
| 564 |
raise HTTPException(status_code=500, detail=str(e))
|
| 565 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
@app.get("/health")
|
| 567 |
async def health_check():
|
| 568 |
return {"status": "healthy", "service": "visual-verification"}
|
|
@@ -708,5 +1373,736 @@ async def get_cache_status():
|
|
| 708 |
except Exception as e:
|
| 709 |
raise HTTPException(status_code=500, detail=str(e))
|
| 710 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
if __name__ == "__main__":
|
| 712 |
uvicorn.run(app, host="0.0.0.0", port=config.SERVICE_PORT)
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Form, WebSocket, WebSocketDisconnect, Request
|
| 2 |
from typing import Optional, List, Dict, Any
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 21 |
from services.educational_content_generator import EducationalContentGenerator
|
| 22 |
from services.mongodb_service import MongoDBService
|
| 23 |
from services.websocket_service import connection_manager, initialize_mongodb_change_stream, cleanup_mongodb_change_stream
|
| 24 |
+
from services.razorpay_service import RazorpayService
|
| 25 |
+
import razorpay.errors
|
| 26 |
from utils.file_utils import save_upload_file, cleanup_temp_files
|
| 27 |
from config import config
|
| 28 |
from services.deepfake_checker import detect_audio_deepfake
|
| 29 |
+
from services.youtube_caption import get_youtube_transcript_ytdlp
|
| 30 |
+
import google.generativeai as genai
|
| 31 |
|
| 32 |
app = FastAPI(
|
| 33 |
title="Visual Verification Service",
|
|
|
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
|
| 42 |
# Add CORS middleware
|
| 43 |
+
# Note: When allow_credentials=True, you cannot use allow_origins=["*"]
|
| 44 |
+
# Must specify exact origins
|
| 45 |
+
# Chrome extensions make requests from background scripts which bypass CORS,
|
| 46 |
+
# but we include common origins for web frontend access
|
| 47 |
app.add_middleware(
|
| 48 |
CORSMiddleware,
|
| 49 |
+
allow_origins=[
|
| 50 |
+
"http://localhost:5173",
|
| 51 |
+
"http://127.0.0.1:5173",
|
| 52 |
+
"http://localhost:3000",
|
| 53 |
+
"http://127.0.0.1:3000",
|
| 54 |
+
],
|
| 55 |
allow_credentials=True,
|
| 56 |
allow_methods=["*"],
|
| 57 |
allow_headers=["*"],
|
|
|
|
| 78 |
except Exception as e:
|
| 79 |
print(f"Warning: MongoDB service initialization failed: {e}")
|
| 80 |
|
| 81 |
+
# Initialize Razorpay service
|
| 82 |
+
razorpay_service = None
|
| 83 |
+
try:
|
| 84 |
+
razorpay_service = RazorpayService()
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"Warning: Razorpay service initialization failed: {e}")
|
| 87 |
+
|
| 88 |
# Initialize MongoDB change service (will be set in startup event)
|
| 89 |
mongodb_change_service = None
|
| 90 |
|
| 91 |
+
async def initialize_subscription_plans():
|
| 92 |
+
"""Initialize subscription plans in Razorpay if they don't exist"""
|
| 93 |
+
if not razorpay_service or not razorpay_service.client:
|
| 94 |
+
logger.warning("⚠️ Razorpay service not available. Skipping plan initialization.")
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
# First, test Razorpay connection by trying to fetch account details or make a simple API call
|
| 98 |
+
try:
|
| 99 |
+
# Try to verify credentials work by attempting a simple operation
|
| 100 |
+
# We'll skip listing plans if it fails and just try to create
|
| 101 |
+
logger.info("🔍 Testing Razorpay API connection...")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.error(f"❌ Razorpay API connection test failed: {e}")
|
| 104 |
+
logger.warning("⚠️ Skipping plan initialization due to API connection issues")
|
| 105 |
+
return
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
# Try to list existing plans, but don't fail if it errors
|
| 109 |
+
existing_plan_names = set()
|
| 110 |
+
try:
|
| 111 |
+
existing_plans = razorpay_service.list_plans(count=100)
|
| 112 |
+
if existing_plans and existing_plans.get("items"):
|
| 113 |
+
existing_plan_names = {
|
| 114 |
+
p.get("item", {}).get("name")
|
| 115 |
+
for p in existing_plans.get("items", [])
|
| 116 |
+
if p.get("item", {}).get("name")
|
| 117 |
+
}
|
| 118 |
+
logger.info(f"📋 Found {len(existing_plan_names)} existing plans")
|
| 119 |
+
except Exception as list_error:
|
| 120 |
+
error_msg = str(list_error).lower()
|
| 121 |
+
if "not found" in error_msg or "404" in error_msg:
|
| 122 |
+
logger.info("ℹ️ No existing plans found (this is normal for new accounts)")
|
| 123 |
+
else:
|
| 124 |
+
logger.warning(f"⚠️ Could not list existing plans: {list_error}")
|
| 125 |
+
# Continue anyway - we'll try to create plans and handle duplicates
|
| 126 |
+
|
| 127 |
+
plans_to_create = [
|
| 128 |
+
{
|
| 129 |
+
"name": "Plan 1",
|
| 130 |
+
"amount": 100, # 1 INR in paise
|
| 131 |
+
"currency": "INR",
|
| 132 |
+
"interval": 1,
|
| 133 |
+
"period": "monthly",
|
| 134 |
+
"description": "Plan 1 - Monthly Subscription (1 INR)"
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"name": "Plan 2",
|
| 138 |
+
"amount": 200, # 2 INR in paise
|
| 139 |
+
"currency": "INR",
|
| 140 |
+
"interval": 1,
|
| 141 |
+
"period": "monthly",
|
| 142 |
+
"description": "Plan 2 - Monthly Subscription (2 INR)"
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"name": "Plan 3",
|
| 146 |
+
"amount": 300, # 3 INR in paise
|
| 147 |
+
"currency": "INR",
|
| 148 |
+
"interval": 1,
|
| 149 |
+
"period": "monthly",
|
| 150 |
+
"description": "Plan 3 - Monthly Subscription (3 INR)"
|
| 151 |
+
}
|
| 152 |
+
]
|
| 153 |
+
|
| 154 |
+
created_count = 0
|
| 155 |
+
skipped_count = 0
|
| 156 |
+
error_count = 0
|
| 157 |
+
|
| 158 |
+
for plan_data in plans_to_create:
|
| 159 |
+
plan_name = plan_data["name"]
|
| 160 |
+
|
| 161 |
+
# Check if plan already exists
|
| 162 |
+
if plan_name in existing_plan_names:
|
| 163 |
+
logger.info(f"⏭️ Plan {plan_name} already exists, skipping")
|
| 164 |
+
skipped_count += 1
|
| 165 |
+
continue
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
logger.info(f"🔄 Creating plan: {plan_name}...")
|
| 169 |
+
plan = razorpay_service.create_plan(**plan_data)
|
| 170 |
+
logger.info(f"✅ Created subscription plan: {plan_name} (ID: {plan.get('id')})")
|
| 171 |
+
created_count += 1
|
| 172 |
+
except razorpay.errors.BadRequestError as e:
|
| 173 |
+
error_msg = str(e).lower()
|
| 174 |
+
# Check if error is due to plan already existing (duplicate)
|
| 175 |
+
if "already exists" in error_msg or "duplicate" in error_msg:
|
| 176 |
+
logger.info(f"⏭️ Plan {plan_name} already exists (detected during creation), skipping")
|
| 177 |
+
skipped_count += 1
|
| 178 |
+
else:
|
| 179 |
+
logger.error(f"❌ BadRequestError creating plan {plan_name}: {e}")
|
| 180 |
+
error_count += 1
|
| 181 |
+
except Exception as e:
|
| 182 |
+
error_msg = str(e).lower()
|
| 183 |
+
# Check if error is due to plan already existing (duplicate)
|
| 184 |
+
if "already exists" in error_msg or "duplicate" in error_msg:
|
| 185 |
+
logger.info(f"⏭️ Plan {plan_name} already exists (detected during creation), skipping")
|
| 186 |
+
skipped_count += 1
|
| 187 |
+
elif "not found" in error_msg or "404" in error_msg:
|
| 188 |
+
logger.error(f"❌ API endpoint not found for plan {plan_name}. Check Razorpay credentials and API access.")
|
| 189 |
+
logger.error(f" Error details: {e}")
|
| 190 |
+
error_count += 1
|
| 191 |
+
else:
|
| 192 |
+
logger.error(f"❌ Failed to create plan {plan_name}: {e}")
|
| 193 |
+
error_count += 1
|
| 194 |
+
|
| 195 |
+
if created_count > 0:
|
| 196 |
+
logger.info(f"✅ Successfully created {created_count} subscription plans")
|
| 197 |
+
if skipped_count > 0:
|
| 198 |
+
logger.info(f"⏭️ Skipped {skipped_count} plans (already exist)")
|
| 199 |
+
if error_count > 0:
|
| 200 |
+
logger.warning(f"⚠️ {error_count} plans failed to create. Check Razorpay credentials and API permissions.")
|
| 201 |
+
if created_count == 0 and skipped_count == 0 and error_count > 0:
|
| 202 |
+
logger.error("❌ All plan creation attempts failed. Please verify:")
|
| 203 |
+
logger.error(" 1. RAZORPAY_ID and RAZORPAY_KEY are correct")
|
| 204 |
+
logger.error(" 2. API keys have subscription/plan creation permissions")
|
| 205 |
+
logger.error(" 3. Razorpay account has subscriptions feature enabled")
|
| 206 |
+
|
| 207 |
+
except Exception as e:
|
| 208 |
+
logger.error(f"❌ Failed to initialize subscription plans: {e}")
|
| 209 |
+
import traceback
|
| 210 |
+
logger.error(traceback.format_exc())
|
| 211 |
+
|
| 212 |
@app.on_event("startup")
|
| 213 |
async def startup_event():
|
| 214 |
"""Initialize services on startup"""
|
| 215 |
global mongodb_change_service
|
| 216 |
try:
|
| 217 |
mongodb_change_service = await initialize_mongodb_change_stream()
|
| 218 |
+
# Initialize subscription plans
|
| 219 |
+
await initialize_subscription_plans()
|
| 220 |
logger.info("✅ All services initialized successfully")
|
| 221 |
except Exception as e:
|
| 222 |
logger.error(f"❌ Failed to initialize services: {e}")
|
|
|
|
| 367 |
except Exception as e:
|
| 368 |
raise HTTPException(status_code=500, detail=str(e))
|
| 369 |
|
| 370 |
+
async def _extract_media_from_url(url: str) -> Optional[Dict[str, Any]]:
|
| 371 |
+
"""
|
| 372 |
+
Use yt-dlp to extract media from a URL and determine if it's an image or video.
|
| 373 |
+
|
| 374 |
+
Returns:
|
| 375 |
+
Dict with "type" ("image" or "video") and "path" (local file path), or None if fails
|
| 376 |
+
"""
|
| 377 |
+
try:
|
| 378 |
+
from shutil import which
|
| 379 |
+
import subprocess
|
| 380 |
+
import tempfile
|
| 381 |
+
|
| 382 |
+
# Resolve yt-dlp binary
|
| 383 |
+
ytdlp_bin = config.YTDLP_BIN or "yt-dlp"
|
| 384 |
+
found = which(ytdlp_bin) or which("yt-dlp")
|
| 385 |
+
if not found:
|
| 386 |
+
print("[extract_media] yt-dlp not found")
|
| 387 |
+
return None
|
| 388 |
+
|
| 389 |
+
# Create temp directory
|
| 390 |
+
temp_dir = tempfile.mkdtemp(prefix="media_extract_")
|
| 391 |
+
|
| 392 |
+
# First, get info about the media
|
| 393 |
+
info_cmd = [found, url, "--dump-json", "--no-playlist"]
|
| 394 |
+
result = subprocess.run(
|
| 395 |
+
info_cmd,
|
| 396 |
+
capture_output=True,
|
| 397 |
+
text=True,
|
| 398 |
+
timeout=30
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
if result.returncode != 0:
|
| 402 |
+
print(f"[extract_media] yt-dlp info failed: {result.stderr}")
|
| 403 |
+
return None
|
| 404 |
+
|
| 405 |
+
info = json.loads(result.stdout)
|
| 406 |
+
|
| 407 |
+
# Determine media type
|
| 408 |
+
ext = info.get("ext", "").lower()
|
| 409 |
+
is_video = ext in ["mp4", "webm", "mkv", "avi", "mov", "flv", "m4v"]
|
| 410 |
+
is_image = ext in ["jpg", "jpeg", "png", "gif", "webp", "bmp"]
|
| 411 |
+
|
| 412 |
+
if not is_video and not is_image:
|
| 413 |
+
# Check formats to determine type
|
| 414 |
+
formats = info.get("formats", [])
|
| 415 |
+
has_video_codec = any(f.get("vcodec") != "none" for f in formats)
|
| 416 |
+
has_audio_codec = any(f.get("acodec") != "none" for f in formats)
|
| 417 |
+
|
| 418 |
+
if has_video_codec:
|
| 419 |
+
is_video = True
|
| 420 |
+
elif not has_audio_codec and not has_video_codec:
|
| 421 |
+
# Likely an image
|
| 422 |
+
is_image = True
|
| 423 |
+
|
| 424 |
+
media_type = "video" if is_video else "image"
|
| 425 |
+
|
| 426 |
+
# Download the media
|
| 427 |
+
output_template = os.path.join(temp_dir, f"media.%(ext)s")
|
| 428 |
+
download_cmd = [
|
| 429 |
+
found,
|
| 430 |
+
url,
|
| 431 |
+
"-o", output_template,
|
| 432 |
+
"--no-playlist",
|
| 433 |
+
]
|
| 434 |
+
|
| 435 |
+
# For images, prefer best quality; for videos, get best format
|
| 436 |
+
if is_image:
|
| 437 |
+
download_cmd.extend(["--format", "best"])
|
| 438 |
+
else:
|
| 439 |
+
download_cmd.extend(["--format", "best[ext=mp4]/best"])
|
| 440 |
+
|
| 441 |
+
result = subprocess.run(
|
| 442 |
+
download_cmd,
|
| 443 |
+
capture_output=True,
|
| 444 |
+
text=True,
|
| 445 |
+
timeout=60
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
if result.returncode != 0:
|
| 449 |
+
print(f"[extract_media] yt-dlp download failed: {result.stderr}")
|
| 450 |
+
return None
|
| 451 |
+
|
| 452 |
+
# Find the downloaded file
|
| 453 |
+
downloaded_files = [f for f in os.listdir(temp_dir) if os.path.isfile(os.path.join(temp_dir, f))]
|
| 454 |
+
if not downloaded_files:
|
| 455 |
+
print("[extract_media] No file downloaded")
|
| 456 |
+
return None
|
| 457 |
+
|
| 458 |
+
media_path = os.path.join(temp_dir, downloaded_files[0])
|
| 459 |
+
|
| 460 |
+
return {
|
| 461 |
+
"type": media_type,
|
| 462 |
+
"path": media_path,
|
| 463 |
+
"temp_dir": temp_dir # Keep for cleanup
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
except Exception as e:
|
| 467 |
+
print(f"[extract_media] Error: {e}")
|
| 468 |
+
import traceback
|
| 469 |
+
print(traceback.format_exc())
|
| 470 |
+
return None
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def _is_youtube_url(url: str) -> bool:
|
| 474 |
+
"""Check if URL is a YouTube URL"""
|
| 475 |
+
url_lower = url.lower()
|
| 476 |
+
youtube_domains = ['youtube.com', 'youtu.be', 'www.youtube.com', 'www.youtu.be', 'm.youtube.com']
|
| 477 |
+
return any(domain in url_lower for domain in youtube_domains)
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
async def _generate_claims_summary(claim_results: List[Dict[str, Any]], gemini_model) -> str:
|
| 481 |
+
"""Generate a comprehensive summary of all claim verification results using Gemini"""
|
| 482 |
+
try:
|
| 483 |
+
# Prepare claims data for Gemini
|
| 484 |
+
claims_data = []
|
| 485 |
+
for i, result in enumerate(claim_results, 1):
|
| 486 |
+
claims_data.append({
|
| 487 |
+
"number": i,
|
| 488 |
+
"claim": result.get("claim_text", ""),
|
| 489 |
+
"verdict": result.get("verdict", "uncertain"),
|
| 490 |
+
"explanation": result.get("message", "No explanation available")
|
| 491 |
+
})
|
| 492 |
+
|
| 493 |
+
prompt = f"""You are a fact-checking summary writer. Based on the following verified claims from a YouTube video, create a comprehensive, user-friendly summary.
|
| 494 |
+
|
| 495 |
+
CLAIM VERIFICATION RESULTS:
|
| 496 |
+
{json.dumps(claims_data, indent=2)}
|
| 497 |
+
|
| 498 |
+
Your task is to create a clear, concise summary that:
|
| 499 |
+
1. Lists each claim with its verdict (TRUE/FALSE/MIXED/UNCERTAIN)
|
| 500 |
+
2. Explains WHY each claim is true or false in simple terms
|
| 501 |
+
3. Highlights the most important findings
|
| 502 |
+
4. Provides an overall assessment of the video's factual accuracy
|
| 503 |
+
|
| 504 |
+
Format your response as a well-structured summary that is easy to read. Use clear sections and bullet points where appropriate.
|
| 505 |
+
|
| 506 |
+
IMPORTANT:
|
| 507 |
+
- Be concise but thorough
|
| 508 |
+
- Explain the reasoning for each verdict
|
| 509 |
+
- Focus on the most significant false or misleading claims
|
| 510 |
+
- Keep the tone professional and informative
|
| 511 |
+
- Do NOT use markdown formatting, just plain text with clear structure
|
| 512 |
+
|
| 513 |
+
Return ONLY the summary text, no JSON or code blocks."""
|
| 514 |
+
|
| 515 |
+
response = gemini_model.generate_content(prompt)
|
| 516 |
+
response_text = response.text.strip()
|
| 517 |
+
|
| 518 |
+
# Clean up response if needed
|
| 519 |
+
if response_text.startswith('```'):
|
| 520 |
+
response_text = re.sub(r'^```[a-z]*\n?', '', response_text, flags=re.IGNORECASE)
|
| 521 |
+
response_text = re.sub(r'```$', '', response_text, flags=re.IGNORECASE).strip()
|
| 522 |
+
|
| 523 |
+
print(f"✅ Generated comprehensive summary")
|
| 524 |
+
return response_text
|
| 525 |
+
|
| 526 |
+
except Exception as e:
|
| 527 |
+
print(f"❌ Error generating summary with Gemini: {e}")
|
| 528 |
+
import traceback
|
| 529 |
+
print(traceback.format_exc())
|
| 530 |
+
# Fallback to simple concatenation
|
| 531 |
+
summary_parts = []
|
| 532 |
+
summary_parts.append(f"Analyzed {len(claim_results)} controversial claim(s) from the video transcript:\n")
|
| 533 |
+
|
| 534 |
+
for i, result in enumerate(claim_results, 1):
|
| 535 |
+
claim_text = result.get("claim_text", "")
|
| 536 |
+
verdict = result.get("verdict", "uncertain")
|
| 537 |
+
message = result.get("message", "No explanation available")
|
| 538 |
+
|
| 539 |
+
claim_display = claim_text[:150] + "..." if len(claim_text) > 150 else claim_text
|
| 540 |
+
|
| 541 |
+
verdict_label = {
|
| 542 |
+
"true": "✅ TRUE",
|
| 543 |
+
"false": "❌ FALSE",
|
| 544 |
+
"mixed": "⚠️ MIXED",
|
| 545 |
+
"uncertain": "❓ UNCERTAIN",
|
| 546 |
+
"error": "⚠️ ERROR"
|
| 547 |
+
}.get(verdict, "❓ UNCERTAIN")
|
| 548 |
+
|
| 549 |
+
summary_parts.append(f"\n{i}. {verdict_label}: {claim_display}")
|
| 550 |
+
summary_parts.append(f" Explanation: {message}")
|
| 551 |
+
|
| 552 |
+
return "\n".join(summary_parts)
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
async def _extract_claims_from_captions(captions: str, gemini_model) -> List[str]:
|
| 556 |
+
"""Extract top 5 controversial claims from video captions using Gemini"""
|
| 557 |
+
try:
|
| 558 |
+
prompt = f"""You are a fact-checking assistant. Analyze the following video transcript and extract the TOP 5 MOST CONTROVERSIAL and verifiable claims that were mentioned in the video.
|
| 559 |
+
|
| 560 |
+
VIDEO TRANSCRIPT:
|
| 561 |
+
{captions}
|
| 562 |
+
|
| 563 |
+
Your task is to identify the 5 MOST controversial, factual claims that can be verified. Prioritize:
|
| 564 |
+
- Claims about events, statistics, or facts that are controversial or disputed
|
| 565 |
+
- Claims about people, organizations, or institutions that are potentially misleading
|
| 566 |
+
- Claims that are specific enough to be fact-checked and are likely to be false or disputed
|
| 567 |
+
- Claims that have significant impact or are widely discussed
|
| 568 |
+
|
| 569 |
+
Ignore:
|
| 570 |
+
- General opinions or subjective statements
|
| 571 |
+
- Questions or hypothetical scenarios
|
| 572 |
+
- Vague statements without specific claims
|
| 573 |
+
- Small talk or filler content
|
| 574 |
+
|
| 575 |
+
IMPORTANT: Return EXACTLY 5 claims (or fewer if the video doesn't contain 5 verifiable controversial claims). Rank them by controversy/importance.
|
| 576 |
+
|
| 577 |
+
Return ONLY a JSON object in this exact format:
|
| 578 |
+
{{
|
| 579 |
+
"claims": [
|
| 580 |
+
"Claim 1 text here (most controversial)",
|
| 581 |
+
"Claim 2 text here",
|
| 582 |
+
"Claim 3 text here",
|
| 583 |
+
"Claim 4 text here",
|
| 584 |
+
"Claim 5 text here"
|
| 585 |
+
]
|
| 586 |
+
}}
|
| 587 |
+
|
| 588 |
+
Return ONLY the JSON object, no other text or explanation."""
|
| 589 |
+
|
| 590 |
+
response = gemini_model.generate_content(prompt)
|
| 591 |
+
response_text = response.text.strip()
|
| 592 |
+
|
| 593 |
+
# Clean up response if needed
|
| 594 |
+
if response_text.startswith('```json'):
|
| 595 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 596 |
+
elif response_text.startswith('```'):
|
| 597 |
+
response_text = response_text.replace('```', '').strip()
|
| 598 |
+
|
| 599 |
+
# Parse JSON response
|
| 600 |
+
parsed = json.loads(response_text)
|
| 601 |
+
claims = parsed.get("claims", [])
|
| 602 |
+
|
| 603 |
+
# Filter out empty claims and limit to 5
|
| 604 |
+
claims = [c.strip() for c in claims if c and c.strip()][:5]
|
| 605 |
+
|
| 606 |
+
print(f"✅ Extracted {len(claims)} claims from video captions")
|
| 607 |
+
return claims
|
| 608 |
+
|
| 609 |
+
except Exception as e:
|
| 610 |
+
print(f"❌ Error extracting claims from captions: {e}")
|
| 611 |
+
import traceback
|
| 612 |
+
print(traceback.format_exc())
|
| 613 |
+
return []
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
async def _verify_youtube_video(url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 617 |
+
"""Verify a YouTube video by extracting captions, extracting claims, and verifying each claim"""
|
| 618 |
+
import tempfile
|
| 619 |
+
import asyncio
|
| 620 |
+
|
| 621 |
+
try:
|
| 622 |
+
print(f"🎥 Starting YouTube video verification for: {url}")
|
| 623 |
+
|
| 624 |
+
# Step 1: Extract captions
|
| 625 |
+
print(f"📝 Extracting captions from YouTube video...")
|
| 626 |
+
# Create a temporary file for the transcript output
|
| 627 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
| 628 |
+
temp_output_file = temp_file.name
|
| 629 |
+
|
| 630 |
+
# Run the synchronous function in an executor to avoid blocking
|
| 631 |
+
loop = asyncio.get_event_loop()
|
| 632 |
+
captions = await loop.run_in_executor(
|
| 633 |
+
None,
|
| 634 |
+
get_youtube_transcript_ytdlp,
|
| 635 |
+
url,
|
| 636 |
+
temp_output_file
|
| 637 |
+
)
|
| 638 |
+
|
| 639 |
+
# Clean up the temporary output file if it was created
|
| 640 |
+
try:
|
| 641 |
+
if os.path.exists(temp_output_file):
|
| 642 |
+
os.unlink(temp_output_file)
|
| 643 |
+
except Exception as cleanup_error:
|
| 644 |
+
print(f"⚠️ Warning: Could not clean up temp file {temp_output_file}: {cleanup_error}")
|
| 645 |
+
|
| 646 |
+
if not captions:
|
| 647 |
+
return {
|
| 648 |
+
"verified": False,
|
| 649 |
+
"verdict": "error",
|
| 650 |
+
"message": "Could not extract captions from the YouTube video. The video may not have captions available.",
|
| 651 |
+
"details": {
|
| 652 |
+
"video_url": url,
|
| 653 |
+
"error": "Caption extraction failed"
|
| 654 |
+
},
|
| 655 |
+
"source": "youtube_url"
|
| 656 |
+
}
|
| 657 |
+
|
| 658 |
+
print(f"✅ Extracted {len(captions)} characters of captions")
|
| 659 |
+
|
| 660 |
+
# Step 2: Extract claims using Gemini
|
| 661 |
+
print(f"🔍 Extracting controversial claims from captions...")
|
| 662 |
+
genai.configure(api_key=config.GEMINI_API_KEY)
|
| 663 |
+
gemini_model = genai.GenerativeModel(config.GEMINI_MODEL)
|
| 664 |
+
|
| 665 |
+
claims = await _extract_claims_from_captions(captions, gemini_model)
|
| 666 |
+
|
| 667 |
+
if not claims:
|
| 668 |
+
return {
|
| 669 |
+
"verified": False,
|
| 670 |
+
"verdict": "uncertain",
|
| 671 |
+
"message": "No verifiable claims were found in the video transcript. The video may contain only opinions, questions, or non-factual content.",
|
| 672 |
+
"details": {
|
| 673 |
+
"video_url": url,
|
| 674 |
+
"captions_length": len(captions),
|
| 675 |
+
"claims_extracted": 0
|
| 676 |
+
},
|
| 677 |
+
"source": "youtube_url"
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
print(f"✅ Extracted {len(claims)} claims, starting verification...")
|
| 681 |
+
|
| 682 |
+
# Step 3: Verify each claim
|
| 683 |
+
claim_results = []
|
| 684 |
+
for i, claim in enumerate(claims, 1):
|
| 685 |
+
print(f"🔍 Verifying claim {i}/{len(claims)}: {claim[:100]}...")
|
| 686 |
+
try:
|
| 687 |
+
verification_result = await text_fact_checker.verify(
|
| 688 |
+
text_input=claim,
|
| 689 |
+
claim_context=f"Claim from YouTube video: {url}",
|
| 690 |
+
claim_date=claim_date
|
| 691 |
+
)
|
| 692 |
+
verification_result["claim_text"] = claim
|
| 693 |
+
verification_result["claim_index"] = i
|
| 694 |
+
claim_results.append(verification_result)
|
| 695 |
+
except Exception as e:
|
| 696 |
+
print(f"❌ Error verifying claim {i}: {e}")
|
| 697 |
+
claim_results.append({
|
| 698 |
+
"claim_text": claim,
|
| 699 |
+
"claim_index": i,
|
| 700 |
+
"verified": False,
|
| 701 |
+
"verdict": "error",
|
| 702 |
+
"message": f"Error during verification: {str(e)}"
|
| 703 |
+
})
|
| 704 |
+
|
| 705 |
+
# Step 4: Combine results
|
| 706 |
+
print(f"📊 Combining {len(claim_results)} claim verification results...")
|
| 707 |
+
|
| 708 |
+
# Aggregate verdicts
|
| 709 |
+
verdicts = [r.get("verdict", "uncertain") for r in claim_results]
|
| 710 |
+
true_count = verdicts.count("true")
|
| 711 |
+
false_count = verdicts.count("false")
|
| 712 |
+
uncertain_count = verdicts.count("uncertain")
|
| 713 |
+
mixed_count = verdicts.count("mixed")
|
| 714 |
+
error_count = verdicts.count("error")
|
| 715 |
+
|
| 716 |
+
# Determine overall verdict
|
| 717 |
+
if false_count > 0:
|
| 718 |
+
overall_verdict = "false"
|
| 719 |
+
verified = False
|
| 720 |
+
elif true_count > 0 and false_count == 0:
|
| 721 |
+
overall_verdict = "true"
|
| 722 |
+
verified = True
|
| 723 |
+
elif mixed_count > 0:
|
| 724 |
+
overall_verdict = "mixed"
|
| 725 |
+
verified = False
|
| 726 |
+
elif uncertain_count > 0:
|
| 727 |
+
overall_verdict = "uncertain"
|
| 728 |
+
verified = False
|
| 729 |
+
else:
|
| 730 |
+
overall_verdict = "error"
|
| 731 |
+
verified = False
|
| 732 |
+
|
| 733 |
+
# Step 5: Generate comprehensive summary using Gemini
|
| 734 |
+
print(f"📝 Generating comprehensive summary with Gemini...")
|
| 735 |
+
combined_message = await _generate_claims_summary(claim_results, gemini_model)
|
| 736 |
+
|
| 737 |
+
return {
|
| 738 |
+
"verified": verified,
|
| 739 |
+
"verdict": overall_verdict,
|
| 740 |
+
"message": combined_message,
|
| 741 |
+
"details": {
|
| 742 |
+
"video_url": url,
|
| 743 |
+
"captions_length": len(captions),
|
| 744 |
+
"total_claims": len(claims),
|
| 745 |
+
"claims_verified": true_count,
|
| 746 |
+
"claims_false": false_count,
|
| 747 |
+
"claims_mixed": mixed_count,
|
| 748 |
+
"claims_uncertain": uncertain_count,
|
| 749 |
+
"claims_error": error_count,
|
| 750 |
+
"claim_results": claim_results
|
| 751 |
+
},
|
| 752 |
+
"source": "youtube_url"
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
except Exception as e:
|
| 756 |
+
print(f"❌ Error verifying YouTube video: {e}")
|
| 757 |
+
import traceback
|
| 758 |
+
print(traceback.format_exc())
|
| 759 |
+
return {
|
| 760 |
+
"verified": False,
|
| 761 |
+
"verdict": "error",
|
| 762 |
+
"message": f"Error processing YouTube video: {str(e)}",
|
| 763 |
+
"details": {
|
| 764 |
+
"video_url": url,
|
| 765 |
+
"error": str(e)
|
| 766 |
+
},
|
| 767 |
+
"source": "youtube_url"
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
|
| 771 |
@app.post("/chatbot/verify")
|
| 772 |
async def chatbot_verify(
|
| 773 |
text_input: Optional[str] = Form(None),
|
|
|
|
| 857 |
try:
|
| 858 |
gemini_prompt = f"""
|
| 859 |
You are an assistant for audio authenticity analysis.
|
|
|
|
| 860 |
{('User question: ' + claim_context) if claim_context else ''}
|
| 861 |
The audio has been analyzed and the result is: {'deepfake' if deepfake else 'NOT deepfake'}.
|
| 862 |
Compose a clear, friendly, 1-2 line summary verdict for the user, tailored to the above context/result (do not answer with JSON or code, just a natural response).
|
| 863 |
Avoid repeating 'deepfake detection' technical language; be concise and direct.
|
| 864 |
+
Do NOT mention file names or file paths in your response.
|
| 865 |
"""
|
| 866 |
gemini_response = input_processor_for_audio.model.generate_content(gemini_prompt)
|
| 867 |
ai_message = None
|
|
|
|
| 917 |
print(f"🔍 DEBUG: Processing {len(urls_list)} URLs")
|
| 918 |
for i, url in enumerate(urls_list):
|
| 919 |
print(f"🔍 DEBUG: Processing URL {i}: {url}")
|
| 920 |
+
|
| 921 |
+
# STEP 0: Check if this is a YouTube URL - handle specially
|
| 922 |
+
if _is_youtube_url(url):
|
| 923 |
+
print(f"🎥 DEBUG: Detected YouTube URL, using caption-based verification: {url}")
|
| 924 |
+
try:
|
| 925 |
+
result = await _verify_youtube_video(url, claim_context, claim_date)
|
| 926 |
+
results.append(result)
|
| 927 |
+
print(f"🔍 DEBUG: YouTube verification result: {result}")
|
| 928 |
+
continue # Skip the rest of the URL processing
|
| 929 |
+
except Exception as e:
|
| 930 |
+
print(f"❌ DEBUG: YouTube verification failed: {e}")
|
| 931 |
+
import traceback
|
| 932 |
+
print(traceback.format_exc())
|
| 933 |
+
# Fall through to regular video processing as fallback
|
| 934 |
+
|
| 935 |
+
# STEP 1: For social media URLs, use yt-dlp to fetch the actual media first
|
| 936 |
+
# This determines the REAL media type, not just what the LLM guessed
|
| 937 |
+
url_lower = url.lower()
|
| 938 |
+
is_social_media = any(domain in url_lower for domain in [
|
| 939 |
+
'twitter.com', 'x.com', 'instagram.com', 'tiktok.com',
|
| 940 |
+
'facebook.com', 'youtube.com', 'youtu.be'
|
| 941 |
+
])
|
| 942 |
+
|
| 943 |
+
extracted_media = None
|
| 944 |
+
if is_social_media:
|
| 945 |
+
print(f"🔍 DEBUG: Detected social media URL, extracting media with yt-dlp: {url}")
|
| 946 |
+
try:
|
| 947 |
+
# Use yt-dlp to extract media and determine actual type
|
| 948 |
+
extracted_media = await _extract_media_from_url(url)
|
| 949 |
+
if extracted_media:
|
| 950 |
+
actual_type = extracted_media.get("type") # "image" or "video"
|
| 951 |
+
media_path = extracted_media.get("path")
|
| 952 |
+
temp_dir = extracted_media.get("temp_dir")
|
| 953 |
+
|
| 954 |
+
print(f"🔍 DEBUG: yt-dlp extracted {actual_type} from URL: {media_path}")
|
| 955 |
+
|
| 956 |
+
# Route based on ACTUAL media type, not LLM's guess
|
| 957 |
+
if actual_type == "image":
|
| 958 |
+
result = await image_verifier.verify(
|
| 959 |
+
image_path=media_path,
|
| 960 |
+
claim_context=claim_context,
|
| 961 |
+
claim_date=claim_date
|
| 962 |
+
)
|
| 963 |
+
else: # video
|
| 964 |
+
result = await video_verifier.verify(
|
| 965 |
+
video_path=media_path,
|
| 966 |
+
claim_context=claim_context,
|
| 967 |
+
claim_date=claim_date
|
| 968 |
+
)
|
| 969 |
+
|
| 970 |
+
result["source"] = "url"
|
| 971 |
+
results.append(result)
|
| 972 |
+
|
| 973 |
+
# Add to cleanup list
|
| 974 |
+
if media_path:
|
| 975 |
+
temp_files_to_cleanup.append(media_path)
|
| 976 |
+
if temp_dir:
|
| 977 |
+
temp_files_to_cleanup.append(temp_dir)
|
| 978 |
+
|
| 979 |
+
continue # Skip the old routing logic below
|
| 980 |
+
else:
|
| 981 |
+
print(f"⚠️ DEBUG: yt-dlp extraction returned None, falling back to direct URL")
|
| 982 |
+
except Exception as e:
|
| 983 |
+
print(f"⚠️ DEBUG: Failed to extract media from URL with yt-dlp: {e}, falling back to direct URL")
|
| 984 |
+
import traceback
|
| 985 |
+
print(traceback.format_exc())
|
| 986 |
+
# Fall through to old logic
|
| 987 |
+
|
| 988 |
+
# STEP 2: Fallback to old routing (for direct image/video URLs or if yt-dlp fails)
|
| 989 |
if verification_type == "image":
|
| 990 |
print(f"🔍 DEBUG: Calling image_verifier.verify for URL")
|
| 991 |
result = await image_verifier.verify(
|
|
|
|
| 1176 |
print(f"🔍 DEBUG: Exception type: {type(e).__name__}")
|
| 1177 |
raise HTTPException(status_code=500, detail=str(e))
|
| 1178 |
|
| 1179 |
+
@app.get("/mongodb/search-similar")
|
| 1180 |
+
async def search_similar_rumours(
|
| 1181 |
+
query: str,
|
| 1182 |
+
similarity_threshold: float = 0.6,
|
| 1183 |
+
limit: int = 5
|
| 1184 |
+
):
|
| 1185 |
+
"""
|
| 1186 |
+
Search for rumours similar to the query text
|
| 1187 |
+
|
| 1188 |
+
Args:
|
| 1189 |
+
query: Search query text
|
| 1190 |
+
similarity_threshold: Minimum similarity score (0.0 to 1.0, default: 0.6)
|
| 1191 |
+
limit: Maximum number of results to return (default: 5)
|
| 1192 |
+
|
| 1193 |
+
Returns:
|
| 1194 |
+
List of similar rumours with similarity scores
|
| 1195 |
+
"""
|
| 1196 |
+
try:
|
| 1197 |
+
if not mongodb_service:
|
| 1198 |
+
raise HTTPException(
|
| 1199 |
+
status_code=503,
|
| 1200 |
+
detail="MongoDB service is not available. Check MONGO_CONNECTION_STRING environment variable."
|
| 1201 |
+
)
|
| 1202 |
+
|
| 1203 |
+
if not query or not query.strip():
|
| 1204 |
+
return {
|
| 1205 |
+
"success": True,
|
| 1206 |
+
"count": 0,
|
| 1207 |
+
"results": []
|
| 1208 |
+
}
|
| 1209 |
+
|
| 1210 |
+
# Validate threshold
|
| 1211 |
+
similarity_threshold = max(0.0, min(1.0, similarity_threshold))
|
| 1212 |
+
|
| 1213 |
+
results = mongodb_service.search_similar_rumours(
|
| 1214 |
+
query=query,
|
| 1215 |
+
similarity_threshold=similarity_threshold,
|
| 1216 |
+
limit=limit
|
| 1217 |
+
)
|
| 1218 |
+
|
| 1219 |
+
return {
|
| 1220 |
+
"success": True,
|
| 1221 |
+
"count": len(results),
|
| 1222 |
+
"query": query,
|
| 1223 |
+
"similarity_threshold": similarity_threshold,
|
| 1224 |
+
"results": results
|
| 1225 |
+
}
|
| 1226 |
+
|
| 1227 |
+
except Exception as e:
|
| 1228 |
+
logger.error(f"❌ Error searching similar rumours: {e}")
|
| 1229 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1230 |
+
|
| 1231 |
@app.get("/health")
|
| 1232 |
async def health_check():
|
| 1233 |
return {"status": "healthy", "service": "visual-verification"}
|
|
|
|
| 1373 |
except Exception as e:
|
| 1374 |
raise HTTPException(status_code=500, detail=str(e))
|
| 1375 |
|
| 1376 |
+
from pydantic import BaseModel
|
| 1377 |
+
|
| 1378 |
+
|
| 1379 |
+
# ---------- Auth endpoints (minimal implementation) ----------
|
| 1380 |
+
|
| 1381 |
+
|
| 1382 |
+
class LoginRequest(BaseModel):
|
| 1383 |
+
email: str
|
| 1384 |
+
password: str
|
| 1385 |
+
|
| 1386 |
+
class SignupRequest(BaseModel):
|
| 1387 |
+
name: str
|
| 1388 |
+
email: str
|
| 1389 |
+
password: str
|
| 1390 |
+
phone_number: Optional[str] = None
|
| 1391 |
+
age: Optional[int] = None
|
| 1392 |
+
domain_preferences: Optional[List[str]] = []
|
| 1393 |
+
|
| 1394 |
+
class UserResponse(BaseModel):
|
| 1395 |
+
email: str
|
| 1396 |
+
id: Optional[str] = None
|
| 1397 |
+
|
| 1398 |
+
@app.post("/auth/signup")
|
| 1399 |
+
async def signup(request: SignupRequest):
|
| 1400 |
+
"""Sign up a new user"""
|
| 1401 |
+
if not mongodb_service:
|
| 1402 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1403 |
+
|
| 1404 |
+
try:
|
| 1405 |
+
# Hash password (in production, use bcrypt or similar)
|
| 1406 |
+
import hashlib
|
| 1407 |
+
password_hash = hashlib.sha256(request.password.encode()).hexdigest()
|
| 1408 |
+
|
| 1409 |
+
user_data = {
|
| 1410 |
+
"name": request.name,
|
| 1411 |
+
"email": request.email,
|
| 1412 |
+
"password": password_hash,
|
| 1413 |
+
"phone_number": request.phone_number,
|
| 1414 |
+
"age": request.age,
|
| 1415 |
+
"domain_preferences": request.domain_preferences or [],
|
| 1416 |
+
"created_at": None, # Will be set by MongoDB service
|
| 1417 |
+
"updated_at": None,
|
| 1418 |
+
}
|
| 1419 |
+
|
| 1420 |
+
user = mongodb_service.create_user(user_data)
|
| 1421 |
+
|
| 1422 |
+
# Generate token (in production, use JWT)
|
| 1423 |
+
token = f"mock_token_{request.email}"
|
| 1424 |
+
|
| 1425 |
+
return {
|
| 1426 |
+
"message": "User created successfully",
|
| 1427 |
+
"token": token,
|
| 1428 |
+
"user": {
|
| 1429 |
+
"name": user.get("name"),
|
| 1430 |
+
"email": user["email"],
|
| 1431 |
+
"id": user["id"],
|
| 1432 |
+
"phone_number": user.get("phone_number"),
|
| 1433 |
+
"age": user.get("age"),
|
| 1434 |
+
"domain_preferences": user.get("domain_preferences", [])
|
| 1435 |
+
}
|
| 1436 |
+
}
|
| 1437 |
+
except ValueError as e:
|
| 1438 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 1439 |
+
except Exception as e:
|
| 1440 |
+
logger.error(f"Signup error: {e}")
|
| 1441 |
+
raise HTTPException(status_code=500, detail="Failed to create user")
|
| 1442 |
+
|
| 1443 |
+
@app.post("/auth/login")
|
| 1444 |
+
async def login(request: LoginRequest):
|
| 1445 |
+
"""Login user"""
|
| 1446 |
+
if not mongodb_service:
|
| 1447 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1448 |
+
|
| 1449 |
+
try:
|
| 1450 |
+
user = mongodb_service.get_user_by_email(request.email)
|
| 1451 |
+
if not user:
|
| 1452 |
+
raise HTTPException(status_code=401, detail="Invalid email or password")
|
| 1453 |
+
|
| 1454 |
+
# Verify password (in production, use bcrypt or similar)
|
| 1455 |
+
import hashlib
|
| 1456 |
+
password_hash = hashlib.sha256(request.password.encode()).hexdigest()
|
| 1457 |
+
|
| 1458 |
+
if user["password"] != password_hash:
|
| 1459 |
+
raise HTTPException(status_code=401, detail="Invalid email or password")
|
| 1460 |
+
|
| 1461 |
+
# Generate token (in production, use JWT)
|
| 1462 |
+
token = f"mock_token_{request.email}"
|
| 1463 |
+
|
| 1464 |
+
return {
|
| 1465 |
+
"message": "Login successful",
|
| 1466 |
+
"token": token,
|
| 1467 |
+
"user": {
|
| 1468 |
+
"name": user.get("name"),
|
| 1469 |
+
"email": user["email"],
|
| 1470 |
+
"id": user["id"],
|
| 1471 |
+
"phone_number": user.get("phone_number"),
|
| 1472 |
+
"age": user.get("age"),
|
| 1473 |
+
"domain_preferences": user.get("domain_preferences", [])
|
| 1474 |
+
}
|
| 1475 |
+
}
|
| 1476 |
+
except HTTPException:
|
| 1477 |
+
raise
|
| 1478 |
+
except Exception as e:
|
| 1479 |
+
logger.error(f"Login error: {e}")
|
| 1480 |
+
raise HTTPException(status_code=500, detail="Failed to login")
|
| 1481 |
+
|
| 1482 |
+
@app.get("/auth/me")
|
| 1483 |
+
async def get_current_user(request: Request):
|
| 1484 |
+
"""Get current user (requires authentication in production)"""
|
| 1485 |
+
if not mongodb_service:
|
| 1486 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1487 |
+
|
| 1488 |
+
# In production, verify JWT token from Authorization header
|
| 1489 |
+
auth_header = request.headers.get("Authorization")
|
| 1490 |
+
if not auth_header or not auth_header.startswith("Bearer "):
|
| 1491 |
+
raise HTTPException(status_code=401, detail="Not authenticated")
|
| 1492 |
+
|
| 1493 |
+
token = auth_header.replace("Bearer ", "")
|
| 1494 |
+
|
| 1495 |
+
# Extract email from token (in production, decode JWT)
|
| 1496 |
+
if not token.startswith("mock_token_"):
|
| 1497 |
+
raise HTTPException(status_code=401, detail="Invalid token")
|
| 1498 |
+
|
| 1499 |
+
email = token.replace("mock_token_", "")
|
| 1500 |
+
|
| 1501 |
+
try:
|
| 1502 |
+
user = mongodb_service.get_user_by_email(email)
|
| 1503 |
+
if not user:
|
| 1504 |
+
raise HTTPException(status_code=401, detail="User not found")
|
| 1505 |
+
|
| 1506 |
+
# Get subscription tier from user document (preferred) or check subscription
|
| 1507 |
+
subscription_tier = user.get("subscription_tier", "Free")
|
| 1508 |
+
|
| 1509 |
+
# If not in user doc, check active subscription
|
| 1510 |
+
if subscription_tier == "Free" and user.get("id"):
|
| 1511 |
+
subscription = mongodb_service.get_user_subscription(user_id=user["id"], status="active")
|
| 1512 |
+
if subscription:
|
| 1513 |
+
subscription_tier = subscription.get("plan_name", "Free")
|
| 1514 |
+
# Update user document with subscription tier
|
| 1515 |
+
mongodb_service.update_user_subscription_tier(user["id"], subscription_tier)
|
| 1516 |
+
|
| 1517 |
+
return {
|
| 1518 |
+
"name": user.get("name"),
|
| 1519 |
+
"email": user["email"],
|
| 1520 |
+
"id": user["id"],
|
| 1521 |
+
"phone_number": user.get("phone_number"),
|
| 1522 |
+
"age": user.get("age"),
|
| 1523 |
+
"domain_preferences": user.get("domain_preferences", []),
|
| 1524 |
+
"subscription_tier": subscription_tier
|
| 1525 |
+
}
|
| 1526 |
+
except HTTPException:
|
| 1527 |
+
raise
|
| 1528 |
+
except Exception as e:
|
| 1529 |
+
logger.error(f"Get user error: {e}")
|
| 1530 |
+
raise HTTPException(status_code=500, detail="Failed to get user")
|
| 1531 |
+
|
| 1532 |
+
|
| 1533 |
+
# ---------- Chat history endpoints ----------
|
| 1534 |
+
|
| 1535 |
+
|
| 1536 |
+
class ChatSessionUpsert(BaseModel):
|
| 1537 |
+
session_id: Optional[str] = None
|
| 1538 |
+
title: Optional[str] = None
|
| 1539 |
+
user_id: Optional[str] = None
|
| 1540 |
+
anonymous_id: Optional[str] = None
|
| 1541 |
+
last_verdict: Optional[str] = None
|
| 1542 |
+
last_summary: Optional[str] = None
|
| 1543 |
+
|
| 1544 |
+
|
| 1545 |
+
class ChatTurn(BaseModel):
|
| 1546 |
+
role: str
|
| 1547 |
+
content: str
|
| 1548 |
+
created_at: Optional[Any] = None # Can be datetime, string, or None
|
| 1549 |
+
verdict: Optional[str] = None
|
| 1550 |
+
confidence: Optional[float] = None
|
| 1551 |
+
sources: Optional[Dict[str, Any]] = None
|
| 1552 |
+
attachments: Optional[List[Dict[str, Any]]] = None
|
| 1553 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 1554 |
+
|
| 1555 |
+
|
| 1556 |
+
class ChatMessagesAppend(BaseModel):
|
| 1557 |
+
session_id: str
|
| 1558 |
+
user_id: Optional[str] = None
|
| 1559 |
+
anonymous_id: Optional[str] = None
|
| 1560 |
+
messages: List[ChatTurn]
|
| 1561 |
+
|
| 1562 |
+
|
| 1563 |
+
@app.get("/chat/sessions")
|
| 1564 |
+
async def list_chat_sessions(
|
| 1565 |
+
user_id: Optional[str] = None,
|
| 1566 |
+
anonymous_id: Optional[str] = None,
|
| 1567 |
+
):
|
| 1568 |
+
"""Return chat sessions for logged-in users only.
|
| 1569 |
+
|
| 1570 |
+
Anonymous users will receive an empty list since their sessions are not persisted.
|
| 1571 |
+
"""
|
| 1572 |
+
try:
|
| 1573 |
+
if not mongodb_service:
|
| 1574 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1575 |
+
|
| 1576 |
+
# Only return sessions for logged-in users
|
| 1577 |
+
if not user_id:
|
| 1578 |
+
logger.info(f"⏭️ No user_id provided, returning empty sessions list")
|
| 1579 |
+
return {"sessions": []}
|
| 1580 |
+
|
| 1581 |
+
logger.info(f"🔍 Loading chat sessions: user_id={user_id}")
|
| 1582 |
+
sessions = mongodb_service.get_chat_sessions(
|
| 1583 |
+
user_id=user_id,
|
| 1584 |
+
anonymous_id=None, # Don't query by anonymous_id anymore
|
| 1585 |
+
)
|
| 1586 |
+
logger.info(f"✅ Found {len(sessions)} chat sessions")
|
| 1587 |
+
return {"sessions": sessions}
|
| 1588 |
+
except Exception as e:
|
| 1589 |
+
logger.error(f"❌ Error loading chat sessions: {e}", exc_info=True)
|
| 1590 |
+
raise HTTPException(status_code=500, detail=f"Failed to load chat sessions: {str(e)}")
|
| 1591 |
+
|
| 1592 |
+
|
| 1593 |
+
@app.post("/chat/sessions")
|
| 1594 |
+
async def upsert_chat_session(payload: ChatSessionUpsert):
|
| 1595 |
+
"""Create or update a chat session.
|
| 1596 |
+
|
| 1597 |
+
Only saves sessions for logged-in users (user_id required).
|
| 1598 |
+
Anonymous sessions are not persisted to MongoDB but a session_id is still returned for UI purposes.
|
| 1599 |
+
"""
|
| 1600 |
+
try:
|
| 1601 |
+
if not mongodb_service:
|
| 1602 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1603 |
+
|
| 1604 |
+
data = payload.dict(exclude_unset=True)
|
| 1605 |
+
user_id = data.get("user_id")
|
| 1606 |
+
anonymous_id = data.get("anonymous_id")
|
| 1607 |
+
|
| 1608 |
+
# Only persist sessions for logged-in users
|
| 1609 |
+
if not user_id:
|
| 1610 |
+
# Still return a session_id for UI purposes, but don't persist
|
| 1611 |
+
import uuid
|
| 1612 |
+
session_id = data.get("session_id") or str(uuid.uuid4())
|
| 1613 |
+
logger.info(f"⏭️ Skipping session persistence for anonymous user (session_id={session_id})")
|
| 1614 |
+
return {
|
| 1615 |
+
"session_id": session_id,
|
| 1616 |
+
"title": data.get("title", "New Chat"),
|
| 1617 |
+
"user_id": None,
|
| 1618 |
+
"anonymous_id": anonymous_id,
|
| 1619 |
+
"created_at": None,
|
| 1620 |
+
"updated_at": None,
|
| 1621 |
+
"persisted": False,
|
| 1622 |
+
}
|
| 1623 |
+
|
| 1624 |
+
logger.info(f"🔍 Upserting chat session: {data}")
|
| 1625 |
+
|
| 1626 |
+
# Optionally migrate anonymous history on first login
|
| 1627 |
+
if user_id and anonymous_id:
|
| 1628 |
+
try:
|
| 1629 |
+
migrated = mongodb_service.migrate_anonymous_sessions(
|
| 1630 |
+
anonymous_id=anonymous_id, user_id=user_id
|
| 1631 |
+
)
|
| 1632 |
+
logger.info(f"✅ Migrated {migrated} anonymous sessions to user {user_id}")
|
| 1633 |
+
except Exception as exc:
|
| 1634 |
+
logger.error(f"Failed to migrate anonymous sessions: {exc}")
|
| 1635 |
+
|
| 1636 |
+
session_doc = mongodb_service.upsert_chat_session(data)
|
| 1637 |
+
logger.info(f"✅ Created/updated session: {session_doc.get('session_id')}")
|
| 1638 |
+
return session_doc
|
| 1639 |
+
except Exception as e:
|
| 1640 |
+
logger.error(f"❌ Error upserting chat session: {e}", exc_info=True)
|
| 1641 |
+
raise HTTPException(status_code=500, detail=f"Failed to create/update chat session: {str(e)}")
|
| 1642 |
+
|
| 1643 |
+
|
| 1644 |
+
@app.get("/chat/messages/{session_id}")
|
| 1645 |
+
async def get_chat_messages(session_id: str):
|
| 1646 |
+
"""Return all messages for a given chat session."""
|
| 1647 |
+
if not mongodb_service:
|
| 1648 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1649 |
+
|
| 1650 |
+
messages = mongodb_service.get_chat_messages(session_id=session_id)
|
| 1651 |
+
return {"session_id": session_id, "messages": messages}
|
| 1652 |
+
|
| 1653 |
+
|
| 1654 |
+
@app.post("/chat/messages")
|
| 1655 |
+
async def append_chat_messages(payload: ChatMessagesAppend):
|
| 1656 |
+
"""Append one or more messages to a chat session.
|
| 1657 |
+
|
| 1658 |
+
Only saves messages for logged-in users (user_id required).
|
| 1659 |
+
Anonymous messages are not persisted to MongoDB.
|
| 1660 |
+
"""
|
| 1661 |
+
if not mongodb_service:
|
| 1662 |
+
raise HTTPException(status_code=503, detail="MongoDB service not available")
|
| 1663 |
+
|
| 1664 |
+
data = payload.dict()
|
| 1665 |
+
user_id = data.get("user_id")
|
| 1666 |
+
|
| 1667 |
+
# Only persist messages for logged-in users
|
| 1668 |
+
if not user_id:
|
| 1669 |
+
logger.info(f"⏭️ Skipping message persistence for anonymous user (session_id={data['session_id']})")
|
| 1670 |
+
return {"inserted": 0, "message": "Messages not persisted for anonymous users"}
|
| 1671 |
+
|
| 1672 |
+
inserted = mongodb_service.append_chat_messages(
|
| 1673 |
+
session_id=data["session_id"],
|
| 1674 |
+
messages=[m for m in data["messages"]],
|
| 1675 |
+
user_id=user_id,
|
| 1676 |
+
anonymous_id=data.get("anonymous_id"),
|
| 1677 |
+
)
|
| 1678 |
+
logger.info(f"✅ Persisted {inserted} messages for user {user_id}")
|
| 1679 |
+
return {"inserted": inserted}
|
| 1680 |
+
|
| 1681 |
+
|
| 1682 |
+
# ---------- Subscription endpoints ----------
|
| 1683 |
+
|
| 1684 |
+
|
| 1685 |
+
class CreatePlanRequest(BaseModel):
|
| 1686 |
+
name: str
|
| 1687 |
+
amount: int # Amount in paise (smallest currency unit)
|
| 1688 |
+
currency: str = "INR"
|
| 1689 |
+
interval: int = 1
|
| 1690 |
+
period: str = "monthly" # daily, weekly, monthly, yearly
|
| 1691 |
+
description: Optional[str] = None
|
| 1692 |
+
|
| 1693 |
+
|
| 1694 |
+
class CreateSubscriptionRequest(BaseModel):
|
| 1695 |
+
plan_id: str
|
| 1696 |
+
user_id: str
|
| 1697 |
+
customer_notify: int = 1
|
| 1698 |
+
total_count: Optional[int] = None
|
| 1699 |
+
notes: Optional[Dict[str, str]] = None
|
| 1700 |
+
|
| 1701 |
+
|
| 1702 |
+
class CancelSubscriptionRequest(BaseModel):
|
| 1703 |
+
subscription_id: str
|
| 1704 |
+
cancel_at_cycle_end: bool = False
|
| 1705 |
+
|
| 1706 |
+
|
| 1707 |
+
@app.post("/subscriptions/plans")
|
| 1708 |
+
async def create_subscription_plan(request: CreatePlanRequest):
|
| 1709 |
+
"""Create a subscription plan in Razorpay (admin/one-time setup)"""
|
| 1710 |
+
try:
|
| 1711 |
+
if not razorpay_service or not razorpay_service.client:
|
| 1712 |
+
raise HTTPException(
|
| 1713 |
+
status_code=503,
|
| 1714 |
+
detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
|
| 1715 |
+
)
|
| 1716 |
+
|
| 1717 |
+
plan = razorpay_service.create_plan(
|
| 1718 |
+
name=request.name,
|
| 1719 |
+
amount=request.amount,
|
| 1720 |
+
currency=request.currency,
|
| 1721 |
+
interval=request.interval,
|
| 1722 |
+
period=request.period,
|
| 1723 |
+
description=request.description
|
| 1724 |
+
)
|
| 1725 |
+
|
| 1726 |
+
return {
|
| 1727 |
+
"success": True,
|
| 1728 |
+
"plan": plan
|
| 1729 |
+
}
|
| 1730 |
+
except Exception as e:
|
| 1731 |
+
logger.error(f"❌ Failed to create subscription plan: {e}")
|
| 1732 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1733 |
+
|
| 1734 |
+
|
| 1735 |
+
@app.get("/subscriptions/plans")
|
| 1736 |
+
async def list_subscription_plans(count: int = 10, skip: int = 0):
|
| 1737 |
+
"""List available subscription plans"""
|
| 1738 |
+
try:
|
| 1739 |
+
if not razorpay_service or not razorpay_service.client:
|
| 1740 |
+
raise HTTPException(
|
| 1741 |
+
status_code=503,
|
| 1742 |
+
detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
|
| 1743 |
+
)
|
| 1744 |
+
|
| 1745 |
+
plans = razorpay_service.list_plans(count=count, skip=skip)
|
| 1746 |
+
return {
|
| 1747 |
+
"success": True,
|
| 1748 |
+
"plans": plans
|
| 1749 |
+
}
|
| 1750 |
+
except Exception as e:
|
| 1751 |
+
logger.error(f"❌ Failed to list subscription plans: {e}")
|
| 1752 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1753 |
+
|
| 1754 |
+
|
| 1755 |
+
@app.get("/subscriptions/config")
|
| 1756 |
+
async def get_subscription_config():
|
| 1757 |
+
"""Get Razorpay public configuration (Key ID) for frontend"""
|
| 1758 |
+
try:
|
| 1759 |
+
if not config.RAZORPAY_ID:
|
| 1760 |
+
raise HTTPException(
|
| 1761 |
+
status_code=503,
|
| 1762 |
+
detail="Razorpay not configured"
|
| 1763 |
+
)
|
| 1764 |
+
|
| 1765 |
+
return {
|
| 1766 |
+
"success": True,
|
| 1767 |
+
"razorpay_key_id": config.RAZORPAY_ID
|
| 1768 |
+
}
|
| 1769 |
+
except Exception as e:
|
| 1770 |
+
logger.error(f"❌ Failed to get subscription config: {e}")
|
| 1771 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1772 |
+
|
| 1773 |
+
|
| 1774 |
+
@app.post("/subscriptions/create")
|
| 1775 |
+
async def create_subscription(request: CreateSubscriptionRequest):
|
| 1776 |
+
"""Create a subscription for a user"""
|
| 1777 |
+
try:
|
| 1778 |
+
if not razorpay_service or not razorpay_service.client:
|
| 1779 |
+
raise HTTPException(
|
| 1780 |
+
status_code=503,
|
| 1781 |
+
detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
|
| 1782 |
+
)
|
| 1783 |
+
|
| 1784 |
+
if not mongodb_service:
|
| 1785 |
+
raise HTTPException(
|
| 1786 |
+
status_code=503,
|
| 1787 |
+
detail="MongoDB service not available"
|
| 1788 |
+
)
|
| 1789 |
+
|
| 1790 |
+
# Create subscription in Razorpay
|
| 1791 |
+
subscription = razorpay_service.create_subscription(
|
| 1792 |
+
plan_id=request.plan_id,
|
| 1793 |
+
customer_notify=request.customer_notify,
|
| 1794 |
+
total_count=request.total_count,
|
| 1795 |
+
notes=request.notes
|
| 1796 |
+
)
|
| 1797 |
+
|
| 1798 |
+
# Get plan details
|
| 1799 |
+
plan = razorpay_service.get_plan(request.plan_id)
|
| 1800 |
+
|
| 1801 |
+
# Extract plan name - try multiple possible locations
|
| 1802 |
+
plan_name = "Pro" # Default
|
| 1803 |
+
if plan:
|
| 1804 |
+
# Try different possible locations for plan name
|
| 1805 |
+
plan_name_raw = (
|
| 1806 |
+
plan.get("item", {}).get("name") or
|
| 1807 |
+
plan.get("name") or
|
| 1808 |
+
request.notes.get("plan_name") if request.notes else None or
|
| 1809 |
+
"Pro"
|
| 1810 |
+
)
|
| 1811 |
+
# Normalize plan name
|
| 1812 |
+
plan_name_raw_lower = plan_name_raw.lower()
|
| 1813 |
+
if "pro" in plan_name_raw_lower:
|
| 1814 |
+
plan_name = "Pro"
|
| 1815 |
+
elif "enterprise" in plan_name_raw_lower:
|
| 1816 |
+
plan_name = "Enterprise"
|
| 1817 |
+
else:
|
| 1818 |
+
plan_name = plan_name_raw
|
| 1819 |
+
|
| 1820 |
+
# Store subscription in MongoDB
|
| 1821 |
+
from datetime import datetime
|
| 1822 |
+
subscription_data = {
|
| 1823 |
+
"user_id": request.user_id,
|
| 1824 |
+
"razorpay_subscription_id": subscription.get("id"),
|
| 1825 |
+
"razorpay_plan_id": request.plan_id,
|
| 1826 |
+
"plan_name": plan_name,
|
| 1827 |
+
"status": subscription.get("status", "created"),
|
| 1828 |
+
"amount": plan.get("item", {}).get("amount", 0) if plan else 0,
|
| 1829 |
+
"currency": plan.get("item", {}).get("currency", "INR") if plan else "INR",
|
| 1830 |
+
"current_start": subscription.get("current_start"),
|
| 1831 |
+
"current_end": subscription.get("current_end"),
|
| 1832 |
+
"next_billing_at": subscription.get("end_at"),
|
| 1833 |
+
"created_at": datetime.utcnow(),
|
| 1834 |
+
"razorpay_data": subscription # Store full Razorpay response
|
| 1835 |
+
}
|
| 1836 |
+
|
| 1837 |
+
mongodb_service.upsert_subscription(subscription_data)
|
| 1838 |
+
|
| 1839 |
+
# Update user's subscription tier immediately if status is active
|
| 1840 |
+
# Otherwise, it will be updated via webhook when payment is completed
|
| 1841 |
+
if subscription.get("status") == "active":
|
| 1842 |
+
mongodb_service.update_user_subscription_tier(request.user_id, plan_name)
|
| 1843 |
+
logger.info(f"✅ Updated user {request.user_id} subscription tier to {plan_name}")
|
| 1844 |
+
else:
|
| 1845 |
+
logger.info(f"⏳ Subscription created with status '{subscription.get('status')}'. User tier will be updated when subscription is activated via webhook.")
|
| 1846 |
+
|
| 1847 |
+
return {
|
| 1848 |
+
"success": True,
|
| 1849 |
+
"subscription_id": subscription.get("id"),
|
| 1850 |
+
"short_url": subscription.get("short_url"),
|
| 1851 |
+
"subscription": subscription
|
| 1852 |
+
}
|
| 1853 |
+
except Exception as e:
|
| 1854 |
+
logger.error(f"❌ Failed to create subscription: {e}")
|
| 1855 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1856 |
+
|
| 1857 |
+
|
| 1858 |
+
@app.get("/subscriptions/status")
|
| 1859 |
+
async def get_subscription_status(user_id: Optional[str] = None):
|
| 1860 |
+
"""Get user's subscription status"""
|
| 1861 |
+
try:
|
| 1862 |
+
if not mongodb_service:
|
| 1863 |
+
raise HTTPException(
|
| 1864 |
+
status_code=503,
|
| 1865 |
+
detail="MongoDB service not available"
|
| 1866 |
+
)
|
| 1867 |
+
|
| 1868 |
+
if not user_id:
|
| 1869 |
+
return {
|
| 1870 |
+
"success": True,
|
| 1871 |
+
"subscription": None,
|
| 1872 |
+
"message": "No user_id provided"
|
| 1873 |
+
}
|
| 1874 |
+
|
| 1875 |
+
subscription = mongodb_service.get_user_subscription(user_id=user_id)
|
| 1876 |
+
|
| 1877 |
+
if subscription:
|
| 1878 |
+
# Optionally fetch latest data from Razorpay
|
| 1879 |
+
if razorpay_service and razorpay_service.client:
|
| 1880 |
+
try:
|
| 1881 |
+
razorpay_sub = razorpay_service.get_subscription(
|
| 1882 |
+
subscription.get("razorpay_subscription_id")
|
| 1883 |
+
)
|
| 1884 |
+
# Update status if changed
|
| 1885 |
+
if razorpay_sub.get("status") != subscription.get("status"):
|
| 1886 |
+
mongodb_service.update_subscription_status(
|
| 1887 |
+
subscription.get("razorpay_subscription_id"),
|
| 1888 |
+
razorpay_sub.get("status"),
|
| 1889 |
+
{
|
| 1890 |
+
"current_start": razorpay_sub.get("current_start"),
|
| 1891 |
+
"current_end": razorpay_sub.get("current_end"),
|
| 1892 |
+
"next_billing_at": razorpay_sub.get("end_at")
|
| 1893 |
+
}
|
| 1894 |
+
)
|
| 1895 |
+
subscription["status"] = razorpay_sub.get("status")
|
| 1896 |
+
except Exception as e:
|
| 1897 |
+
logger.warning(f"Failed to sync with Razorpay: {e}")
|
| 1898 |
+
|
| 1899 |
+
return {
|
| 1900 |
+
"success": True,
|
| 1901 |
+
"subscription": subscription
|
| 1902 |
+
}
|
| 1903 |
+
except Exception as e:
|
| 1904 |
+
logger.error(f"❌ Failed to get subscription status: {e}")
|
| 1905 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1906 |
+
|
| 1907 |
+
|
| 1908 |
+
@app.post("/subscriptions/cancel")
|
| 1909 |
+
async def cancel_subscription(request: CancelSubscriptionRequest):
|
| 1910 |
+
"""Cancel user's subscription"""
|
| 1911 |
+
try:
|
| 1912 |
+
if not razorpay_service or not razorpay_service.client:
|
| 1913 |
+
raise HTTPException(
|
| 1914 |
+
status_code=503,
|
| 1915 |
+
detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
|
| 1916 |
+
)
|
| 1917 |
+
|
| 1918 |
+
if not mongodb_service:
|
| 1919 |
+
raise HTTPException(
|
| 1920 |
+
status_code=503,
|
| 1921 |
+
detail="MongoDB service not available"
|
| 1922 |
+
)
|
| 1923 |
+
|
| 1924 |
+
# Cancel subscription in Razorpay
|
| 1925 |
+
subscription = razorpay_service.cancel_subscription(
|
| 1926 |
+
subscription_id=request.subscription_id,
|
| 1927 |
+
cancel_at_cycle_end=request.cancel_at_cycle_end
|
| 1928 |
+
)
|
| 1929 |
+
|
| 1930 |
+
# Update status in MongoDB
|
| 1931 |
+
mongodb_service.update_subscription_status(
|
| 1932 |
+
request.subscription_id,
|
| 1933 |
+
subscription.get("status", "cancelled"),
|
| 1934 |
+
{
|
| 1935 |
+
"current_start": subscription.get("current_start"),
|
| 1936 |
+
"current_end": subscription.get("current_end"),
|
| 1937 |
+
"next_billing_at": subscription.get("end_at")
|
| 1938 |
+
}
|
| 1939 |
+
)
|
| 1940 |
+
|
| 1941 |
+
return {
|
| 1942 |
+
"success": True,
|
| 1943 |
+
"subscription": subscription
|
| 1944 |
+
}
|
| 1945 |
+
except Exception as e:
|
| 1946 |
+
logger.error(f"❌ Failed to cancel subscription: {e}")
|
| 1947 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 1948 |
+
|
| 1949 |
+
|
| 1950 |
+
@app.post("/webhooks/razorpay")
|
| 1951 |
+
async def razorpay_webhook(request: Request):
|
| 1952 |
+
"""Handle Razorpay webhook events"""
|
| 1953 |
+
try:
|
| 1954 |
+
if not razorpay_service:
|
| 1955 |
+
raise HTTPException(
|
| 1956 |
+
status_code=503,
|
| 1957 |
+
detail="Razorpay service not available"
|
| 1958 |
+
)
|
| 1959 |
+
|
| 1960 |
+
if not mongodb_service:
|
| 1961 |
+
raise HTTPException(
|
| 1962 |
+
status_code=503,
|
| 1963 |
+
detail="MongoDB service not available"
|
| 1964 |
+
)
|
| 1965 |
+
|
| 1966 |
+
# Get raw body for signature verification
|
| 1967 |
+
body = await request.body()
|
| 1968 |
+
body_str = body.decode('utf-8')
|
| 1969 |
+
|
| 1970 |
+
# Get signature from header
|
| 1971 |
+
signature = request.headers.get("X-Razorpay-Signature", "")
|
| 1972 |
+
|
| 1973 |
+
# Verify webhook signature
|
| 1974 |
+
if not razorpay_service.verify_webhook_signature(body_str, signature):
|
| 1975 |
+
logger.warning("⚠️ Invalid webhook signature")
|
| 1976 |
+
raise HTTPException(status_code=400, detail="Invalid webhook signature")
|
| 1977 |
+
|
| 1978 |
+
# Parse webhook payload from body string
|
| 1979 |
+
webhook_data = json.loads(body_str)
|
| 1980 |
+
event = webhook_data.get("event")
|
| 1981 |
+
payload = webhook_data.get("payload", {})
|
| 1982 |
+
|
| 1983 |
+
logger.info(f"📥 Received Razorpay webhook: {event}")
|
| 1984 |
+
|
| 1985 |
+
# Handle different webhook events
|
| 1986 |
+
if event == "subscription.activated":
|
| 1987 |
+
subscription = payload.get("subscription", {}).get("entity", {})
|
| 1988 |
+
subscription_id = subscription.get("id")
|
| 1989 |
+
|
| 1990 |
+
if subscription_id:
|
| 1991 |
+
# Get subscription from DB to get user_id and plan_name
|
| 1992 |
+
sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
|
| 1993 |
+
if sub_doc:
|
| 1994 |
+
user_id = sub_doc.get("user_id")
|
| 1995 |
+
plan_name = sub_doc.get("plan_name", "Pro")
|
| 1996 |
+
|
| 1997 |
+
logger.info(f"📥 Processing subscription.activated for user {user_id}, plan {plan_name}")
|
| 1998 |
+
|
| 1999 |
+
mongodb_service.update_subscription_status(
|
| 2000 |
+
subscription_id,
|
| 2001 |
+
"active",
|
| 2002 |
+
{
|
| 2003 |
+
"current_start": subscription.get("current_start"),
|
| 2004 |
+
"current_end": subscription.get("current_end"),
|
| 2005 |
+
"next_billing_at": subscription.get("end_at")
|
| 2006 |
+
}
|
| 2007 |
+
)
|
| 2008 |
+
|
| 2009 |
+
# Update user's subscription tier
|
| 2010 |
+
if user_id:
|
| 2011 |
+
success = mongodb_service.update_user_subscription_tier(user_id, plan_name)
|
| 2012 |
+
if success:
|
| 2013 |
+
logger.info(f"✅ Successfully updated user {user_id} tier to {plan_name} via webhook")
|
| 2014 |
+
else:
|
| 2015 |
+
logger.error(f"❌ Failed to update user {user_id} tier to {plan_name}")
|
| 2016 |
+
else:
|
| 2017 |
+
logger.warning(f"⚠️ Subscription {subscription_id} not found in database")
|
| 2018 |
+
|
| 2019 |
+
elif event == "subscription.charged":
|
| 2020 |
+
subscription = payload.get("subscription", {}).get("entity", {})
|
| 2021 |
+
payment = payload.get("payment", {}).get("entity", {})
|
| 2022 |
+
subscription_id = subscription.get("id")
|
| 2023 |
+
|
| 2024 |
+
if subscription_id:
|
| 2025 |
+
# Get subscription from DB to get user_id and plan_name
|
| 2026 |
+
sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
|
| 2027 |
+
if sub_doc:
|
| 2028 |
+
user_id = sub_doc.get("user_id")
|
| 2029 |
+
plan_name = sub_doc.get("plan_name", "Pro")
|
| 2030 |
+
|
| 2031 |
+
logger.info(f"📥 Processing subscription.charged for user {user_id}, plan {plan_name}")
|
| 2032 |
+
|
| 2033 |
+
# Update subscription with payment info
|
| 2034 |
+
update_data = {
|
| 2035 |
+
"current_start": subscription.get("current_start"),
|
| 2036 |
+
"current_end": subscription.get("current_end"),
|
| 2037 |
+
"next_billing_at": subscription.get("end_at"),
|
| 2038 |
+
"last_payment_id": payment.get("id"),
|
| 2039 |
+
"last_payment_amount": payment.get("amount"),
|
| 2040 |
+
"last_payment_date": payment.get("created_at")
|
| 2041 |
+
}
|
| 2042 |
+
mongodb_service.update_subscription_status(
|
| 2043 |
+
subscription_id,
|
| 2044 |
+
subscription.get("status", "active"),
|
| 2045 |
+
update_data
|
| 2046 |
+
)
|
| 2047 |
+
|
| 2048 |
+
# Update user's subscription tier when payment is charged
|
| 2049 |
+
if user_id and subscription.get("status") == "active":
|
| 2050 |
+
success = mongodb_service.update_user_subscription_tier(user_id, plan_name)
|
| 2051 |
+
if success:
|
| 2052 |
+
logger.info(f"✅ Successfully updated user {user_id} tier to {plan_name} via subscription.charged webhook")
|
| 2053 |
+
else:
|
| 2054 |
+
logger.error(f"❌ Failed to update user {user_id} tier to {plan_name}")
|
| 2055 |
+
else:
|
| 2056 |
+
logger.warning(f"⚠️ Subscription {subscription_id} not found in database for subscription.charged event")
|
| 2057 |
+
|
| 2058 |
+
elif event == "subscription.cancelled":
|
| 2059 |
+
subscription = payload.get("subscription", {}).get("entity", {})
|
| 2060 |
+
subscription_id = subscription.get("id")
|
| 2061 |
+
|
| 2062 |
+
if subscription_id:
|
| 2063 |
+
# Get subscription from DB to get user_id
|
| 2064 |
+
sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
|
| 2065 |
+
if sub_doc:
|
| 2066 |
+
user_id = sub_doc.get("user_id")
|
| 2067 |
+
|
| 2068 |
+
mongodb_service.update_subscription_status(
|
| 2069 |
+
subscription_id,
|
| 2070 |
+
"cancelled",
|
| 2071 |
+
{
|
| 2072 |
+
"current_start": subscription.get("current_start"),
|
| 2073 |
+
"current_end": subscription.get("current_end"),
|
| 2074 |
+
"next_billing_at": subscription.get("end_at")
|
| 2075 |
+
}
|
| 2076 |
+
)
|
| 2077 |
+
|
| 2078 |
+
# Update user's subscription tier to Free
|
| 2079 |
+
if user_id:
|
| 2080 |
+
mongodb_service.update_user_subscription_tier(user_id, "Free")
|
| 2081 |
+
|
| 2082 |
+
elif event == "payment.failed":
|
| 2083 |
+
payment = payload.get("payment", {}).get("entity", {})
|
| 2084 |
+
subscription_id = payment.get("subscription_id")
|
| 2085 |
+
|
| 2086 |
+
if subscription_id:
|
| 2087 |
+
# Update subscription to reflect failed payment
|
| 2088 |
+
subscription = razorpay_service.get_subscription(subscription_id)
|
| 2089 |
+
mongodb_service.update_subscription_status(
|
| 2090 |
+
subscription_id,
|
| 2091 |
+
subscription.get("status", "pending"),
|
| 2092 |
+
{
|
| 2093 |
+
"last_payment_failed": True,
|
| 2094 |
+
"last_payment_failure_reason": payment.get("error_description")
|
| 2095 |
+
}
|
| 2096 |
+
)
|
| 2097 |
+
|
| 2098 |
+
return {"success": True, "message": "Webhook processed"}
|
| 2099 |
+
|
| 2100 |
+
except HTTPException:
|
| 2101 |
+
raise
|
| 2102 |
+
except Exception as e:
|
| 2103 |
+
logger.error(f"❌ Failed to process webhook: {e}")
|
| 2104 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 2105 |
+
|
| 2106 |
+
|
| 2107 |
if __name__ == "__main__":
|
| 2108 |
uvicorn.run(app, host="0.0.0.0", port=config.SERVICE_PORT)
|
requirements.txt
CHANGED
|
@@ -1,23 +1,24 @@
|
|
| 1 |
-
requests
|
| 2 |
-
pillow
|
| 3 |
-
opencv-python
|
| 4 |
-
fastapi
|
| 5 |
-
uvicorn[standard]
|
| 6 |
-
websockets
|
| 7 |
-
serpapi
|
| 8 |
-
python-dotenv
|
| 9 |
-
python-multipart
|
| 10 |
-
yt-dlp
|
| 11 |
-
google-generativeai
|
| 12 |
-
google-auth
|
| 13 |
-
google-auth-oauthlib
|
| 14 |
-
google-auth-httplib2
|
| 15 |
-
scikit-learn
|
| 16 |
-
numpy
|
| 17 |
-
pymongo
|
| 18 |
-
upstash-redis
|
| 19 |
-
google-search-results
|
| 20 |
-
cloudinary
|
| 21 |
-
torch
|
| 22 |
-
transformers
|
| 23 |
-
pytorchvideo
|
|
|
|
|
|
| 1 |
+
requests
|
| 2 |
+
pillow
|
| 3 |
+
opencv-python
|
| 4 |
+
fastapi
|
| 5 |
+
uvicorn[standard]
|
| 6 |
+
websockets
|
| 7 |
+
serpapi
|
| 8 |
+
python-dotenv
|
| 9 |
+
python-multipart
|
| 10 |
+
yt-dlp
|
| 11 |
+
google-generativeai
|
| 12 |
+
google-auth
|
| 13 |
+
google-auth-oauthlib
|
| 14 |
+
google-auth-httplib2
|
| 15 |
+
scikit-learn
|
| 16 |
+
numpy
|
| 17 |
+
pymongo
|
| 18 |
+
upstash-redis
|
| 19 |
+
google-search-results
|
| 20 |
+
cloudinary
|
| 21 |
+
torch
|
| 22 |
+
transformers
|
| 23 |
+
pytorchvideo
|
| 24 |
+
razorpay
|
services/deepfake_checker.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from typing import Set
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import pipeline
|
| 8 |
+
except ImportError:
|
| 9 |
+
print("="*80)
|
| 10 |
+
print("ERROR: Missing critical libraries.")
|
| 11 |
+
print("Please install all required dependencies first:")
|
| 12 |
+
print("pip install torch transformers")
|
| 13 |
+
print("="*80)
|
| 14 |
+
sys.exit(1)
|
| 15 |
+
|
| 16 |
+
# --- Configuration ---
|
| 17 |
+
AUDIO_FORMATS: Set[str] = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
|
| 18 |
+
DEVICE = 0 if torch.cuda.is_available() else -1 # 0 for CUDA, -1 for CPU
|
| 19 |
+
AUDIO_MODEL_ID = "mo-thecreator/Deepfake-audio-detection"
|
| 20 |
+
|
| 21 |
+
audio_pipeline_instance = None
|
| 22 |
+
|
| 23 |
+
def get_audio_pipeline():
|
| 24 |
+
"""Loads the audio pipeline into memory (if not already loaded)."""
|
| 25 |
+
global audio_pipeline_instance
|
| 26 |
+
if audio_pipeline_instance is None:
|
| 27 |
+
try:
|
| 28 |
+
print(f"Loading audio model '{AUDIO_MODEL_ID}' from Hugging Face Hub...")
|
| 29 |
+
audio_pipeline_instance = pipeline(
|
| 30 |
+
"audio-classification",
|
| 31 |
+
model=AUDIO_MODEL_ID,
|
| 32 |
+
device=DEVICE
|
| 33 |
+
)
|
| 34 |
+
print("Audio detection pipeline loaded successfully.")
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"Error loading audio pipeline: {e}")
|
| 37 |
+
print("Please ensure the model ID is correct.")
|
| 38 |
+
sys.exit(1)
|
| 39 |
+
return audio_pipeline_instance
|
| 40 |
+
|
| 41 |
+
def detect_audio_deepfake(file_path: str) -> bool:
|
| 42 |
+
"""
|
| 43 |
+
Runs a pretrained audio deepfake detection model from the HF Hub.
|
| 44 |
+
"""
|
| 45 |
+
print(f"Analyzing audio file: {os.path.basename(file_path)}")
|
| 46 |
+
try:
|
| 47 |
+
detector = get_audio_pipeline()
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Failed to load audio pipeline: {e}")
|
| 50 |
+
return False # Fail safe
|
| 51 |
+
try:
|
| 52 |
+
results = detector(file_path)
|
| 53 |
+
best_result = max(results, key=lambda x: x['score'])
|
| 54 |
+
top_label = best_result['label'].lower()
|
| 55 |
+
top_score = best_result['score']
|
| 56 |
+
print(f"...Audio pipeline result: '{top_label}' with score {top_score:.4f}")
|
| 57 |
+
is_fake = top_label in ['spoof', 'fake']
|
| 58 |
+
return is_fake
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error during audio processing/inference: {e}")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
def is_audio_deepfake(file_path: str) -> bool:
|
| 64 |
+
"""
|
| 65 |
+
Checks if a given audio file is a deepfake.
|
| 66 |
+
Args:
|
| 67 |
+
file_path: The absolute or relative path to the audio file.
|
| 68 |
+
Returns:
|
| 69 |
+
True if the file is classified as a deepfake, False otherwise.
|
| 70 |
+
Raises:
|
| 71 |
+
FileNotFoundError: If the file does not exist.
|
| 72 |
+
ValueError: If the file format is not supported.
|
| 73 |
+
"""
|
| 74 |
+
if not os.path.exists(file_path):
|
| 75 |
+
raise FileNotFoundError(f"File not found at path: {file_path}")
|
| 76 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 77 |
+
if ext in AUDIO_FORMATS:
|
| 78 |
+
return detect_audio_deepfake(file_path)
|
| 79 |
+
else:
|
| 80 |
+
raise ValueError(
|
| 81 |
+
f"Unsupported file format: {ext}. Supported types: {AUDIO_FORMATS}"
|
| 82 |
+
)
|
| 83 |
+
|
services/educational_content_generator.py
ADDED
|
@@ -0,0 +1,533 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict, List, Optional, Any
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
from upstash_redis import Redis
|
| 6 |
+
from config import config
|
| 7 |
+
|
| 8 |
+
class EducationalContentGenerator:
|
| 9 |
+
"""Service for generating educational content about misinformation detection"""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
# Configure Gemini
|
| 13 |
+
genai.configure(api_key=config.GEMINI_API_KEY)
|
| 14 |
+
self.model = genai.GenerativeModel(config.GEMINI_MODEL)
|
| 15 |
+
|
| 16 |
+
# Initialize Upstash Redis connection
|
| 17 |
+
try:
|
| 18 |
+
if config.UPSTASH_REDIS_URL and config.UPSTASH_REDIS_TOKEN:
|
| 19 |
+
self.redis_client = Redis(
|
| 20 |
+
url=config.UPSTASH_REDIS_URL,
|
| 21 |
+
token=config.UPSTASH_REDIS_TOKEN
|
| 22 |
+
)
|
| 23 |
+
# Test connection
|
| 24 |
+
self.redis_client.set("test", "connection")
|
| 25 |
+
self.redis_client.delete("test")
|
| 26 |
+
print("✅ Upstash Redis connection established")
|
| 27 |
+
else:
|
| 28 |
+
print("⚠️ Upstash Redis credentials not found, running without cache")
|
| 29 |
+
self.redis_client = None
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"❌ Upstash Redis connection failed: {e}")
|
| 32 |
+
self.redis_client = None
|
| 33 |
+
|
| 34 |
+
# Cache TTL (Time To Live) in seconds
|
| 35 |
+
self.cache_ttl = config.REDIS_TTL
|
| 36 |
+
|
| 37 |
+
# Pre-defined content templates
|
| 38 |
+
self.content_templates = {
|
| 39 |
+
"red_flags": {
|
| 40 |
+
"title": "How to Spot Red Flags in Misinformation",
|
| 41 |
+
"categories": [
|
| 42 |
+
"Emotional Language",
|
| 43 |
+
"Suspicious URLs",
|
| 44 |
+
"Poor Grammar",
|
| 45 |
+
"Missing Sources",
|
| 46 |
+
"Outdated Information",
|
| 47 |
+
"Confirmation Bias Triggers"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"source_credibility": {
|
| 51 |
+
"title": "Evaluating Source Credibility",
|
| 52 |
+
"categories": [
|
| 53 |
+
"Authority Assessment",
|
| 54 |
+
"Bias Detection",
|
| 55 |
+
"Fact-checking Methodology",
|
| 56 |
+
"Peer Review Process",
|
| 57 |
+
"Transparency Standards"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
"manipulation_techniques": {
|
| 61 |
+
"title": "Common Manipulation Techniques",
|
| 62 |
+
"categories": [
|
| 63 |
+
"Deepfakes and AI-generated Content",
|
| 64 |
+
"Outdated Images",
|
| 65 |
+
"Misleading Headlines",
|
| 66 |
+
"False Context",
|
| 67 |
+
"Social Media Manipulation",
|
| 68 |
+
"Bot Networks"
|
| 69 |
+
]
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
def _get_cache_key(self, key: str) -> str:
|
| 74 |
+
"""Get the Redis cache key"""
|
| 75 |
+
return f"educational:{key}"
|
| 76 |
+
|
| 77 |
+
def _load_from_cache(self, cache_key: str) -> Optional[Dict[str, Any]]:
|
| 78 |
+
"""Load content from Redis cache if it exists"""
|
| 79 |
+
if not self.redis_client:
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
cached_data = self.redis_client.get(self._get_cache_key(cache_key))
|
| 84 |
+
if cached_data:
|
| 85 |
+
return json.loads(cached_data)
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"Failed to load from Redis cache {cache_key}: {e}")
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
def _save_to_cache(self, cache_key: str, content: Dict[str, Any]) -> None:
|
| 91 |
+
"""Save content to Redis cache"""
|
| 92 |
+
if not self.redis_client:
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
self.redis_client.setex(
|
| 97 |
+
self._get_cache_key(cache_key),
|
| 98 |
+
self.cache_ttl,
|
| 99 |
+
json.dumps(content, ensure_ascii=False)
|
| 100 |
+
)
|
| 101 |
+
print(f"✅ Cached {cache_key} in Redis")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"Failed to save to Redis cache {cache_key}: {e}")
|
| 104 |
+
|
| 105 |
+
async def get_modules_list(self) -> Dict[str, Any]:
|
| 106 |
+
"""Get the list of available modules (cached in Redis)"""
|
| 107 |
+
cache_key = "modules_list"
|
| 108 |
+
cached = self._load_from_cache(cache_key)
|
| 109 |
+
|
| 110 |
+
if cached:
|
| 111 |
+
print(f"📦 Loading modules list from Redis cache")
|
| 112 |
+
return cached
|
| 113 |
+
|
| 114 |
+
print(f"🔄 Generating new modules list")
|
| 115 |
+
# Generate modules list
|
| 116 |
+
modules_data = {
|
| 117 |
+
"modules": [
|
| 118 |
+
{
|
| 119 |
+
"id": "red_flags",
|
| 120 |
+
"title": "How to Spot Red Flags",
|
| 121 |
+
"description": "Learn to identify warning signs in misinformation",
|
| 122 |
+
"difficulty_levels": ["beginner", "intermediate", "advanced"],
|
| 123 |
+
"estimated_time": "10-15 minutes"
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"id": "source_credibility",
|
| 127 |
+
"title": "Evaluating Source Credibility",
|
| 128 |
+
"description": "Understand how to assess source reliability",
|
| 129 |
+
"difficulty_levels": ["beginner", "intermediate", "advanced"],
|
| 130 |
+
"estimated_time": "15-20 minutes"
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"id": "manipulation_techniques",
|
| 134 |
+
"title": "Common Manipulation Techniques",
|
| 135 |
+
"description": "Learn about various misinformation techniques",
|
| 136 |
+
"difficulty_levels": ["intermediate", "advanced"],
|
| 137 |
+
"estimated_time": "20-25 minutes"
|
| 138 |
+
}
|
| 139 |
+
]
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
# Save to Redis cache
|
| 143 |
+
self._save_to_cache(cache_key, modules_data)
|
| 144 |
+
return modules_data
|
| 145 |
+
|
| 146 |
+
async def generate_module_content(self, module_type: str, difficulty_level: str = "beginner") -> Dict[str, Any]:
|
| 147 |
+
"""
|
| 148 |
+
Generate educational content for a specific module (with Redis caching)
|
| 149 |
+
|
| 150 |
+
Args:
|
| 151 |
+
module_type: Type of module (red_flags, source_credibility, etc.)
|
| 152 |
+
difficulty_level: beginner, intermediate, advanced
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
Dictionary containing educational content
|
| 156 |
+
"""
|
| 157 |
+
# Check Redis cache first
|
| 158 |
+
cache_key = f"{module_type}_{difficulty_level}"
|
| 159 |
+
cached_content = self._load_from_cache(cache_key)
|
| 160 |
+
|
| 161 |
+
if cached_content:
|
| 162 |
+
print(f"📦 Loading {module_type} ({difficulty_level}) from Redis cache")
|
| 163 |
+
return cached_content
|
| 164 |
+
|
| 165 |
+
print(f"🔄 Generating new content for {module_type} ({difficulty_level})")
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
template = self.content_templates.get(module_type, {})
|
| 169 |
+
if not template:
|
| 170 |
+
return {"error": f"Unknown module type: {module_type}"}
|
| 171 |
+
|
| 172 |
+
# Generate content using AI
|
| 173 |
+
content = await self._generate_ai_content(module_type, difficulty_level, template)
|
| 174 |
+
|
| 175 |
+
# Add interactive elements
|
| 176 |
+
content["interactive_elements"] = await self._generate_interactive_elements(module_type, difficulty_level)
|
| 177 |
+
|
| 178 |
+
# Add real-world examples
|
| 179 |
+
content["examples"] = await self._generate_examples(module_type, difficulty_level)
|
| 180 |
+
|
| 181 |
+
# Save to Redis cache
|
| 182 |
+
self._save_to_cache(cache_key, content)
|
| 183 |
+
|
| 184 |
+
return content
|
| 185 |
+
|
| 186 |
+
except Exception as e:
|
| 187 |
+
print(f"Failed to generate content: {str(e)}")
|
| 188 |
+
# Return fallback content
|
| 189 |
+
fallback = self._get_fallback_content(module_type, difficulty_level)
|
| 190 |
+
self._save_to_cache(cache_key, fallback)
|
| 191 |
+
return fallback
|
| 192 |
+
|
| 193 |
+
async def _generate_ai_content(self, module_type: str, difficulty_level: str, template: Dict) -> Dict[str, Any]:
|
| 194 |
+
"""Generate AI-powered educational content"""
|
| 195 |
+
|
| 196 |
+
prompt = f"""
|
| 197 |
+
You are an expert digital literacy educator specializing in misinformation detection.
|
| 198 |
+
Create comprehensive educational content for the following module:
|
| 199 |
+
|
| 200 |
+
MODULE TYPE: {module_type}
|
| 201 |
+
DIFFICULTY LEVEL: {difficulty_level}
|
| 202 |
+
TEMPLATE: {json.dumps(template, indent=2)}
|
| 203 |
+
|
| 204 |
+
Create educational content that includes:
|
| 205 |
+
1. Clear explanations of concepts
|
| 206 |
+
2. Step-by-step instructions
|
| 207 |
+
3. Visual indicators to look for
|
| 208 |
+
4. Common mistakes to avoid
|
| 209 |
+
5. Practical exercises
|
| 210 |
+
|
| 211 |
+
Respond in this JSON format:
|
| 212 |
+
{{
|
| 213 |
+
"title": "Module title",
|
| 214 |
+
"overview": "Brief overview of what users will learn",
|
| 215 |
+
"learning_objectives": ["Objective 1", "Objective 2", "Objective 3"],
|
| 216 |
+
"content_sections": [
|
| 217 |
+
{{
|
| 218 |
+
"title": "Section title",
|
| 219 |
+
"content": "Detailed explanation",
|
| 220 |
+
"key_points": ["Point 1", "Point 2"],
|
| 221 |
+
"visual_indicators": ["Indicator 1", "Indicator 2"],
|
| 222 |
+
"examples": ["Example 1", "Example 2"]
|
| 223 |
+
}}
|
| 224 |
+
],
|
| 225 |
+
"practical_tips": ["Tip 1", "Tip 2", "Tip 3"],
|
| 226 |
+
"common_mistakes": ["Mistake 1", "Mistake 2"],
|
| 227 |
+
"difficulty_level": "{difficulty_level}"
|
| 228 |
+
}}
|
| 229 |
+
"""
|
| 230 |
+
|
| 231 |
+
try:
|
| 232 |
+
response = self.model.generate_content(prompt)
|
| 233 |
+
response_text = response.text.strip()
|
| 234 |
+
|
| 235 |
+
# Clean up JSON response
|
| 236 |
+
if response_text.startswith('```json'):
|
| 237 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 238 |
+
elif response_text.startswith('```'):
|
| 239 |
+
response_text = response_text.replace('```', '').strip()
|
| 240 |
+
|
| 241 |
+
return json.loads(response_text)
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
print(f"AI content generation failed: {e}")
|
| 245 |
+
return self._get_fallback_content(module_type, difficulty_level)
|
| 246 |
+
|
| 247 |
+
async def _generate_interactive_elements(self, module_type: str, difficulty_level: str) -> Dict[str, Any]:
|
| 248 |
+
"""Generate interactive learning elements"""
|
| 249 |
+
|
| 250 |
+
prompt = f"""
|
| 251 |
+
Create interactive learning elements for a {difficulty_level} level module about {module_type}.
|
| 252 |
+
|
| 253 |
+
Generate:
|
| 254 |
+
1. Quiz questions with multiple choice answers
|
| 255 |
+
2. True/false statements
|
| 256 |
+
3. Scenario-based questions
|
| 257 |
+
|
| 258 |
+
Respond in JSON format:
|
| 259 |
+
{{
|
| 260 |
+
"quiz_questions": [
|
| 261 |
+
{{
|
| 262 |
+
"question": "Question text",
|
| 263 |
+
"options": ["Option A", "Option B", "Option C", "Option D"],
|
| 264 |
+
"correct_answer": 0,
|
| 265 |
+
"explanation": "Why this answer is correct"
|
| 266 |
+
}}
|
| 267 |
+
],
|
| 268 |
+
"true_false": [
|
| 269 |
+
{{
|
| 270 |
+
"statement": "Statement to evaluate",
|
| 271 |
+
"answer": true,
|
| 272 |
+
"explanation": "Explanation"
|
| 273 |
+
}}
|
| 274 |
+
],
|
| 275 |
+
"scenarios": [
|
| 276 |
+
{{
|
| 277 |
+
"scenario": "Real-world scenario description",
|
| 278 |
+
"question": "What should you do?",
|
| 279 |
+
"correct_action": "Correct action",
|
| 280 |
+
"explanation": "Why this is the right approach"
|
| 281 |
+
}}
|
| 282 |
+
]
|
| 283 |
+
}}
|
| 284 |
+
"""
|
| 285 |
+
|
| 286 |
+
try:
|
| 287 |
+
response = self.model.generate_content(prompt)
|
| 288 |
+
response_text = response.text.strip()
|
| 289 |
+
|
| 290 |
+
if response_text.startswith('```json'):
|
| 291 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 292 |
+
elif response_text.startswith('```'):
|
| 293 |
+
response_text = response_text.replace('```', '').strip()
|
| 294 |
+
|
| 295 |
+
return json.loads(response_text)
|
| 296 |
+
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print(f"Interactive elements generation failed: {e}")
|
| 299 |
+
return {"quiz_questions": [], "true_false": [], "scenarios": []}
|
| 300 |
+
|
| 301 |
+
async def _generate_examples(self, module_type: str, difficulty_level: str) -> List[Dict[str, Any]]:
|
| 302 |
+
"""Generate real-world examples"""
|
| 303 |
+
|
| 304 |
+
prompt = f"""
|
| 305 |
+
Create realistic examples of {module_type} for {difficulty_level} learners.
|
| 306 |
+
|
| 307 |
+
For each example, provide:
|
| 308 |
+
1. A realistic scenario
|
| 309 |
+
2. What to look for
|
| 310 |
+
3. How to verify
|
| 311 |
+
4. Why it's misleading
|
| 312 |
+
|
| 313 |
+
Respond in JSON format:
|
| 314 |
+
{{
|
| 315 |
+
"examples": [
|
| 316 |
+
{{
|
| 317 |
+
"title": "Example title",
|
| 318 |
+
"scenario": "Realistic scenario description",
|
| 319 |
+
"red_flags": ["Flag 1", "Flag 2"],
|
| 320 |
+
"verification_steps": ["Step 1", "Step 2"],
|
| 321 |
+
"explanation": "Why this is misleading",
|
| 322 |
+
"difficulty": "{difficulty_level}"
|
| 323 |
+
}}
|
| 324 |
+
]
|
| 325 |
+
}}
|
| 326 |
+
"""
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
+
response = self.model.generate_content(prompt)
|
| 330 |
+
response_text = response.text.strip()
|
| 331 |
+
|
| 332 |
+
if response_text.startswith('```json'):
|
| 333 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 334 |
+
elif response_text.startswith('```'):
|
| 335 |
+
response_text = response_text.replace('```', '').strip()
|
| 336 |
+
|
| 337 |
+
result = json.loads(response_text)
|
| 338 |
+
return result.get("examples", [])
|
| 339 |
+
|
| 340 |
+
except Exception as e:
|
| 341 |
+
print(f"Examples generation failed: {e}")
|
| 342 |
+
return []
|
| 343 |
+
|
| 344 |
+
def _get_fallback_content(self, module_type: str, difficulty_level: str) -> Dict[str, Any]:
|
| 345 |
+
"""Fallback content when AI generation fails"""
|
| 346 |
+
|
| 347 |
+
fallback_content = {
|
| 348 |
+
"red_flags": {
|
| 349 |
+
"title": "How to Spot Red Flags in Misinformation",
|
| 350 |
+
"overview": "Learn to identify warning signs that content might be misleading",
|
| 351 |
+
"learning_objectives": [
|
| 352 |
+
"Identify emotional manipulation techniques",
|
| 353 |
+
"Recognize suspicious URLs and sources",
|
| 354 |
+
"Spot grammatical and formatting errors",
|
| 355 |
+
"Understand confirmation bias triggers"
|
| 356 |
+
],
|
| 357 |
+
"content_sections": [
|
| 358 |
+
{
|
| 359 |
+
"title": "Emotional Language",
|
| 360 |
+
"content": "Misinformation often uses strong emotional language to bypass critical thinking.",
|
| 361 |
+
"key_points": [
|
| 362 |
+
"Look for excessive use of emotional words",
|
| 363 |
+
"Be wary of content that makes you feel angry or scared",
|
| 364 |
+
"Check if emotions are being used to distract from facts"
|
| 365 |
+
],
|
| 366 |
+
"visual_indicators": ["ALL CAPS", "Multiple exclamation marks", "Emotional imagery"],
|
| 367 |
+
"examples": ["URGENT!!!", "You won't believe this!", "This will shock you!"]
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"title": "Suspicious URLs",
|
| 371 |
+
"content": "Fake news often uses URLs that mimic legitimate news sources.",
|
| 372 |
+
"key_points": [
|
| 373 |
+
"Check for slight misspellings in domain names",
|
| 374 |
+
"Look for unusual domain extensions",
|
| 375 |
+
"Verify the actual website matches the URL"
|
| 376 |
+
],
|
| 377 |
+
"visual_indicators": ["typos in URLs", "unusual extensions", "redirects"],
|
| 378 |
+
"examples": ["cnn-news.com", "bbc-news.net", "reuters.info"]
|
| 379 |
+
}
|
| 380 |
+
],
|
| 381 |
+
"practical_tips": [
|
| 382 |
+
"Take a deep breath before sharing emotional content",
|
| 383 |
+
"Ask yourself: 'Why do I feel this way?'",
|
| 384 |
+
"Look for factual evidence, not just emotional appeals"
|
| 385 |
+
],
|
| 386 |
+
"common_mistakes": [
|
| 387 |
+
"Sharing content because it makes you angry",
|
| 388 |
+
"Ignoring red flags when content confirms your beliefs",
|
| 389 |
+
"Not checking sources when content feels 'right'"
|
| 390 |
+
],
|
| 391 |
+
"difficulty_level": difficulty_level
|
| 392 |
+
},
|
| 393 |
+
"source_credibility": {
|
| 394 |
+
"title": "Evaluating Source Credibility",
|
| 395 |
+
"overview": "Learn how to assess whether a source is trustworthy and reliable",
|
| 396 |
+
"learning_objectives": [
|
| 397 |
+
"Understand what makes a source credible",
|
| 398 |
+
"Identify bias in news sources",
|
| 399 |
+
"Evaluate author expertise",
|
| 400 |
+
"Check source transparency"
|
| 401 |
+
],
|
| 402 |
+
"content_sections": [
|
| 403 |
+
{
|
| 404 |
+
"title": "Authority Assessment",
|
| 405 |
+
"content": "Credible sources have recognized expertise in their field.",
|
| 406 |
+
"key_points": [
|
| 407 |
+
"Check the author's credentials and background",
|
| 408 |
+
"Look for institutional affiliations",
|
| 409 |
+
"Verify expertise matches the topic"
|
| 410 |
+
],
|
| 411 |
+
"visual_indicators": ["Author bio", "Credentials listed", "Institutional affiliation"],
|
| 412 |
+
"examples": ["PhD in relevant field", "Journalist with experience", "Academic institution"]
|
| 413 |
+
}
|
| 414 |
+
],
|
| 415 |
+
"practical_tips": [
|
| 416 |
+
"Always check the 'About' page",
|
| 417 |
+
"Look for contact information",
|
| 418 |
+
"Verify claims with multiple sources"
|
| 419 |
+
],
|
| 420 |
+
"common_mistakes": [
|
| 421 |
+
"Trusting sources without checking credentials",
|
| 422 |
+
"Ignoring bias in sources",
|
| 423 |
+
"Not verifying institutional affiliations"
|
| 424 |
+
],
|
| 425 |
+
"difficulty_level": difficulty_level
|
| 426 |
+
},
|
| 427 |
+
"manipulation_techniques": {
|
| 428 |
+
"title": "Common Manipulation Techniques",
|
| 429 |
+
"overview": "Understand the various methods used to create and spread misinformation",
|
| 430 |
+
"learning_objectives": [
|
| 431 |
+
"Recognize different manipulation techniques",
|
| 432 |
+
"Understand how AI-generated content works",
|
| 433 |
+
"Identify social media manipulation",
|
| 434 |
+
"Learn verification strategies"
|
| 435 |
+
],
|
| 436 |
+
"content_sections": [
|
| 437 |
+
{
|
| 438 |
+
"title": "Deepfakes and AI-generated Content",
|
| 439 |
+
"content": "Advanced technology can create convincing fake videos and images.",
|
| 440 |
+
"key_points": [
|
| 441 |
+
"Look for unnatural facial movements",
|
| 442 |
+
"Check for inconsistencies in lighting",
|
| 443 |
+
"Verify with original sources"
|
| 444 |
+
],
|
| 445 |
+
"visual_indicators": ["Unnatural blinking", "Lighting inconsistencies", "Audio sync issues"],
|
| 446 |
+
"examples": ["AI-generated celebrity videos", "Deepfake political speeches"]
|
| 447 |
+
}
|
| 448 |
+
],
|
| 449 |
+
"practical_tips": [
|
| 450 |
+
"Use reverse image search",
|
| 451 |
+
"Check multiple angles of the same event",
|
| 452 |
+
"Verify with official sources"
|
| 453 |
+
],
|
| 454 |
+
"common_mistakes": [
|
| 455 |
+
"Trusting videos without verification",
|
| 456 |
+
"Not checking for AI generation",
|
| 457 |
+
"Sharing before verification"
|
| 458 |
+
],
|
| 459 |
+
"difficulty_level": difficulty_level
|
| 460 |
+
}
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
return fallback_content.get(module_type, {
|
| 464 |
+
"title": f"Educational Module: {module_type}",
|
| 465 |
+
"overview": "Learn about misinformation detection",
|
| 466 |
+
"learning_objectives": ["Understand basic concepts"],
|
| 467 |
+
"content_sections": [],
|
| 468 |
+
"practical_tips": [],
|
| 469 |
+
"common_mistakes": [],
|
| 470 |
+
"difficulty_level": difficulty_level
|
| 471 |
+
})
|
| 472 |
+
|
| 473 |
+
async def generate_contextual_learning(self, verification_result: Dict[str, Any]) -> Dict[str, Any]:
|
| 474 |
+
"""
|
| 475 |
+
Generate educational content based on a specific verification result
|
| 476 |
+
|
| 477 |
+
Args:
|
| 478 |
+
verification_result: Result from fact-checking
|
| 479 |
+
|
| 480 |
+
Returns:
|
| 481 |
+
Educational content tailored to the verification result
|
| 482 |
+
"""
|
| 483 |
+
try:
|
| 484 |
+
# Extract relevant information from verification result
|
| 485 |
+
verdict = verification_result.get("verdict", "uncertain")
|
| 486 |
+
message = verification_result.get("message", "")
|
| 487 |
+
details = verification_result.get("details", {})
|
| 488 |
+
|
| 489 |
+
# Generate contextual learning content
|
| 490 |
+
prompt = f"""
|
| 491 |
+
Based on this fact-checking result, create educational content to help users learn:
|
| 492 |
+
|
| 493 |
+
VERDICT: {verdict}
|
| 494 |
+
MESSAGE: {message}
|
| 495 |
+
DETAILS: {json.dumps(details, indent=2)}
|
| 496 |
+
|
| 497 |
+
Create learning content that explains:
|
| 498 |
+
1. What this result means
|
| 499 |
+
2. What red flags were found (if any)
|
| 500 |
+
3. How to verify similar claims in the future
|
| 501 |
+
4. Key lessons learned
|
| 502 |
+
|
| 503 |
+
Respond in JSON format:
|
| 504 |
+
{{
|
| 505 |
+
"learning_summary": "What users learned from this verification",
|
| 506 |
+
"red_flags_found": ["List of red flags detected"],
|
| 507 |
+
"verification_techniques": ["Techniques used to verify"],
|
| 508 |
+
"future_tips": ["Tips for similar situations"],
|
| 509 |
+
"key_lessons": ["Main takeaways"],
|
| 510 |
+
"related_topics": ["Related educational topics to explore"]
|
| 511 |
+
}}
|
| 512 |
+
"""
|
| 513 |
+
|
| 514 |
+
response = self.model.generate_content(prompt)
|
| 515 |
+
response_text = response.text.strip()
|
| 516 |
+
|
| 517 |
+
if response_text.startswith('```json'):
|
| 518 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 519 |
+
elif response_text.startswith('```'):
|
| 520 |
+
response_text = response_text.replace('```', '').strip()
|
| 521 |
+
|
| 522 |
+
return json.loads(response_text)
|
| 523 |
+
|
| 524 |
+
except Exception as e:
|
| 525 |
+
print(f"Contextual learning generation failed: {e}")
|
| 526 |
+
return {
|
| 527 |
+
"learning_summary": "Learn to verify information systematically",
|
| 528 |
+
"red_flags_found": [],
|
| 529 |
+
"verification_techniques": ["Source checking", "Cross-referencing"],
|
| 530 |
+
"future_tips": ["Always verify before sharing"],
|
| 531 |
+
"key_lessons": ["Critical thinking is essential"],
|
| 532 |
+
"related_topics": ["Source credibility", "Fact-checking basics"]
|
| 533 |
+
}
|
services/image_verifier.py
ADDED
|
@@ -0,0 +1,1377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
from typing import Dict, Any, Optional, Tuple, List
|
| 4 |
+
import requests
|
| 5 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 6 |
+
import io
|
| 7 |
+
import base64
|
| 8 |
+
import json
|
| 9 |
+
import google.generativeai as genai
|
| 10 |
+
# Import SerpApi client - use the correct import path from documentation
|
| 11 |
+
GoogleSearch = None # type: ignore
|
| 12 |
+
try:
|
| 13 |
+
from serpapi import GoogleSearch as _GS # correct import per SerpApi docs
|
| 14 |
+
GoogleSearch = _GS
|
| 15 |
+
print("[serpapi] Successfully imported GoogleSearch from serpapi")
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"[serpapi] Failed to import GoogleSearch: {e}")
|
| 18 |
+
GoogleSearch = None # client unavailable; will fall back to HTTP
|
| 19 |
+
from config import config
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class ImageVerifier:
|
| 23 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 24 |
+
"""
|
| 25 |
+
Initialize the ImageVerifier with SerpApi credentials
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
api_key: SerpApi API key. If None, will try to get from environment
|
| 29 |
+
"""
|
| 30 |
+
self.api_key = api_key or config.SERP_API_KEY
|
| 31 |
+
if not self.api_key:
|
| 32 |
+
raise ValueError("SERP_API_KEY environment variable or api_key parameter is required")
|
| 33 |
+
|
| 34 |
+
# Configure Gemini
|
| 35 |
+
if config.GEMINI_API_KEY:
|
| 36 |
+
genai.configure(api_key=config.GEMINI_API_KEY)
|
| 37 |
+
self.gemini_model = genai.GenerativeModel(
|
| 38 |
+
config.GEMINI_MODEL,
|
| 39 |
+
generation_config=genai.types.GenerationConfig(
|
| 40 |
+
temperature=config.GEMINI_TEMPERATURE,
|
| 41 |
+
top_p=config.GEMINI_TOP_P,
|
| 42 |
+
max_output_tokens=config.GEMINI_MAX_TOKENS
|
| 43 |
+
)
|
| 44 |
+
)
|
| 45 |
+
else:
|
| 46 |
+
self.gemini_model = None
|
| 47 |
+
|
| 48 |
+
# SerpApi endpoints
|
| 49 |
+
self.base_url_json = "https://serpapi.com/search.json" # for GET with image_url
|
| 50 |
+
self.base_url_form = "https://serpapi.com/search.json" # for POST form with image_content
|
| 51 |
+
|
| 52 |
+
async def verify(self, image_path: Optional[str] = None, claim_context: str = "", claim_date: str = "", image_url: Optional[str] = None) -> Dict[str, Any]:
|
| 53 |
+
"""
|
| 54 |
+
Verify an image using a two-stage approach:
|
| 55 |
+
1. Gemini Vision analyzes the image directly for AI-generated/deepfake/manipulation
|
| 56 |
+
2. Reverse image search + evidence analysis
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
image_path: Path to the image file
|
| 60 |
+
claim_context: The claimed context of the image
|
| 61 |
+
claim_date: The claimed date of the image
|
| 62 |
+
image_url: URL of the image
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
Dictionary with verification results and output file path
|
| 66 |
+
"""
|
| 67 |
+
try:
|
| 68 |
+
print("[verify] start", {"claim_context": claim_context, "claim_date": claim_date, "has_image_path": bool(image_path), "has_image_url": bool(image_url)})
|
| 69 |
+
|
| 70 |
+
# STEP 0: Gemini Vision analysis of the actual image
|
| 71 |
+
preliminary_analysis = await self._analyze_image_with_vision(
|
| 72 |
+
image_path=image_path,
|
| 73 |
+
image_url=image_url,
|
| 74 |
+
claim_context=claim_context,
|
| 75 |
+
claim_date=claim_date
|
| 76 |
+
)
|
| 77 |
+
print(f"✅ Gemini Vision analysis result: {preliminary_analysis.get('verdict', 'unknown')}")
|
| 78 |
+
|
| 79 |
+
# STEP 1: Perform reverse image search (wrap in try/except so vision analysis can still proceed)
|
| 80 |
+
search_results = None
|
| 81 |
+
try:
|
| 82 |
+
search_results = await self._reverse_image_search(image_path=image_path, image_url=image_url)
|
| 83 |
+
except Exception as search_error:
|
| 84 |
+
print(f"⚠️ Reverse image search failed (will use vision analysis only): {search_error}")
|
| 85 |
+
# Continue with vision analysis only - this is fine, we have a fallback
|
| 86 |
+
|
| 87 |
+
# STEP 2: Build evidence from SerpApi (reverse image search)
|
| 88 |
+
evidence = []
|
| 89 |
+
curated_analysis = None
|
| 90 |
+
if search_results and (search_results.get("inline_images") or search_results.get("image_results")):
|
| 91 |
+
evidence = self._collect_evidence(search_results)
|
| 92 |
+
print("[verify] serpapi_counts", {
|
| 93 |
+
"image_results": len(search_results.get("image_results", [])) if isinstance(search_results, dict) else None,
|
| 94 |
+
"inline_images": len(search_results.get("inline_images", [])) if isinstance(search_results, dict) else None,
|
| 95 |
+
"status": (search_results.get("search_metadata", {}) or {}).get("status") if isinstance(search_results, dict) else None,
|
| 96 |
+
})
|
| 97 |
+
print("[verify] evidence_collected", {"count": len(evidence), "sample_titles": [e.get("title") for e in evidence[:3]]})
|
| 98 |
+
|
| 99 |
+
# Ask Gemini to produce structured verdict + structured claim parse with citations
|
| 100 |
+
filtered_evidence = self._rank_and_filter_evidence(evidence, claim_context, top_k=12)
|
| 101 |
+
print("[verify] preparing_llm_request", {"evidence_count": len(filtered_evidence)})
|
| 102 |
+
curated_analysis = self._summarize_with_gemini_structured(
|
| 103 |
+
claim_context=claim_context,
|
| 104 |
+
claim_date=claim_date,
|
| 105 |
+
evidence=filtered_evidence,
|
| 106 |
+
)
|
| 107 |
+
else:
|
| 108 |
+
print("[verify] No reverse image search results, using vision analysis only")
|
| 109 |
+
filtered_evidence = []
|
| 110 |
+
|
| 111 |
+
# STEP 3: Synthesize vision analysis + reverse image search results
|
| 112 |
+
final_response = self._synthesize_vision_and_evidence(
|
| 113 |
+
preliminary_analysis=preliminary_analysis,
|
| 114 |
+
curated_analysis=curated_analysis,
|
| 115 |
+
evidence=filtered_evidence,
|
| 116 |
+
claim_context=claim_context,
|
| 117 |
+
claim_date=claim_date,
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
if final_response:
|
| 121 |
+
return final_response
|
| 122 |
+
|
| 123 |
+
# Fallback: use vision analysis if available, else curated analysis
|
| 124 |
+
if preliminary_analysis and preliminary_analysis.get("verdict") in ["false", "true"]:
|
| 125 |
+
llm = preliminary_analysis
|
| 126 |
+
elif curated_analysis:
|
| 127 |
+
llm = curated_analysis
|
| 128 |
+
else:
|
| 129 |
+
llm = None
|
| 130 |
+
validator = {"passed": False, "reasons": [], "checks": {}}
|
| 131 |
+
debug_details = {}
|
| 132 |
+
if llm:
|
| 133 |
+
print("[verify] llm_keys", list(llm.keys()))
|
| 134 |
+
base_verdict = (llm.get("verdict") or "uncertain").lower()
|
| 135 |
+
relation_verdict = (llm.get("relation_verdict") or base_verdict).lower()
|
| 136 |
+
# Enforce policy: default to false when the claimed relation isn't supported by evidence.
|
| 137 |
+
cp = (llm.get("claim_parse") or {})
|
| 138 |
+
citations = (cp.get("citations") or {})
|
| 139 |
+
relation_citations = citations.get("relation") or []
|
| 140 |
+
has_any_evidence = bool(filtered_evidence)
|
| 141 |
+
relation_supported = bool(relation_citations)
|
| 142 |
+
|
| 143 |
+
if relation_verdict == "false":
|
| 144 |
+
verdict = "false"
|
| 145 |
+
elif has_any_evidence and not relation_supported:
|
| 146 |
+
# We have evidence but none supports the claimed relation → false
|
| 147 |
+
verdict = "false"
|
| 148 |
+
else:
|
| 149 |
+
verdict = base_verdict
|
| 150 |
+
summary = llm.get("summary") or ""
|
| 151 |
+
# Enforce reputable domain gating + cross-source agreement
|
| 152 |
+
sources = llm.get("top_sources") or self._top_sources(filtered_evidence, 3)
|
| 153 |
+
from urllib.parse import urlparse
|
| 154 |
+
def is_reputable(url: Optional[str]) -> bool:
|
| 155 |
+
try:
|
| 156 |
+
net = urlparse(url or "").netloc
|
| 157 |
+
except Exception:
|
| 158 |
+
net = ""
|
| 159 |
+
# Reputable = not low-priority social/UGC domain
|
| 160 |
+
return bool(net and (net not in config.LOW_PRIORITY_DOMAINS))
|
| 161 |
+
reputable_sources = [s for s in (sources or []) if is_reputable(s.get("link"))]
|
| 162 |
+
# Relation support must come from reputable domains and have >=2 independent domains
|
| 163 |
+
cp = (llm.get("claim_parse") or {})
|
| 164 |
+
rel_cits = (cp.get("citations") or {}).get("relation") or []
|
| 165 |
+
cited_domains = set()
|
| 166 |
+
for j in rel_cits:
|
| 167 |
+
try:
|
| 168 |
+
ev = filtered_evidence[int(j)]
|
| 169 |
+
net = urlparse(ev.get("link") or "").netloc
|
| 170 |
+
if net and (net not in config.LOW_PRIORITY_DOMAINS):
|
| 171 |
+
cited_domains.add(net)
|
| 172 |
+
except Exception:
|
| 173 |
+
pass
|
| 174 |
+
cross_source_ok = len(cited_domains) >= 2
|
| 175 |
+
# Stronger relation test: require co-mention already validated (checks[relation_comention])
|
| 176 |
+
relation_comention_ok = False
|
| 177 |
+
try:
|
| 178 |
+
relation_comention_ok = bool(validator["checks"].get("relation_comention"))
|
| 179 |
+
except Exception:
|
| 180 |
+
relation_comention_ok = False
|
| 181 |
+
if verdict == "true":
|
| 182 |
+
if not (cross_source_ok and relation_comention_ok):
|
| 183 |
+
verdict = "uncertain"
|
| 184 |
+
# If verdict is still not false, ensure at least two reputable sources overall
|
| 185 |
+
if verdict == "true" and len({urlparse((s.get("link") or "")).netloc for s in reputable_sources}) < 2:
|
| 186 |
+
verdict = "uncertain"
|
| 187 |
+
# Run validator: require citations for all extracted parts and relation co-mention
|
| 188 |
+
validator, debug_details = self._validate_llm_parse(
|
| 189 |
+
claim_text=claim_context,
|
| 190 |
+
evidence=filtered_evidence,
|
| 191 |
+
llm=llm,
|
| 192 |
+
)
|
| 193 |
+
# Only downgrade true to uncertain if validator fails; never upgrade false
|
| 194 |
+
if verdict == "true" and not validator.get("passed", False):
|
| 195 |
+
verdict = "uncertain"
|
| 196 |
+
if verdict == "true":
|
| 197 |
+
from urllib.parse import urlparse
|
| 198 |
+
cited_idx = set()
|
| 199 |
+
cp = (llm.get("claim_parse") or {}).get("citations") or {}
|
| 200 |
+
for key, val in cp.items():
|
| 201 |
+
if isinstance(val, list):
|
| 202 |
+
if key in ["entities","roles"]:
|
| 203 |
+
for arr in val:
|
| 204 |
+
for j in (arr or []):
|
| 205 |
+
try:
|
| 206 |
+
cited_idx.add(int(j))
|
| 207 |
+
except Exception:
|
| 208 |
+
pass
|
| 209 |
+
else:
|
| 210 |
+
for j in val:
|
| 211 |
+
try:
|
| 212 |
+
cited_idx.add(int(j))
|
| 213 |
+
except Exception:
|
| 214 |
+
pass
|
| 215 |
+
domains = set()
|
| 216 |
+
for ix in cited_idx:
|
| 217 |
+
if 0 <= ix < len(filtered_evidence):
|
| 218 |
+
lk = filtered_evidence[ix].get("link") or ""
|
| 219 |
+
try:
|
| 220 |
+
net = urlparse(lk).netloc
|
| 221 |
+
except Exception:
|
| 222 |
+
net = ""
|
| 223 |
+
if net:
|
| 224 |
+
domains.add(net)
|
| 225 |
+
print("[verify] domain_independence", {"cited_count": len(cited_idx), "domains": list(domains)})
|
| 226 |
+
if len(domains) < 2:
|
| 227 |
+
verdict = "uncertain"
|
| 228 |
+
validator.setdefault("reasons", []).append("Insufficient domain independence for true verdict")
|
| 229 |
+
print("[verify] gemini_structured", {"verdict": verdict, "summary_preview": summary[:120]})
|
| 230 |
+
print("[verify] validator", validator)
|
| 231 |
+
print("[verify] debug_details_keys", list(debug_details.keys()))
|
| 232 |
+
else:
|
| 233 |
+
# Fallback minimal output
|
| 234 |
+
verdict = "uncertain"
|
| 235 |
+
summary = self._fallback_summary("uncertain", claim_context, claim_date, None, None, None)
|
| 236 |
+
sources = self._top_sources(filtered_evidence, 3)
|
| 237 |
+
print("[verify] gemini_structured_none_fallback", {"verdict": verdict, "summary_preview": summary[:120]})
|
| 238 |
+
|
| 239 |
+
if verdict != "false":
|
| 240 |
+
resp = {
|
| 241 |
+
"verdict": verdict,
|
| 242 |
+
"summary": summary,
|
| 243 |
+
"message": summary,
|
| 244 |
+
"sources": sources,
|
| 245 |
+
"claim_context": claim_context,
|
| 246 |
+
"claim_date": claim_date,
|
| 247 |
+
"validator": validator,
|
| 248 |
+
}
|
| 249 |
+
if config.DEBUG:
|
| 250 |
+
resp["debug"] = debug_details
|
| 251 |
+
return resp
|
| 252 |
+
|
| 253 |
+
# Generate visual counter-measure (pick first usable evidence image)
|
| 254 |
+
evidence_img_url = None
|
| 255 |
+
for ev in filtered_evidence:
|
| 256 |
+
if ev.get("thumbnail"):
|
| 257 |
+
evidence_img_url = ev.get("thumbnail")
|
| 258 |
+
break
|
| 259 |
+
if not evidence_img_url:
|
| 260 |
+
for ev in filtered_evidence:
|
| 261 |
+
if ev.get("link") and isinstance(ev.get("link"), str) and ev.get("link").startswith("http"):
|
| 262 |
+
evidence_img_url = ev.get("link")
|
| 263 |
+
break
|
| 264 |
+
evidence_img_url = evidence_img_url or (image_url or "")
|
| 265 |
+
output_path = await self._generate_counter_measure(
|
| 266 |
+
original_image_path=image_path,
|
| 267 |
+
evidence_image_url=evidence_img_url,
|
| 268 |
+
claim_context=claim_context,
|
| 269 |
+
claim_date=claim_date,
|
| 270 |
+
original_image_url=image_url,
|
| 271 |
+
)
|
| 272 |
+
print("[verify] counter_measure_generated", {"output_path": output_path})
|
| 273 |
+
|
| 274 |
+
# For false verdict, ensure summary exists
|
| 275 |
+
if not llm or llm.get("verdict", "").lower() != "false":
|
| 276 |
+
# Force LLM to produce a false-context explanation
|
| 277 |
+
llm = self._summarize_with_gemini_structured(
|
| 278 |
+
claim_context=claim_context,
|
| 279 |
+
claim_date=claim_date,
|
| 280 |
+
evidence=filtered_evidence,
|
| 281 |
+
forced_verdict="false",
|
| 282 |
+
) or {}
|
| 283 |
+
summary = llm.get("summary") or self._fallback_summary("false", claim_context, claim_date, None, None, None)
|
| 284 |
+
sources = llm.get("top_sources") or self._top_sources(filtered_evidence, 3)
|
| 285 |
+
resp = {
|
| 286 |
+
"verdict": "false",
|
| 287 |
+
"summary": summary,
|
| 288 |
+
"message": summary,
|
| 289 |
+
"sources": sources,
|
| 290 |
+
"output_path": output_path,
|
| 291 |
+
"claim_context": claim_context,
|
| 292 |
+
"claim_date": claim_date,
|
| 293 |
+
"validator": validator,
|
| 294 |
+
}
|
| 295 |
+
if config.DEBUG:
|
| 296 |
+
resp["debug"] = debug_details
|
| 297 |
+
return resp
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
return {
|
| 301 |
+
"verdict": "error",
|
| 302 |
+
"summary": f"Error during verification: {str(e)}",
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
async def _analyze_image_with_vision(
|
| 306 |
+
self,
|
| 307 |
+
image_path: Optional[str] = None,
|
| 308 |
+
image_url: Optional[str] = None,
|
| 309 |
+
claim_context: str = "",
|
| 310 |
+
claim_date: str = ""
|
| 311 |
+
) -> Dict[str, Any]:
|
| 312 |
+
"""
|
| 313 |
+
Use Gemini Vision to analyze the actual image content for:
|
| 314 |
+
- AI-generated/deepfake indicators
|
| 315 |
+
- Manipulation artifacts
|
| 316 |
+
- Visual inconsistencies
|
| 317 |
+
- Context analysis
|
| 318 |
+
|
| 319 |
+
Args:
|
| 320 |
+
image_path: Path to the image file
|
| 321 |
+
image_url: URL of the image
|
| 322 |
+
claim_context: The claimed context
|
| 323 |
+
claim_date: The claimed date
|
| 324 |
+
|
| 325 |
+
Returns:
|
| 326 |
+
Dictionary with preliminary analysis
|
| 327 |
+
"""
|
| 328 |
+
try:
|
| 329 |
+
if not self.gemini_model:
|
| 330 |
+
return {
|
| 331 |
+
"verdict": "uncertain",
|
| 332 |
+
"verified": False,
|
| 333 |
+
"message": "Gemini Vision not available",
|
| 334 |
+
"confidence": "low",
|
| 335 |
+
"analysis_method": "vision_unavailable",
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
# Load the image
|
| 339 |
+
import PIL.Image as PILImage
|
| 340 |
+
if image_path:
|
| 341 |
+
img = PILImage.open(image_path)
|
| 342 |
+
elif image_url:
|
| 343 |
+
img = await self._download_image(image_url)
|
| 344 |
+
else:
|
| 345 |
+
return {
|
| 346 |
+
"verdict": "uncertain",
|
| 347 |
+
"verified": False,
|
| 348 |
+
"message": "No image provided for vision analysis",
|
| 349 |
+
"confidence": "low",
|
| 350 |
+
"analysis_method": "vision_no_image",
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
prompt = f"""You are an expert image forensics analyst. Analyze this image carefully for authenticity and manipulation.
|
| 354 |
+
|
| 355 |
+
CLAIMED CONTEXT: {claim_context}
|
| 356 |
+
CLAIMED DATE: {claim_date}
|
| 357 |
+
|
| 358 |
+
Analyze the image for:
|
| 359 |
+
1. **AI-Generated/Deepfake Indicators**: Look for signs of AI generation (inconsistent lighting, unnatural textures, artifacts around faces/objects, watermarks, telltale patterns)
|
| 360 |
+
2. **Manipulation Artifacts**: Check for signs of editing (cloning, copy-paste, inconsistent shadows, lighting mismatches, pixelation patterns)
|
| 361 |
+
3. **Visual Inconsistencies**: Look for impossible physics, inconsistent perspectives, mismatched elements
|
| 362 |
+
4. **Context Analysis**: Does the visual content match the claimed context and date? (e.g., clothing styles, technology visible, environment)
|
| 363 |
+
|
| 364 |
+
Respond in JSON format:
|
| 365 |
+
{{
|
| 366 |
+
"verdict": "true|false|uncertain",
|
| 367 |
+
"verified": true|false,
|
| 368 |
+
"message": "Clear explanation of your findings",
|
| 369 |
+
"confidence": "high|medium|low",
|
| 370 |
+
"ai_generated_indicators": ["list of specific indicators found"],
|
| 371 |
+
"manipulation_artifacts": ["list of artifacts found"],
|
| 372 |
+
"visual_inconsistencies": ["list of inconsistencies"],
|
| 373 |
+
"context_match": "Does the image content match the claimed context?",
|
| 374 |
+
"reasoning": "Detailed reasoning for your verdict"
|
| 375 |
+
}}
|
| 376 |
+
|
| 377 |
+
Be specific and cite what you see in the image. If uncertain, explain why."""
|
| 378 |
+
|
| 379 |
+
# Use Gemini Vision to analyze the image
|
| 380 |
+
response = self.gemini_model.generate_content([prompt, img])
|
| 381 |
+
|
| 382 |
+
if not response.text:
|
| 383 |
+
return {
|
| 384 |
+
"verdict": "uncertain",
|
| 385 |
+
"verified": False,
|
| 386 |
+
"message": "Gemini Vision returned no response",
|
| 387 |
+
"confidence": "low",
|
| 388 |
+
"analysis_method": "vision_no_response",
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
# Parse JSON response
|
| 392 |
+
import json
|
| 393 |
+
response_text = response.text.strip()
|
| 394 |
+
if response_text.startswith("```json"):
|
| 395 |
+
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
| 396 |
+
elif response_text.startswith("```"):
|
| 397 |
+
response_text = response_text.replace("```", "").strip()
|
| 398 |
+
|
| 399 |
+
try:
|
| 400 |
+
analysis = json.loads(response_text)
|
| 401 |
+
analysis["analysis_method"] = "gemini_vision"
|
| 402 |
+
return analysis
|
| 403 |
+
except json.JSONDecodeError:
|
| 404 |
+
# Fallback: extract verdict from text
|
| 405 |
+
verdict = "uncertain"
|
| 406 |
+
if "false" in response_text.lower() or "fake" in response_text.lower() or "manipulated" in response_text.lower():
|
| 407 |
+
verdict = "false"
|
| 408 |
+
elif "true" in response_text.lower() and "not" not in response_text.lower()[:50]:
|
| 409 |
+
verdict = "true"
|
| 410 |
+
|
| 411 |
+
return {
|
| 412 |
+
"verdict": verdict,
|
| 413 |
+
"verified": verdict == "true",
|
| 414 |
+
"message": response_text[:500],
|
| 415 |
+
"confidence": "medium",
|
| 416 |
+
"analysis_method": "gemini_vision_fallback",
|
| 417 |
+
"raw_response": response_text,
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
except Exception as e:
|
| 421 |
+
print(f"[vision] Error in Gemini Vision analysis: {e}")
|
| 422 |
+
return {
|
| 423 |
+
"verdict": "uncertain",
|
| 424 |
+
"verified": False,
|
| 425 |
+
"message": f"Error during vision analysis: {str(e)}",
|
| 426 |
+
"confidence": "low",
|
| 427 |
+
"analysis_method": "vision_error",
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
def _synthesize_vision_and_evidence(
|
| 431 |
+
self,
|
| 432 |
+
preliminary_analysis: Dict[str, Any],
|
| 433 |
+
curated_analysis: Optional[Dict[str, Any]],
|
| 434 |
+
evidence: List[Dict[str, Any]],
|
| 435 |
+
claim_context: str,
|
| 436 |
+
claim_date: str,
|
| 437 |
+
) -> Optional[Dict[str, Any]]:
|
| 438 |
+
"""
|
| 439 |
+
Synthesize Gemini Vision analysis with reverse image search evidence.
|
| 440 |
+
Similar to text verification's hybrid synthesis.
|
| 441 |
+
"""
|
| 442 |
+
try:
|
| 443 |
+
if not self.gemini_model:
|
| 444 |
+
return None
|
| 445 |
+
|
| 446 |
+
source_briefs = []
|
| 447 |
+
for item in evidence[:5]:
|
| 448 |
+
source_briefs.append({
|
| 449 |
+
"title": item.get("title"),
|
| 450 |
+
"snippet": item.get("snippet"),
|
| 451 |
+
"link": item.get("link"),
|
| 452 |
+
})
|
| 453 |
+
|
| 454 |
+
prompt = f"""You are an expert image verification analyst. Combine direct image analysis (Gemini Vision) with reverse image search evidence to produce a final verdict.
|
| 455 |
+
|
| 456 |
+
CLAIM: {claim_context}
|
| 457 |
+
CLAIM DATE: {claim_date}
|
| 458 |
+
|
| 459 |
+
DIRECT IMAGE ANALYSIS (Gemini Vision):
|
| 460 |
+
{json.dumps(preliminary_analysis or {}, indent=2, ensure_ascii=False)}
|
| 461 |
+
|
| 462 |
+
REVERSE IMAGE SEARCH ANALYSIS:
|
| 463 |
+
{json.dumps(curated_analysis or {}, indent=2, ensure_ascii=False)}
|
| 464 |
+
|
| 465 |
+
REVERSE IMAGE SEARCH SOURCES:
|
| 466 |
+
{json.dumps(source_briefs, indent=2, ensure_ascii=False)}
|
| 467 |
+
|
| 468 |
+
INSTRUCTIONS:
|
| 469 |
+
- Combine both analyses to make a final decision (true/false/uncertain)
|
| 470 |
+
- If vision analysis detects AI-generated/manipulated content, prioritize that
|
| 471 |
+
- If reverse image search finds contradictory evidence, factor that in
|
| 472 |
+
- If evidence is thin, keep the tone cautious
|
| 473 |
+
- Provide clear, actionable messaging for the end user
|
| 474 |
+
|
| 475 |
+
Respond ONLY in this JSON format:
|
| 476 |
+
{{
|
| 477 |
+
"verdict": "true|false|uncertain",
|
| 478 |
+
"verified": true|false,
|
| 479 |
+
"message": "Concise user-facing summary combining both analyses",
|
| 480 |
+
"confidence": "high|medium|low",
|
| 481 |
+
"reasoning": "Brief reasoning trail you followed",
|
| 482 |
+
"vision_findings": "Key findings from direct image analysis",
|
| 483 |
+
"search_findings": "Key findings from reverse image search"
|
| 484 |
+
}}"""
|
| 485 |
+
|
| 486 |
+
response = self.gemini_model.generate_content(prompt)
|
| 487 |
+
response_text = response.text.strip()
|
| 488 |
+
|
| 489 |
+
if response_text.startswith("```json"):
|
| 490 |
+
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
| 491 |
+
elif response_text.startswith("```"):
|
| 492 |
+
response_text = response_text.replace("```", "").strip()
|
| 493 |
+
|
| 494 |
+
final_analysis = json.loads(response_text)
|
| 495 |
+
final_analysis.setdefault("verdict", "uncertain")
|
| 496 |
+
final_analysis.setdefault("verified", False)
|
| 497 |
+
final_analysis.setdefault("message", "Unable to synthesize final verdict.")
|
| 498 |
+
final_analysis.setdefault("confidence", "low")
|
| 499 |
+
final_analysis["analysis_method"] = "hybrid_vision_and_search"
|
| 500 |
+
|
| 501 |
+
# Build response similar to existing format
|
| 502 |
+
sources = self._top_sources(evidence, 3) if evidence else []
|
| 503 |
+
|
| 504 |
+
return {
|
| 505 |
+
"verdict": final_analysis["verdict"],
|
| 506 |
+
"summary": final_analysis["message"],
|
| 507 |
+
"message": final_analysis["message"],
|
| 508 |
+
"sources": sources,
|
| 509 |
+
"claim_context": claim_context,
|
| 510 |
+
"claim_date": claim_date,
|
| 511 |
+
"confidence": final_analysis.get("confidence", "medium"),
|
| 512 |
+
"analysis_method": "hybrid_vision_and_search",
|
| 513 |
+
"preliminary_analysis": preliminary_analysis,
|
| 514 |
+
"curated_analysis": curated_analysis,
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
except Exception as e:
|
| 518 |
+
print(f"Hybrid synthesis error: {e}")
|
| 519 |
+
return None
|
| 520 |
+
|
| 521 |
+
async def gather_evidence(self, image_path: Optional[str] = None, image_url: Optional[str] = None, claim_context: str = "") -> List[Dict[str, Any]]:
|
| 522 |
+
"""
|
| 523 |
+
Evidence-only helper: performs reverse image search and returns ranked/filterred evidence
|
| 524 |
+
without invoking the LLM or producing a verdict.
|
| 525 |
+
"""
|
| 526 |
+
try:
|
| 527 |
+
print("[verify] start", {"gather_only": True, "has_image_path": bool(image_path), "has_image_url": bool(image_url)})
|
| 528 |
+
search_results = await self._reverse_image_search(image_path=image_path, image_url=image_url)
|
| 529 |
+
if not search_results or (not search_results.get("inline_images") and not search_results.get("image_results")):
|
| 530 |
+
return []
|
| 531 |
+
evidence = self._collect_evidence(search_results)
|
| 532 |
+
filtered = self._rank_and_filter_evidence(evidence, claim_context, top_k=12)
|
| 533 |
+
return filtered
|
| 534 |
+
except Exception as e:
|
| 535 |
+
print(f"[gather_evidence] error: {e}")
|
| 536 |
+
return []
|
| 537 |
+
|
| 538 |
+
def _summarize_with_gemini(self, claim_context: str, claim_date: str, analysis: Dict[str, Any], forced_verdict: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
| 539 |
+
try:
|
| 540 |
+
if not self.gemini_model:
|
| 541 |
+
return None
|
| 542 |
+
|
| 543 |
+
verdict = forced_verdict or analysis.get("verdict", "uncertain")
|
| 544 |
+
prompt = f"""You are a fact-checking assistant. Generate a single, concise sentence (no code blocks, no JSON)
|
| 545 |
+
that explains the verdict. Mirror the provided verdict exactly (do not change it).
|
| 546 |
+
If false, mention the most likely real context/time from evidence; if true, confirm briefly;
|
| 547 |
+
if uncertain, state uncertainty.
|
| 548 |
+
|
| 549 |
+
Claim context: {claim_context}
|
| 550 |
+
Claim date: {claim_date}
|
| 551 |
+
Verdict: {verdict}
|
| 552 |
+
Evidence (condensed): {self._top_sources(analysis.get('evidence', []), 3)}"""
|
| 553 |
+
|
| 554 |
+
response = self.gemini_model.generate_content(prompt)
|
| 555 |
+
text = response.text if response.text else None
|
| 556 |
+
|
| 557 |
+
return {"model": config.GEMINI_MODEL, "verdict": verdict, "text": text}
|
| 558 |
+
except Exception:
|
| 559 |
+
return None
|
| 560 |
+
|
| 561 |
+
def _collect_evidence(self, search_results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 562 |
+
evidence: List[Dict[str, Any]] = []
|
| 563 |
+
for res in search_results.get("image_results", []):
|
| 564 |
+
evidence.append({
|
| 565 |
+
"title": res.get("title"),
|
| 566 |
+
"link": res.get("link"),
|
| 567 |
+
"source": res.get("source"),
|
| 568 |
+
"date": res.get("date"),
|
| 569 |
+
"thumbnail": res.get("thumbnail"),
|
| 570 |
+
"snippet": res.get("snippet"),
|
| 571 |
+
})
|
| 572 |
+
for img in search_results.get("inline_images", []):
|
| 573 |
+
evidence.append({
|
| 574 |
+
"title": img.get("title"),
|
| 575 |
+
"link": img.get("link"),
|
| 576 |
+
"source": img.get("source"),
|
| 577 |
+
"thumbnail": img.get("thumbnail"),
|
| 578 |
+
"snippet": img.get("snippet"),
|
| 579 |
+
})
|
| 580 |
+
return evidence
|
| 581 |
+
|
| 582 |
+
def _normalize_tokens(self, text: Optional[str]) -> List[str]:
|
| 583 |
+
if not text:
|
| 584 |
+
return []
|
| 585 |
+
import re
|
| 586 |
+
t = (text or "").lower()
|
| 587 |
+
stop = set(["the","a","an","and","or","for","to","of","in","on","at","with","by","from","this","that","is","are","was","were","as","it","its","their","his","her","him","she","he","they","them","we","you"])
|
| 588 |
+
toks = re.findall(r"[a-z0-9]{3,}", t)
|
| 589 |
+
return [x for x in toks if x not in stop]
|
| 590 |
+
|
| 591 |
+
def _evidence_score(self, claim_text: str, ev: Dict[str, Any]) -> float:
|
| 592 |
+
claim_tokens = set(self._normalize_tokens(claim_text))
|
| 593 |
+
ev_text = " ".join([s for s in [ev.get("title"), ev.get("snippet"), ev.get("source")] if s])
|
| 594 |
+
ev_tokens = set(self._normalize_tokens(ev_text))
|
| 595 |
+
if not claim_tokens or not ev_tokens:
|
| 596 |
+
return 0.0
|
| 597 |
+
overlap = len(claim_tokens & ev_tokens)
|
| 598 |
+
return overlap / float(len(claim_tokens))
|
| 599 |
+
|
| 600 |
+
def _rank_and_filter_evidence(self, evidence: List[Dict[str, Any]], claim_text: str, top_k: int = 12) -> List[Dict[str, Any]]:
|
| 601 |
+
scored: List[Tuple[float, int, Dict[str, Any]]] = []
|
| 602 |
+
for i, ev in enumerate(evidence):
|
| 603 |
+
s = self._evidence_score(claim_text, ev)
|
| 604 |
+
# Downrank social/UGC and YouTube to prefer article pages when checking relations
|
| 605 |
+
try:
|
| 606 |
+
from urllib.parse import urlparse
|
| 607 |
+
net = urlparse((ev.get("link") or "").strip()).netloc
|
| 608 |
+
except Exception:
|
| 609 |
+
net = ""
|
| 610 |
+
if net in config.LOW_PRIORITY_DOMAINS or net in ("youtube.com", "www.youtube.com", "youtu.be"):
|
| 611 |
+
s *= 0.6
|
| 612 |
+
scored.append((s, i, ev))
|
| 613 |
+
scored.sort(key=lambda x: x[0], reverse=True)
|
| 614 |
+
seen_urls = set()
|
| 615 |
+
seen_titles = set()
|
| 616 |
+
filtered: List[Dict[str, Any]] = []
|
| 617 |
+
for s, i, ev in scored:
|
| 618 |
+
url = (ev.get("link") or "").strip()
|
| 619 |
+
title = (ev.get("title") or "").strip().lower()
|
| 620 |
+
title_key = title[:80] if title else ""
|
| 621 |
+
if url and url in seen_urls:
|
| 622 |
+
continue
|
| 623 |
+
if title_key and title_key in seen_titles:
|
| 624 |
+
continue
|
| 625 |
+
filtered.append(ev)
|
| 626 |
+
if url:
|
| 627 |
+
seen_urls.add(url)
|
| 628 |
+
if title_key:
|
| 629 |
+
seen_titles.add(title_key)
|
| 630 |
+
if len(filtered) >= top_k:
|
| 631 |
+
break
|
| 632 |
+
print("[verify] evidence_rank_filter", {"input": len(evidence), "kept": len(filtered)})
|
| 633 |
+
return filtered
|
| 634 |
+
|
| 635 |
+
def _extract_json(self, text: str) -> Dict[str, Any]:
|
| 636 |
+
# Strip common fences and attempt to locate JSON object
|
| 637 |
+
t = text.strip()
|
| 638 |
+
if t.startswith("```"):
|
| 639 |
+
t = t.split("```", 1)[1]
|
| 640 |
+
t = t.lstrip("json").lstrip("\n").strip()
|
| 641 |
+
if "```" in t:
|
| 642 |
+
t = t.split("```", 1)[0].strip()
|
| 643 |
+
# Find first '{' and last '}'
|
| 644 |
+
start = t.find('{')
|
| 645 |
+
end = t.rfind('}')
|
| 646 |
+
if start != -1 and end != -1 and end > start:
|
| 647 |
+
t = t[start:end+1]
|
| 648 |
+
import json
|
| 649 |
+
return json.loads(t)
|
| 650 |
+
|
| 651 |
+
def _summarize_with_gemini_structured(self, claim_context: str, claim_date: str,
|
| 652 |
+
evidence: List[Dict[str, Any]],
|
| 653 |
+
forced_verdict: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
| 654 |
+
try:
|
| 655 |
+
if not self.gemini_model:
|
| 656 |
+
return None
|
| 657 |
+
|
| 658 |
+
prompt = f"""You are a fact-checking assistant. Use the provided evidence items (title, link, date, source, snippet) to evaluate the FULL claim text.
|
| 659 |
+
The claim can include: event/context, place, timeframe, actors/entities, quantities, and relations/attribution. You may use only the provided evidence items.
|
| 660 |
+
Respond STRICTLY as compact JSON with keys:
|
| 661 |
+
- verdict: one of 'true' | 'false' | 'uncertain'
|
| 662 |
+
- relation_verdict: one of 'true' | 'false' | 'uncertain' (whether the stated relation holds)
|
| 663 |
+
- summary: <= 2 sentences, plain text
|
| 664 |
+
- top_sources: array of up to 3 objects {{title, link}}
|
| 665 |
+
- claim_parse: {{
|
| 666 |
+
entities: array of strings,
|
| 667 |
+
roles: array of strings,
|
| 668 |
+
relation: {{ predicate: string, subject: string, object: string }},
|
| 669 |
+
timeframe: {{ year: number|null, month: number|null }},
|
| 670 |
+
location: string|null,
|
| 671 |
+
citations: {{
|
| 672 |
+
entities: array of arrays of evidence indices (per entity),
|
| 673 |
+
roles: array of arrays of evidence indices (per role),
|
| 674 |
+
relation: array of evidence indices supporting subject+predicate+object together,
|
| 675 |
+
timeframe: array of evidence indices supporting the timeframe,
|
| 676 |
+
location: array of evidence indices supporting the location
|
| 677 |
+
}}
|
| 678 |
+
}}
|
| 679 |
+
Rules:
|
| 680 |
+
- verdict 'true' ONLY if evidence supports ALL key parts: event/context, place, timeframe, AND any stated relation.
|
| 681 |
+
- relation_verdict 'false' if the evidence supports a different relation and none supports the claimed relation.
|
| 682 |
+
- verdict 'false' if relation_verdict is 'false' or if place/time contradicts the claim without supporting evidence.
|
| 683 |
+
- 'uncertain' if ANY extracted part in claim_parse has no supporting citations.
|
| 684 |
+
- relation consistency: at least one cited evidence item MUST co-mention subject and object tokens with the predicate.
|
| 685 |
+
Do not include code fences or extra text; return only the JSON object.
|
| 686 |
+
|
| 687 |
+
Claim text: {claim_context}
|
| 688 |
+
Claim date: {claim_date}
|
| 689 |
+
Forced verdict: {forced_verdict}
|
| 690 |
+
Evidence: {evidence}"""
|
| 691 |
+
|
| 692 |
+
print("[gemini] request_meta", {"model": config.GEMINI_MODEL, "temp": config.GEMINI_TEMPERATURE, "topP": config.GEMINI_TOP_P})
|
| 693 |
+
response = self.gemini_model.generate_content(prompt)
|
| 694 |
+
|
| 695 |
+
if not response.text:
|
| 696 |
+
return None
|
| 697 |
+
|
| 698 |
+
text = response.text.strip()
|
| 699 |
+
print("[gemini] structured_text_preview", text[:200])
|
| 700 |
+
parsed = self._extract_json(text)
|
| 701 |
+
print("[gemini] parsed_json_keys", list(parsed.keys()) if isinstance(parsed, dict) else type(parsed).__name__)
|
| 702 |
+
return parsed if isinstance(parsed, dict) else None
|
| 703 |
+
|
| 704 |
+
except Exception as e:
|
| 705 |
+
print(f"[gemini] error: {e}")
|
| 706 |
+
return None
|
| 707 |
+
|
| 708 |
+
def _summarize_with_gemini_majority(self, claim_context: str, claim_date: str,
|
| 709 |
+
evidence: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
| 710 |
+
"""
|
| 711 |
+
Simpler majority-based prompt: ask Gemini to decide true/false by which side has more supporting
|
| 712 |
+
evidence; only return uncertain if support is roughly equal/ambiguous.
|
| 713 |
+
Returns compact JSON: { verdict, clarification, corrected_relation, top_sources }
|
| 714 |
+
"""
|
| 715 |
+
try:
|
| 716 |
+
if not self.gemini_model:
|
| 717 |
+
return None
|
| 718 |
+
prompt = f"""You are a citation-driven fact-checking assistant.
|
| 719 |
+
Given a CLAIM and a list of EVIDENCE items (title, link, date, source, snippet), decide if the CLAIM itself is true or false.
|
| 720 |
+
|
| 721 |
+
STRICT adjudication rules (apply literally to the CLAIM):
|
| 722 |
+
1) Extract the relation from the CLAIM as:
|
| 723 |
+
relation: {{ predicate: string, subject: string, object: string }}
|
| 724 |
+
2) Evaluate ONLY the CLAIM's relation. Mentions of a different object (alternative person/role/event/location) are NOT support for the CLAIM.
|
| 725 |
+
3) SUPPORT only when an evidence item explicitly co-mentions the CLAIM's subject AND the CLAIM's object with the predicate in title/snippet (token-level match; paraphrases of those tokens are fine). General marital status or vague wording does NOT count as support if the CLAIM's object is not explicitly present.
|
| 726 |
+
4) CONTRADICTION when evidence explicitly supports a mutually exclusive alternative relation (e.g., same subject + predicate with a different object), or explicitly negates the CLAIM.
|
| 727 |
+
5) Social/UGC links may appear; still judge by content but prefer clearer, explicit co-mentions from any source.
|
| 728 |
+
6) Decision for the CLAIM:
|
| 729 |
+
- If SUPPORT > CONTRADICTION by a meaningful margin, verdict = "true".
|
| 730 |
+
- If CONTRADICTION > SUPPORT by a meaningful margin, verdict = "false".
|
| 731 |
+
- If neither side is clearly stronger or no explicit co-mentions exist, verdict = "uncertain".
|
| 732 |
+
7) Use only the provided EVIDENCE texts; no outside knowledge.
|
| 733 |
+
|
| 734 |
+
Output strictly as compact JSON with keys (and nothing else):
|
| 735 |
+
verdict: one of 'true' | 'false' | 'uncertain'
|
| 736 |
+
clarification: one concise sentence that answers the CLAIM directly. If verdict is 'false' or 'uncertain', state the most supported alternative relation (e.g., "<subject> was not <predicate> <object>. Instead, <subject> <predicate> <alt_object> at <context>."). Avoid hedging like "does not confirm".
|
| 737 |
+
corrected_relation: {{ predicate: string, subject: string, object: string }} | null
|
| 738 |
+
top_sources: up to 3 objects {{title, link}}
|
| 739 |
+
|
| 740 |
+
CLAIM: {claim_context}
|
| 741 |
+
CLAIM_DATE: {claim_date}
|
| 742 |
+
EVIDENCE: {evidence}
|
| 743 |
+
"""
|
| 744 |
+
print("[gemini] request_meta", {"model": config.GEMINI_MODEL, "temp": config.GEMINI_TEMPERATURE, "topP": config.GEMINI_TOP_P})
|
| 745 |
+
response = self.gemini_model.generate_content(prompt)
|
| 746 |
+
if not response.text:
|
| 747 |
+
return None
|
| 748 |
+
text = response.text.strip()
|
| 749 |
+
print("[gemini] structured_text_preview", text[:200])
|
| 750 |
+
parsed = self._extract_json(text)
|
| 751 |
+
print("[gemini] parsed_json_keys", list(parsed.keys()) if isinstance(parsed, dict) else type(parsed).__name__)
|
| 752 |
+
return parsed if isinstance(parsed, dict) else None
|
| 753 |
+
except Exception as e:
|
| 754 |
+
print(f"[gemini] error: {e}")
|
| 755 |
+
return None
|
| 756 |
+
|
| 757 |
+
def _top_sources(self, evidence: List[Dict[str, Any]], k: int) -> List[Dict[str, Any]]:
|
| 758 |
+
items = []
|
| 759 |
+
for e in evidence:
|
| 760 |
+
title = e.get("title")
|
| 761 |
+
link = e.get("link")
|
| 762 |
+
if title or link:
|
| 763 |
+
items.append({"title": title, "link": link})
|
| 764 |
+
if len(items) >= k:
|
| 765 |
+
break
|
| 766 |
+
return items
|
| 767 |
+
|
| 768 |
+
def _validate_llm_parse(self, claim_text: str, evidence: List[Dict[str, Any]], llm: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
| 769 |
+
checks: Dict[str, Any] = {}
|
| 770 |
+
reasons: List[str] = []
|
| 771 |
+
passed = True
|
| 772 |
+
parse = (llm or {}).get("claim_parse") or {}
|
| 773 |
+
citations = parse.get("citations") or {}
|
| 774 |
+
# Helper to get combined text for an evidence index
|
| 775 |
+
def ev_text(i: int) -> str:
|
| 776 |
+
if i < 0 or i >= len(evidence):
|
| 777 |
+
return ""
|
| 778 |
+
ev = evidence[i]
|
| 779 |
+
return " ".join([t for t in [ev.get("title"), ev.get("snippet")] if t])
|
| 780 |
+
# 1) Ensure each entities[] and roles[] item has at least one citation
|
| 781 |
+
for key in ["entities", "roles"]:
|
| 782 |
+
items = parse.get(key) or []
|
| 783 |
+
cits = citations.get(key) or []
|
| 784 |
+
ok = bool(items) and len(cits) == len(items) and all(len(lst) > 0 for lst in cits if isinstance(lst, list))
|
| 785 |
+
checks[f"{key}_citations"] = ok
|
| 786 |
+
if not ok:
|
| 787 |
+
passed = False
|
| 788 |
+
reasons.append(f"Missing citations for {key}")
|
| 789 |
+
# 2) timeframe and location citations exist if present
|
| 790 |
+
for key in ["timeframe", "location"]:
|
| 791 |
+
has_item = bool(parse.get(key))
|
| 792 |
+
if has_item:
|
| 793 |
+
ok = bool(citations.get(key)) and len(citations.get(key)) > 0
|
| 794 |
+
checks[f"{key}_citations"] = ok
|
| 795 |
+
if not ok:
|
| 796 |
+
passed = False
|
| 797 |
+
reasons.append(f"Missing citations for {key}")
|
| 798 |
+
# 2b) If location cited, require token presence in at least one cited item
|
| 799 |
+
def _tok(text: str) -> set:
|
| 800 |
+
import re
|
| 801 |
+
return set(re.findall(r"[a-z0-9]{3,}", (text or "").lower()))
|
| 802 |
+
if parse.get("location") and citations.get("location"):
|
| 803 |
+
loc_toks = _tok(str(parse.get("location") or ""))
|
| 804 |
+
loc_token_ok = False
|
| 805 |
+
for i in citations.get("location"):
|
| 806 |
+
try:
|
| 807 |
+
it = _tok(ev_text(int(i)))
|
| 808 |
+
except Exception:
|
| 809 |
+
it = set()
|
| 810 |
+
if loc_toks and (loc_toks & it):
|
| 811 |
+
loc_token_ok = True
|
| 812 |
+
break
|
| 813 |
+
checks["location_token_match"] = loc_token_ok
|
| 814 |
+
if not loc_token_ok:
|
| 815 |
+
passed = False
|
| 816 |
+
reasons.append("Location tokens not found in cited items")
|
| 817 |
+
# 3) relation citations and co-mention (subject/object in same item)
|
| 818 |
+
relation = parse.get("relation") or {}
|
| 819 |
+
subj = (relation.get("subject") or "").strip()
|
| 820 |
+
obj = (relation.get("object") or "").strip()
|
| 821 |
+
# Token-based co-mention: require at least one informative token from subject and object in same item
|
| 822 |
+
def tokens(text: str) -> List[str]:
|
| 823 |
+
import re
|
| 824 |
+
return re.findall(r"[a-z0-9]{3,}", (text or "").lower())
|
| 825 |
+
subj_toks = set(tokens(subj))
|
| 826 |
+
obj_toks = set(tokens(obj))
|
| 827 |
+
rel_indices: List[int] = citations.get("relation") or []
|
| 828 |
+
rel_ok = False
|
| 829 |
+
for idx in rel_indices:
|
| 830 |
+
txt = ev_text(int(idx))
|
| 831 |
+
tl_toks = set(tokens(txt))
|
| 832 |
+
if subj_toks and obj_toks and (subj_toks & tl_toks) and (obj_toks & tl_toks):
|
| 833 |
+
rel_ok = True
|
| 834 |
+
break
|
| 835 |
+
checks["relation_comention"] = rel_ok
|
| 836 |
+
# Allow pooled-evidence relation support via shared anchors if co-mention failed
|
| 837 |
+
pooled_ok = False
|
| 838 |
+
pooled_detail: Dict[str, Any] = {}
|
| 839 |
+
if not rel_ok:
|
| 840 |
+
try:
|
| 841 |
+
entity_list: List[str] = (parse.get("entities") or [])
|
| 842 |
+
entity_cits: List[List[int]] = (citations.get("entities") or [])
|
| 843 |
+
def _tokens(text: str) -> set:
|
| 844 |
+
import re
|
| 845 |
+
return set(re.findall(r"[a-z0-9]{3,}", (text or "").lower()))
|
| 846 |
+
# Map subject/object to entity indices by token overlap
|
| 847 |
+
def best_entity_indices(name_toks: set) -> List[int]:
|
| 848 |
+
scored: List[Tuple[int,int]] = []
|
| 849 |
+
for idx, ent in enumerate(entity_list):
|
| 850 |
+
et = _tokens(ent)
|
| 851 |
+
scored.append((len(name_toks & et), idx))
|
| 852 |
+
scored.sort(reverse=True)
|
| 853 |
+
return [i for s,i in scored if s > 0]
|
| 854 |
+
subj_toks_set = _tokens(subj)
|
| 855 |
+
obj_toks_set = _tokens(obj)
|
| 856 |
+
subj_idxs = best_entity_indices(subj_toks_set) if subj_toks_set else []
|
| 857 |
+
obj_idxs = best_entity_indices(obj_toks_set) if obj_toks_set else []
|
| 858 |
+
subj_pool: List[int] = []
|
| 859 |
+
obj_pool: List[int] = []
|
| 860 |
+
for si in subj_idxs:
|
| 861 |
+
if si < len(entity_cits) and isinstance(entity_cits[si], list):
|
| 862 |
+
for v in entity_cits[si]:
|
| 863 |
+
try:
|
| 864 |
+
subj_pool.append(int(v))
|
| 865 |
+
except Exception:
|
| 866 |
+
pass
|
| 867 |
+
for oi in obj_idxs:
|
| 868 |
+
if oi < len(entity_cits) and isinstance(entity_cits[oi], list):
|
| 869 |
+
for v in entity_cits[oi]:
|
| 870 |
+
try:
|
| 871 |
+
obj_pool.append(int(v))
|
| 872 |
+
except Exception:
|
| 873 |
+
pass
|
| 874 |
+
subj_pool = list({int(x) for x in subj_pool})
|
| 875 |
+
obj_pool = list({int(x) for x in obj_pool})
|
| 876 |
+
# Anchors from claim parse
|
| 877 |
+
anchor_year = None
|
| 878 |
+
tf = parse.get("timeframe") or {}
|
| 879 |
+
try:
|
| 880 |
+
anchor_year = int(tf.get("year")) if tf.get("year") is not None else None
|
| 881 |
+
except Exception:
|
| 882 |
+
anchor_year = None
|
| 883 |
+
anchor_month_name = None
|
| 884 |
+
try:
|
| 885 |
+
mn = int(tf.get("month")) if tf.get("month") is not None else None
|
| 886 |
+
months = ["january","february","march","april","may","june","july","august","september","october","november","december"]
|
| 887 |
+
anchor_month_name = months[mn-1] if mn and 1 <= mn <= 12 else None
|
| 888 |
+
except Exception:
|
| 889 |
+
anchor_month_name = None
|
| 890 |
+
loc_tokens = _tok(str(parse.get("location") or ""))
|
| 891 |
+
claim_event_tokens = _tok(claim_text)
|
| 892 |
+
import re
|
| 893 |
+
def item_text(idx: int) -> str:
|
| 894 |
+
return ev_text(idx)
|
| 895 |
+
def has_year(idx: int) -> bool:
|
| 896 |
+
return bool(anchor_year is not None and re.search(rf"\b{anchor_year}\b", item_text(idx) or ""))
|
| 897 |
+
def has_month(idx: int) -> bool:
|
| 898 |
+
return bool(anchor_month_name and (anchor_month_name in (item_text(idx) or "").lower()))
|
| 899 |
+
def has_loc(idx: int) -> bool:
|
| 900 |
+
return bool(loc_tokens and (loc_tokens & _tok(item_text(idx))))
|
| 901 |
+
def event_overlap(idx1: int, idx2: int) -> bool:
|
| 902 |
+
t1 = _tok(item_text(idx1))
|
| 903 |
+
t2 = _tok(item_text(idx2))
|
| 904 |
+
return bool((claim_event_tokens & t1) and (claim_event_tokens & t2))
|
| 905 |
+
def anchors_align(i: int, j: int) -> Tuple[bool, List[str]]:
|
| 906 |
+
reasons: List[str] = []
|
| 907 |
+
if has_year(i) and has_year(j):
|
| 908 |
+
reasons.append("year")
|
| 909 |
+
if has_month(i) and has_month(j):
|
| 910 |
+
reasons.append("month")
|
| 911 |
+
if has_loc(i) and has_loc(j):
|
| 912 |
+
reasons.append("location")
|
| 913 |
+
if event_overlap(i, j):
|
| 914 |
+
reasons.append("event")
|
| 915 |
+
return (len(reasons) > 0, reasons)
|
| 916 |
+
for si in subj_pool:
|
| 917 |
+
for oj in obj_pool:
|
| 918 |
+
ok, rs = anchors_align(int(si), int(oj))
|
| 919 |
+
if ok:
|
| 920 |
+
pooled_ok = True
|
| 921 |
+
pooled_detail = {"subj_idx": int(si), "obj_idx": int(oj), "anchors": rs}
|
| 922 |
+
break
|
| 923 |
+
if pooled_ok:
|
| 924 |
+
break
|
| 925 |
+
except Exception:
|
| 926 |
+
pooled_ok = False
|
| 927 |
+
checks["relation_pooled_anchor"] = pooled_ok
|
| 928 |
+
if pooled_ok:
|
| 929 |
+
checks["relation_pooled_detail"] = pooled_detail
|
| 930 |
+
if not rel_ok and not pooled_ok:
|
| 931 |
+
passed = False
|
| 932 |
+
reasons.append("Relation not supported by co-mention or pooled anchors")
|
| 933 |
+
# 4) Simple entity overlap score between claim tokens and cited items
|
| 934 |
+
import re
|
| 935 |
+
claim_tokens = set([t.lower() for t in re.findall(r"[A-Za-z]{3,}", claim_text or "")])
|
| 936 |
+
cited_indices = set()
|
| 937 |
+
for arr in (citations.get("entities") or []):
|
| 938 |
+
for i in arr:
|
| 939 |
+
try:
|
| 940 |
+
cited_indices.add(int(i))
|
| 941 |
+
except Exception:
|
| 942 |
+
pass
|
| 943 |
+
overlap_hits = 0
|
| 944 |
+
for i in cited_indices:
|
| 945 |
+
tl = ev_text(i).lower()
|
| 946 |
+
if any(tok in tl for tok in claim_tokens):
|
| 947 |
+
overlap_hits += 1
|
| 948 |
+
entity_overlap_score = overlap_hits / (len(cited_indices) or 1)
|
| 949 |
+
checks["entity_overlap_score"] = entity_overlap_score
|
| 950 |
+
# 5) Date check: allow year and optional month names from claim timeframe in cited items
|
| 951 |
+
year = None
|
| 952 |
+
month_num = None
|
| 953 |
+
tf = parse.get("timeframe") or {}
|
| 954 |
+
try:
|
| 955 |
+
year = int(tf.get("year")) if tf.get("year") is not None else None
|
| 956 |
+
except Exception:
|
| 957 |
+
year = None
|
| 958 |
+
try:
|
| 959 |
+
month_num = int(tf.get("month")) if tf.get("month") is not None else None
|
| 960 |
+
except Exception:
|
| 961 |
+
month_num = None
|
| 962 |
+
date_ok = True
|
| 963 |
+
if year is not None:
|
| 964 |
+
date_ok = False
|
| 965 |
+
for i in (citations.get("timeframe") or []):
|
| 966 |
+
try:
|
| 967 |
+
ev = evidence[int(i)]
|
| 968 |
+
except Exception:
|
| 969 |
+
continue
|
| 970 |
+
text = " ".join([t for t in [ev.get("title"), ev.get("snippet"), ev.get("date"), ev.get("source"), ev.get("link")] if t])
|
| 971 |
+
if re.search(rf"\b{year}\b", text or ""):
|
| 972 |
+
date_ok = True
|
| 973 |
+
break
|
| 974 |
+
# Month name matching if provided
|
| 975 |
+
if month_num is not None:
|
| 976 |
+
month_names = [
|
| 977 |
+
"january","february","march","april","may","june",
|
| 978 |
+
"july","august","september","october","november","december"
|
| 979 |
+
]
|
| 980 |
+
mname = month_names[month_num-1] if 1 <= month_num <= 12 else None
|
| 981 |
+
if mname and (mname in (text or "").lower()):
|
| 982 |
+
date_ok = True
|
| 983 |
+
break
|
| 984 |
+
checks["timeframe_match"] = date_ok
|
| 985 |
+
if not date_ok:
|
| 986 |
+
passed = False
|
| 987 |
+
reasons.append("Timeframe year not supported in cited items")
|
| 988 |
+
# Domains used (for logging only)
|
| 989 |
+
from urllib.parse import urlparse
|
| 990 |
+
domains = []
|
| 991 |
+
for ev in evidence:
|
| 992 |
+
try:
|
| 993 |
+
net = urlparse(ev.get("link") or "").netloc
|
| 994 |
+
except Exception:
|
| 995 |
+
net = ""
|
| 996 |
+
if net:
|
| 997 |
+
domains.append(net)
|
| 998 |
+
debug = {
|
| 999 |
+
"claim_parse": parse,
|
| 1000 |
+
"citations": citations,
|
| 1001 |
+
"domains_used": domains,
|
| 1002 |
+
}
|
| 1003 |
+
return {"passed": passed, "reasons": reasons, "checks": checks}, debug
|
| 1004 |
+
|
| 1005 |
+
def _fallback_summary(self, verdict: str, claim_context: str, claim_date: str,
|
| 1006 |
+
best_title: Optional[str], best_link: Optional[str], best_year: Optional[int]) -> str:
|
| 1007 |
+
if verdict == "false":
|
| 1008 |
+
where = best_title or "another place/time"
|
| 1009 |
+
when = str(best_year) if best_year else "an earlier date"
|
| 1010 |
+
src = best_link or "a corroborating source"
|
| 1011 |
+
return f"Claim is false. The image corresponds to {where} from {when}, not {claim_context}, {claim_date}. Source: {src}."
|
| 1012 |
+
if verdict == "true":
|
| 1013 |
+
return f"Claim is true. The available evidence supports {claim_context}, {claim_date}."
|
| 1014 |
+
return f"Claim is uncertain. Evidence is inconclusive for {claim_context}, {claim_date}."
|
| 1015 |
+
|
| 1016 |
+
def _clean_summary_text(self, text: Optional[str]) -> str:
|
| 1017 |
+
if not text:
|
| 1018 |
+
return ""
|
| 1019 |
+
t = text.strip()
|
| 1020 |
+
# Remove common code-fence wrappers
|
| 1021 |
+
if t.startswith("```"):
|
| 1022 |
+
# drop first fence
|
| 1023 |
+
t = t.split("```", 1)[1]
|
| 1024 |
+
# drop language tag if present
|
| 1025 |
+
t = t.lstrip("\n").split("\n", 1)[-1] if "\n" in t else t
|
| 1026 |
+
# drop trailing fence
|
| 1027 |
+
if "```" in t:
|
| 1028 |
+
t = t.rsplit("```", 1)[0]
|
| 1029 |
+
return t.strip()
|
| 1030 |
+
|
| 1031 |
+
async def _reverse_image_search(self, image_path: Optional[str] = None, image_url: Optional[str] = None) -> Dict[str, Any]:
|
| 1032 |
+
"""
|
| 1033 |
+
Perform reverse image search using SerpApi
|
| 1034 |
+
|
| 1035 |
+
Args:
|
| 1036 |
+
image_path: Path to the image file
|
| 1037 |
+
image_url: URL of the image
|
| 1038 |
+
|
| 1039 |
+
Returns:
|
| 1040 |
+
Search results from SerpApi
|
| 1041 |
+
"""
|
| 1042 |
+
try:
|
| 1043 |
+
if GoogleSearch is None:
|
| 1044 |
+
raise RuntimeError("google-search-results package not available. Install with: pip install google-search-results")
|
| 1045 |
+
|
| 1046 |
+
# Build params per SerpApi docs - use official client for ALL requests
|
| 1047 |
+
params: Dict[str, Any] = {
|
| 1048 |
+
"engine": "google_reverse_image",
|
| 1049 |
+
"api_key": self.api_key,
|
| 1050 |
+
}
|
| 1051 |
+
|
| 1052 |
+
if image_url:
|
| 1053 |
+
# Use image_url parameter for URLs
|
| 1054 |
+
params["image_url"] = image_url
|
| 1055 |
+
print("[serpapi] Using image_url parameter")
|
| 1056 |
+
elif image_path:
|
| 1057 |
+
# For local files, upload to Cloudinary first to get a public URL
|
| 1058 |
+
try:
|
| 1059 |
+
cloudinary_url = await self._upload_to_cloudinary(image_path)
|
| 1060 |
+
if cloudinary_url:
|
| 1061 |
+
params["image_url"] = cloudinary_url
|
| 1062 |
+
print(f"[serpapi] Using Cloudinary URL: {cloudinary_url}")
|
| 1063 |
+
else:
|
| 1064 |
+
print("[serpapi] Cloudinary upload failed, falling back to base64")
|
| 1065 |
+
# Fallback to base64 if Cloudinary fails
|
| 1066 |
+
with open(image_path, "rb") as img_file:
|
| 1067 |
+
img_data = img_file.read()
|
| 1068 |
+
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
| 1069 |
+
params["image_content"] = img_base64
|
| 1070 |
+
print("[serpapi] Using image_content parameter (base64 fallback)")
|
| 1071 |
+
except Exception as e:
|
| 1072 |
+
print(f"[serpapi] Error uploading to Cloudinary: {e}")
|
| 1073 |
+
# Fallback to base64
|
| 1074 |
+
with open(image_path, "rb") as img_file:
|
| 1075 |
+
img_data = img_file.read()
|
| 1076 |
+
img_base64 = base64.b64encode(img_data).decode("utf-8")
|
| 1077 |
+
params["image_content"] = img_base64
|
| 1078 |
+
print("[serpapi] Using image_content parameter (base64 fallback)")
|
| 1079 |
+
|
| 1080 |
+
# Debug prints
|
| 1081 |
+
print("[serpapi] params", {
|
| 1082 |
+
"engine": params.get("engine"),
|
| 1083 |
+
"has_image_url": bool(params.get("image_url")),
|
| 1084 |
+
"has_image_content": bool(params.get("image_content")),
|
| 1085 |
+
"image_content_len": len(params.get("image_content", "")) if params.get("image_content") else 0,
|
| 1086 |
+
})
|
| 1087 |
+
|
| 1088 |
+
# Use different approaches based on whether we have image_url or image_content
|
| 1089 |
+
if params.get("image_url"):
|
| 1090 |
+
# For image_url, use the official client (works well)
|
| 1091 |
+
print("[serpapi] Using official GoogleSearch client for image_url")
|
| 1092 |
+
search = GoogleSearch(params) # type: ignore
|
| 1093 |
+
results = search.get_dict()
|
| 1094 |
+
print("[serpapi] Successfully got results from GoogleSearch client")
|
| 1095 |
+
return results
|
| 1096 |
+
else:
|
| 1097 |
+
# For image_content (base64), use direct HTTP POST to avoid header size issues
|
| 1098 |
+
print("[serpapi] Using direct HTTP POST for image_content (base64)")
|
| 1099 |
+
try:
|
| 1100 |
+
import requests
|
| 1101 |
+
response = requests.post(
|
| 1102 |
+
"https://serpapi.com/search?engine=google_reverse_image",
|
| 1103 |
+
data=params,
|
| 1104 |
+
timeout=60
|
| 1105 |
+
)
|
| 1106 |
+
print(f"[serpapi] HTTP POST status: {response.status_code}")
|
| 1107 |
+
response.raise_for_status()
|
| 1108 |
+
results = response.json()
|
| 1109 |
+
print("[serpapi] Successfully got results from HTTP POST")
|
| 1110 |
+
return results
|
| 1111 |
+
except Exception as http_error:
|
| 1112 |
+
print(f"[serpapi] HTTP POST failed: {http_error}")
|
| 1113 |
+
return {}
|
| 1114 |
+
|
| 1115 |
+
except Exception as e:
|
| 1116 |
+
print(f"[serpapi] Error in reverse image search: {e}")
|
| 1117 |
+
print(f"[serpapi] Error type: {type(e).__name__}")
|
| 1118 |
+
import traceback
|
| 1119 |
+
print(f"[serpapi] Traceback: {traceback.format_exc()}")
|
| 1120 |
+
return {}
|
| 1121 |
+
|
| 1122 |
+
def _extract_year_from_text(self, text: str) -> Optional[int]:
|
| 1123 |
+
if not text:
|
| 1124 |
+
return None
|
| 1125 |
+
import re
|
| 1126 |
+
years = re.findall(r"(19\d{2}|20\d{2})", text)
|
| 1127 |
+
if not years:
|
| 1128 |
+
return None
|
| 1129 |
+
try:
|
| 1130 |
+
return int(years[0])
|
| 1131 |
+
except Exception:
|
| 1132 |
+
return None
|
| 1133 |
+
|
| 1134 |
+
def _context_mismatch(self, claim_context_lc: str, text: str) -> bool:
|
| 1135 |
+
t = (text or "").lower()
|
| 1136 |
+
if not claim_context_lc:
|
| 1137 |
+
return False
|
| 1138 |
+
# Simple heuristic: if text contains a strong, different location keyword
|
| 1139 |
+
known = {
|
| 1140 |
+
"mumbai": ["delhi", "bangalore", "chennai", "kolkata", "new york", "london"],
|
| 1141 |
+
"new york": ["mumbai", "delhi", "london", "paris", "dubai"],
|
| 1142 |
+
}
|
| 1143 |
+
for k, others in known.items():
|
| 1144 |
+
if claim_context_lc == k:
|
| 1145 |
+
if any(o in t for o in others):
|
| 1146 |
+
return True
|
| 1147 |
+
return False
|
| 1148 |
+
|
| 1149 |
+
async def _generate_counter_measure(self, original_image_path: Optional[str], evidence_image_url: str,
|
| 1150 |
+
claim_context: str, claim_date: str, original_image_url: Optional[str] = None) -> str:
|
| 1151 |
+
"""
|
| 1152 |
+
Generate a visual counter-measure image
|
| 1153 |
+
|
| 1154 |
+
Args:
|
| 1155 |
+
original_image_path: Path to the original misleading image
|
| 1156 |
+
evidence_image_url: URL of the evidence image
|
| 1157 |
+
claim_context: The claimed context
|
| 1158 |
+
claim_date: The claimed date
|
| 1159 |
+
|
| 1160 |
+
Returns:
|
| 1161 |
+
Path to the generated counter-measure image
|
| 1162 |
+
"""
|
| 1163 |
+
try:
|
| 1164 |
+
# Load original image: from path if available, else download from original_image_url
|
| 1165 |
+
if original_image_path:
|
| 1166 |
+
original_img = Image.open(original_image_path)
|
| 1167 |
+
elif original_image_url:
|
| 1168 |
+
original_img = await self._download_image(original_image_url)
|
| 1169 |
+
else:
|
| 1170 |
+
# Fallback to evidence image as placeholder
|
| 1171 |
+
original_img = await self._download_image(evidence_image_url)
|
| 1172 |
+
|
| 1173 |
+
# Download evidence image
|
| 1174 |
+
evidence_img = await self._download_image(evidence_image_url)
|
| 1175 |
+
|
| 1176 |
+
# Create counter-measure
|
| 1177 |
+
counter_measure = self._create_counter_measure_image(
|
| 1178 |
+
original_img, evidence_img, claim_context, claim_date
|
| 1179 |
+
)
|
| 1180 |
+
|
| 1181 |
+
# Save to temporary file
|
| 1182 |
+
output_path = tempfile.mktemp(suffix=".png")
|
| 1183 |
+
counter_measure.save(output_path, "PNG")
|
| 1184 |
+
|
| 1185 |
+
return output_path
|
| 1186 |
+
|
| 1187 |
+
except Exception as e:
|
| 1188 |
+
print(f"Error generating counter-measure: {e}")
|
| 1189 |
+
raise
|
| 1190 |
+
|
| 1191 |
+
async def _upload_to_cloudinary(self, image_path: str) -> Optional[str]:
|
| 1192 |
+
"""
|
| 1193 |
+
Upload image to Cloudinary and return the public URL
|
| 1194 |
+
|
| 1195 |
+
Args:
|
| 1196 |
+
image_path: Path to the source image file
|
| 1197 |
+
|
| 1198 |
+
Returns:
|
| 1199 |
+
Cloudinary public URL of the uploaded image, or None if upload fails
|
| 1200 |
+
"""
|
| 1201 |
+
try:
|
| 1202 |
+
import cloudinary
|
| 1203 |
+
import cloudinary.uploader
|
| 1204 |
+
from config import config
|
| 1205 |
+
|
| 1206 |
+
# Configure Cloudinary
|
| 1207 |
+
cloudinary.config(
|
| 1208 |
+
cloud_name=config.CLOUDINARY_CLOUD_NAME,
|
| 1209 |
+
api_key=config.CLOUDINARY_API_KEY,
|
| 1210 |
+
api_secret=config.CLOUDINARY_API_SECRET
|
| 1211 |
+
)
|
| 1212 |
+
|
| 1213 |
+
# Upload to Cloudinary with frames folder
|
| 1214 |
+
result = cloudinary.uploader.upload(
|
| 1215 |
+
image_path,
|
| 1216 |
+
folder="frames",
|
| 1217 |
+
resource_type="image"
|
| 1218 |
+
)
|
| 1219 |
+
|
| 1220 |
+
if result and result.get('secure_url'):
|
| 1221 |
+
public_url = result['secure_url']
|
| 1222 |
+
print(f"[cloudinary] Uploaded {image_path} to {public_url}")
|
| 1223 |
+
return public_url
|
| 1224 |
+
else:
|
| 1225 |
+
print("[cloudinary] Upload failed - no secure_url in response")
|
| 1226 |
+
return None
|
| 1227 |
+
|
| 1228 |
+
except Exception as e:
|
| 1229 |
+
print(f"[cloudinary] Error uploading to Cloudinary: {e}")
|
| 1230 |
+
return None
|
| 1231 |
+
|
| 1232 |
+
async def _copy_to_public_folder(self, image_path: str) -> Optional[str]:
|
| 1233 |
+
"""
|
| 1234 |
+
Copy image to public/frames folder and return the public URL
|
| 1235 |
+
|
| 1236 |
+
Args:
|
| 1237 |
+
image_path: Path to the source image file
|
| 1238 |
+
|
| 1239 |
+
Returns:
|
| 1240 |
+
Public URL of the copied image, or None if copy fails
|
| 1241 |
+
"""
|
| 1242 |
+
try:
|
| 1243 |
+
import shutil
|
| 1244 |
+
import uuid
|
| 1245 |
+
from pathlib import Path
|
| 1246 |
+
|
| 1247 |
+
# Create public/frames directory if it doesn't exist
|
| 1248 |
+
public_frames_dir = Path("public/frames")
|
| 1249 |
+
public_frames_dir.mkdir(parents=True, exist_ok=True)
|
| 1250 |
+
|
| 1251 |
+
# Generate unique filename
|
| 1252 |
+
file_extension = Path(image_path).suffix
|
| 1253 |
+
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
| 1254 |
+
public_path = public_frames_dir / unique_filename
|
| 1255 |
+
|
| 1256 |
+
# Copy the file
|
| 1257 |
+
shutil.copy2(image_path, public_path)
|
| 1258 |
+
|
| 1259 |
+
# Return the public URL
|
| 1260 |
+
public_url = f"http://127.0.0.1:{config.SERVICE_PORT}/frames/{unique_filename}"
|
| 1261 |
+
print(f"[copy] Copied {image_path} to {public_path}")
|
| 1262 |
+
print(f"[copy] Public URL: {public_url}")
|
| 1263 |
+
|
| 1264 |
+
return public_url
|
| 1265 |
+
|
| 1266 |
+
except Exception as e:
|
| 1267 |
+
print(f"[copy] Error copying to public folder: {e}")
|
| 1268 |
+
return None
|
| 1269 |
+
|
| 1270 |
+
async def _download_image(self, image_url: str) -> Image.Image:
|
| 1271 |
+
"""
|
| 1272 |
+
Download an image from URL
|
| 1273 |
+
|
| 1274 |
+
Args:
|
| 1275 |
+
image_url: URL of the image to download
|
| 1276 |
+
|
| 1277 |
+
Returns:
|
| 1278 |
+
PIL Image object
|
| 1279 |
+
"""
|
| 1280 |
+
try:
|
| 1281 |
+
headers = {
|
| 1282 |
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0 Safari/537.36",
|
| 1283 |
+
"Referer": "https://www.google.com/",
|
| 1284 |
+
}
|
| 1285 |
+
response = requests.get(image_url, timeout=15, headers=headers, stream=True)
|
| 1286 |
+
response.raise_for_status()
|
| 1287 |
+
content_type = response.headers.get("Content-Type", "").lower()
|
| 1288 |
+
if "image" not in content_type:
|
| 1289 |
+
# Not an image (likely a webpage); return placeholder
|
| 1290 |
+
return Image.new('RGB', (300, 200), color='gray')
|
| 1291 |
+
data = response.content
|
| 1292 |
+
img = Image.open(io.BytesIO(data))
|
| 1293 |
+
return img
|
| 1294 |
+
except Exception:
|
| 1295 |
+
# Return a placeholder image if download fails
|
| 1296 |
+
return Image.new('RGB', (300, 200), color='gray')
|
| 1297 |
+
|
| 1298 |
+
def _create_counter_measure_image(self, original_img: Image.Image, evidence_img: Image.Image,
|
| 1299 |
+
claim_context: str, claim_date: str) -> Image.Image:
|
| 1300 |
+
"""
|
| 1301 |
+
Create the counter-measure image with side-by-side comparison
|
| 1302 |
+
|
| 1303 |
+
Args:
|
| 1304 |
+
original_img: The original misleading image
|
| 1305 |
+
evidence_img: The evidence image
|
| 1306 |
+
claim_context: The claimed context
|
| 1307 |
+
claim_date: The claimed date
|
| 1308 |
+
|
| 1309 |
+
Returns:
|
| 1310 |
+
Generated counter-measure image
|
| 1311 |
+
"""
|
| 1312 |
+
# Resize images to consistent dimensions
|
| 1313 |
+
target_width, target_height = 400, 300
|
| 1314 |
+
|
| 1315 |
+
original_img = original_img.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
| 1316 |
+
evidence_img = evidence_img.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
| 1317 |
+
|
| 1318 |
+
# Create canvas for side-by-side layout
|
| 1319 |
+
canvas_width = target_width * 2 + 50 # Extra space for padding
|
| 1320 |
+
canvas_height = target_height + 200 # Extra space for labels and watermark
|
| 1321 |
+
|
| 1322 |
+
canvas = Image.new('RGB', (canvas_width, canvas_height), 'white')
|
| 1323 |
+
draw = ImageDraw.Draw(canvas)
|
| 1324 |
+
|
| 1325 |
+
# Try to load a font, fall back to default if not available
|
| 1326 |
+
try:
|
| 1327 |
+
font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
|
| 1328 |
+
font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 18)
|
| 1329 |
+
font_small = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14)
|
| 1330 |
+
except:
|
| 1331 |
+
font_large = ImageFont.load_default()
|
| 1332 |
+
font_medium = ImageFont.load_default()
|
| 1333 |
+
font_small = ImageFont.load_default()
|
| 1334 |
+
|
| 1335 |
+
# Add title
|
| 1336 |
+
title = "FALSE CONTEXT DETECTED"
|
| 1337 |
+
title_bbox = draw.textbbox((0, 0), title, font=font_large)
|
| 1338 |
+
title_width = title_bbox[2] - title_bbox[0]
|
| 1339 |
+
title_x = (canvas_width - title_width) // 2
|
| 1340 |
+
draw.text((title_x, 20), title, fill='red', font=font_large)
|
| 1341 |
+
|
| 1342 |
+
# Add original image (left side)
|
| 1343 |
+
original_x = 25
|
| 1344 |
+
original_y = 80
|
| 1345 |
+
canvas.paste(original_img, (original_x, original_y))
|
| 1346 |
+
|
| 1347 |
+
# Add evidence image (right side)
|
| 1348 |
+
evidence_x = original_x + target_width + 25
|
| 1349 |
+
evidence_y = original_y
|
| 1350 |
+
canvas.paste(evidence_img, (evidence_x, evidence_y))
|
| 1351 |
+
|
| 1352 |
+
# Add labels
|
| 1353 |
+
claim_label = f"CLAIM: {claim_context}, {claim_date}"
|
| 1354 |
+
reality_label = "REALITY: Different context/earlier date"
|
| 1355 |
+
|
| 1356 |
+
draw.text((original_x, original_y - 30), claim_label, fill='red', font=font_medium)
|
| 1357 |
+
draw.text((evidence_x, evidence_y - 30), reality_label, fill='green', font=font_medium)
|
| 1358 |
+
|
| 1359 |
+
# Add watermark
|
| 1360 |
+
watermark = "FALSE CONTEXT"
|
| 1361 |
+
watermark_img = Image.new('RGBA', canvas.size, (0, 0, 0, 0))
|
| 1362 |
+
watermark_draw = ImageDraw.Draw(watermark_img)
|
| 1363 |
+
|
| 1364 |
+
# Create semi-transparent watermark
|
| 1365 |
+
watermark_bbox = watermark_draw.textbbox((0, 0), watermark, font=font_large)
|
| 1366 |
+
watermark_width = watermark_bbox[2] - watermark_bbox[0]
|
| 1367 |
+
watermark_height = watermark_bbox[3] - watermark_bbox[1]
|
| 1368 |
+
|
| 1369 |
+
watermark_x = (canvas_width - watermark_width) // 2
|
| 1370 |
+
watermark_y = (canvas_height - watermark_height) // 2
|
| 1371 |
+
|
| 1372 |
+
watermark_draw.text((watermark_x, watermark_y), watermark, fill=(255, 0, 0, 128), font=font_large)
|
| 1373 |
+
|
| 1374 |
+
# Composite watermark onto canvas
|
| 1375 |
+
canvas = Image.alpha_composite(canvas.convert('RGBA'), watermark_img).convert('RGB')
|
| 1376 |
+
|
| 1377 |
+
return canvas
|
services/input_processor.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, List, Optional, Union, Tuple
|
| 5 |
+
import google.generativeai as genai
|
| 6 |
+
import tempfile
|
| 7 |
+
from config import config
|
| 8 |
+
|
| 9 |
+
class InputProcessor:
|
| 10 |
+
"""
|
| 11 |
+
Intelligent input processor that converts chatbot input into structured verification requests
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
# Configure Gemini
|
| 16 |
+
genai.configure(api_key=config.GEMINI_API_KEY)
|
| 17 |
+
self.model = genai.GenerativeModel(
|
| 18 |
+
config.GEMINI_MODEL,
|
| 19 |
+
generation_config=genai.types.GenerationConfig(
|
| 20 |
+
temperature=config.GEMINI_TEMPERATURE,
|
| 21 |
+
top_p=config.GEMINI_TOP_P,
|
| 22 |
+
max_output_tokens=config.GEMINI_MAX_TOKENS
|
| 23 |
+
)
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
self.system_prompt = """You are an intelligent input processor for a visual verification service.
|
| 27 |
+
|
| 28 |
+
Your task is to analyze user input and extract:
|
| 29 |
+
1. Image/video/audio content (files, URLs, or descriptions)
|
| 30 |
+
2. Claim context (what the user is claiming)
|
| 31 |
+
3. Claim date (when the claim was made)
|
| 32 |
+
4. Type of verification needed (image, video, audio, or text)
|
| 33 |
+
|
| 34 |
+
Return a JSON response with this structure:
|
| 35 |
+
{
|
| 36 |
+
"verification_type": "image" or "video" or "audio" or "text",
|
| 37 |
+
"content": {
|
| 38 |
+
"files": ["list of file paths if files provided"],
|
| 39 |
+
"urls": ["list of image/video/audio URLs"],
|
| 40 |
+
"descriptions": ["list of text descriptions"],
|
| 41 |
+
"text": "the text claim to verify (if verification_type is text)"
|
| 42 |
+
},
|
| 43 |
+
"claim_context": "extracted or inferred claim context",
|
| 44 |
+
"claim_date": "extracted or inferred date"
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
Rules:
|
| 48 |
+
- If multiple images/videos/audio files are mentioned, separate them clearly
|
| 49 |
+
- Extract URLs from text using regex patterns
|
| 50 |
+
- Infer context from surrounding text if not explicitly stated
|
| 51 |
+
- If no date is mentioned leave it blank
|
| 52 |
+
- Handle mixed content types appropriately"""
|
| 53 |
+
|
| 54 |
+
async def process_input(
|
| 55 |
+
self,
|
| 56 |
+
text_input: Optional[str] = None,
|
| 57 |
+
files: Optional[List] = None
|
| 58 |
+
) -> Dict:
|
| 59 |
+
"""
|
| 60 |
+
Process chatbot input and return structured verification request
|
| 61 |
+
"""
|
| 62 |
+
try:
|
| 63 |
+
print(f"🔍 DEBUG: InputProcessor.process_input called")
|
| 64 |
+
print(f"🔍 DEBUG: text_input = {text_input}")
|
| 65 |
+
print(f"🔍 DEBUG: files = {files}")
|
| 66 |
+
print(f"🔍 DEBUG: files type = {type(files)}")
|
| 67 |
+
|
| 68 |
+
# Prepare input for LLM analysis
|
| 69 |
+
print(f"🔍 DEBUG: Preparing input text for LLM analysis")
|
| 70 |
+
input_text = self._prepare_input_text(text_input, files)
|
| 71 |
+
print(f"🔍 DEBUG: Prepared input_text = {input_text}")
|
| 72 |
+
|
| 73 |
+
# Get LLM analysis
|
| 74 |
+
print(f"🔍 DEBUG: Calling LLM analysis")
|
| 75 |
+
llm_response = await self._analyze_with_llm(input_text)
|
| 76 |
+
print(f"🔍 DEBUG: LLM response = {llm_response}")
|
| 77 |
+
|
| 78 |
+
# Parse and validate LLM response
|
| 79 |
+
print(f"🔍 DEBUG: Parsing LLM response")
|
| 80 |
+
parsed_response = self._parse_llm_response(llm_response)
|
| 81 |
+
print(f"🔍 DEBUG: Parsed response = {parsed_response}")
|
| 82 |
+
|
| 83 |
+
# Post-process and enhance the response
|
| 84 |
+
print(f"🔍 DEBUG: Post-processing response")
|
| 85 |
+
final_response = await self._post_process_response(parsed_response, files)
|
| 86 |
+
|
| 87 |
+
# PATCH: If verification_type is 'video' but all files have audio extensions, reassign to 'audio'
|
| 88 |
+
audio_exts = ['.mp3', '.wav', '.ogg', '.flac', '.m4a']
|
| 89 |
+
content_files = final_response.get('content', {}).get('files', [])
|
| 90 |
+
if (
|
| 91 |
+
final_response.get('verification_type') == 'video' and
|
| 92 |
+
content_files and
|
| 93 |
+
all(any(f.lower().endswith(e) for e in audio_exts) for f in content_files)
|
| 94 |
+
):
|
| 95 |
+
print(f"🔍 PATCH: Rewriting 'verification_type' from 'video' to 'audio' (all files are audio)")
|
| 96 |
+
final_response['verification_type'] = 'audio'
|
| 97 |
+
print(f"🔍 DEBUG: Final response = {final_response}")
|
| 98 |
+
return final_response
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"❌ DEBUG: Exception in InputProcessor.process_input: {e}")
|
| 102 |
+
print(f"❌ DEBUG: Exception type: {type(e).__name__}")
|
| 103 |
+
import traceback
|
| 104 |
+
print(f"❌ DEBUG: Traceback: {traceback.format_exc()}")
|
| 105 |
+
return {
|
| 106 |
+
"error": f"Failed to process input: {str(e)}",
|
| 107 |
+
"verification_type": "unknown",
|
| 108 |
+
"content": {"files": [], "urls": [], "descriptions": []},
|
| 109 |
+
"claim_context": "Unknown context",
|
| 110 |
+
"claim_date": "Unknown date",
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
def _prepare_input_text(self, text_input: Optional[str], files: Optional[List]) -> str:
|
| 114 |
+
"""Prepare input text for LLM analysis"""
|
| 115 |
+
print(f"🔍 DEBUG: _prepare_input_text called with text_input={text_input}, files={files}")
|
| 116 |
+
input_parts = []
|
| 117 |
+
|
| 118 |
+
if text_input:
|
| 119 |
+
input_parts.append(f"Text input: {text_input}")
|
| 120 |
+
print(f"🔍 DEBUG: Added text input: {text_input}")
|
| 121 |
+
|
| 122 |
+
if files:
|
| 123 |
+
file_info = []
|
| 124 |
+
for i, file in enumerate(files):
|
| 125 |
+
file_info.append(f"File {i+1}: {file.filename} ({file.content_type})")
|
| 126 |
+
print(f"🔍 DEBUG: Added file {i+1}: {file.filename} ({file.content_type})")
|
| 127 |
+
input_parts.append(f"Files provided: {'; '.join(file_info)}")
|
| 128 |
+
|
| 129 |
+
if not input_parts:
|
| 130 |
+
input_parts.append("No text or files provided")
|
| 131 |
+
print(f"🔍 DEBUG: No input parts, using default message")
|
| 132 |
+
|
| 133 |
+
result = "\n".join(input_parts)
|
| 134 |
+
print(f"🔍 DEBUG: Final prepared input text: {result}")
|
| 135 |
+
return result
|
| 136 |
+
|
| 137 |
+
async def _analyze_with_llm(self, input_text: str) -> str:
|
| 138 |
+
"""Use Gemini to analyze the input"""
|
| 139 |
+
try:
|
| 140 |
+
print(f"🔍 DEBUG: _analyze_with_llm called with input_text: {input_text}")
|
| 141 |
+
prompt = f"{self.system_prompt}\n\nUser input: {input_text}"
|
| 142 |
+
print(f"🔍 DEBUG: Generated prompt: {prompt}")
|
| 143 |
+
response = self.model.generate_content(prompt)
|
| 144 |
+
print(f"🔍 DEBUG: LLM response text: {response.text}")
|
| 145 |
+
return response.text
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print(f"❌ DEBUG: LLM analysis failed: {e}")
|
| 148 |
+
print(f"🔍 DEBUG: Falling back to rule-based parsing")
|
| 149 |
+
# Fallback to rule-based parsing if LLM fails
|
| 150 |
+
return self._fallback_parsing(input_text)
|
| 151 |
+
|
| 152 |
+
def _fallback_parsing(self, input_text: str) -> str:
|
| 153 |
+
"""Fallback parsing when LLM is unavailable"""
|
| 154 |
+
print(f"🔍 DEBUG: _fallback_parsing called with input_text: {input_text}")
|
| 155 |
+
|
| 156 |
+
# Extract URLs using regex
|
| 157 |
+
url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
|
| 158 |
+
urls = re.findall(url_pattern, input_text)
|
| 159 |
+
print(f"🔍 DEBUG: Extracted URLs: {urls}")
|
| 160 |
+
|
| 161 |
+
# Simple content type detection
|
| 162 |
+
verification_type = "text" # default for text-only queries
|
| 163 |
+
|
| 164 |
+
# Check for video platform URLs first
|
| 165 |
+
video_platforms = [
|
| 166 |
+
'instagram.com/reels/', 'instagram.com/p/', 'instagram.com/tv/',
|
| 167 |
+
'youtube.com/watch', 'youtu.be/', 'youtube.com/shorts/',
|
| 168 |
+
'tiktok.com/', 'vm.tiktok.com/',
|
| 169 |
+
'twitter.com/', 'x.com/', 't.co/',
|
| 170 |
+
'facebook.com/', 'fb.watch/',
|
| 171 |
+
'vimeo.com/', 'twitch.tv/', 'dailymotion.com/',
|
| 172 |
+
'imgur.com/', 'soundcloud.com/', 'mixcloud.com/',
|
| 173 |
+
'lbry.tv/', 'odysee.com/', 't.me/'
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
# Check for image platform URLs
|
| 177 |
+
image_platforms = [
|
| 178 |
+
'instagram.com/p/', 'imgur.com/', 'flickr.com/',
|
| 179 |
+
'pinterest.com/', 'unsplash.com/', 'pexels.com/'
|
| 180 |
+
]
|
| 181 |
+
|
| 182 |
+
# Check for direct file extensions
|
| 183 |
+
if any(ext in input_text.lower() for ext in ['.mp4', '.avi', '.mov', '.mkv', '.webm', 'video']):
|
| 184 |
+
verification_type = "video"
|
| 185 |
+
elif any(ext in input_text.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', 'image', 'photo', 'picture']):
|
| 186 |
+
verification_type = "image"
|
| 187 |
+
elif any(ext in input_text.lower() for ext in ['.mp3', '.wav', '.ogg', '.flac', '.m4a', 'audio']):
|
| 188 |
+
verification_type = "audio"
|
| 189 |
+
# Check for video platform URLs
|
| 190 |
+
elif any(platform in input_text.lower() for platform in video_platforms):
|
| 191 |
+
verification_type = "video"
|
| 192 |
+
# Check for image platform URLs
|
| 193 |
+
elif any(platform in input_text.lower() for platform in image_platforms):
|
| 194 |
+
verification_type = "image"
|
| 195 |
+
|
| 196 |
+
print(f"🔍 DEBUG: Detected verification_type: {verification_type}")
|
| 197 |
+
|
| 198 |
+
# Extract date patterns
|
| 199 |
+
date_pattern = r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}'
|
| 200 |
+
dates = re.findall(date_pattern, input_text)
|
| 201 |
+
claim_date = dates[0] if dates else "Unknown date"
|
| 202 |
+
print(f"🔍 DEBUG: Extracted dates: {dates}, using: {claim_date}")
|
| 203 |
+
|
| 204 |
+
# Clean up the input text for better processing
|
| 205 |
+
clean_text = input_text.replace("Text input: ", "").strip()
|
| 206 |
+
|
| 207 |
+
result = {
|
| 208 |
+
"verification_type": verification_type,
|
| 209 |
+
"content": {
|
| 210 |
+
"files": [],
|
| 211 |
+
"urls": urls,
|
| 212 |
+
"descriptions": [clean_text],
|
| 213 |
+
"text": clean_text if verification_type == "text" else None
|
| 214 |
+
},
|
| 215 |
+
"claim_context": clean_text,
|
| 216 |
+
"claim_date": claim_date,
|
| 217 |
+
}
|
| 218 |
+
print(f"🔍 DEBUG: Fallback parsing result: {result}")
|
| 219 |
+
return json.dumps(result)
|
| 220 |
+
|
| 221 |
+
def _parse_llm_response(self, llm_response: str) -> Dict:
|
| 222 |
+
"""Parse and validate LLM response"""
|
| 223 |
+
try:
|
| 224 |
+
print(f"🔍 DEBUG: _parse_llm_response called with llm_response: {llm_response}")
|
| 225 |
+
# Extract JSON from response
|
| 226 |
+
json_match = re.search(r'\{.*\}', llm_response, re.DOTALL)
|
| 227 |
+
if json_match:
|
| 228 |
+
print(f"🔍 DEBUG: Found JSON match: {json_match.group()}")
|
| 229 |
+
parsed = json.loads(json_match.group())
|
| 230 |
+
print(f"🔍 DEBUG: Parsed JSON: {parsed}")
|
| 231 |
+
else:
|
| 232 |
+
print(f"❌ DEBUG: No JSON found in response")
|
| 233 |
+
raise ValueError("No JSON found in response")
|
| 234 |
+
|
| 235 |
+
# Validate required fields
|
| 236 |
+
required_fields = ["verification_type", "content", "claim_context", "claim_date"]
|
| 237 |
+
for field in required_fields:
|
| 238 |
+
if field not in parsed:
|
| 239 |
+
print(f"❌ DEBUG: Missing required field: {field}")
|
| 240 |
+
raise ValueError(f"Missing required field: {field}")
|
| 241 |
+
|
| 242 |
+
print(f"🔍 DEBUG: Successfully parsed and validated response")
|
| 243 |
+
return parsed
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
print(f"❌ DEBUG: Failed to parse LLM response: {e}")
|
| 247 |
+
print(f"🔍 DEBUG: Returning safe defaults")
|
| 248 |
+
# Return safe defaults if parsing fails
|
| 249 |
+
return {
|
| 250 |
+
"verification_type": "image",
|
| 251 |
+
"content": {"files": [], "urls": [], "descriptions": []},
|
| 252 |
+
"claim_context": "Unknown context",
|
| 253 |
+
"claim_date": "Unknown date",
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
async def _post_process_response(self, parsed_response: Dict, files: Optional[List]) -> Dict:
|
| 257 |
+
"""Post-process the parsed response and add file information"""
|
| 258 |
+
print(f"🔍 DEBUG: _post_process_response called with parsed_response: {parsed_response}, files: {files}")
|
| 259 |
+
|
| 260 |
+
# Add actual file information if files were provided
|
| 261 |
+
if files:
|
| 262 |
+
print(f"🔍 DEBUG: Processing {len(files)} files")
|
| 263 |
+
file_paths = []
|
| 264 |
+
for i, file in enumerate(files):
|
| 265 |
+
print(f"🔍 DEBUG: Saving file {i}: {file.filename}")
|
| 266 |
+
# Save file temporarily and get path
|
| 267 |
+
temp_path = await self._save_temp_file(file)
|
| 268 |
+
if temp_path:
|
| 269 |
+
file_paths.append(temp_path)
|
| 270 |
+
print(f"🔍 DEBUG: Saved file {i} to: {temp_path}")
|
| 271 |
+
else:
|
| 272 |
+
print(f"❌ DEBUG: Failed to save file {i}")
|
| 273 |
+
|
| 274 |
+
parsed_response["content"]["files"] = file_paths
|
| 275 |
+
print(f"🔍 DEBUG: Updated files list: {file_paths}")
|
| 276 |
+
else:
|
| 277 |
+
print(f"🔍 DEBUG: No files to process")
|
| 278 |
+
|
| 279 |
+
print(f"🔍 DEBUG: Final post-processed response: {parsed_response}")
|
| 280 |
+
return parsed_response
|
| 281 |
+
|
| 282 |
+
async def _save_temp_file(self, file) -> Optional[str]:
|
| 283 |
+
"""Save uploaded file temporarily and return path"""
|
| 284 |
+
try:
|
| 285 |
+
print(f"🔍 DEBUG: _save_temp_file called for file: {file.filename}")
|
| 286 |
+
# Create temp file
|
| 287 |
+
import os
|
| 288 |
+
suffix = os.path.splitext(file.filename)[1] if file.filename else ""
|
| 289 |
+
print(f"🔍 DEBUG: Using suffix: {suffix}")
|
| 290 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
| 291 |
+
content = await file.read()
|
| 292 |
+
print(f"🔍 DEBUG: Read {len(content)} bytes from file")
|
| 293 |
+
temp_file.write(content)
|
| 294 |
+
temp_path = temp_file.name
|
| 295 |
+
print(f"🔍 DEBUG: Saved temp file to: {temp_path}")
|
| 296 |
+
return temp_path
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print(f"❌ DEBUG: Failed to save temp file: {e}")
|
| 299 |
+
return None
|
| 300 |
+
|
| 301 |
+
def cleanup_temp_files(self, file_paths: List[str]):
|
| 302 |
+
"""Clean up temporary files"""
|
| 303 |
+
for path in file_paths:
|
| 304 |
+
try:
|
| 305 |
+
if os.path.exists(path):
|
| 306 |
+
os.unlink(path)
|
| 307 |
+
except Exception as e:
|
| 308 |
+
print(f"Failed to cleanup temp file {path}: {e}")
|
services/mongodb_service.py
ADDED
|
@@ -0,0 +1,684 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MongoDB Service for Backend
|
| 3 |
+
Handles MongoDB operations for debunk posts
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
from typing import List, Dict, Any, Optional
|
| 9 |
+
from pymongo import MongoClient
|
| 10 |
+
from pymongo.errors import ConnectionFailure
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
# Setup logging
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
class MongoDBService:
|
| 19 |
+
"""MongoDB service for backend operations"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, connection_string: Optional[str] = None):
|
| 22 |
+
"""Initialize MongoDB connection
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
connection_string: MongoDB connection string. If None, uses MONGO_CONNECTION_STRING env var
|
| 26 |
+
"""
|
| 27 |
+
self.connection_string = connection_string or os.getenv('MONGO_CONNECTION_STRING')
|
| 28 |
+
|
| 29 |
+
if not self.connection_string:
|
| 30 |
+
raise ValueError("MongoDB connection string is required. Set MONGO_CONNECTION_STRING environment variable.")
|
| 31 |
+
|
| 32 |
+
self.client = None
|
| 33 |
+
self.db = None
|
| 34 |
+
self.collection = None
|
| 35 |
+
self.chat_sessions = None
|
| 36 |
+
self.chat_messages = None
|
| 37 |
+
|
| 38 |
+
self._connect()
|
| 39 |
+
|
| 40 |
+
def _connect(self):
|
| 41 |
+
"""Establish MongoDB connection"""
|
| 42 |
+
try:
|
| 43 |
+
self.client = MongoClient(self.connection_string)
|
| 44 |
+
# Test connection
|
| 45 |
+
self.client.admin.command('ping')
|
| 46 |
+
|
| 47 |
+
# Use 'aegis' database
|
| 48 |
+
self.db = self.client["aegis"]
|
| 49 |
+
self.collection = self.db["debunk_posts"]
|
| 50 |
+
|
| 51 |
+
# Additional collections used by other features
|
| 52 |
+
self.chat_sessions = self.db["chat_sessions"]
|
| 53 |
+
self.chat_messages = self.db["chat_messages"]
|
| 54 |
+
self.subscriptions = self.db["subscriptions"]
|
| 55 |
+
self.users = self.db["users"]
|
| 56 |
+
|
| 57 |
+
logger.info("✅ Successfully connected to MongoDB")
|
| 58 |
+
|
| 59 |
+
except ConnectionFailure as e:
|
| 60 |
+
logger.error(f"❌ Failed to connect to MongoDB: {e}")
|
| 61 |
+
raise
|
| 62 |
+
|
| 63 |
+
def get_recent_posts(self, limit: int = 5) -> List[Dict[str, Any]]:
|
| 64 |
+
"""Get recent debunk posts from MongoDB
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
limit: Maximum number of posts to return
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
List of recent debunk posts
|
| 71 |
+
"""
|
| 72 |
+
try:
|
| 73 |
+
logger.info(f"🔍 DEBUG: Starting get_recent_posts with limit={limit}")
|
| 74 |
+
logger.info(f"🔍 DEBUG: Collection name: {self.collection.name}")
|
| 75 |
+
logger.info(f"🔍 DEBUG: Database name: {self.db.name}")
|
| 76 |
+
|
| 77 |
+
# Check if collection exists and has documents
|
| 78 |
+
total_count = self.collection.count_documents({})
|
| 79 |
+
logger.info(f"🔍 DEBUG: Total documents in collection: {total_count}")
|
| 80 |
+
|
| 81 |
+
if total_count == 0:
|
| 82 |
+
logger.warning("⚠️ DEBUG: Collection is empty!")
|
| 83 |
+
return []
|
| 84 |
+
|
| 85 |
+
# Get sample document to check structure
|
| 86 |
+
sample_doc = self.collection.find_one()
|
| 87 |
+
if sample_doc:
|
| 88 |
+
logger.info(f"🔍 DEBUG: Sample document keys: {list(sample_doc.keys())}")
|
| 89 |
+
logger.info(f"🔍 DEBUG: Sample document _id: {sample_doc.get('_id')}")
|
| 90 |
+
logger.info(f"🔍 DEBUG: Sample document stored_at: {sample_doc.get('stored_at')}")
|
| 91 |
+
else:
|
| 92 |
+
logger.warning("⚠️ DEBUG: No sample document found!")
|
| 93 |
+
|
| 94 |
+
posts = list(self.collection
|
| 95 |
+
.find()
|
| 96 |
+
.sort("stored_at", -1)
|
| 97 |
+
.limit(limit))
|
| 98 |
+
|
| 99 |
+
logger.info(f"🔍 DEBUG: Raw query returned {len(posts)} posts")
|
| 100 |
+
|
| 101 |
+
# Convert ObjectId to string for JSON serialization
|
| 102 |
+
for i, post in enumerate(posts):
|
| 103 |
+
if '_id' in post:
|
| 104 |
+
post['_id'] = str(post['_id'])
|
| 105 |
+
logger.info(f"🔍 DEBUG: Post {i+1} keys: {list(post.keys())}")
|
| 106 |
+
logger.info(f"🔍 DEBUG: Post {i+1} stored_at: {post.get('stored_at')}")
|
| 107 |
+
|
| 108 |
+
logger.info(f"📋 Retrieved {len(posts)} recent debunk posts")
|
| 109 |
+
return posts
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"❌ Failed to get recent posts: {e}")
|
| 113 |
+
logger.error(f"🔍 DEBUG: Exception type: {type(e).__name__}")
|
| 114 |
+
logger.error(f"🔍 DEBUG: Exception details: {str(e)}")
|
| 115 |
+
return []
|
| 116 |
+
|
| 117 |
+
def search_similar_rumours(self, query: str, similarity_threshold: float = 0.6, limit: int = 5) -> List[Dict[str, Any]]:
|
| 118 |
+
"""Search for rumours similar to the query text using TF-IDF similarity
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
query: Search query text
|
| 122 |
+
similarity_threshold: Minimum similarity score (0.0 to 1.0)
|
| 123 |
+
limit: Maximum number of results to return
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
List of similar rumours with similarity scores
|
| 127 |
+
"""
|
| 128 |
+
try:
|
| 129 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 130 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 131 |
+
import re
|
| 132 |
+
|
| 133 |
+
if not query or not query.strip():
|
| 134 |
+
logger.warning("⚠️ Empty query provided")
|
| 135 |
+
return []
|
| 136 |
+
|
| 137 |
+
logger.info(f"🔍 Searching for rumours similar to: {query[:50]}...")
|
| 138 |
+
|
| 139 |
+
# Get all rumours from database
|
| 140 |
+
all_posts = list(self.collection.find())
|
| 141 |
+
|
| 142 |
+
if not all_posts:
|
| 143 |
+
logger.warning("⚠️ No rumours found in database")
|
| 144 |
+
return []
|
| 145 |
+
|
| 146 |
+
# Extract claim text from each post
|
| 147 |
+
claims = []
|
| 148 |
+
posts_data = []
|
| 149 |
+
|
| 150 |
+
for post in all_posts:
|
| 151 |
+
# Extract claim text - try multiple fields
|
| 152 |
+
claim_text = (
|
| 153 |
+
post.get('claim') or
|
| 154 |
+
post.get('summary') or
|
| 155 |
+
""
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Handle nested claim structure
|
| 159 |
+
if isinstance(claim_text, dict):
|
| 160 |
+
claim_text = claim_text.get('text') or claim_text.get('claim_text') or ""
|
| 161 |
+
|
| 162 |
+
if claim_text and claim_text.strip():
|
| 163 |
+
claims.append(claim_text)
|
| 164 |
+
posts_data.append(post)
|
| 165 |
+
|
| 166 |
+
if not claims:
|
| 167 |
+
logger.warning("⚠️ No claims found in posts")
|
| 168 |
+
return []
|
| 169 |
+
|
| 170 |
+
# Preprocess query
|
| 171 |
+
def preprocess_text(text: str) -> str:
|
| 172 |
+
text = text.lower()
|
| 173 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
| 174 |
+
text = ' '.join(text.split())
|
| 175 |
+
return text
|
| 176 |
+
|
| 177 |
+
query_processed = preprocess_text(query)
|
| 178 |
+
|
| 179 |
+
# Calculate TF-IDF similarity
|
| 180 |
+
try:
|
| 181 |
+
vectorizer = TfidfVectorizer(
|
| 182 |
+
stop_words='english',
|
| 183 |
+
ngram_range=(1, 2),
|
| 184 |
+
max_features=500,
|
| 185 |
+
lowercase=True
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
# Combine query and claims for vectorization
|
| 189 |
+
all_texts = [query_processed] + [preprocess_text(c) for c in claims]
|
| 190 |
+
tfidf_matrix = vectorizer.fit_transform(all_texts)
|
| 191 |
+
|
| 192 |
+
# Calculate similarity between query and each claim
|
| 193 |
+
query_vector = tfidf_matrix[0:1]
|
| 194 |
+
claims_matrix = tfidf_matrix[1:]
|
| 195 |
+
|
| 196 |
+
similarities = cosine_similarity(query_vector, claims_matrix)[0]
|
| 197 |
+
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.error(f"❌ TF-IDF calculation failed: {e}")
|
| 200 |
+
# Fallback to simple word overlap
|
| 201 |
+
similarities = []
|
| 202 |
+
query_words = set(query_processed.split())
|
| 203 |
+
for claim in claims:
|
| 204 |
+
claim_words = set(preprocess_text(claim).split())
|
| 205 |
+
if not query_words or not claim_words:
|
| 206 |
+
similarities.append(0.0)
|
| 207 |
+
else:
|
| 208 |
+
intersection = query_words.intersection(claim_words)
|
| 209 |
+
union = query_words.union(claim_words)
|
| 210 |
+
similarities.append(len(intersection) / len(union) if union else 0.0)
|
| 211 |
+
|
| 212 |
+
# Filter by threshold and sort by similarity
|
| 213 |
+
results = []
|
| 214 |
+
for i, (post, similarity) in enumerate(zip(posts_data, similarities)):
|
| 215 |
+
if similarity >= similarity_threshold:
|
| 216 |
+
# Convert ObjectId to string
|
| 217 |
+
if '_id' in post:
|
| 218 |
+
post['_id'] = str(post['_id'])
|
| 219 |
+
|
| 220 |
+
result = {
|
| 221 |
+
**post,
|
| 222 |
+
'similarity_score': float(similarity)
|
| 223 |
+
}
|
| 224 |
+
results.append(result)
|
| 225 |
+
|
| 226 |
+
# Sort by similarity score (descending) and limit
|
| 227 |
+
results.sort(key=lambda x: x.get('similarity_score', 0), reverse=True)
|
| 228 |
+
results = results[:limit]
|
| 229 |
+
|
| 230 |
+
logger.info(f"✅ Found {len(results)} similar rumours (threshold: {similarity_threshold})")
|
| 231 |
+
return results
|
| 232 |
+
|
| 233 |
+
except Exception as e:
|
| 234 |
+
logger.error(f"❌ Failed to search similar rumours: {e}")
|
| 235 |
+
import traceback
|
| 236 |
+
logger.error(traceback.format_exc())
|
| 237 |
+
return []
|
| 238 |
+
|
| 239 |
+
# ---------- Chat sessions & messages ----------
|
| 240 |
+
|
| 241 |
+
def get_chat_sessions(
|
| 242 |
+
self,
|
| 243 |
+
user_id: Optional[str] = None,
|
| 244 |
+
anonymous_id: Optional[str] = None,
|
| 245 |
+
limit: int = 50,
|
| 246 |
+
) -> List[Dict[str, Any]]:
|
| 247 |
+
"""Return chat sessions for a given user or anonymous visitor."""
|
| 248 |
+
if self.chat_sessions is None:
|
| 249 |
+
return []
|
| 250 |
+
|
| 251 |
+
query: Dict[str, Any] = {}
|
| 252 |
+
if user_id:
|
| 253 |
+
query["user_id"] = user_id
|
| 254 |
+
if anonymous_id and not user_id:
|
| 255 |
+
# For anonymous visitors we only look at sessions that have not yet been
|
| 256 |
+
# attached to a concrete user id.
|
| 257 |
+
query["anonymous_id"] = anonymous_id
|
| 258 |
+
query["user_id"] = None
|
| 259 |
+
|
| 260 |
+
cursor = (
|
| 261 |
+
self.chat_sessions.find(query)
|
| 262 |
+
.sort("updated_at", -1)
|
| 263 |
+
.limit(limit)
|
| 264 |
+
)
|
| 265 |
+
sessions: List[Dict[str, Any]] = []
|
| 266 |
+
for doc in cursor:
|
| 267 |
+
doc["session_id"] = str(doc.get("session_id") or doc.get("_id"))
|
| 268 |
+
doc["_id"] = str(doc["_id"])
|
| 269 |
+
sessions.append(doc)
|
| 270 |
+
return sessions
|
| 271 |
+
|
| 272 |
+
def migrate_anonymous_sessions(self, anonymous_id: str, user_id: str) -> int:
|
| 273 |
+
"""Attach existing anonymous sessions to a logged-in user.
|
| 274 |
+
|
| 275 |
+
This keeps history when a visitor later signs in.
|
| 276 |
+
"""
|
| 277 |
+
if self.chat_sessions is None or not anonymous_id or not user_id:
|
| 278 |
+
return 0
|
| 279 |
+
|
| 280 |
+
result = self.chat_sessions.update_many(
|
| 281 |
+
{"anonymous_id": anonymous_id, "user_id": None},
|
| 282 |
+
{"$set": {"user_id": user_id}},
|
| 283 |
+
)
|
| 284 |
+
return int(getattr(result, "modified_count", 0))
|
| 285 |
+
|
| 286 |
+
def upsert_chat_session(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 287 |
+
"""Create or update a chat session document.
|
| 288 |
+
|
| 289 |
+
Expected keys in payload: session_id (optional), user_id, anonymous_id,
|
| 290 |
+
title, last_verdict, last_summary.
|
| 291 |
+
"""
|
| 292 |
+
if self.chat_sessions is None:
|
| 293 |
+
raise RuntimeError("chat_sessions collection not initialised")
|
| 294 |
+
|
| 295 |
+
from datetime import datetime
|
| 296 |
+
|
| 297 |
+
session_id = payload.get("session_id")
|
| 298 |
+
now = datetime.utcnow()
|
| 299 |
+
|
| 300 |
+
base_updates: Dict[str, Any] = {
|
| 301 |
+
"title": payload.get("title") or "New Chat",
|
| 302 |
+
"user_id": payload.get("user_id"),
|
| 303 |
+
"anonymous_id": payload.get("anonymous_id"),
|
| 304 |
+
"last_verdict": payload.get("last_verdict"),
|
| 305 |
+
"last_summary": payload.get("last_summary"),
|
| 306 |
+
"updated_at": now,
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
if session_id:
|
| 310 |
+
doc = self.chat_sessions.find_one_and_update(
|
| 311 |
+
{"session_id": session_id},
|
| 312 |
+
{"$set": base_updates},
|
| 313 |
+
upsert=True,
|
| 314 |
+
return_document=True,
|
| 315 |
+
)
|
| 316 |
+
else:
|
| 317 |
+
doc_to_insert = {
|
| 318 |
+
**base_updates,
|
| 319 |
+
"session_id": payload.get("session_id") or os.urandom(12).hex(),
|
| 320 |
+
"created_at": now,
|
| 321 |
+
}
|
| 322 |
+
inserted = self.chat_sessions.insert_one(doc_to_insert)
|
| 323 |
+
doc = self.chat_sessions.find_one({"_id": inserted.inserted_id})
|
| 324 |
+
|
| 325 |
+
doc["_id"] = str(doc["_id"])
|
| 326 |
+
doc["session_id"] = str(doc.get("session_id"))
|
| 327 |
+
return doc
|
| 328 |
+
|
| 329 |
+
def append_chat_messages(
|
| 330 |
+
self,
|
| 331 |
+
session_id: str,
|
| 332 |
+
messages: List[Dict[str, Any]],
|
| 333 |
+
user_id: Optional[str] = None,
|
| 334 |
+
anonymous_id: Optional[str] = None,
|
| 335 |
+
) -> int:
|
| 336 |
+
"""Append one or more messages to a given session."""
|
| 337 |
+
if self.chat_messages is None:
|
| 338 |
+
raise RuntimeError("chat_messages collection not initialised")
|
| 339 |
+
|
| 340 |
+
from datetime import datetime
|
| 341 |
+
|
| 342 |
+
docs = []
|
| 343 |
+
for msg in messages:
|
| 344 |
+
docs.append(
|
| 345 |
+
{
|
| 346 |
+
"session_id": session_id,
|
| 347 |
+
"user_id": user_id,
|
| 348 |
+
"anonymous_id": anonymous_id,
|
| 349 |
+
"role": msg.get("role"),
|
| 350 |
+
"content": msg.get("content"),
|
| 351 |
+
"attachments": msg.get("attachments") or [],
|
| 352 |
+
"verdict": msg.get("verdict"),
|
| 353 |
+
"confidence": msg.get("confidence"),
|
| 354 |
+
"sources": msg.get("sources"),
|
| 355 |
+
"created_at": msg.get("created_at") or datetime.utcnow(),
|
| 356 |
+
"metadata": msg.get("metadata") or {},
|
| 357 |
+
}
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
if not docs:
|
| 361 |
+
return 0
|
| 362 |
+
|
| 363 |
+
result = self.chat_messages.insert_many(docs)
|
| 364 |
+
return len(getattr(result, "inserted_ids", []))
|
| 365 |
+
|
| 366 |
+
def get_chat_messages(
|
| 367 |
+
self, session_id: str, limit: int = 100
|
| 368 |
+
) -> List[Dict[str, Any]]:
|
| 369 |
+
"""Return messages for a particular session ordered by time."""
|
| 370 |
+
if self.chat_messages is None:
|
| 371 |
+
return []
|
| 372 |
+
|
| 373 |
+
cursor = (
|
| 374 |
+
self.chat_messages.find({"session_id": session_id})
|
| 375 |
+
.sort("created_at", 1)
|
| 376 |
+
.limit(limit)
|
| 377 |
+
)
|
| 378 |
+
docs: List[Dict[str, Any]] = []
|
| 379 |
+
for doc in cursor:
|
| 380 |
+
doc["_id"] = str(doc["_id"])
|
| 381 |
+
docs.append(doc)
|
| 382 |
+
return docs
|
| 383 |
+
|
| 384 |
+
# ---------- Subscription management ----------
|
| 385 |
+
|
| 386 |
+
def upsert_subscription(self, subscription_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 387 |
+
"""
|
| 388 |
+
Create or update a subscription document
|
| 389 |
+
|
| 390 |
+
Expected keys in subscription_data:
|
| 391 |
+
- user_id: User ID
|
| 392 |
+
- razorpay_subscription_id: Razorpay subscription ID
|
| 393 |
+
- razorpay_plan_id: Razorpay plan ID
|
| 394 |
+
- plan_name: Plan name (e.g., "Pro")
|
| 395 |
+
- status: Subscription status (e.g., "active", "cancelled", "expired")
|
| 396 |
+
- amount: Subscription amount
|
| 397 |
+
- currency: Currency code
|
| 398 |
+
- current_start: Current billing cycle start
|
| 399 |
+
- current_end: Current billing cycle end
|
| 400 |
+
- next_billing_at: Next billing date
|
| 401 |
+
- created_at: Subscription creation date
|
| 402 |
+
- updated_at: Last update date
|
| 403 |
+
"""
|
| 404 |
+
if self.subscriptions is None:
|
| 405 |
+
raise RuntimeError("subscriptions collection not initialised")
|
| 406 |
+
|
| 407 |
+
from datetime import datetime
|
| 408 |
+
|
| 409 |
+
razorpay_subscription_id = subscription_data.get("razorpay_subscription_id")
|
| 410 |
+
if not razorpay_subscription_id:
|
| 411 |
+
raise ValueError("razorpay_subscription_id is required")
|
| 412 |
+
|
| 413 |
+
now = datetime.utcnow()
|
| 414 |
+
|
| 415 |
+
# Prepare update data
|
| 416 |
+
update_data = {
|
| 417 |
+
**subscription_data,
|
| 418 |
+
"updated_at": now,
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
# Set created_at only if creating new subscription
|
| 422 |
+
existing = self.subscriptions.find_one(
|
| 423 |
+
{"razorpay_subscription_id": razorpay_subscription_id}
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
if not existing:
|
| 427 |
+
update_data["created_at"] = subscription_data.get("created_at") or now
|
| 428 |
+
|
| 429 |
+
# Upsert subscription
|
| 430 |
+
result = self.subscriptions.find_one_and_update(
|
| 431 |
+
{"razorpay_subscription_id": razorpay_subscription_id},
|
| 432 |
+
{"$set": update_data},
|
| 433 |
+
upsert=True,
|
| 434 |
+
return_document=True
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
if result:
|
| 438 |
+
result["_id"] = str(result["_id"])
|
| 439 |
+
logger.info(f"✅ Upserted subscription: {razorpay_subscription_id}")
|
| 440 |
+
|
| 441 |
+
# Update user's subscription tier if user_id is present
|
| 442 |
+
user_id = subscription_data.get("user_id")
|
| 443 |
+
status = subscription_data.get("status")
|
| 444 |
+
plan_name = subscription_data.get("plan_name", "Free")
|
| 445 |
+
|
| 446 |
+
if user_id:
|
| 447 |
+
if status == "active":
|
| 448 |
+
success = self.update_user_subscription_tier(user_id, plan_name)
|
| 449 |
+
if success:
|
| 450 |
+
logger.info(f"✅ Updated user {user_id} subscription tier to {plan_name} via upsert_subscription")
|
| 451 |
+
elif status in ["cancelled", "expired", "paused", "ended"]:
|
| 452 |
+
success = self.update_user_subscription_tier(user_id, "Free")
|
| 453 |
+
if success:
|
| 454 |
+
logger.info(f"✅ Updated user {user_id} subscription tier to Free (status: {status})")
|
| 455 |
+
|
| 456 |
+
return result
|
| 457 |
+
|
| 458 |
+
def get_user_subscription(
|
| 459 |
+
self,
|
| 460 |
+
user_id: str,
|
| 461 |
+
status: Optional[str] = None
|
| 462 |
+
) -> Optional[Dict[str, Any]]:
|
| 463 |
+
"""
|
| 464 |
+
Get user's active subscription
|
| 465 |
+
|
| 466 |
+
Args:
|
| 467 |
+
user_id: User ID
|
| 468 |
+
status: Filter by status (e.g., "active"). If None, returns most recent
|
| 469 |
+
|
| 470 |
+
Returns:
|
| 471 |
+
Subscription document or None
|
| 472 |
+
"""
|
| 473 |
+
if self.subscriptions is None:
|
| 474 |
+
return None
|
| 475 |
+
|
| 476 |
+
query = {"user_id": user_id}
|
| 477 |
+
if status:
|
| 478 |
+
query["status"] = status
|
| 479 |
+
|
| 480 |
+
subscription = self.subscriptions.find_one(
|
| 481 |
+
query,
|
| 482 |
+
sort=[("created_at", -1)]
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
if subscription:
|
| 486 |
+
subscription["_id"] = str(subscription["_id"])
|
| 487 |
+
|
| 488 |
+
return subscription
|
| 489 |
+
|
| 490 |
+
def update_subscription_status(
|
| 491 |
+
self,
|
| 492 |
+
razorpay_subscription_id: str,
|
| 493 |
+
status: str,
|
| 494 |
+
additional_data: Optional[Dict[str, Any]] = None
|
| 495 |
+
) -> Optional[Dict[str, Any]]:
|
| 496 |
+
"""
|
| 497 |
+
Update subscription status from webhook events
|
| 498 |
+
|
| 499 |
+
Args:
|
| 500 |
+
razorpay_subscription_id: Razorpay subscription ID
|
| 501 |
+
status: New status
|
| 502 |
+
additional_data: Additional fields to update
|
| 503 |
+
|
| 504 |
+
Returns:
|
| 505 |
+
Updated subscription document or None
|
| 506 |
+
"""
|
| 507 |
+
if self.subscriptions is None:
|
| 508 |
+
return None
|
| 509 |
+
|
| 510 |
+
from datetime import datetime
|
| 511 |
+
|
| 512 |
+
update_data = {
|
| 513 |
+
"status": status,
|
| 514 |
+
"updated_at": datetime.utcnow()
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
if additional_data:
|
| 518 |
+
update_data.update(additional_data)
|
| 519 |
+
|
| 520 |
+
result = self.subscriptions.find_one_and_update(
|
| 521 |
+
{"razorpay_subscription_id": razorpay_subscription_id},
|
| 522 |
+
{"$set": update_data},
|
| 523 |
+
return_document=True
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
+
if result:
|
| 527 |
+
result["_id"] = str(result["_id"])
|
| 528 |
+
logger.info(f"✅ Updated subscription status: {razorpay_subscription_id} -> {status}")
|
| 529 |
+
|
| 530 |
+
# Update user's subscription tier
|
| 531 |
+
user_id = result.get("user_id")
|
| 532 |
+
if user_id:
|
| 533 |
+
plan_name = result.get("plan_name", "Free")
|
| 534 |
+
if status == "active":
|
| 535 |
+
self.update_user_subscription_tier(user_id, plan_name)
|
| 536 |
+
elif status in ["cancelled", "expired", "paused"]:
|
| 537 |
+
self.update_user_subscription_tier(user_id, "Free")
|
| 538 |
+
|
| 539 |
+
return result
|
| 540 |
+
|
| 541 |
+
def get_subscription_by_razorpay_id(
|
| 542 |
+
self,
|
| 543 |
+
razorpay_subscription_id: str
|
| 544 |
+
) -> Optional[Dict[str, Any]]:
|
| 545 |
+
"""
|
| 546 |
+
Get subscription by Razorpay subscription ID
|
| 547 |
+
|
| 548 |
+
Args:
|
| 549 |
+
razorpay_subscription_id: Razorpay subscription ID
|
| 550 |
+
|
| 551 |
+
Returns:
|
| 552 |
+
Subscription document or None
|
| 553 |
+
"""
|
| 554 |
+
if self.subscriptions is None:
|
| 555 |
+
return None
|
| 556 |
+
|
| 557 |
+
subscription = self.subscriptions.find_one(
|
| 558 |
+
{"razorpay_subscription_id": razorpay_subscription_id}
|
| 559 |
+
)
|
| 560 |
+
|
| 561 |
+
if subscription:
|
| 562 |
+
subscription["_id"] = str(subscription["_id"])
|
| 563 |
+
|
| 564 |
+
return subscription
|
| 565 |
+
|
| 566 |
+
def create_user(self, user_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 567 |
+
"""
|
| 568 |
+
Create a new user in MongoDB
|
| 569 |
+
|
| 570 |
+
Args:
|
| 571 |
+
user_data: User data including email, password (hashed), domain_preferences, etc.
|
| 572 |
+
|
| 573 |
+
Returns:
|
| 574 |
+
Created user document
|
| 575 |
+
"""
|
| 576 |
+
if self.users is None:
|
| 577 |
+
raise RuntimeError("users collection not initialised")
|
| 578 |
+
|
| 579 |
+
from datetime import datetime
|
| 580 |
+
from bson import ObjectId
|
| 581 |
+
|
| 582 |
+
# Check if user already exists
|
| 583 |
+
existing = self.users.find_one({"email": user_data["email"]})
|
| 584 |
+
if existing:
|
| 585 |
+
raise ValueError("Email already registered")
|
| 586 |
+
|
| 587 |
+
user_doc = {
|
| 588 |
+
**user_data,
|
| 589 |
+
"created_at": datetime.utcnow(),
|
| 590 |
+
"updated_at": datetime.utcnow(),
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
result = self.users.insert_one(user_doc)
|
| 594 |
+
user_doc["_id"] = str(result.inserted_id)
|
| 595 |
+
user_doc["id"] = str(result.inserted_id)
|
| 596 |
+
|
| 597 |
+
logger.info(f"✅ Created user: {user_data['email']}")
|
| 598 |
+
return user_doc
|
| 599 |
+
|
| 600 |
+
def get_user_by_email(self, email: str) -> Optional[Dict[str, Any]]:
|
| 601 |
+
"""
|
| 602 |
+
Get user by email
|
| 603 |
+
|
| 604 |
+
Args:
|
| 605 |
+
email: User email
|
| 606 |
+
|
| 607 |
+
Returns:
|
| 608 |
+
User document or None
|
| 609 |
+
"""
|
| 610 |
+
if self.users is None:
|
| 611 |
+
return None
|
| 612 |
+
|
| 613 |
+
user = self.users.find_one({"email": email})
|
| 614 |
+
if user:
|
| 615 |
+
user["_id"] = str(user["_id"])
|
| 616 |
+
user["id"] = str(user["_id"])
|
| 617 |
+
|
| 618 |
+
return user
|
| 619 |
+
|
| 620 |
+
def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]:
|
| 621 |
+
"""
|
| 622 |
+
Get user by ID
|
| 623 |
+
|
| 624 |
+
Args:
|
| 625 |
+
user_id: User ID
|
| 626 |
+
|
| 627 |
+
Returns:
|
| 628 |
+
User document or None
|
| 629 |
+
"""
|
| 630 |
+
if self.users is None:
|
| 631 |
+
return None
|
| 632 |
+
|
| 633 |
+
from bson import ObjectId
|
| 634 |
+
|
| 635 |
+
try:
|
| 636 |
+
user = self.users.find_one({"_id": ObjectId(user_id)})
|
| 637 |
+
if user:
|
| 638 |
+
user["_id"] = str(user["_id"])
|
| 639 |
+
user["id"] = str(user["_id"])
|
| 640 |
+
return user
|
| 641 |
+
except Exception as e:
|
| 642 |
+
logger.error(f"Error getting user by ID: {e}")
|
| 643 |
+
return None
|
| 644 |
+
|
| 645 |
+
def update_user_subscription_tier(self, user_id: str, subscription_tier: str) -> bool:
|
| 646 |
+
"""
|
| 647 |
+
Update user's subscription tier in user collection
|
| 648 |
+
|
| 649 |
+
Args:
|
| 650 |
+
user_id: User ID
|
| 651 |
+
subscription_tier: Subscription tier (Free, Pro, Enterprise)
|
| 652 |
+
|
| 653 |
+
Returns:
|
| 654 |
+
True if updated successfully, False otherwise
|
| 655 |
+
"""
|
| 656 |
+
if self.users is None:
|
| 657 |
+
return False
|
| 658 |
+
|
| 659 |
+
from datetime import datetime
|
| 660 |
+
from bson import ObjectId
|
| 661 |
+
|
| 662 |
+
try:
|
| 663 |
+
result = self.users.update_one(
|
| 664 |
+
{"_id": ObjectId(user_id)},
|
| 665 |
+
{
|
| 666 |
+
"$set": {
|
| 667 |
+
"subscription_tier": subscription_tier,
|
| 668 |
+
"updated_at": datetime.utcnow()
|
| 669 |
+
}
|
| 670 |
+
}
|
| 671 |
+
)
|
| 672 |
+
if result.modified_count > 0:
|
| 673 |
+
logger.info(f"✅ Updated user {user_id} subscription tier to {subscription_tier}")
|
| 674 |
+
return True
|
| 675 |
+
return False
|
| 676 |
+
except Exception as e:
|
| 677 |
+
logger.error(f"Error updating user subscription tier: {e}")
|
| 678 |
+
return False
|
| 679 |
+
|
| 680 |
+
def close(self):
|
| 681 |
+
"""Close MongoDB connection"""
|
| 682 |
+
if self.client:
|
| 683 |
+
self.client.close()
|
| 684 |
+
logger.info("🔌 MongoDB connection closed")
|
services/razorpay_service.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Razorpay Service for Subscription Management
|
| 3 |
+
Handles Razorpay API interactions for subscription payments
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
import hmac
|
| 8 |
+
import hashlib
|
| 9 |
+
from typing import Dict, Any, Optional
|
| 10 |
+
import razorpay
|
| 11 |
+
from config import config
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class RazorpayService:
|
| 17 |
+
"""Service for handling Razorpay subscription operations"""
|
| 18 |
+
|
| 19 |
+
def __init__(self):
|
| 20 |
+
"""Initialize Razorpay client"""
|
| 21 |
+
if not config.RAZORPAY_ID or not config.RAZORPAY_KEY:
|
| 22 |
+
logger.warning("⚠️ Razorpay credentials not configured. Subscription features will not work.")
|
| 23 |
+
self.client = None
|
| 24 |
+
else:
|
| 25 |
+
try:
|
| 26 |
+
# Initialize Razorpay client with explicit base URL
|
| 27 |
+
# Test mode uses different base URL, but SDK handles this automatically
|
| 28 |
+
self.client = razorpay.Client(auth=(config.RAZORPAY_ID, config.RAZORPAY_KEY))
|
| 29 |
+
logger.info(f"✅ Razorpay client initialized with Key ID: {config.RAZORPAY_ID[:8]}...")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.error(f"❌ Failed to initialize Razorpay client: {e}")
|
| 32 |
+
self.client = None
|
| 33 |
+
|
| 34 |
+
def create_plan(
|
| 35 |
+
self,
|
| 36 |
+
name: str,
|
| 37 |
+
amount: int,
|
| 38 |
+
currency: str = "INR",
|
| 39 |
+
interval: int = 1,
|
| 40 |
+
period: str = "monthly",
|
| 41 |
+
description: Optional[str] = None
|
| 42 |
+
) -> Dict[str, Any]:
|
| 43 |
+
"""
|
| 44 |
+
Create a subscription plan in Razorpay
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
name: Plan name
|
| 48 |
+
amount: Amount in smallest currency unit (paise for INR)
|
| 49 |
+
currency: Currency code (default: INR)
|
| 50 |
+
interval: Billing interval (default: 1)
|
| 51 |
+
period: Billing period - 'daily', 'weekly', 'monthly', 'yearly' (default: monthly)
|
| 52 |
+
description: Plan description
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Dict containing plan details from Razorpay
|
| 56 |
+
"""
|
| 57 |
+
if not self.client:
|
| 58 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
plan_data = {
|
| 62 |
+
"period": period,
|
| 63 |
+
"interval": interval,
|
| 64 |
+
"item": {
|
| 65 |
+
"name": name,
|
| 66 |
+
"amount": amount,
|
| 67 |
+
"currency": currency,
|
| 68 |
+
"description": description or f"{name} subscription plan"
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
logger.debug(f"Creating plan with data: {plan_data}")
|
| 73 |
+
# Try creating plan - note: some accounts may need subscriptions enabled first
|
| 74 |
+
plan = self.client.plan.create(plan_data)
|
| 75 |
+
logger.info(f"✅ Created Razorpay plan: {plan.get('id')}")
|
| 76 |
+
return plan
|
| 77 |
+
except razorpay.errors.BadRequestError as e:
|
| 78 |
+
error_msg = str(e)
|
| 79 |
+
logger.error(f"❌ BadRequestError creating plan '{name}': {error_msg}")
|
| 80 |
+
# Check if it's a "URL not found" error which indicates subscriptions might not be enabled
|
| 81 |
+
if "not found" in error_msg.lower() or "url" in error_msg.lower():
|
| 82 |
+
logger.error(f" This error typically means:")
|
| 83 |
+
logger.error(f" 1. Subscriptions feature is NOT enabled on your Razorpay account")
|
| 84 |
+
logger.error(f" 2. You need to enable subscriptions in Razorpay Dashboard")
|
| 85 |
+
logger.error(f" 3. Go to: Razorpay Dashboard > Settings > Subscriptions")
|
| 86 |
+
logger.error(f" 4. Or contact Razorpay support to enable subscriptions")
|
| 87 |
+
# Check if plan already exists
|
| 88 |
+
elif "already exists" in error_msg.lower() or "duplicate" in error_msg.lower():
|
| 89 |
+
logger.warning(f"⚠️ Plan '{name}' may already exist")
|
| 90 |
+
raise
|
| 91 |
+
except razorpay.errors.ServerError as e:
|
| 92 |
+
logger.error(f"❌ ServerError creating plan '{name}': {e}")
|
| 93 |
+
raise
|
| 94 |
+
except Exception as e:
|
| 95 |
+
error_type = type(e).__name__
|
| 96 |
+
error_msg = str(e)
|
| 97 |
+
logger.error(f"❌ Failed to create Razorpay plan '{name}' ({error_type}): {error_msg}")
|
| 98 |
+
# Log more details if available
|
| 99 |
+
if hasattr(e, 'status_code'):
|
| 100 |
+
logger.error(f" Status code: {e.status_code}")
|
| 101 |
+
if hasattr(e, 'error'):
|
| 102 |
+
logger.error(f" Error details: {e.error}")
|
| 103 |
+
raise
|
| 104 |
+
|
| 105 |
+
def create_subscription(
|
| 106 |
+
self,
|
| 107 |
+
plan_id: str,
|
| 108 |
+
customer_notify: int = 1,
|
| 109 |
+
total_count: Optional[int] = None,
|
| 110 |
+
start_at: Optional[int] = None,
|
| 111 |
+
end_at: Optional[int] = None,
|
| 112 |
+
notes: Optional[Dict[str, str]] = None
|
| 113 |
+
) -> Dict[str, Any]:
|
| 114 |
+
"""
|
| 115 |
+
Create a subscription for a user
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
plan_id: Razorpay plan ID
|
| 119 |
+
customer_notify: Whether to notify customer (1 or 0)
|
| 120 |
+
total_count: Total number of billing cycles (None for infinite - will use end_at instead)
|
| 121 |
+
start_at: Unix timestamp for subscription start (None for immediate)
|
| 122 |
+
end_at: Unix timestamp for subscription end (used if total_count is None for infinite subscriptions)
|
| 123 |
+
notes: Additional notes/metadata
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
Dict containing subscription details from Razorpay
|
| 127 |
+
"""
|
| 128 |
+
if not self.client:
|
| 129 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 130 |
+
|
| 131 |
+
try:
|
| 132 |
+
subscription_data = {
|
| 133 |
+
"plan_id": plan_id,
|
| 134 |
+
"customer_notify": customer_notify,
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
# Razorpay requires either total_count or end_at
|
| 138 |
+
# If end_at is provided, start_at is also required
|
| 139 |
+
# start_at must be in the future (add 60 seconds buffer to account for clock differences)
|
| 140 |
+
import time
|
| 141 |
+
current_time = int(time.time())
|
| 142 |
+
# Add 60 seconds buffer to ensure start_at is always in the future
|
| 143 |
+
future_start_time = current_time + 60
|
| 144 |
+
|
| 145 |
+
if total_count is not None:
|
| 146 |
+
subscription_data["total_count"] = total_count
|
| 147 |
+
elif end_at is not None:
|
| 148 |
+
subscription_data["end_at"] = end_at
|
| 149 |
+
# If end_at is set but start_at is not, set start_at to 60 seconds in the future
|
| 150 |
+
if start_at is None:
|
| 151 |
+
subscription_data["start_at"] = future_start_time
|
| 152 |
+
else:
|
| 153 |
+
# Set both start_at and end_at for infinite subscriptions
|
| 154 |
+
subscription_data["start_at"] = future_start_time
|
| 155 |
+
subscription_data["end_at"] = future_start_time + (10 * 365 * 24 * 60 * 60) # 10 years
|
| 156 |
+
logger.info("ℹ️ No total_count or end_at provided, setting start_at to 60 seconds in future and end_at to 10 years from start (infinite subscription)")
|
| 157 |
+
|
| 158 |
+
# Override start_at if explicitly provided (but ensure it's in the future)
|
| 159 |
+
if start_at is not None:
|
| 160 |
+
if start_at <= current_time:
|
| 161 |
+
# If provided start_at is in the past, add 60 seconds buffer
|
| 162 |
+
subscription_data["start_at"] = current_time + 60
|
| 163 |
+
logger.warning(f"⚠️ Provided start_at was in the past, adjusted to {subscription_data['start_at']}")
|
| 164 |
+
else:
|
| 165 |
+
subscription_data["start_at"] = start_at
|
| 166 |
+
|
| 167 |
+
if notes:
|
| 168 |
+
subscription_data["notes"] = notes
|
| 169 |
+
|
| 170 |
+
subscription = self.client.subscription.create(subscription_data)
|
| 171 |
+
logger.info(f"✅ Created Razorpay subscription: {subscription.get('id')}")
|
| 172 |
+
return subscription
|
| 173 |
+
except Exception as e:
|
| 174 |
+
logger.error(f"❌ Failed to create Razorpay subscription: {e}")
|
| 175 |
+
raise
|
| 176 |
+
|
| 177 |
+
def get_subscription(self, subscription_id: str) -> Dict[str, Any]:
|
| 178 |
+
"""
|
| 179 |
+
Get subscription details from Razorpay
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
subscription_id: Razorpay subscription ID
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
Dict containing subscription details
|
| 186 |
+
"""
|
| 187 |
+
if not self.client:
|
| 188 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 189 |
+
|
| 190 |
+
try:
|
| 191 |
+
subscription = self.client.subscription.fetch(subscription_id)
|
| 192 |
+
return subscription
|
| 193 |
+
except Exception as e:
|
| 194 |
+
logger.error(f"❌ Failed to fetch subscription {subscription_id}: {e}")
|
| 195 |
+
raise
|
| 196 |
+
|
| 197 |
+
def cancel_subscription(
|
| 198 |
+
self,
|
| 199 |
+
subscription_id: str,
|
| 200 |
+
cancel_at_cycle_end: bool = False
|
| 201 |
+
) -> Dict[str, Any]:
|
| 202 |
+
"""
|
| 203 |
+
Cancel a subscription
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
subscription_id: Razorpay subscription ID
|
| 207 |
+
cancel_at_cycle_end: If True, cancel at end of current cycle
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Dict containing updated subscription details
|
| 211 |
+
"""
|
| 212 |
+
if not self.client:
|
| 213 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
if cancel_at_cycle_end:
|
| 217 |
+
subscription = self.client.subscription.cancel(
|
| 218 |
+
subscription_id,
|
| 219 |
+
{"cancel_at_cycle_end": 1}
|
| 220 |
+
)
|
| 221 |
+
else:
|
| 222 |
+
subscription = self.client.subscription.cancel(subscription_id)
|
| 223 |
+
|
| 224 |
+
logger.info(f"✅ Cancelled subscription: {subscription_id}")
|
| 225 |
+
return subscription
|
| 226 |
+
except Exception as e:
|
| 227 |
+
logger.error(f"❌ Failed to cancel subscription {subscription_id}: {e}")
|
| 228 |
+
raise
|
| 229 |
+
|
| 230 |
+
def verify_webhook_signature(
|
| 231 |
+
self,
|
| 232 |
+
payload: str,
|
| 233 |
+
signature: str
|
| 234 |
+
) -> bool:
|
| 235 |
+
"""
|
| 236 |
+
Verify Razorpay webhook signature
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
payload: Raw webhook payload (string)
|
| 240 |
+
signature: Webhook signature from X-Razorpay-Signature header
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
True if signature is valid, False otherwise
|
| 244 |
+
"""
|
| 245 |
+
if not config.RAZORPAY_WEBHOOK_SECRET:
|
| 246 |
+
logger.warning("⚠️ RAZORPAY_WEBHOOK_SECRET not set. Webhook verification skipped.")
|
| 247 |
+
return True # Allow if secret not configured (for development)
|
| 248 |
+
|
| 249 |
+
try:
|
| 250 |
+
expected_signature = hmac.new(
|
| 251 |
+
config.RAZORPAY_WEBHOOK_SECRET.encode('utf-8'),
|
| 252 |
+
payload.encode('utf-8'),
|
| 253 |
+
hashlib.sha256
|
| 254 |
+
).hexdigest()
|
| 255 |
+
|
| 256 |
+
return hmac.compare_digest(expected_signature, signature)
|
| 257 |
+
except Exception as e:
|
| 258 |
+
logger.error(f"❌ Webhook signature verification failed: {e}")
|
| 259 |
+
return False
|
| 260 |
+
|
| 261 |
+
def get_plan(self, plan_id: str) -> Dict[str, Any]:
|
| 262 |
+
"""
|
| 263 |
+
Get plan details from Razorpay
|
| 264 |
+
|
| 265 |
+
Args:
|
| 266 |
+
plan_id: Razorpay plan ID
|
| 267 |
+
|
| 268 |
+
Returns:
|
| 269 |
+
Dict containing plan details
|
| 270 |
+
"""
|
| 271 |
+
if not self.client:
|
| 272 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 273 |
+
|
| 274 |
+
try:
|
| 275 |
+
plan = self.client.plan.fetch(plan_id)
|
| 276 |
+
return plan
|
| 277 |
+
except Exception as e:
|
| 278 |
+
logger.error(f"❌ Failed to fetch plan {plan_id}: {e}")
|
| 279 |
+
raise
|
| 280 |
+
|
| 281 |
+
def list_plans(self, count: int = 10, skip: int = 0) -> Dict[str, Any]:
|
| 282 |
+
"""
|
| 283 |
+
List all plans
|
| 284 |
+
|
| 285 |
+
Args:
|
| 286 |
+
count: Number of plans to fetch
|
| 287 |
+
skip: Number of plans to skip
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
Dict containing list of plans
|
| 291 |
+
"""
|
| 292 |
+
if not self.client:
|
| 293 |
+
raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
|
| 294 |
+
|
| 295 |
+
try:
|
| 296 |
+
# Try to list plans - this may fail if no plans exist or API endpoint is different
|
| 297 |
+
plans = self.client.plan.all({"count": count, "skip": skip})
|
| 298 |
+
return plans
|
| 299 |
+
except razorpay.errors.BadRequestError as e:
|
| 300 |
+
error_msg = str(e).lower()
|
| 301 |
+
logger.error(f"❌ BadRequestError listing plans: {e}")
|
| 302 |
+
# Check if it's a "not found" error which might mean subscriptions aren't enabled
|
| 303 |
+
if "not found" in error_msg or "url" in error_msg:
|
| 304 |
+
logger.warning("⚠️ Subscriptions API endpoint not found. This might mean:")
|
| 305 |
+
logger.warning(" 1. Subscriptions feature is not enabled on your Razorpay account")
|
| 306 |
+
logger.warning(" 2. Your API keys don't have subscription permissions")
|
| 307 |
+
logger.warning(" 3. You need to enable subscriptions in Razorpay Dashboard")
|
| 308 |
+
# Return empty structure if it's a "not found" type error
|
| 309 |
+
return {"items": [], "count": 0}
|
| 310 |
+
except razorpay.errors.ServerError as e:
|
| 311 |
+
logger.error(f"❌ ServerError listing plans: {e}")
|
| 312 |
+
raise
|
| 313 |
+
except Exception as e:
|
| 314 |
+
error_type = type(e).__name__
|
| 315 |
+
error_msg = str(e)
|
| 316 |
+
logger.error(f"❌ Failed to list plans ({error_type}): {error_msg}")
|
| 317 |
+
# If it's a "not found" error, return empty list instead of raising
|
| 318 |
+
if "not found" in error_msg.lower() or "404" in error_msg:
|
| 319 |
+
logger.warning("⚠️ No plans found or endpoint not available, returning empty list")
|
| 320 |
+
return {"items": [], "count": 0}
|
| 321 |
+
raise
|
| 322 |
+
|
services/text_fact_checker.py
ADDED
|
@@ -0,0 +1,905 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
from typing import Dict, List, Optional, Any
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
+
import numpy as np
|
| 8 |
+
from config import config
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TextFactChecker:
|
| 12 |
+
"""Service for fact-checking textual claims using Google Custom Search API with fact-checking sites"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.api_key = config.GOOGLE_API_KEY
|
| 16 |
+
self.search_engine_id = config.GOOGLE_FACT_CHECK_CX
|
| 17 |
+
self.base_url = "https://www.googleapis.com/customsearch/v1"
|
| 18 |
+
|
| 19 |
+
# Configure Gemini for analysis
|
| 20 |
+
if not config.GEMINI_API_KEY:
|
| 21 |
+
print("⚠️ WARNING: GEMINI_API_KEY not set. Gemini features will not work.")
|
| 22 |
+
else:
|
| 23 |
+
try:
|
| 24 |
+
genai.configure(api_key=config.GEMINI_API_KEY)
|
| 25 |
+
self.model = genai.GenerativeModel(config.GEMINI_MODEL)
|
| 26 |
+
print(f"✅ Gemini configured with model: {config.GEMINI_MODEL}")
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"❌ Failed to configure Gemini: {e}")
|
| 29 |
+
raise
|
| 30 |
+
|
| 31 |
+
if not self.api_key:
|
| 32 |
+
raise ValueError("Google Custom Search API key is required")
|
| 33 |
+
if not self.search_engine_id:
|
| 34 |
+
raise ValueError("Google Custom Search Engine ID (cx) is required")
|
| 35 |
+
|
| 36 |
+
async def verify(self, text_input: str, claim_context: str = "Unknown context", claim_date: str = "Unknown date") -> Dict[str, Any]:
|
| 37 |
+
"""
|
| 38 |
+
Verify a textual claim using a three-phase approach:
|
| 39 |
+
1. Immediate Gemini read-through for a quick, reference-free baseline
|
| 40 |
+
2. Curated SERP (fact-check) harvesting with structured analysis
|
| 41 |
+
3. A final Gemini synthesis that reasons over BOTH the baseline and SERP data
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
text_input: The text claim to verify
|
| 45 |
+
claim_context: Context about the claim
|
| 46 |
+
claim_date: Date when the claim was made
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
Dictionary containing verification results
|
| 50 |
+
"""
|
| 51 |
+
try:
|
| 52 |
+
print(f"🔍 DEBUG: TextFactChecker.verify called")
|
| 53 |
+
print(f"🔍 DEBUG: text_input = {text_input}")
|
| 54 |
+
print(f"🔍 DEBUG: claim_context = {claim_context}")
|
| 55 |
+
print(f"🔍 DEBUG: claim_date = {claim_date}")
|
| 56 |
+
print(f"Starting verification for: {text_input}")
|
| 57 |
+
|
| 58 |
+
# STEP 0: quick general-knowledge pass (baseline)
|
| 59 |
+
preliminary_analysis = await self._verify_with_general_knowledge(
|
| 60 |
+
text_input, claim_context, claim_date
|
| 61 |
+
)
|
| 62 |
+
print(f"🔍 DEBUG: preliminary_analysis = {preliminary_analysis}")
|
| 63 |
+
|
| 64 |
+
# STEP 1: Search for fact-checked claims in curated sources
|
| 65 |
+
search_results = await self._search_claims(text_input)
|
| 66 |
+
print(f"🔍 DEBUG: search_results = {search_results}")
|
| 67 |
+
|
| 68 |
+
curated_analysis = None
|
| 69 |
+
if search_results:
|
| 70 |
+
# Analyze the search results with Gemini
|
| 71 |
+
curated_analysis = self._analyze_results(search_results, text_input)
|
| 72 |
+
|
| 73 |
+
final_response = self._synthesize_final_response(
|
| 74 |
+
text_input=text_input,
|
| 75 |
+
claim_context=claim_context,
|
| 76 |
+
claim_date=claim_date,
|
| 77 |
+
preliminary_analysis=preliminary_analysis,
|
| 78 |
+
curated_analysis=curated_analysis,
|
| 79 |
+
search_results=search_results or []
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
if final_response:
|
| 83 |
+
return final_response
|
| 84 |
+
|
| 85 |
+
# Fallback ladder: curated -> preliminary -> default error
|
| 86 |
+
if curated_analysis:
|
| 87 |
+
return self._build_simple_response(
|
| 88 |
+
curated_analysis,
|
| 89 |
+
text_input,
|
| 90 |
+
claim_context,
|
| 91 |
+
claim_date,
|
| 92 |
+
search_results or [],
|
| 93 |
+
method_label="curated_sources_only",
|
| 94 |
+
extra_details={
|
| 95 |
+
"preliminary_analysis": preliminary_analysis,
|
| 96 |
+
"curated_analysis": curated_analysis,
|
| 97 |
+
},
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if preliminary_analysis:
|
| 101 |
+
return self._build_simple_response(
|
| 102 |
+
preliminary_analysis,
|
| 103 |
+
text_input,
|
| 104 |
+
claim_context,
|
| 105 |
+
claim_date,
|
| 106 |
+
search_results or [],
|
| 107 |
+
method_label="general_knowledge_only",
|
| 108 |
+
extra_details={"preliminary_analysis": preliminary_analysis},
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
return {
|
| 112 |
+
"verified": False,
|
| 113 |
+
"verdict": "error",
|
| 114 |
+
"message": "Unable to generate a verification response.",
|
| 115 |
+
"details": {
|
| 116 |
+
"claim_text": text_input,
|
| 117 |
+
"claim_context": claim_context,
|
| 118 |
+
"claim_date": claim_date,
|
| 119 |
+
"fact_checks": search_results or [],
|
| 120 |
+
"analysis": {},
|
| 121 |
+
"verification_method": "unavailable",
|
| 122 |
+
},
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f"❌ Error in verify: {e}")
|
| 127 |
+
return {
|
| 128 |
+
"verified": False,
|
| 129 |
+
"verdict": "error",
|
| 130 |
+
"message": f"Error during fact-checking: {str(e)}",
|
| 131 |
+
"details": {
|
| 132 |
+
"claim_text": text_input,
|
| 133 |
+
"claim_context": claim_context,
|
| 134 |
+
"claim_date": claim_date,
|
| 135 |
+
"error": str(e)
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
async def _search_claims(self, query: str) -> List[Dict[str, Any]]:
|
| 140 |
+
"""
|
| 141 |
+
Search for fact-checked claims using Google Custom Search API with LLM-powered fallback strategies
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
query: The search query
|
| 145 |
+
|
| 146 |
+
Returns:
|
| 147 |
+
List of search results
|
| 148 |
+
"""
|
| 149 |
+
# Try the original query first
|
| 150 |
+
results = await self._perform_search(query)
|
| 151 |
+
|
| 152 |
+
# If no results, use LLM to create alternative queries
|
| 153 |
+
if not results:
|
| 154 |
+
print("No results found, using LLM to create alternative queries...")
|
| 155 |
+
|
| 156 |
+
alternative_queries = self._create_alternative_queries(query)
|
| 157 |
+
print(f"Generated alternative queries: {alternative_queries}")
|
| 158 |
+
|
| 159 |
+
results = await self._perform_search(alternative_queries)
|
| 160 |
+
if results:
|
| 161 |
+
print(f"Found {len(results)} results with alternative query")
|
| 162 |
+
else:
|
| 163 |
+
print("No results found with alternative query")
|
| 164 |
+
return results
|
| 165 |
+
|
| 166 |
+
async def _perform_search(self, query: str) -> List[Dict[str, Any]]:
|
| 167 |
+
"""
|
| 168 |
+
Perform a single search request
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
query: The search query
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
List of search results
|
| 175 |
+
"""
|
| 176 |
+
params = {
|
| 177 |
+
"q": query,
|
| 178 |
+
"key": self.api_key,
|
| 179 |
+
"cx": self.search_engine_id,
|
| 180 |
+
"num": 10 # Limit results to 10 for better performance
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
try:
|
| 184 |
+
print(f"Making request to: {self.base_url}")
|
| 185 |
+
print(f"Params: {params}")
|
| 186 |
+
|
| 187 |
+
response = requests.get(self.base_url, params=params, timeout=30)
|
| 188 |
+
print(f"Response status: {response.status_code}")
|
| 189 |
+
print(f"Response text: {response.text}")
|
| 190 |
+
|
| 191 |
+
response.raise_for_status()
|
| 192 |
+
|
| 193 |
+
data = response.json()
|
| 194 |
+
items = data.get("items", [])
|
| 195 |
+
|
| 196 |
+
return items
|
| 197 |
+
|
| 198 |
+
except requests.exceptions.RequestException as e:
|
| 199 |
+
raise Exception(f"API request failed: {str(e)}")
|
| 200 |
+
except json.JSONDecodeError as e:
|
| 201 |
+
raise Exception(f"Failed to parse API response: {str(e)}")
|
| 202 |
+
except Exception as e:
|
| 203 |
+
raise Exception(f"Search error: {str(e)}")
|
| 204 |
+
|
| 205 |
+
def _create_alternative_queries(self, query: str) -> List[str]:
|
| 206 |
+
"""
|
| 207 |
+
Use LLM to create alternative search queries (broader and simpler)
|
| 208 |
+
|
| 209 |
+
Args:
|
| 210 |
+
query: Original query
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
List of alternative queries to try
|
| 214 |
+
"""
|
| 215 |
+
prompt = f"""
|
| 216 |
+
You are a search query optimizer. Given a fact-checking query that returned no results, create alternative queries that might find relevant information.
|
| 217 |
+
|
| 218 |
+
ORIGINAL QUERY: "{query}"
|
| 219 |
+
|
| 220 |
+
Create an alternative query:
|
| 221 |
+
1. A BROADER query that removes specific assumptions and focuses on key entities/events
|
| 222 |
+
|
| 223 |
+
Examples:
|
| 224 |
+
- "Is it true the CEO of Astronomer resigned because of toxic workplace allegations?"
|
| 225 |
+
→ Broader: "Astronomer CEO resignation"
|
| 226 |
+
|
| 227 |
+
- "Did Apple release a new iPhone with 5G in 2023?"
|
| 228 |
+
→ Broader: "Apple iPhone 2023 release"
|
| 229 |
+
|
| 230 |
+
Respond in this exact JSON format:
|
| 231 |
+
{{
|
| 232 |
+
"broader_query": "your broader query here",
|
| 233 |
+
}}
|
| 234 |
+
"""
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
response = self.model.generate_content(prompt)
|
| 238 |
+
response_text = response.text.strip()
|
| 239 |
+
|
| 240 |
+
# Try to parse JSON response
|
| 241 |
+
if response_text.startswith('```json'):
|
| 242 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 243 |
+
elif response_text.startswith('```'):
|
| 244 |
+
response_text = response_text.replace('```', '').strip()
|
| 245 |
+
|
| 246 |
+
alternatives = json.loads(response_text)
|
| 247 |
+
|
| 248 |
+
# Return both alternatives
|
| 249 |
+
queries = []
|
| 250 |
+
if alternatives.get("broader_query") and alternatives["broader_query"] != query:
|
| 251 |
+
queries.append(alternatives["broader_query"])
|
| 252 |
+
if alternatives.get("simpler_query") and alternatives["simpler_query"] != query:
|
| 253 |
+
queries.append(alternatives["simpler_query"])
|
| 254 |
+
|
| 255 |
+
return queries
|
| 256 |
+
|
| 257 |
+
except Exception as e:
|
| 258 |
+
print(f"Failed to create alternative queries with LLM: {e}")
|
| 259 |
+
|
| 260 |
+
def _analyze_results(self, results: List[Dict[str, Any]], original_text: str) -> Dict[str, Any]:
|
| 261 |
+
"""
|
| 262 |
+
Analyze the search results using Gemini AI to determine overall verdict
|
| 263 |
+
|
| 264 |
+
Args:
|
| 265 |
+
results: List of search results from the API
|
| 266 |
+
original_text: The original text being verified
|
| 267 |
+
|
| 268 |
+
Returns:
|
| 269 |
+
Analysis results including verdict and message
|
| 270 |
+
"""
|
| 271 |
+
if not results:
|
| 272 |
+
return {
|
| 273 |
+
"verified": False,
|
| 274 |
+
"verdict": "no_content",
|
| 275 |
+
"message": "No fact-checked information found for this claim"
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
# Filter relevant results
|
| 279 |
+
relevant_results = []
|
| 280 |
+
for result in results:
|
| 281 |
+
title = result.get("title", "").lower()
|
| 282 |
+
snippet = result.get("snippet", "").lower()
|
| 283 |
+
original_lower = original_text.lower()
|
| 284 |
+
|
| 285 |
+
# Check if the result is relevant to our original text
|
| 286 |
+
relevance_score = self._calculate_relevance(result, original_text)
|
| 287 |
+
|
| 288 |
+
print(f"Relevance score for '{title[:50]}...': {relevance_score:.3f}")
|
| 289 |
+
if relevance_score > 0.05: # Very low threshold to catch all relevant results
|
| 290 |
+
relevant_results.append(result)
|
| 291 |
+
|
| 292 |
+
if not relevant_results:
|
| 293 |
+
return {
|
| 294 |
+
"verified": False,
|
| 295 |
+
"verdict": "no_content",
|
| 296 |
+
"message": "No relevant fact-checked information found for this specific claim"
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
# Use Gemini to analyze the results
|
| 300 |
+
try:
|
| 301 |
+
analysis = self._analyze_with_gemini(original_text, relevant_results)
|
| 302 |
+
return analysis
|
| 303 |
+
except Exception as e:
|
| 304 |
+
print(f"Gemini analysis failed: {str(e)}")
|
| 305 |
+
# Fallback to simple analysis
|
| 306 |
+
return self._fallback_analysis(relevant_results)
|
| 307 |
+
|
| 308 |
+
def _calculate_relevance(self, result: Dict[str, Any], original_text: str) -> float:
|
| 309 |
+
"""
|
| 310 |
+
Calculate relevance score using TF-IDF similarity with multiple components
|
| 311 |
+
|
| 312 |
+
Args:
|
| 313 |
+
result: Search result dictionary
|
| 314 |
+
original_text: Original text being verified
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
Relevance score between 0 and 1
|
| 318 |
+
"""
|
| 319 |
+
score = 0.0
|
| 320 |
+
|
| 321 |
+
# 1. Title relevance (40% weight)
|
| 322 |
+
title = result.get("title", "")
|
| 323 |
+
if title:
|
| 324 |
+
title_score = self._tfidf_similarity(title, original_text)
|
| 325 |
+
score += title_score * 0.6
|
| 326 |
+
|
| 327 |
+
# 2. Snippet relevance (30% weight)
|
| 328 |
+
snippet = result.get("snippet", "")
|
| 329 |
+
if snippet:
|
| 330 |
+
snippet_score = self._tfidf_similarity(snippet, original_text)
|
| 331 |
+
score += snippet_score * 0.4
|
| 332 |
+
|
| 333 |
+
# 3. Fact-check specific bonus (30% weight)
|
| 334 |
+
factcheck_score = self._has_factcheck_data(result)
|
| 335 |
+
score += factcheck_score * 0.1
|
| 336 |
+
|
| 337 |
+
return min(1.0, score)
|
| 338 |
+
|
| 339 |
+
def _tfidf_similarity(self, text1: str, text2: str) -> float:
|
| 340 |
+
"""
|
| 341 |
+
Calculate TF-IDF cosine similarity between two texts
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
text1: First text
|
| 345 |
+
text2: Second text
|
| 346 |
+
|
| 347 |
+
Returns:
|
| 348 |
+
Similarity score between 0 and 1
|
| 349 |
+
"""
|
| 350 |
+
if not text1.strip() or not text2.strip():
|
| 351 |
+
return 0.0
|
| 352 |
+
|
| 353 |
+
try:
|
| 354 |
+
# Preprocess texts
|
| 355 |
+
texts = [self._preprocess_text(text1), self._preprocess_text(text2)]
|
| 356 |
+
|
| 357 |
+
# Create TF-IDF vectors
|
| 358 |
+
vectorizer = TfidfVectorizer(
|
| 359 |
+
stop_words='english',
|
| 360 |
+
ngram_range=(1, 2), # Include bigrams
|
| 361 |
+
max_features=500,
|
| 362 |
+
lowercase=True
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
tfidf_matrix = vectorizer.fit_transform(texts)
|
| 366 |
+
|
| 367 |
+
# Calculate cosine similarity
|
| 368 |
+
similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
|
| 369 |
+
|
| 370 |
+
return float(similarity)
|
| 371 |
+
|
| 372 |
+
except Exception as e:
|
| 373 |
+
print(f"TF-IDF calculation failed: {e}")
|
| 374 |
+
# Fallback to simple word overlap
|
| 375 |
+
return self._simple_word_overlap(text1, text2)
|
| 376 |
+
|
| 377 |
+
def _preprocess_text(self, text: str) -> str:
|
| 378 |
+
"""
|
| 379 |
+
Preprocess text for TF-IDF analysis
|
| 380 |
+
|
| 381 |
+
Args:
|
| 382 |
+
text: Raw text
|
| 383 |
+
|
| 384 |
+
Returns:
|
| 385 |
+
Preprocessed text
|
| 386 |
+
"""
|
| 387 |
+
import re
|
| 388 |
+
|
| 389 |
+
# Convert to lowercase
|
| 390 |
+
text = text.lower()
|
| 391 |
+
|
| 392 |
+
# Remove special characters but keep spaces
|
| 393 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
| 394 |
+
|
| 395 |
+
# Remove extra whitespace
|
| 396 |
+
text = ' '.join(text.split())
|
| 397 |
+
|
| 398 |
+
return text
|
| 399 |
+
|
| 400 |
+
def _simple_word_overlap(self, text1: str, text2: str) -> float:
|
| 401 |
+
"""
|
| 402 |
+
Fallback similarity calculation using word overlap
|
| 403 |
+
|
| 404 |
+
Args:
|
| 405 |
+
text1: First text
|
| 406 |
+
text2: Second text
|
| 407 |
+
|
| 408 |
+
Returns:
|
| 409 |
+
Similarity score between 0 and 1
|
| 410 |
+
"""
|
| 411 |
+
words1 = set(text1.lower().split())
|
| 412 |
+
words2 = set(text2.lower().split())
|
| 413 |
+
|
| 414 |
+
if not words1 or not words2:
|
| 415 |
+
return 0.0
|
| 416 |
+
|
| 417 |
+
intersection = words1.intersection(words2)
|
| 418 |
+
union = words1.union(words2)
|
| 419 |
+
|
| 420 |
+
return len(intersection) / len(union) if union else 0.0
|
| 421 |
+
|
| 422 |
+
def _has_factcheck_data(self, result: Dict[str, Any]) -> float:
|
| 423 |
+
"""
|
| 424 |
+
Check if result has fact-check specific metadata
|
| 425 |
+
|
| 426 |
+
Args:
|
| 427 |
+
result: Search result dictionary
|
| 428 |
+
|
| 429 |
+
Returns:
|
| 430 |
+
1.0 if has fact-check data, 0.0 otherwise
|
| 431 |
+
"""
|
| 432 |
+
# Check for ClaimReview metadata
|
| 433 |
+
pagemap = result.get("pagemap", {})
|
| 434 |
+
claim_review = pagemap.get("ClaimReview", [])
|
| 435 |
+
|
| 436 |
+
if claim_review:
|
| 437 |
+
return 1.0
|
| 438 |
+
|
| 439 |
+
# Check for fact-check related keywords in URL or title
|
| 440 |
+
url = result.get("link", "").lower()
|
| 441 |
+
title = result.get("title", "").lower()
|
| 442 |
+
|
| 443 |
+
factcheck_keywords = [
|
| 444 |
+
"fact-check", "factcheck", "snopes", "politifact",
|
| 445 |
+
"factcrescendo", "boomlive", "newschecker", "afp"
|
| 446 |
+
]
|
| 447 |
+
|
| 448 |
+
for keyword in factcheck_keywords:
|
| 449 |
+
if keyword in url or keyword in title:
|
| 450 |
+
return 1.0
|
| 451 |
+
|
| 452 |
+
return 0.0
|
| 453 |
+
|
| 454 |
+
def _analyze_with_gemini(self, original_text: str, results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 455 |
+
"""
|
| 456 |
+
Use Gemini AI to analyze fact-check results and determine verdict
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
original_text: The original claim being verified
|
| 460 |
+
results: List of relevant search results
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
Analysis results with verdict and message
|
| 464 |
+
"""
|
| 465 |
+
# Prepare the prompt
|
| 466 |
+
results_text = ""
|
| 467 |
+
for i, result in enumerate(results[:5], 1): # Limit to top 5 results
|
| 468 |
+
title = result.get("title", "")
|
| 469 |
+
snippet = result.get("snippet", "")
|
| 470 |
+
link = result.get("link", "")
|
| 471 |
+
results_text += f"{i}. Title: {title}\n Snippet: {snippet}\n Link: {link}\n\n"
|
| 472 |
+
|
| 473 |
+
prompt = f"""
|
| 474 |
+
You are a fact-checking expert. Analyze the following claim against the provided fact-checking sources.
|
| 475 |
+
|
| 476 |
+
CLAIM TO VERIFY: "{original_text}"
|
| 477 |
+
|
| 478 |
+
FACT-CHECKING SOURCES:
|
| 479 |
+
{results_text}
|
| 480 |
+
|
| 481 |
+
STEP-BY-STEP ANALYSIS:
|
| 482 |
+
1. What does each source say ACTUALLY HAPPENED?
|
| 483 |
+
2. What does each source say was FAKE or MISLEADING?
|
| 484 |
+
3. Based on the evidence, what is the most likely truth about the claim?
|
| 485 |
+
|
| 486 |
+
Think through this systematically and provide your analysis.
|
| 487 |
+
|
| 488 |
+
IMPORTANT INSTRUCTIONS FOR YOUR RESPONSE:
|
| 489 |
+
- When referring to sources in your message, DO NOT use specific numbers like "Source 1", "Source 3", or "Sources 2, 4, and 5"
|
| 490 |
+
- Instead, use generic references like "the sources", "multiple sources", "one source", "several sources"
|
| 491 |
+
- Example: Instead of "Sources 3, 4, and 5 confirm..." say "Multiple sources confirm..." or "The sources confirm..."
|
| 492 |
+
|
| 493 |
+
Respond in this exact JSON format:
|
| 494 |
+
{{
|
| 495 |
+
"verdict": "true|false|mixed|uncertain",
|
| 496 |
+
"verified": true|false,
|
| 497 |
+
"message": "Your explanation here",
|
| 498 |
+
"confidence": "high|medium|low",
|
| 499 |
+
"reasoning": "Your step-by-step reasoning process"
|
| 500 |
+
}}
|
| 501 |
+
"""
|
| 502 |
+
|
| 503 |
+
try:
|
| 504 |
+
response = self.model.generate_content(prompt)
|
| 505 |
+
response_text = response.text.strip()
|
| 506 |
+
|
| 507 |
+
# Try to parse JSON response
|
| 508 |
+
if response_text.startswith('```json'):
|
| 509 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 510 |
+
elif response_text.startswith('```'):
|
| 511 |
+
response_text = response_text.replace('```', '').strip()
|
| 512 |
+
|
| 513 |
+
analysis = json.loads(response_text)
|
| 514 |
+
|
| 515 |
+
# Ensure required fields
|
| 516 |
+
analysis.setdefault("verdict", "uncertain")
|
| 517 |
+
analysis.setdefault("verified", False)
|
| 518 |
+
analysis.setdefault("message", "Analysis completed")
|
| 519 |
+
analysis.setdefault("confidence", "medium")
|
| 520 |
+
analysis.setdefault("reasoning", "Analysis completed")
|
| 521 |
+
|
| 522 |
+
# Add metadata
|
| 523 |
+
analysis["relevant_results_count"] = len(results)
|
| 524 |
+
analysis["analysis_method"] = "gemini"
|
| 525 |
+
|
| 526 |
+
return analysis
|
| 527 |
+
|
| 528 |
+
except json.JSONDecodeError as e:
|
| 529 |
+
print(f"Failed to parse Gemini response as JSON: {e}")
|
| 530 |
+
print(f"Raw response: {response_text}")
|
| 531 |
+
return self._fallback_analysis(results)
|
| 532 |
+
except Exception as e:
|
| 533 |
+
print(f"Gemini analysis error: {e}")
|
| 534 |
+
return self._fallback_analysis(results)
|
| 535 |
+
|
| 536 |
+
def _format_source_summary(self, results: List[Dict[str, Any]]) -> str:
|
| 537 |
+
"""Create a short, human readable summary of the surfaced sources."""
|
| 538 |
+
if not results:
|
| 539 |
+
return "No vetted sources surfaced yet."
|
| 540 |
+
|
| 541 |
+
highlights = []
|
| 542 |
+
for result in results[:3]:
|
| 543 |
+
title = result.get("title") or "Unknown source"
|
| 544 |
+
outlet = result.get("displayLink")
|
| 545 |
+
summary = title
|
| 546 |
+
if outlet:
|
| 547 |
+
summary += f" ({outlet})"
|
| 548 |
+
highlights.append(summary)
|
| 549 |
+
|
| 550 |
+
return "Sources surfaced: " + "; ".join(highlights)
|
| 551 |
+
|
| 552 |
+
def _fallback_analysis(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 553 |
+
"""
|
| 554 |
+
Fallback analysis when Gemini fails
|
| 555 |
+
|
| 556 |
+
Args:
|
| 557 |
+
results: List of search results
|
| 558 |
+
|
| 559 |
+
Returns:
|
| 560 |
+
Basic analysis results
|
| 561 |
+
"""
|
| 562 |
+
summary = self._format_source_summary(results)
|
| 563 |
+
|
| 564 |
+
return {
|
| 565 |
+
"verified": False,
|
| 566 |
+
"verdict": "uncertain",
|
| 567 |
+
"message": f"Could not verify this claim yet. {summary}",
|
| 568 |
+
"confidence": "low",
|
| 569 |
+
"relevant_results_count": len(results),
|
| 570 |
+
"analysis_method": "fallback"
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
async def _verify_with_general_knowledge(self, text_input: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 574 |
+
"""
|
| 575 |
+
Verify a claim using Gemini's general knowledge base directly (no curated sources)
|
| 576 |
+
This is used as a fallback when curated sources don't have enough information
|
| 577 |
+
|
| 578 |
+
Args:
|
| 579 |
+
text_input: The text claim to verify
|
| 580 |
+
claim_context: Context about the claim
|
| 581 |
+
claim_date: Date when the claim was made
|
| 582 |
+
|
| 583 |
+
Returns:
|
| 584 |
+
Analysis results with verdict and message
|
| 585 |
+
"""
|
| 586 |
+
from datetime import datetime
|
| 587 |
+
current_date = datetime.now().strftime("%B %d, %Y")
|
| 588 |
+
|
| 589 |
+
prompt = f"""
|
| 590 |
+
You are a fact-checking expert AI with access to current information as of {current_date}.
|
| 591 |
+
|
| 592 |
+
CLAIM TO VERIFY: "{text_input}"
|
| 593 |
+
CONTEXT: {claim_context if claim_context != "Unknown context" else "No additional context provided"}
|
| 594 |
+
CLAIM DATE: {claim_date if claim_date != "Unknown date" else "Unknown"}
|
| 595 |
+
|
| 596 |
+
Your task is to verify this claim using your knowledge base. Since this is a direct factual question that may not be covered by news articles:
|
| 597 |
+
|
| 598 |
+
1. **Use your most recent training data** to answer the question directly
|
| 599 |
+
2. If this is about current events, political positions, or time-sensitive facts, be especially careful to provide the MOST CURRENT information
|
| 600 |
+
3. If you're uncertain about recent changes, acknowledge that
|
| 601 |
+
4. Always answer based on the most recent information you have
|
| 602 |
+
|
| 603 |
+
Provide a clear, direct answer. Think step-by-step:
|
| 604 |
+
- What does the claim assert?
|
| 605 |
+
- Based on your knowledge (as of your training cutoff and any recent data you have), is this true or false?
|
| 606 |
+
- If it's a time-sensitive claim, what is the current status?
|
| 607 |
+
|
| 608 |
+
Respond in this exact JSON format:
|
| 609 |
+
{{
|
| 610 |
+
"verdict": "true|false|mixed|uncertain",
|
| 611 |
+
"verified": true|false,
|
| 612 |
+
"message": "Your clear, direct answer explaining whether the claim is true or false and why",
|
| 613 |
+
"confidence": "high|medium|low",
|
| 614 |
+
"reasoning": "Your step-by-step reasoning process",
|
| 615 |
+
"knowledge_cutoff_note": "Optional note if the answer might be outdated or if recent changes are possible"
|
| 616 |
+
}}
|
| 617 |
+
|
| 618 |
+
IMPORTANT: For current events or political positions, provide the MOST RECENT information you have access to.
|
| 619 |
+
"""
|
| 620 |
+
|
| 621 |
+
try:
|
| 622 |
+
response = self.model.generate_content(prompt)
|
| 623 |
+
response_text = response.text.strip()
|
| 624 |
+
|
| 625 |
+
# Try to parse JSON response
|
| 626 |
+
if response_text.startswith('```json'):
|
| 627 |
+
response_text = response_text.replace('```json', '').replace('```', '').strip()
|
| 628 |
+
elif response_text.startswith('```'):
|
| 629 |
+
response_text = response_text.replace('```', '').strip()
|
| 630 |
+
|
| 631 |
+
analysis = json.loads(response_text)
|
| 632 |
+
|
| 633 |
+
# Ensure required fields
|
| 634 |
+
analysis.setdefault("verdict", "uncertain")
|
| 635 |
+
analysis.setdefault("verified", False)
|
| 636 |
+
analysis.setdefault("message", "Analysis completed using general knowledge")
|
| 637 |
+
analysis.setdefault("confidence", "medium")
|
| 638 |
+
analysis.setdefault("reasoning", "Direct verification using AI knowledge base")
|
| 639 |
+
|
| 640 |
+
# Add metadata
|
| 641 |
+
analysis["analysis_method"] = "general_knowledge"
|
| 642 |
+
analysis["verification_date"] = current_date
|
| 643 |
+
|
| 644 |
+
print(f"✅ General knowledge verification result: {analysis['verdict']}")
|
| 645 |
+
return analysis
|
| 646 |
+
|
| 647 |
+
except json.JSONDecodeError as e:
|
| 648 |
+
print(f"Failed to parse Gemini general knowledge response as JSON: {e}")
|
| 649 |
+
print(f"Raw response: {response_text[:500]}")
|
| 650 |
+
# Try to extract plain text answer
|
| 651 |
+
return {
|
| 652 |
+
"verified": False,
|
| 653 |
+
"verdict": "uncertain",
|
| 654 |
+
"message": response_text if response_text else "Unable to verify using general knowledge",
|
| 655 |
+
"confidence": "low",
|
| 656 |
+
"analysis_method": "general_knowledge",
|
| 657 |
+
"error": "JSON parsing failed, used plain text response"
|
| 658 |
+
}
|
| 659 |
+
except Exception as e:
|
| 660 |
+
print(f"General knowledge verification error: {e}")
|
| 661 |
+
return {
|
| 662 |
+
"verified": False,
|
| 663 |
+
"verdict": "error",
|
| 664 |
+
"message": f"Error during general knowledge verification: {str(e)}",
|
| 665 |
+
"confidence": "low",
|
| 666 |
+
"analysis_method": "general_knowledge"
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
def _extract_verdict_from_content(self, content: str) -> str:
|
| 670 |
+
"""
|
| 671 |
+
Extract verdict from search result content
|
| 672 |
+
|
| 673 |
+
Args:
|
| 674 |
+
content: Combined title and snippet text
|
| 675 |
+
|
| 676 |
+
Returns:
|
| 677 |
+
Verdict string
|
| 678 |
+
"""
|
| 679 |
+
content_lower = content.lower()
|
| 680 |
+
|
| 681 |
+
# Look for verdict indicators
|
| 682 |
+
if any(word in content_lower for word in ["false", "misleading", "incorrect", "debunked", "not true"]):
|
| 683 |
+
return "false"
|
| 684 |
+
elif any(word in content_lower for word in ["true", "accurate", "correct", "verified", "confirmed", "is true", "is correct"]):
|
| 685 |
+
return "true"
|
| 686 |
+
elif any(word in content_lower for word in ["partially", "mixed", "somewhat", "half"]):
|
| 687 |
+
return "mixed"
|
| 688 |
+
elif any(word in content_lower for word in ["unverified", "unproven", "uncertain", "disputed"]):
|
| 689 |
+
return "uncertain"
|
| 690 |
+
else:
|
| 691 |
+
return "unknown"
|
| 692 |
+
|
| 693 |
+
def _analyze_verdicts(self, verdicts: List[str]) -> Dict[str, Any]:
|
| 694 |
+
"""
|
| 695 |
+
Analyze verdicts to determine overall result
|
| 696 |
+
|
| 697 |
+
Args:
|
| 698 |
+
verdicts: List of verdict strings
|
| 699 |
+
|
| 700 |
+
Returns:
|
| 701 |
+
Analysis of verdicts
|
| 702 |
+
"""
|
| 703 |
+
if not verdicts:
|
| 704 |
+
return {
|
| 705 |
+
"verified": False,
|
| 706 |
+
"verdict": "uncertain",
|
| 707 |
+
"message": "No verdicts found"
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
true_count = verdicts.count("true")
|
| 711 |
+
false_count = verdicts.count("false")
|
| 712 |
+
mixed_count = verdicts.count("mixed")
|
| 713 |
+
uncertain_count = verdicts.count("uncertain")
|
| 714 |
+
unknown_count = verdicts.count("unknown")
|
| 715 |
+
|
| 716 |
+
total = len(verdicts)
|
| 717 |
+
|
| 718 |
+
# Determine overall verdict
|
| 719 |
+
if false_count > 0:
|
| 720 |
+
overall_verdict = "false"
|
| 721 |
+
verified = False
|
| 722 |
+
elif true_count > 0 and false_count == 0:
|
| 723 |
+
overall_verdict = "true"
|
| 724 |
+
verified = True
|
| 725 |
+
elif mixed_count > 0:
|
| 726 |
+
overall_verdict = "mixed"
|
| 727 |
+
verified = False
|
| 728 |
+
elif uncertain_count > 0:
|
| 729 |
+
overall_verdict = "uncertain"
|
| 730 |
+
verified = False
|
| 731 |
+
else:
|
| 732 |
+
overall_verdict = "unknown"
|
| 733 |
+
verified = False
|
| 734 |
+
|
| 735 |
+
return {
|
| 736 |
+
"verified": verified,
|
| 737 |
+
"verdict": overall_verdict,
|
| 738 |
+
"true_count": true_count,
|
| 739 |
+
"false_count": false_count,
|
| 740 |
+
"mixed_count": mixed_count,
|
| 741 |
+
"uncertain_count": uncertain_count,
|
| 742 |
+
"unknown_count": unknown_count,
|
| 743 |
+
"total_verdicts": total
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
def _build_message(self, analysis: Dict[str, Any], results: List[Dict[str, Any]]) -> str:
|
| 747 |
+
"""
|
| 748 |
+
Build a human-readable message based on the analysis
|
| 749 |
+
|
| 750 |
+
Args:
|
| 751 |
+
analysis: Analysis results
|
| 752 |
+
results: Relevant search results
|
| 753 |
+
|
| 754 |
+
Returns:
|
| 755 |
+
Formatted message
|
| 756 |
+
"""
|
| 757 |
+
verdict = analysis["verdict"]
|
| 758 |
+
total_verdicts = analysis["total_verdicts"]
|
| 759 |
+
relevant_results_count = len(results)
|
| 760 |
+
|
| 761 |
+
base_messages = {
|
| 762 |
+
"true": "This claim appears to be TRUE based on fact-checking sources.",
|
| 763 |
+
"false": "This claim appears to be FALSE based on fact-checking sources.",
|
| 764 |
+
"mixed": "This claim has MIXED evidence - some parts are true, others are false.",
|
| 765 |
+
"uncertain": "This claim is UNCERTAIN - insufficient evidence to determine accuracy.",
|
| 766 |
+
"unknown": "This claim needs further investigation - verdict unclear from available sources.",
|
| 767 |
+
"no_content": "No fact-checked information found for this claim."
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
message = base_messages.get(verdict, "Unable to determine claim accuracy.")
|
| 771 |
+
|
| 772 |
+
# Add details about sources
|
| 773 |
+
if relevant_results_count > 0:
|
| 774 |
+
message += f" Found {relevant_results_count} relevant fact-check(s) with {total_verdicts} total verdicts."
|
| 775 |
+
|
| 776 |
+
# Add top sources
|
| 777 |
+
top_sources = []
|
| 778 |
+
for result in results[:3]: # Show top 3 sources
|
| 779 |
+
title = result.get("title", "Unknown")
|
| 780 |
+
link = result.get("link", "")
|
| 781 |
+
if title not in top_sources and link:
|
| 782 |
+
top_sources.append(f"{title}")
|
| 783 |
+
|
| 784 |
+
if top_sources:
|
| 785 |
+
message += f" Sources include: {', '.join(top_sources[:3])}."
|
| 786 |
+
|
| 787 |
+
return message
|
| 788 |
+
|
| 789 |
+
def _synthesize_final_response(
|
| 790 |
+
self,
|
| 791 |
+
text_input: str,
|
| 792 |
+
claim_context: str,
|
| 793 |
+
claim_date: str,
|
| 794 |
+
preliminary_analysis: Optional[Dict[str, Any]],
|
| 795 |
+
curated_analysis: Optional[Dict[str, Any]],
|
| 796 |
+
search_results: List[Dict[str, Any]],
|
| 797 |
+
) -> Optional[Dict[str, Any]]:
|
| 798 |
+
"""
|
| 799 |
+
Ask Gemini to reconcile preliminary + curated evidence into a single user-facing verdict.
|
| 800 |
+
"""
|
| 801 |
+
try:
|
| 802 |
+
source_briefs = []
|
| 803 |
+
for item in search_results[:5]:
|
| 804 |
+
source_briefs.append(
|
| 805 |
+
{
|
| 806 |
+
"title": item.get("title"),
|
| 807 |
+
"snippet": item.get("snippet"),
|
| 808 |
+
"outlet": item.get("displayLink"),
|
| 809 |
+
"link": item.get("link"),
|
| 810 |
+
}
|
| 811 |
+
)
|
| 812 |
+
|
| 813 |
+
prompt = f"""
|
| 814 |
+
You are an AI fact-checking editor. Combine the baseline assessment and curated sources to produce the final answer.
|
| 815 |
+
|
| 816 |
+
CLAIM: "{text_input}"
|
| 817 |
+
CONTEXT: {claim_context}
|
| 818 |
+
CLAIM DATE: {claim_date}
|
| 819 |
+
|
| 820 |
+
BASELINE ANALYSIS (Gemini quick look):
|
| 821 |
+
{json.dumps(preliminary_analysis or {}, indent=2, ensure_ascii=False)}
|
| 822 |
+
|
| 823 |
+
CURATED FACT-CHECK ANALYSIS:
|
| 824 |
+
{json.dumps(curated_analysis or {}, indent=2, ensure_ascii=False)}
|
| 825 |
+
|
| 826 |
+
FACT-CHECK SOURCES:
|
| 827 |
+
{json.dumps(source_briefs, indent=2, ensure_ascii=False)}
|
| 828 |
+
|
| 829 |
+
INSTRUCTIONS:
|
| 830 |
+
- Make a reasoned decision (true/false/mixed/uncertain) based on the above.
|
| 831 |
+
- If evidence is thin, keep the tone cautious and say it is unverified/uncertain but mention what was found.
|
| 832 |
+
- Refer to sources generically (e.g., "one BBC article", "multiple outlets") — never number them.
|
| 833 |
+
- Provide clear, actionable messaging for the end user.
|
| 834 |
+
|
| 835 |
+
Respond ONLY in this JSON format:
|
| 836 |
+
{{
|
| 837 |
+
"verdict": "true|false|mixed|uncertain",
|
| 838 |
+
"verified": true|false,
|
| 839 |
+
"message": "Concise user-facing summary referencing evidence in plain language",
|
| 840 |
+
"confidence": "high|medium|low",
|
| 841 |
+
"reasoning": "Brief reasoning trail you followed",
|
| 842 |
+
"tone": "confident|balanced|cautious"
|
| 843 |
+
}}
|
| 844 |
+
"""
|
| 845 |
+
response = self.model.generate_content(prompt)
|
| 846 |
+
response_text = response.text.strip()
|
| 847 |
+
|
| 848 |
+
if response_text.startswith("```json"):
|
| 849 |
+
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
| 850 |
+
elif response_text.startswith("```"):
|
| 851 |
+
response_text = response_text.replace("```", "").strip()
|
| 852 |
+
|
| 853 |
+
final_analysis = json.loads(response_text)
|
| 854 |
+
final_analysis.setdefault("verdict", "uncertain")
|
| 855 |
+
final_analysis.setdefault("verified", False)
|
| 856 |
+
final_analysis.setdefault("message", "Unable to synthesize final verdict.")
|
| 857 |
+
final_analysis.setdefault("confidence", "low")
|
| 858 |
+
final_analysis.setdefault("reasoning", "")
|
| 859 |
+
final_analysis.setdefault("tone", "cautious")
|
| 860 |
+
final_analysis["analysis_method"] = "hybrid_synthesis"
|
| 861 |
+
|
| 862 |
+
return self._build_simple_response(
|
| 863 |
+
final_analysis,
|
| 864 |
+
text_input,
|
| 865 |
+
claim_context,
|
| 866 |
+
claim_date,
|
| 867 |
+
search_results,
|
| 868 |
+
method_label="hybrid_synthesis",
|
| 869 |
+
extra_details={
|
| 870 |
+
"preliminary_analysis": preliminary_analysis,
|
| 871 |
+
"curated_analysis": curated_analysis,
|
| 872 |
+
"source_highlights": source_briefs,
|
| 873 |
+
},
|
| 874 |
+
)
|
| 875 |
+
except Exception as e:
|
| 876 |
+
print(f"Hybrid synthesis error: {e}")
|
| 877 |
+
return None
|
| 878 |
+
|
| 879 |
+
def _build_simple_response(
|
| 880 |
+
self,
|
| 881 |
+
analysis: Dict[str, Any],
|
| 882 |
+
text_input: str,
|
| 883 |
+
claim_context: str,
|
| 884 |
+
claim_date: str,
|
| 885 |
+
search_results: List[Dict[str, Any]],
|
| 886 |
+
method_label: str,
|
| 887 |
+
extra_details: Optional[Dict[str, Any]] = None,
|
| 888 |
+
) -> Dict[str, Any]:
|
| 889 |
+
details = {
|
| 890 |
+
"claim_text": text_input,
|
| 891 |
+
"claim_context": claim_context,
|
| 892 |
+
"claim_date": claim_date,
|
| 893 |
+
"fact_checks": search_results,
|
| 894 |
+
"analysis": analysis,
|
| 895 |
+
"verification_method": method_label,
|
| 896 |
+
}
|
| 897 |
+
if extra_details:
|
| 898 |
+
details.update(extra_details)
|
| 899 |
+
|
| 900 |
+
return {
|
| 901 |
+
"verified": analysis.get("verified", False),
|
| 902 |
+
"verdict": analysis.get("verdict", "uncertain"),
|
| 903 |
+
"message": analysis.get("message", "No message produced."),
|
| 904 |
+
"details": details,
|
| 905 |
+
}
|
services/video_verifier.py
ADDED
|
@@ -0,0 +1,1310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 4 |
+
import cv2
|
| 5 |
+
import requests
|
| 6 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 7 |
+
import subprocess
|
| 8 |
+
import json
|
| 9 |
+
import asyncio
|
| 10 |
+
|
| 11 |
+
from .image_verifier import ImageVerifier
|
| 12 |
+
from .youtube_api import YouTubeDataAPI
|
| 13 |
+
from config import config
|
| 14 |
+
import time
|
| 15 |
+
|
| 16 |
+
class VideoVerifier:
|
| 17 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 18 |
+
"""
|
| 19 |
+
Initialize the VideoVerifier with SerpApi credentials
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
api_key: SerpApi API key. If None, will try to get from environment
|
| 23 |
+
"""
|
| 24 |
+
self.api_key = api_key or config.SERP_API_KEY
|
| 25 |
+
if not self.api_key:
|
| 26 |
+
raise ValueError("SERP_API_KEY environment variable or api_key parameter is required")
|
| 27 |
+
|
| 28 |
+
# Initialize image verifier for frame analysis
|
| 29 |
+
self.image_verifier = ImageVerifier(api_key)
|
| 30 |
+
|
| 31 |
+
# Initialize YouTube Data API client
|
| 32 |
+
self.youtube_api = YouTubeDataAPI(api_key)
|
| 33 |
+
|
| 34 |
+
# Video processing parameters
|
| 35 |
+
self.frame_interval = 4 # Extract frame every 4 seconds
|
| 36 |
+
self.clip_duration = 5 # Duration of misleading clip in seconds
|
| 37 |
+
|
| 38 |
+
async def verify(self, video_path: Optional[str] = None, claim_context: str = "", claim_date: str = "", video_url: Optional[str] = None) -> Dict[str, Any]:
|
| 39 |
+
"""
|
| 40 |
+
Verify a video and generate a visual counter-measure video if false context is detected
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
video_path: Path to the video file
|
| 44 |
+
claim_context: The claimed context of the video
|
| 45 |
+
claim_date: The claimed date of the video
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Dictionary with verification results and output file path
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
# If a video URL is supplied, determine the best verification approach
|
| 52 |
+
if video_url and not video_path:
|
| 53 |
+
# Check if it's a YouTube URL and use API verification
|
| 54 |
+
if self._is_youtube_url(video_url):
|
| 55 |
+
return await self._verify_youtube_video(video_url, claim_context, claim_date)
|
| 56 |
+
|
| 57 |
+
# Check if it's a supported platform for yt-dlp
|
| 58 |
+
if self._is_supported_platform(video_url):
|
| 59 |
+
return await self._verify_with_ytdlp(video_url, claim_context, claim_date)
|
| 60 |
+
|
| 61 |
+
# For unsupported platforms, try direct download first; if not a real video, fallback to yt-dlp
|
| 62 |
+
try:
|
| 63 |
+
video_path = await self._download_video(video_url)
|
| 64 |
+
except Exception as direct_err:
|
| 65 |
+
# Always attempt yt-dlp as fallback when available
|
| 66 |
+
try:
|
| 67 |
+
video_path = await self._download_with_ytdlp(video_url)
|
| 68 |
+
used_ytdlp = True
|
| 69 |
+
except Exception as ytdlp_err:
|
| 70 |
+
# Return the more informative error
|
| 71 |
+
raise RuntimeError(f"Direct download failed: {direct_err}; yt-dlp failed: {ytdlp_err}")
|
| 72 |
+
|
| 73 |
+
# Extract key frames from video
|
| 74 |
+
frames = await self._extract_key_frames(video_path)
|
| 75 |
+
|
| 76 |
+
# If extraction failed and we have a URL, try yt-dlp fallback once
|
| 77 |
+
if (not frames) and video_url and config.USE_STREAM_DOWNLOADER and not used_ytdlp:
|
| 78 |
+
video_path = await self._download_with_ytdlp(video_url)
|
| 79 |
+
used_ytdlp = True
|
| 80 |
+
frames = await self._extract_key_frames(video_path)
|
| 81 |
+
|
| 82 |
+
if not frames:
|
| 83 |
+
return {
|
| 84 |
+
"verified": False,
|
| 85 |
+
"message": "Could not extract frames from video",
|
| 86 |
+
"details": {"error": "Frame extraction failed"}
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
# STEP 0: Analyze frames with Gemini Vision first (direct frame analysis)
|
| 90 |
+
preliminary_vision_analysis = await self._analyze_frames_with_vision(
|
| 91 |
+
frames, claim_context, claim_date
|
| 92 |
+
)
|
| 93 |
+
print(f"✅ Gemini Vision analysis result: {preliminary_vision_analysis.get('overall_verdict', 'unknown')}")
|
| 94 |
+
|
| 95 |
+
# STEP 1: Analyze frames with reverse image search (existing approach)
|
| 96 |
+
# Wrap in try/except so vision analysis can still proceed if search fails
|
| 97 |
+
reverse_search_analysis = None
|
| 98 |
+
try:
|
| 99 |
+
reverse_search_analysis = await self._analyze_frames(frames, claim_context, claim_date)
|
| 100 |
+
except Exception as search_error:
|
| 101 |
+
print(f"⚠️ Reverse image search analysis failed (will use vision analysis only): {search_error}")
|
| 102 |
+
# Continue with vision analysis only
|
| 103 |
+
|
| 104 |
+
# STEP 2: Synthesize vision analysis + reverse image search results
|
| 105 |
+
if reverse_search_analysis:
|
| 106 |
+
final_analysis = self._synthesize_video_analyses(
|
| 107 |
+
preliminary_vision_analysis=preliminary_vision_analysis,
|
| 108 |
+
reverse_search_analysis=reverse_search_analysis,
|
| 109 |
+
frames=frames,
|
| 110 |
+
claim_context=claim_context,
|
| 111 |
+
claim_date=claim_date,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
if final_analysis:
|
| 115 |
+
analysis = final_analysis
|
| 116 |
+
else:
|
| 117 |
+
# Fallback: use vision analysis if synthesis fails
|
| 118 |
+
if preliminary_vision_analysis.get("overall_verdict") in ["false", "true"]:
|
| 119 |
+
analysis = preliminary_vision_analysis
|
| 120 |
+
else:
|
| 121 |
+
analysis = reverse_search_analysis
|
| 122 |
+
else:
|
| 123 |
+
# No reverse search results, use vision analysis only
|
| 124 |
+
print("⚠️ Using vision analysis only (reverse image search unavailable)")
|
| 125 |
+
analysis = preliminary_vision_analysis
|
| 126 |
+
|
| 127 |
+
if analysis.get("overall_verdict") != "false":
|
| 128 |
+
return {
|
| 129 |
+
"verified": analysis.get("overall_verdict") == "true",
|
| 130 |
+
"message": analysis.get("overall_summary") or "No decisive false context detected in video frames",
|
| 131 |
+
"details": {
|
| 132 |
+
"frames_analyzed": len(frames),
|
| 133 |
+
"overall_verdict": analysis.get("overall_verdict"),
|
| 134 |
+
"frame_summaries": analysis.get("frame_summaries", []),
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
# Generate video counter-measure only if we have a specific false frame
|
| 139 |
+
false_ctx = analysis.get("false_context_frame")
|
| 140 |
+
if not false_ctx:
|
| 141 |
+
return {
|
| 142 |
+
"verified": False,
|
| 143 |
+
"message": analysis.get("overall_summary") or "False context inferred but no specific frame identified for counter-measure.",
|
| 144 |
+
"details": {
|
| 145 |
+
"frames_analyzed": len(frames),
|
| 146 |
+
"overall_verdict": analysis.get("overall_verdict"),
|
| 147 |
+
"frame_summaries": analysis.get("frame_summaries", []),
|
| 148 |
+
}
|
| 149 |
+
}
|
| 150 |
+
output_path = await self._generate_video_counter_measure(
|
| 151 |
+
video_path, false_ctx, claim_context, claim_date
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
result: Dict[str, Any] = {
|
| 155 |
+
"verified": True,
|
| 156 |
+
"message": "False context detected and video counter-measure generated",
|
| 157 |
+
"output_path": output_path,
|
| 158 |
+
"false_context_frame": analysis.get("false_context_frame"),
|
| 159 |
+
"details": {
|
| 160 |
+
"frames_analyzed": len(frames),
|
| 161 |
+
"claim_context": claim_context,
|
| 162 |
+
"claim_date": claim_date
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
# Attempt Cloudinary cleanup (best-effort) before responding
|
| 166 |
+
await self._cloudinary_cleanup_prefix(config.CLOUDINARY_FOLDER or "frames")
|
| 167 |
+
return result
|
| 168 |
+
|
| 169 |
+
except Exception as e:
|
| 170 |
+
return {
|
| 171 |
+
"verified": False,
|
| 172 |
+
"message": f"Error during video verification: {str(e)}",
|
| 173 |
+
"details": {"error": str(e)}
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
async def _download_video(self, url: str) -> str:
|
| 177 |
+
try:
|
| 178 |
+
resp = requests.get(url, stream=True, timeout=30)
|
| 179 |
+
resp.raise_for_status()
|
| 180 |
+
content_type = (resp.headers.get("Content-Type") or "").lower()
|
| 181 |
+
looks_like_video = ("video" in content_type) or url.lower().endswith((".mp4", ".mov", ".mkv", ".webm", ".m4v"))
|
| 182 |
+
if not looks_like_video:
|
| 183 |
+
raise RuntimeError(f"URL is not a direct video (content-type={content_type})")
|
| 184 |
+
suffix = ".mp4"
|
| 185 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
| 186 |
+
bytes_written = 0
|
| 187 |
+
for chunk in resp.iter_content(chunk_size=1 << 14):
|
| 188 |
+
if chunk:
|
| 189 |
+
tmp.write(chunk)
|
| 190 |
+
bytes_written += len(chunk)
|
| 191 |
+
tmp.close()
|
| 192 |
+
# Heuristic: reject tiny files that aren't valid containers
|
| 193 |
+
if bytes_written < 200 * 1024: # 200KB
|
| 194 |
+
os.unlink(tmp.name)
|
| 195 |
+
raise RuntimeError("Downloaded file too small to be a valid video")
|
| 196 |
+
return tmp.name
|
| 197 |
+
except Exception as e:
|
| 198 |
+
raise RuntimeError(f"Failed to download video: {e}")
|
| 199 |
+
|
| 200 |
+
async def _download_with_ytdlp(self, url: str) -> str:
|
| 201 |
+
try:
|
| 202 |
+
# Resolve yt-dlp binary
|
| 203 |
+
ytdlp_bin = self._resolve_ytdlp_bin()
|
| 204 |
+
tmp_dir = tempfile.mkdtemp()
|
| 205 |
+
out_path = os.path.join(tmp_dir, "video.%(ext)s")
|
| 206 |
+
cmd = [
|
| 207 |
+
ytdlp_bin,
|
| 208 |
+
"-f", "best[height<=720]/best[height<=480]/best",
|
| 209 |
+
"--no-warnings",
|
| 210 |
+
"--no-call-home",
|
| 211 |
+
"--no-progress",
|
| 212 |
+
"--restrict-filenames",
|
| 213 |
+
"--socket-timeout", "30",
|
| 214 |
+
"--retries", "3",
|
| 215 |
+
"--fragment-retries", "3",
|
| 216 |
+
"--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
| 217 |
+
"--extractor-retries", "3",
|
| 218 |
+
"-o", out_path,
|
| 219 |
+
url,
|
| 220 |
+
]
|
| 221 |
+
proc = await asyncio.create_subprocess_exec(
|
| 222 |
+
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 223 |
+
)
|
| 224 |
+
try:
|
| 225 |
+
await asyncio.wait_for(proc.communicate(), timeout=config.STREAM_DOWNLOAD_TIMEOUT)
|
| 226 |
+
except asyncio.TimeoutError:
|
| 227 |
+
proc.kill()
|
| 228 |
+
raise RuntimeError("yt-dlp timed out")
|
| 229 |
+
if proc.returncode != 0:
|
| 230 |
+
# capture stderr for diagnostics
|
| 231 |
+
raise RuntimeError("yt-dlp failed (non-zero exit)")
|
| 232 |
+
# Resolve resulting file (first mp4 in dir)
|
| 233 |
+
for fname in os.listdir(tmp_dir):
|
| 234 |
+
if fname.lower().endswith((".mp4", ".mkv", ".webm", ".mov")):
|
| 235 |
+
return os.path.join(tmp_dir, fname)
|
| 236 |
+
raise RuntimeError("yt-dlp produced no playable file")
|
| 237 |
+
except Exception as e:
|
| 238 |
+
raise RuntimeError(f"yt-dlp error: {e}")
|
| 239 |
+
|
| 240 |
+
def _resolve_ytdlp_bin(self) -> str:
|
| 241 |
+
# Prefer configured path if executable, else try PATH
|
| 242 |
+
cand = config.YTDLP_BIN or "yt-dlp"
|
| 243 |
+
if os.path.isabs(cand) and os.path.isfile(cand) and os.access(cand, os.X_OK):
|
| 244 |
+
return cand
|
| 245 |
+
from shutil import which
|
| 246 |
+
found = which(cand) or which("yt-dlp")
|
| 247 |
+
if not found:
|
| 248 |
+
raise RuntimeError("yt-dlp not found on PATH; install yt-dlp or set YTDLP_BIN")
|
| 249 |
+
return found
|
| 250 |
+
|
| 251 |
+
def _is_youtube_url(self, url: str) -> bool:
|
| 252 |
+
"""
|
| 253 |
+
Check if the URL is a YouTube URL
|
| 254 |
+
|
| 255 |
+
Args:
|
| 256 |
+
url: URL to check
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
True if it's a YouTube URL, False otherwise
|
| 260 |
+
"""
|
| 261 |
+
youtube_domains = [
|
| 262 |
+
'youtube.com',
|
| 263 |
+
'www.youtube.com',
|
| 264 |
+
'youtu.be',
|
| 265 |
+
'www.youtu.be',
|
| 266 |
+
'm.youtube.com'
|
| 267 |
+
]
|
| 268 |
+
|
| 269 |
+
url_lower = url.lower()
|
| 270 |
+
return any(domain in url_lower for domain in youtube_domains)
|
| 271 |
+
|
| 272 |
+
def _is_supported_platform(self, url: str) -> bool:
|
| 273 |
+
"""
|
| 274 |
+
Check if the URL is from a platform supported by yt-dlp
|
| 275 |
+
|
| 276 |
+
Args:
|
| 277 |
+
url: URL to check
|
| 278 |
+
|
| 279 |
+
Returns:
|
| 280 |
+
True if it's a supported platform, False otherwise
|
| 281 |
+
"""
|
| 282 |
+
supported_domains = [
|
| 283 |
+
# Video platforms
|
| 284 |
+
'instagram.com', 'www.instagram.com',
|
| 285 |
+
'tiktok.com', 'www.tiktok.com', 'vm.tiktok.com',
|
| 286 |
+
'twitter.com', 'x.com', 'www.twitter.com', 'www.x.com',
|
| 287 |
+
'facebook.com', 'www.facebook.com', 'fb.watch',
|
| 288 |
+
'vimeo.com', 'www.vimeo.com',
|
| 289 |
+
'twitch.tv', 'www.twitch.tv',
|
| 290 |
+
'dailymotion.com', 'www.dailymotion.com',
|
| 291 |
+
'youtube.com', 'www.youtube.com', 'youtu.be', 'www.youtu.be',
|
| 292 |
+
|
| 293 |
+
# Image platforms
|
| 294 |
+
'imgur.com', 'www.imgur.com',
|
| 295 |
+
'flickr.com', 'www.flickr.com',
|
| 296 |
+
|
| 297 |
+
# Audio platforms
|
| 298 |
+
'soundcloud.com', 'www.soundcloud.com',
|
| 299 |
+
'mixcloud.com', 'www.mixcloud.com',
|
| 300 |
+
|
| 301 |
+
# Alternative platforms
|
| 302 |
+
'lbry.tv', 'odysee.com', 'www.odysee.com',
|
| 303 |
+
'telegram.org', 't.me',
|
| 304 |
+
'linkedin.com', 'www.linkedin.com',
|
| 305 |
+
|
| 306 |
+
# Other platforms
|
| 307 |
+
'streamable.com', 'www.streamable.com',
|
| 308 |
+
'rumble.com', 'www.rumble.com',
|
| 309 |
+
'bitchute.com', 'www.bitchute.com',
|
| 310 |
+
'peertube.tv', 'www.peertube.tv'
|
| 311 |
+
]
|
| 312 |
+
|
| 313 |
+
url_lower = url.lower()
|
| 314 |
+
return any(domain in url_lower for domain in supported_domains)
|
| 315 |
+
|
| 316 |
+
async def _verify_with_ytdlp(self, url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 317 |
+
"""
|
| 318 |
+
Verify a video from supported platforms using yt-dlp + visual analysis
|
| 319 |
+
|
| 320 |
+
Args:
|
| 321 |
+
url: Video URL from supported platform
|
| 322 |
+
claim_context: The claimed context of the video
|
| 323 |
+
claim_date: The claimed date of the video
|
| 324 |
+
|
| 325 |
+
Returns:
|
| 326 |
+
Dictionary with verification results
|
| 327 |
+
"""
|
| 328 |
+
try:
|
| 329 |
+
print(f"🔍 DEBUG: Verifying video with yt-dlp: {url}")
|
| 330 |
+
|
| 331 |
+
# Download video using yt-dlp
|
| 332 |
+
video_path = await self._download_with_ytdlp(url)
|
| 333 |
+
|
| 334 |
+
# Extract frames for visual verification
|
| 335 |
+
frames = await self._extract_key_frames(video_path)
|
| 336 |
+
|
| 337 |
+
if frames:
|
| 338 |
+
# Perform visual analysis on frames
|
| 339 |
+
visual_analysis = await self._analyze_frames_visually(frames, claim_context, claim_date)
|
| 340 |
+
|
| 341 |
+
# Get platform info
|
| 342 |
+
platform = self._get_platform_name(url)
|
| 343 |
+
|
| 344 |
+
return {
|
| 345 |
+
'verified': visual_analysis.get('verified', True),
|
| 346 |
+
'message': f"✅ Video verified from {platform}: {visual_analysis.get('message', 'Visual analysis completed')}",
|
| 347 |
+
'details': {
|
| 348 |
+
'verification_method': 'ytdlp_plus_visual',
|
| 349 |
+
'platform': platform,
|
| 350 |
+
'url': url,
|
| 351 |
+
'claim_context': claim_context,
|
| 352 |
+
'claim_date': claim_date,
|
| 353 |
+
'visual_analysis': visual_analysis.get('details', {}),
|
| 354 |
+
'frames_analyzed': len(frames)
|
| 355 |
+
},
|
| 356 |
+
'reasoning': f"Video verified from {platform} using yt-dlp and visual analysis. {visual_analysis.get('reasoning', '')}",
|
| 357 |
+
'sources': [url]
|
| 358 |
+
}
|
| 359 |
+
else:
|
| 360 |
+
# Fallback to basic verification if frames can't be extracted
|
| 361 |
+
platform = self._get_platform_name(url)
|
| 362 |
+
return {
|
| 363 |
+
'verified': True,
|
| 364 |
+
'message': f"✅ Video verified from {platform} (basic verification - frame extraction failed)",
|
| 365 |
+
'details': {
|
| 366 |
+
'verification_method': 'ytdlp_basic',
|
| 367 |
+
'platform': platform,
|
| 368 |
+
'url': url,
|
| 369 |
+
'claim_context': claim_context,
|
| 370 |
+
'claim_date': claim_date,
|
| 371 |
+
'limitation': 'Visual frame analysis unavailable'
|
| 372 |
+
},
|
| 373 |
+
'reasoning': f"Video verified from {platform} using yt-dlp. Visual analysis was not possible due to frame extraction issues.",
|
| 374 |
+
'sources': [url]
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
except Exception as e:
|
| 378 |
+
platform = self._get_platform_name(url)
|
| 379 |
+
return {
|
| 380 |
+
'verified': False,
|
| 381 |
+
'message': f'Error during {platform} video verification: {str(e)}',
|
| 382 |
+
'details': {'error': str(e), 'platform': platform},
|
| 383 |
+
'reasoning': f'An error occurred while verifying the {platform} video: {str(e)}',
|
| 384 |
+
'sources': [url]
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
def _get_platform_name(self, url: str) -> str:
|
| 388 |
+
"""Get the platform name from URL"""
|
| 389 |
+
url_lower = url.lower()
|
| 390 |
+
|
| 391 |
+
if 'instagram.com' in url_lower:
|
| 392 |
+
return 'Instagram'
|
| 393 |
+
elif 'tiktok.com' in url_lower or 'vm.tiktok.com' in url_lower:
|
| 394 |
+
return 'TikTok'
|
| 395 |
+
elif 'twitter.com' in url_lower or 'x.com' in url_lower:
|
| 396 |
+
return 'Twitter/X'
|
| 397 |
+
elif 'facebook.com' in url_lower or 'fb.watch' in url_lower:
|
| 398 |
+
return 'Facebook'
|
| 399 |
+
elif 'vimeo.com' in url_lower:
|
| 400 |
+
return 'Vimeo'
|
| 401 |
+
elif 'twitch.tv' in url_lower:
|
| 402 |
+
return 'Twitch'
|
| 403 |
+
elif 'dailymotion.com' in url_lower:
|
| 404 |
+
return 'DailyMotion'
|
| 405 |
+
elif 'imgur.com' in url_lower:
|
| 406 |
+
return 'Imgur'
|
| 407 |
+
elif 'soundcloud.com' in url_lower:
|
| 408 |
+
return 'SoundCloud'
|
| 409 |
+
elif 'mixcloud.com' in url_lower:
|
| 410 |
+
return 'Mixcloud'
|
| 411 |
+
elif 'lbry.tv' in url_lower or 'odysee.com' in url_lower:
|
| 412 |
+
return 'LBRY/Odysee'
|
| 413 |
+
elif 'telegram.org' in url_lower or 't.me' in url_lower:
|
| 414 |
+
return 'Telegram'
|
| 415 |
+
elif 'linkedin.com' in url_lower:
|
| 416 |
+
return 'LinkedIn'
|
| 417 |
+
else:
|
| 418 |
+
return 'Unknown Platform'
|
| 419 |
+
|
| 420 |
+
async def _verify_youtube_video(self, url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 421 |
+
"""
|
| 422 |
+
Verify a YouTube video using hybrid approach: API metadata + yt-dlp for visual analysis
|
| 423 |
+
|
| 424 |
+
Args:
|
| 425 |
+
url: YouTube URL
|
| 426 |
+
claim_context: The claimed context of the video
|
| 427 |
+
claim_date: The claimed date of the video
|
| 428 |
+
|
| 429 |
+
Returns:
|
| 430 |
+
Dictionary with verification results
|
| 431 |
+
"""
|
| 432 |
+
try:
|
| 433 |
+
# Step 1: Use YouTube Data API to verify the video exists and get metadata
|
| 434 |
+
verification_result = self.youtube_api.verify_video_exists(url)
|
| 435 |
+
|
| 436 |
+
if not verification_result.get('verified'):
|
| 437 |
+
return {
|
| 438 |
+
'verified': False,
|
| 439 |
+
'message': f'YouTube video verification failed: {verification_result.get("message", "Unknown error")}',
|
| 440 |
+
'details': verification_result.get('details', {}),
|
| 441 |
+
'reasoning': f'The video could not be verified through YouTube Data API. {verification_result.get("message", "Unknown error")}',
|
| 442 |
+
'sources': [url]
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
# Step 2: Video exists, now try to download for visual analysis
|
| 446 |
+
video_details = verification_result.get('details', {})
|
| 447 |
+
|
| 448 |
+
try:
|
| 449 |
+
# Attempt to download video for frame analysis
|
| 450 |
+
print(f"🔍 DEBUG: Attempting to download video for visual analysis: {url}")
|
| 451 |
+
video_path = await self._download_with_ytdlp(url)
|
| 452 |
+
|
| 453 |
+
# Extract frames for visual verification
|
| 454 |
+
frames = await self._extract_key_frames(video_path)
|
| 455 |
+
|
| 456 |
+
if frames:
|
| 457 |
+
# Perform visual analysis on frames
|
| 458 |
+
visual_analysis = await self._analyze_frames_visually(frames, claim_context, claim_date)
|
| 459 |
+
|
| 460 |
+
# Combine metadata + visual analysis
|
| 461 |
+
return {
|
| 462 |
+
'verified': visual_analysis.get('verified', True),
|
| 463 |
+
'message': f"✅ Video verified with visual analysis: '{video_details.get('title', 'Unknown Title')}' by {video_details.get('channel_title', 'Unknown Channel')}\n\n{visual_analysis.get('message', '')}",
|
| 464 |
+
'details': {
|
| 465 |
+
'verification_method': 'hybrid_youtube_api_plus_visual',
|
| 466 |
+
'video_id': video_details.get('video_id'),
|
| 467 |
+
'title': video_details.get('title'),
|
| 468 |
+
'channel_title': video_details.get('channel_title'),
|
| 469 |
+
'published_at': video_details.get('published_at'),
|
| 470 |
+
'duration': video_details.get('duration'),
|
| 471 |
+
'view_count': video_details.get('view_count'),
|
| 472 |
+
'thumbnail_url': video_details.get('thumbnail_url'),
|
| 473 |
+
'claim_context': claim_context,
|
| 474 |
+
'claim_date': claim_date,
|
| 475 |
+
'visual_analysis': visual_analysis.get('details', {}),
|
| 476 |
+
'frames_analyzed': len(frames)
|
| 477 |
+
},
|
| 478 |
+
'reasoning': f"Video verified through YouTube Data API and visual analysis. {visual_analysis.get('reasoning', '')}",
|
| 479 |
+
'sources': [url]
|
| 480 |
+
}
|
| 481 |
+
else:
|
| 482 |
+
# Fallback to metadata-only verification
|
| 483 |
+
print(f"⚠️ DEBUG: Could not extract frames, falling back to metadata verification")
|
| 484 |
+
return self._create_metadata_only_response(video_details, claim_context, claim_date, url)
|
| 485 |
+
|
| 486 |
+
except Exception as download_error:
|
| 487 |
+
# Fallback to metadata-only verification if download fails
|
| 488 |
+
print(f"⚠️ DEBUG: Video download failed: {download_error}, falling back to metadata verification")
|
| 489 |
+
return self._create_metadata_only_response(video_details, claim_context, claim_date, url)
|
| 490 |
+
|
| 491 |
+
except Exception as e:
|
| 492 |
+
return {
|
| 493 |
+
'verified': False,
|
| 494 |
+
'message': f'Error during YouTube video verification: {str(e)}',
|
| 495 |
+
'details': {'error': str(e)},
|
| 496 |
+
'reasoning': f'An error occurred while verifying the YouTube video: {str(e)}',
|
| 497 |
+
'sources': [url]
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
def _create_metadata_only_response(self, video_details: Dict[str, Any], claim_context: str, claim_date: str, url: str) -> Dict[str, Any]:
|
| 501 |
+
"""Create a metadata-only verification response when visual analysis fails"""
|
| 502 |
+
verification_message = f"✅ Video verified (metadata only): '{video_details.get('title', 'Unknown Title')}' by {video_details.get('channel_title', 'Unknown Channel')}"
|
| 503 |
+
|
| 504 |
+
# Add context analysis if available
|
| 505 |
+
if claim_context and claim_context.lower() != "the user wants to verify the content of the provided youtube video.":
|
| 506 |
+
verification_message += f"\n\n📝 Claim Context: {claim_context}"
|
| 507 |
+
verification_message += f"\n⚠️ Note: Visual content analysis unavailable - only metadata verification performed"
|
| 508 |
+
|
| 509 |
+
if claim_date and claim_date.strip():
|
| 510 |
+
verification_message += f"\n📅 Claimed Date: {claim_date}"
|
| 511 |
+
|
| 512 |
+
verification_message += f"\n📊 Video Stats: {video_details.get('view_count', 'Unknown')} views, Published: {video_details.get('published_at', 'Unknown')}"
|
| 513 |
+
|
| 514 |
+
return {
|
| 515 |
+
'verified': True,
|
| 516 |
+
'message': verification_message,
|
| 517 |
+
'details': {
|
| 518 |
+
'verification_method': 'youtube_data_api_metadata_only',
|
| 519 |
+
'video_id': video_details.get('video_id'),
|
| 520 |
+
'title': video_details.get('title'),
|
| 521 |
+
'channel_title': video_details.get('channel_title'),
|
| 522 |
+
'published_at': video_details.get('published_at'),
|
| 523 |
+
'duration': video_details.get('duration'),
|
| 524 |
+
'view_count': video_details.get('view_count'),
|
| 525 |
+
'thumbnail_url': video_details.get('thumbnail_url'),
|
| 526 |
+
'claim_context': claim_context,
|
| 527 |
+
'claim_date': claim_date,
|
| 528 |
+
'limitation': 'Visual content analysis unavailable'
|
| 529 |
+
},
|
| 530 |
+
'reasoning': f"Video verified through YouTube Data API metadata only. Visual content analysis was not possible due to download limitations.",
|
| 531 |
+
'sources': [url]
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
async def _analyze_frames_visually(self, frames: List[Tuple[str, float]], claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 535 |
+
"""
|
| 536 |
+
Analyze extracted frames for visual verification
|
| 537 |
+
|
| 538 |
+
Args:
|
| 539 |
+
frames: List of (frame_path, timestamp) tuples
|
| 540 |
+
claim_context: The claimed context
|
| 541 |
+
claim_date: The claimed date
|
| 542 |
+
|
| 543 |
+
Returns:
|
| 544 |
+
Dictionary with visual analysis results
|
| 545 |
+
"""
|
| 546 |
+
try:
|
| 547 |
+
# Analyze each frame using the image verifier
|
| 548 |
+
frame_analyses = []
|
| 549 |
+
|
| 550 |
+
for frame_path, timestamp in frames:
|
| 551 |
+
try:
|
| 552 |
+
frame_result = await self.image_verifier.verify(
|
| 553 |
+
image_path=frame_path,
|
| 554 |
+
claim_context=f"{claim_context} (Frame at {timestamp}s)",
|
| 555 |
+
claim_date=claim_date
|
| 556 |
+
)
|
| 557 |
+
frame_analyses.append({
|
| 558 |
+
'timestamp': timestamp,
|
| 559 |
+
'result': frame_result
|
| 560 |
+
})
|
| 561 |
+
except Exception as e:
|
| 562 |
+
print(f"⚠️ DEBUG: Frame analysis failed for {timestamp}s: {e}")
|
| 563 |
+
continue
|
| 564 |
+
|
| 565 |
+
if not frame_analyses:
|
| 566 |
+
return {
|
| 567 |
+
'verified': False,
|
| 568 |
+
'message': 'No frames could be analyzed',
|
| 569 |
+
'details': {'error': 'All frame analyses failed'},
|
| 570 |
+
'reasoning': 'Visual analysis failed for all extracted frames'
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
# Determine overall verification result
|
| 574 |
+
verified_count = sum(1 for analysis in frame_analyses if analysis['result'].get('verified', False))
|
| 575 |
+
total_frames = len(frame_analyses)
|
| 576 |
+
|
| 577 |
+
if verified_count == 0:
|
| 578 |
+
verification_status = False
|
| 579 |
+
message = f"❌ Visual analysis found no supporting evidence in {total_frames} frames"
|
| 580 |
+
elif verified_count == total_frames:
|
| 581 |
+
verification_status = True
|
| 582 |
+
message = f"✅ Visual analysis confirmed claim in all {total_frames} frames"
|
| 583 |
+
else:
|
| 584 |
+
verification_status = True # Partial verification
|
| 585 |
+
message = f"⚠️ Visual analysis partially confirmed claim in {verified_count}/{total_frames} frames"
|
| 586 |
+
|
| 587 |
+
return {
|
| 588 |
+
'verified': verification_status,
|
| 589 |
+
'message': message,
|
| 590 |
+
'details': {
|
| 591 |
+
'frames_analyzed': total_frames,
|
| 592 |
+
'verified_frames': verified_count,
|
| 593 |
+
'frame_results': frame_analyses
|
| 594 |
+
},
|
| 595 |
+
'reasoning': f"Analyzed {total_frames} video frames. {verified_count} frames supported the claim."
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
except Exception as e:
|
| 599 |
+
return {
|
| 600 |
+
'verified': False,
|
| 601 |
+
'message': f'Visual analysis failed: {str(e)}',
|
| 602 |
+
'details': {'error': str(e)},
|
| 603 |
+
'reasoning': f'Error during visual frame analysis: {str(e)}'
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
async def _extract_key_frames(self, video_path: str) -> List[Tuple[str, float]]:
|
| 607 |
+
"""
|
| 608 |
+
Extract key frames from video at regular intervals
|
| 609 |
+
|
| 610 |
+
Args:
|
| 611 |
+
video_path: Path to the video file
|
| 612 |
+
|
| 613 |
+
Returns:
|
| 614 |
+
List of tuples (frame_path, timestamp)
|
| 615 |
+
"""
|
| 616 |
+
try:
|
| 617 |
+
frames = []
|
| 618 |
+
cap = cv2.VideoCapture(video_path)
|
| 619 |
+
|
| 620 |
+
if not cap.isOpened():
|
| 621 |
+
print(f"Error: Could not open video file {video_path}")
|
| 622 |
+
return []
|
| 623 |
+
|
| 624 |
+
# Get video properties
|
| 625 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 626 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 627 |
+
duration = total_frames / fps if fps > 0 else 0
|
| 628 |
+
|
| 629 |
+
frame_interval_frames = int(fps * self.frame_interval)
|
| 630 |
+
|
| 631 |
+
frame_count = 0
|
| 632 |
+
saved_count = 0
|
| 633 |
+
|
| 634 |
+
while True:
|
| 635 |
+
ret, frame = cap.read()
|
| 636 |
+
if not ret:
|
| 637 |
+
break
|
| 638 |
+
|
| 639 |
+
# Save frame at regular intervals
|
| 640 |
+
if frame_count % frame_interval_frames == 0:
|
| 641 |
+
timestamp = frame_count / fps
|
| 642 |
+
# Save frame into public/frames for local static serving
|
| 643 |
+
out_dir = os.path.join("public", "frames")
|
| 644 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 645 |
+
frame_file = f"frame_{int(timestamp*1000)}.jpg"
|
| 646 |
+
frame_path = os.path.join(out_dir, frame_file)
|
| 647 |
+
cv2.imwrite(frame_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
|
| 648 |
+
frames.append((frame_path, timestamp))
|
| 649 |
+
saved_count += 1
|
| 650 |
+
|
| 651 |
+
# Limit number of frames to analyze
|
| 652 |
+
if saved_count >= 10: # Max 10 frames
|
| 653 |
+
break
|
| 654 |
+
|
| 655 |
+
frame_count += 1
|
| 656 |
+
|
| 657 |
+
cap.release()
|
| 658 |
+
return frames
|
| 659 |
+
|
| 660 |
+
except Exception as e:
|
| 661 |
+
print(f"Error extracting frames: {e}")
|
| 662 |
+
return []
|
| 663 |
+
|
| 664 |
+
async def _analyze_frames_with_vision(
|
| 665 |
+
self,
|
| 666 |
+
frames: List[Tuple[str, float]],
|
| 667 |
+
claim_context: str,
|
| 668 |
+
claim_date: str
|
| 669 |
+
) -> Dict[str, Any]:
|
| 670 |
+
"""
|
| 671 |
+
Analyze video frames directly with Gemini Vision (first pass).
|
| 672 |
+
Detects AI-generated/deepfake/manipulation in frames.
|
| 673 |
+
|
| 674 |
+
Args:
|
| 675 |
+
frames: List of (frame_path, timestamp) tuples
|
| 676 |
+
claim_context: The claimed context
|
| 677 |
+
claim_date: The claimed date
|
| 678 |
+
|
| 679 |
+
Returns:
|
| 680 |
+
Dictionary with preliminary vision analysis
|
| 681 |
+
"""
|
| 682 |
+
try:
|
| 683 |
+
if not self.image_verifier.gemini_model:
|
| 684 |
+
return {
|
| 685 |
+
"overall_verdict": "uncertain",
|
| 686 |
+
"overall_summary": "Gemini Vision not available",
|
| 687 |
+
"frame_analyses": [],
|
| 688 |
+
"analysis_method": "vision_unavailable",
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
frame_analyses = []
|
| 692 |
+
for frame_path, timestamp in frames:
|
| 693 |
+
try:
|
| 694 |
+
# Use image verifier's vision analysis method
|
| 695 |
+
vision_result = await self.image_verifier._analyze_image_with_vision(
|
| 696 |
+
image_path=frame_path,
|
| 697 |
+
image_url=None,
|
| 698 |
+
claim_context=f"{claim_context} (Frame at {timestamp}s)",
|
| 699 |
+
claim_date=claim_date
|
| 700 |
+
)
|
| 701 |
+
frame_analyses.append({
|
| 702 |
+
"timestamp": timestamp,
|
| 703 |
+
"frame_path": frame_path,
|
| 704 |
+
"vision_analysis": vision_result,
|
| 705 |
+
})
|
| 706 |
+
except Exception as e:
|
| 707 |
+
print(f"⚠️ Vision analysis failed for frame at {timestamp}s: {e}")
|
| 708 |
+
continue
|
| 709 |
+
|
| 710 |
+
if not frame_analyses:
|
| 711 |
+
return {
|
| 712 |
+
"overall_verdict": "uncertain",
|
| 713 |
+
"overall_summary": "No frames could be analyzed with vision",
|
| 714 |
+
"frame_analyses": [],
|
| 715 |
+
"analysis_method": "vision_no_frames",
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
# Aggregate vision results across frames
|
| 719 |
+
false_count = sum(1 for fa in frame_analyses if fa["vision_analysis"].get("verdict") == "false")
|
| 720 |
+
true_count = sum(1 for fa in frame_analyses if fa["vision_analysis"].get("verdict") == "true")
|
| 721 |
+
uncertain_count = len(frame_analyses) - false_count - true_count
|
| 722 |
+
|
| 723 |
+
# Determine overall verdict
|
| 724 |
+
if false_count > true_count and false_count > uncertain_count:
|
| 725 |
+
overall_verdict = "false"
|
| 726 |
+
overall_summary = f"Vision analysis detected manipulation/AI-generated content in {false_count}/{len(frame_analyses)} frames"
|
| 727 |
+
elif true_count > false_count and true_count > uncertain_count:
|
| 728 |
+
overall_verdict = "true"
|
| 729 |
+
overall_summary = f"Vision analysis found authentic content in {true_count}/{len(frame_analyses)} frames"
|
| 730 |
+
else:
|
| 731 |
+
overall_verdict = "uncertain"
|
| 732 |
+
overall_summary = f"Vision analysis inconclusive: {true_count} true, {false_count} false, {uncertain_count} uncertain across {len(frame_analyses)} frames"
|
| 733 |
+
|
| 734 |
+
return {
|
| 735 |
+
"overall_verdict": overall_verdict,
|
| 736 |
+
"overall_summary": overall_summary,
|
| 737 |
+
"frame_analyses": frame_analyses,
|
| 738 |
+
"false_count": false_count,
|
| 739 |
+
"true_count": true_count,
|
| 740 |
+
"uncertain_count": uncertain_count,
|
| 741 |
+
"analysis_method": "gemini_vision",
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
except Exception as e:
|
| 745 |
+
print(f"[vision] Error in frame vision analysis: {e}")
|
| 746 |
+
return {
|
| 747 |
+
"overall_verdict": "uncertain",
|
| 748 |
+
"overall_summary": f"Error during vision analysis: {str(e)}",
|
| 749 |
+
"frame_analyses": [],
|
| 750 |
+
"analysis_method": "vision_error",
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
def _synthesize_video_analyses(
|
| 754 |
+
self,
|
| 755 |
+
preliminary_vision_analysis: Dict[str, Any],
|
| 756 |
+
reverse_search_analysis: Dict[str, Any],
|
| 757 |
+
frames: List[Tuple[str, float]],
|
| 758 |
+
claim_context: str,
|
| 759 |
+
claim_date: str,
|
| 760 |
+
) -> Optional[Dict[str, Any]]:
|
| 761 |
+
"""
|
| 762 |
+
Synthesize Gemini Vision analysis with reverse image search results.
|
| 763 |
+
"""
|
| 764 |
+
try:
|
| 765 |
+
if not self.image_verifier.gemini_model:
|
| 766 |
+
return None
|
| 767 |
+
|
| 768 |
+
prompt = f"""You are an expert video verification analyst. Combine direct frame analysis (Gemini Vision) with reverse image search evidence to produce a final verdict.
|
| 769 |
+
|
| 770 |
+
CLAIM: {claim_context}
|
| 771 |
+
CLAIM DATE: {claim_date}
|
| 772 |
+
|
| 773 |
+
DIRECT FRAME ANALYSIS (Gemini Vision):
|
| 774 |
+
{json.dumps(preliminary_vision_analysis or {}, indent=2, ensure_ascii=False)}
|
| 775 |
+
|
| 776 |
+
REVERSE IMAGE SEARCH ANALYSIS:
|
| 777 |
+
{json.dumps(reverse_search_analysis or {}, indent=2, ensure_ascii=False)}
|
| 778 |
+
|
| 779 |
+
TOTAL FRAMES ANALYZED: {len(frames)}
|
| 780 |
+
|
| 781 |
+
INSTRUCTIONS:
|
| 782 |
+
- Combine both analyses to make a final decision (true/false/uncertain)
|
| 783 |
+
- If vision analysis detects AI-generated/manipulated content in multiple frames, prioritize that
|
| 784 |
+
- If reverse image search finds contradictory evidence, factor that in
|
| 785 |
+
- Consider consistency across frames
|
| 786 |
+
- If evidence is thin, keep the tone cautious
|
| 787 |
+
- Provide clear, actionable messaging for the end user
|
| 788 |
+
|
| 789 |
+
Respond ONLY in this JSON format:
|
| 790 |
+
{{
|
| 791 |
+
"overall_verdict": "true|false|uncertain",
|
| 792 |
+
"overall_summary": "Concise user-facing summary combining both analyses",
|
| 793 |
+
"confidence": "high|medium|low",
|
| 794 |
+
"reasoning": "Brief reasoning trail you followed",
|
| 795 |
+
"vision_findings": "Key findings from direct frame analysis",
|
| 796 |
+
"search_findings": "Key findings from reverse image search"
|
| 797 |
+
}}"""
|
| 798 |
+
|
| 799 |
+
response = self.image_verifier.gemini_model.generate_content(prompt)
|
| 800 |
+
response_text = response.text.strip()
|
| 801 |
+
|
| 802 |
+
if response_text.startswith("```json"):
|
| 803 |
+
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
| 804 |
+
elif response_text.startswith("```"):
|
| 805 |
+
response_text = response_text.replace("```", "").strip()
|
| 806 |
+
|
| 807 |
+
final_analysis = json.loads(response_text)
|
| 808 |
+
final_analysis.setdefault("overall_verdict", "uncertain")
|
| 809 |
+
final_analysis.setdefault("overall_summary", "Unable to synthesize final verdict.")
|
| 810 |
+
final_analysis.setdefault("confidence", "low")
|
| 811 |
+
final_analysis["analysis_method"] = "hybrid_vision_and_search"
|
| 812 |
+
|
| 813 |
+
# Preserve frame summaries and sources from reverse search
|
| 814 |
+
final_analysis["frame_summaries"] = reverse_search_analysis.get("frame_summaries", [])
|
| 815 |
+
final_analysis["consolidated_sources"] = reverse_search_analysis.get("consolidated_sources", [])
|
| 816 |
+
final_analysis["preliminary_vision_analysis"] = preliminary_vision_analysis
|
| 817 |
+
final_analysis["reverse_search_analysis"] = reverse_search_analysis
|
| 818 |
+
|
| 819 |
+
return final_analysis
|
| 820 |
+
|
| 821 |
+
except Exception as e:
|
| 822 |
+
print(f"Video hybrid synthesis error: {e}")
|
| 823 |
+
return None
|
| 824 |
+
|
| 825 |
+
async def _analyze_frames(self, frames: List[Tuple[str, float]],
|
| 826 |
+
claim_context: str, claim_date: str) -> Dict[str, Any]:
|
| 827 |
+
"""
|
| 828 |
+
Analyze extracted frames for false context
|
| 829 |
+
|
| 830 |
+
Args:
|
| 831 |
+
frames: List of (frame_path, timestamp) tuples
|
| 832 |
+
claim_context: The claimed context
|
| 833 |
+
claim_date: The claimed date
|
| 834 |
+
|
| 835 |
+
Returns:
|
| 836 |
+
Aggregated analysis with overall verdict and optional false frame
|
| 837 |
+
"""
|
| 838 |
+
frame_summaries: List[Dict[str, Any]] = []
|
| 839 |
+
false_hit: Optional[Dict[str, Any]] = None
|
| 840 |
+
true_hit: Optional[Dict[str, Any]] = None
|
| 841 |
+
saw_false_validated = False
|
| 842 |
+
saw_true_validated = False
|
| 843 |
+
# 1) Per-frame: only gather evidence; defer verdict to a single final pass
|
| 844 |
+
all_evidence: List[Dict[str, Any]] = []
|
| 845 |
+
for frame_path, timestamp in frames:
|
| 846 |
+
try:
|
| 847 |
+
# Upload frame to Cloudinary if configured, else local static URL
|
| 848 |
+
frame_url = None
|
| 849 |
+
if config.CLOUDINARY_CLOUD_NAME and (config.CLOUDINARY_UPLOAD_PRESET or (config.CLOUDINARY_API_KEY and config.CLOUDINARY_API_SECRET)):
|
| 850 |
+
frame_url = await self._upload_frame_cloudinary(frame_path)
|
| 851 |
+
if not frame_url:
|
| 852 |
+
# fallback local (note: SerpApi can't access localhost; cloudinary is preferred)
|
| 853 |
+
from urllib.parse import quote
|
| 854 |
+
rel = frame_path.replace(os.path.join("public", ''), "") if frame_path.startswith("public"+os.sep) else os.path.basename(frame_path)
|
| 855 |
+
frame_url = f"http://127.0.0.1:{config.SERVICE_PORT}/static/{quote(rel)}"
|
| 856 |
+
print("[video] analyze_frame", {"ts": timestamp, "path": frame_path})
|
| 857 |
+
# Gather evidence only for this frame
|
| 858 |
+
ev = await self.image_verifier.gather_evidence(
|
| 859 |
+
image_path=None, image_url=frame_url, claim_context=claim_context
|
| 860 |
+
)
|
| 861 |
+
all_evidence.extend(ev or [])
|
| 862 |
+
# Populate a placeholder entry per frame (no verdict yet)
|
| 863 |
+
frame_entry = {
|
| 864 |
+
"timestamp": timestamp,
|
| 865 |
+
"verdict": None,
|
| 866 |
+
"summary": None,
|
| 867 |
+
"sources": None,
|
| 868 |
+
"frame_path": frame_path,
|
| 869 |
+
"validator": None,
|
| 870 |
+
"details": {"evidence": ev or []},
|
| 871 |
+
}
|
| 872 |
+
# Compute rule-based confidence (0..1)
|
| 873 |
+
conf = 0.2
|
| 874 |
+
reasons: List[str] = []
|
| 875 |
+
checks = {}
|
| 876 |
+
if frame_entry["verdict"] == "true":
|
| 877 |
+
if checks.get("relation_comention"):
|
| 878 |
+
conf += 0.3; reasons.append("relation_comention")
|
| 879 |
+
if frame_entry["verdict"] == "false":
|
| 880 |
+
if not checks.get("relation_comention"):
|
| 881 |
+
conf += 0.25; reasons.append("no_relation_support")
|
| 882 |
+
if checks.get("timeframe_citations") or checks.get("timeframe_match"):
|
| 883 |
+
conf += 0.15; reasons.append("timeframe_match")
|
| 884 |
+
eos = checks.get("entity_overlap_score")
|
| 885 |
+
try:
|
| 886 |
+
if eos is not None and float(eos) >= 0.7:
|
| 887 |
+
conf += 0.1; reasons.append("entity_overlap")
|
| 888 |
+
except Exception:
|
| 889 |
+
pass
|
| 890 |
+
# Penalize if sources dominated by low-priority domains
|
| 891 |
+
low_priority_hits = 0
|
| 892 |
+
total_sources = 0
|
| 893 |
+
try:
|
| 894 |
+
from urllib.parse import urlparse
|
| 895 |
+
for s in (frame_entry.get("sources") or []):
|
| 896 |
+
total_sources += 1
|
| 897 |
+
net = urlparse((s.get("link") or "")).netloc
|
| 898 |
+
if net in config.LOW_PRIORITY_DOMAINS:
|
| 899 |
+
low_priority_hits += 1
|
| 900 |
+
except Exception:
|
| 901 |
+
pass
|
| 902 |
+
if total_sources > 0 and low_priority_hits / float(total_sources) >= 0.5:
|
| 903 |
+
conf -= 0.2; reasons.append("low_priority_sources")
|
| 904 |
+
if conf < 0.0: conf = 0.0
|
| 905 |
+
if conf > 1.0: conf = 1.0
|
| 906 |
+
frame_entry["confidence"] = conf
|
| 907 |
+
frame_entry["confidence_reasons"] = reasons
|
| 908 |
+
print("[video] frame_result", {"ts": timestamp, "verdict": frame_entry["verdict"], "passed": (frame_entry.get("validator") or {}).get("passed")})
|
| 909 |
+
# No per-frame debug when gathering evidence only
|
| 910 |
+
frame_summaries.append(frame_entry)
|
| 911 |
+
# No per-frame validator flags when gathering evidence only
|
| 912 |
+
if false_hit is None:
|
| 913 |
+
false_hit = {
|
| 914 |
+
"timestamp": timestamp,
|
| 915 |
+
"frame_path": frame_path,
|
| 916 |
+
"evidence_image": None,
|
| 917 |
+
"details": {"evidence": ev or []},
|
| 918 |
+
}
|
| 919 |
+
if true_hit is None:
|
| 920 |
+
true_hit = {
|
| 921 |
+
"timestamp": timestamp,
|
| 922 |
+
"frame_path": frame_path,
|
| 923 |
+
"details": {"evidence": ev or []},
|
| 924 |
+
}
|
| 925 |
+
|
| 926 |
+
except Exception as e:
|
| 927 |
+
print(f"Error analyzing frame {frame_path}: {e}")
|
| 928 |
+
# Keep files even on error for debugging
|
| 929 |
+
|
| 930 |
+
# 2) Single final pass: send aggregated evidence to image verifier's Gemini summarizer
|
| 931 |
+
# Reuse image verifier's structured summarizer for a consolidated verdict
|
| 932 |
+
# Use the simple majority-based summarizer per product rule
|
| 933 |
+
final_llm = self.image_verifier._summarize_with_gemini_majority(
|
| 934 |
+
claim_context=claim_context,
|
| 935 |
+
claim_date=claim_date,
|
| 936 |
+
evidence=all_evidence[:24], # cap to keep prompt manageable
|
| 937 |
+
) or {}
|
| 938 |
+
final_verdict = (final_llm.get("verdict") or "uncertain").lower()
|
| 939 |
+
# Prefer LLM clarification if present; else fallback to previous summary
|
| 940 |
+
final_summary = final_llm.get("clarification") or final_llm.get("summary") or "Consolidated evidence analyzed."
|
| 941 |
+
|
| 942 |
+
# Deterministic co-mention vote to override ambiguous LLM outcomes
|
| 943 |
+
def _tokens(text: str) -> List[str]:
|
| 944 |
+
import re
|
| 945 |
+
return re.findall(r"[a-z0-9]{3,}", (text or "").lower())
|
| 946 |
+
|
| 947 |
+
def _split_relation(claim: str) -> Tuple[List[str], List[str]]:
|
| 948 |
+
# Heuristic: split on ' with ' to get subject vs object; fallback to all tokens as subject
|
| 949 |
+
cl = (claim or "").strip()
|
| 950 |
+
i = cl.lower().find(" with ")
|
| 951 |
+
if i != -1:
|
| 952 |
+
subj = cl[:i].strip()
|
| 953 |
+
obj = cl[i+6:].strip().split(".")[0]
|
| 954 |
+
else:
|
| 955 |
+
subj = cl
|
| 956 |
+
obj = ""
|
| 957 |
+
return list(set(_tokens(subj))), list(set(_tokens(obj)))
|
| 958 |
+
|
| 959 |
+
def _evidence_text(ev: Dict[str, Any]) -> str:
|
| 960 |
+
return " ".join([t for t in [ev.get("title"), ev.get("snippet"), ev.get("source")] if t])
|
| 961 |
+
|
| 962 |
+
subj_toks, obj_toks = _split_relation(claim_context)
|
| 963 |
+
support = 0
|
| 964 |
+
contra = 0
|
| 965 |
+
for ev in all_evidence[:24]:
|
| 966 |
+
txt_toks = set(_tokens(_evidence_text(ev)))
|
| 967 |
+
if not txt_toks:
|
| 968 |
+
continue
|
| 969 |
+
subj_hit = bool(subj_toks and (set(subj_toks) & txt_toks))
|
| 970 |
+
obj_hit = bool(obj_toks and (set(obj_toks) & txt_toks))
|
| 971 |
+
if subj_hit and obj_hit:
|
| 972 |
+
support += 1
|
| 973 |
+
elif subj_hit and obj_toks:
|
| 974 |
+
# mentions subject but not the claimed object → treat as contradiction to the claimed relation
|
| 975 |
+
contra += 1
|
| 976 |
+
|
| 977 |
+
# Apply override rules: prioritize clear majority; else keep LLM
|
| 978 |
+
if support == 0 and contra > 0:
|
| 979 |
+
final_verdict = "false" # keep LLM clarification
|
| 980 |
+
elif support > contra and (support - contra) >= 1:
|
| 981 |
+
final_verdict = "true" # keep LLM clarification
|
| 982 |
+
elif contra > support and (contra - support) >= 1:
|
| 983 |
+
final_verdict = "false" # keep LLM clarification
|
| 984 |
+
# else keep LLM's verdict/summary
|
| 985 |
+
|
| 986 |
+
return {
|
| 987 |
+
"overall_verdict": final_verdict,
|
| 988 |
+
"overall_summary": final_summary,
|
| 989 |
+
"frame_summaries": frame_summaries,
|
| 990 |
+
"consolidated_sources": final_llm.get("top_sources") or self.image_verifier._top_sources(all_evidence, 3),
|
| 991 |
+
}
|
| 992 |
+
|
| 993 |
+
async def _upload_frame_cloudinary(self, frame_path: str) -> Optional[str]:
|
| 994 |
+
try:
|
| 995 |
+
import hashlib
|
| 996 |
+
import requests
|
| 997 |
+
cloud = config.CLOUDINARY_CLOUD_NAME
|
| 998 |
+
folder = config.CLOUDINARY_FOLDER.strip('/')
|
| 999 |
+
# Unsigned upload if preset provided
|
| 1000 |
+
if config.CLOUDINARY_UPLOAD_PRESET:
|
| 1001 |
+
url = f"https://api.cloudinary.com/v1_1/{cloud}/image/upload"
|
| 1002 |
+
with open(frame_path, 'rb') as f:
|
| 1003 |
+
files = {"file": f}
|
| 1004 |
+
data = {"upload_preset": config.CLOUDINARY_UPLOAD_PRESET, "folder": folder}
|
| 1005 |
+
r = requests.post(url, files=files, data=data, timeout=30)
|
| 1006 |
+
r.raise_for_status()
|
| 1007 |
+
return r.json().get("secure_url")
|
| 1008 |
+
# Signed upload
|
| 1009 |
+
ts = str(int(time.time()))
|
| 1010 |
+
params_to_sign = {"timestamp": ts, "folder": folder}
|
| 1011 |
+
to_sign = "&".join([f"{k}={v}" for k, v in sorted(params_to_sign.items())]) + config.CLOUDINARY_API_SECRET
|
| 1012 |
+
signature = hashlib.sha1(to_sign.encode('utf-8')).hexdigest()
|
| 1013 |
+
url = f"https://api.cloudinary.com/v1_1/{cloud}/image/upload"
|
| 1014 |
+
with open(frame_path, 'rb') as f:
|
| 1015 |
+
files = {"file": f}
|
| 1016 |
+
data = {
|
| 1017 |
+
"api_key": config.CLOUDINARY_API_KEY,
|
| 1018 |
+
"timestamp": ts,
|
| 1019 |
+
"signature": signature,
|
| 1020 |
+
"folder": folder,
|
| 1021 |
+
}
|
| 1022 |
+
r = requests.post(url, files=files, data=data, timeout=30)
|
| 1023 |
+
r.raise_for_status()
|
| 1024 |
+
return r.json().get("secure_url")
|
| 1025 |
+
except Exception as e:
|
| 1026 |
+
print(f"Cloudinary upload failed: {e}")
|
| 1027 |
+
return None
|
| 1028 |
+
|
| 1029 |
+
async def _generate_video_counter_measure(self, video_path: str,
|
| 1030 |
+
false_context_frame: Dict[str, Any],
|
| 1031 |
+
claim_context: str, claim_date: str) -> str:
|
| 1032 |
+
"""
|
| 1033 |
+
Generate a video counter-measure
|
| 1034 |
+
|
| 1035 |
+
Args:
|
| 1036 |
+
video_path: Path to the original video
|
| 1037 |
+
false_context_frame: Information about the false context frame
|
| 1038 |
+
claim_context: The claimed context
|
| 1039 |
+
claim_date: The claimed date
|
| 1040 |
+
|
| 1041 |
+
Returns:
|
| 1042 |
+
Path to the generated counter-measure video
|
| 1043 |
+
"""
|
| 1044 |
+
try:
|
| 1045 |
+
# Create temporary directory for video processing
|
| 1046 |
+
temp_dir = tempfile.mkdtemp()
|
| 1047 |
+
|
| 1048 |
+
# Generate video components
|
| 1049 |
+
title_clip = await self._create_title_clip(temp_dir, claim_context, claim_date)
|
| 1050 |
+
misleading_clip = await self._create_misleading_clip(
|
| 1051 |
+
video_path, false_context_frame["timestamp"], temp_dir
|
| 1052 |
+
)
|
| 1053 |
+
debunk_clip = await self._create_debunk_clip(
|
| 1054 |
+
temp_dir, false_context_frame, claim_context, claim_date
|
| 1055 |
+
)
|
| 1056 |
+
verdict_clip = await self._create_verdict_clip(temp_dir)
|
| 1057 |
+
|
| 1058 |
+
# Concatenate all clips
|
| 1059 |
+
output_path = await self._concatenate_clips(
|
| 1060 |
+
[title_clip, misleading_clip, debunk_clip, verdict_clip],
|
| 1061 |
+
temp_dir
|
| 1062 |
+
)
|
| 1063 |
+
|
| 1064 |
+
# Clean up temporary files
|
| 1065 |
+
self._cleanup_temp_files(temp_dir)
|
| 1066 |
+
|
| 1067 |
+
# Attempt Cloudinary cleanup (best-effort) before responding
|
| 1068 |
+
await self._cloudinary_cleanup_prefix(config.CLOUDINARY_FOLDER or "frames")
|
| 1069 |
+
return output_path
|
| 1070 |
+
|
| 1071 |
+
except Exception as e:
|
| 1072 |
+
print(f"Error generating video counter-measure: {e}")
|
| 1073 |
+
raise
|
| 1074 |
+
|
| 1075 |
+
async def _create_title_clip(self, temp_dir: str, claim_context: str, claim_date: str) -> str:
|
| 1076 |
+
"""Create title clip with claim information"""
|
| 1077 |
+
try:
|
| 1078 |
+
# Create title image
|
| 1079 |
+
img = Image.new('RGB', (800, 400), 'white')
|
| 1080 |
+
draw = ImageDraw.Draw(img)
|
| 1081 |
+
|
| 1082 |
+
try:
|
| 1083 |
+
font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 36)
|
| 1084 |
+
font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
|
| 1085 |
+
except:
|
| 1086 |
+
font_large = ImageFont.load_default()
|
| 1087 |
+
font_medium = ImageFont.load_default()
|
| 1088 |
+
|
| 1089 |
+
# Add title
|
| 1090 |
+
title = "FALSE CONTEXT DETECTED"
|
| 1091 |
+
title_bbox = draw.textbbox((0, 0), title, font=font_large)
|
| 1092 |
+
title_width = title_bbox[2] - title_bbox[0]
|
| 1093 |
+
title_x = (800 - title_width) // 2
|
| 1094 |
+
draw.text((title_x, 100), title, fill='red', font=font_large)
|
| 1095 |
+
|
| 1096 |
+
# Add claim details
|
| 1097 |
+
claim_text = f"Claim: {claim_context}, {claim_date}"
|
| 1098 |
+
claim_bbox = draw.textbbox((0, 0), claim_text, font=font_medium)
|
| 1099 |
+
claim_width = claim_bbox[2] - claim_bbox[0]
|
| 1100 |
+
claim_x = (800 - claim_width) // 2
|
| 1101 |
+
draw.text((claim_x, 200), claim_text, fill='black', font=font_medium)
|
| 1102 |
+
|
| 1103 |
+
# Save image
|
| 1104 |
+
title_img_path = os.path.join(temp_dir, "title.png")
|
| 1105 |
+
img.save(title_img_path)
|
| 1106 |
+
|
| 1107 |
+
# Convert to video clip
|
| 1108 |
+
title_video_path = os.path.join(temp_dir, "title.mp4")
|
| 1109 |
+
await self._image_to_video(title_img_path, title_video_path, duration=3)
|
| 1110 |
+
|
| 1111 |
+
return title_video_path
|
| 1112 |
+
|
| 1113 |
+
except Exception as e:
|
| 1114 |
+
print(f"Error creating title clip: {e}")
|
| 1115 |
+
raise
|
| 1116 |
+
|
| 1117 |
+
async def _create_misleading_clip(self, video_path: str, timestamp: float, temp_dir: str) -> str:
|
| 1118 |
+
"""Create clip from original misleading video"""
|
| 1119 |
+
try:
|
| 1120 |
+
# Calculate frame numbers for 5-second clip
|
| 1121 |
+
cap = cv2.VideoCapture(video_path)
|
| 1122 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1123 |
+
cap.release()
|
| 1124 |
+
|
| 1125 |
+
start_frame = int(timestamp * fps) - int(self.clip_duration / 2 * fps)
|
| 1126 |
+
start_frame = max(0, start_frame)
|
| 1127 |
+
|
| 1128 |
+
# Extract clip using ffmpeg
|
| 1129 |
+
clip_path = os.path.join(temp_dir, "misleading_clip.mp4")
|
| 1130 |
+
|
| 1131 |
+
start_time = max(0, timestamp - self.clip_duration / 2)
|
| 1132 |
+
|
| 1133 |
+
cmd = [
|
| 1134 |
+
'ffmpeg', '-i', video_path,
|
| 1135 |
+
'-ss', str(start_time),
|
| 1136 |
+
'-t', str(self.clip_duration),
|
| 1137 |
+
'-c', 'copy',
|
| 1138 |
+
'-y', clip_path
|
| 1139 |
+
]
|
| 1140 |
+
|
| 1141 |
+
process = await asyncio.create_subprocess_exec(
|
| 1142 |
+
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 1143 |
+
)
|
| 1144 |
+
await process.communicate()
|
| 1145 |
+
|
| 1146 |
+
if process.returncode != 0:
|
| 1147 |
+
raise Exception("FFmpeg failed to create misleading clip")
|
| 1148 |
+
|
| 1149 |
+
return clip_path
|
| 1150 |
+
|
| 1151 |
+
except Exception as e:
|
| 1152 |
+
print(f"Error creating misleading clip: {e}")
|
| 1153 |
+
raise
|
| 1154 |
+
|
| 1155 |
+
async def _create_debunk_clip(self, temp_dir: str, false_context_frame: Dict[str, Any],
|
| 1156 |
+
claim_context: str, claim_date: str) -> str:
|
| 1157 |
+
"""Create debunk scene clip with side-by-side comparison"""
|
| 1158 |
+
try:
|
| 1159 |
+
# Create debunk image using image verifier's counter-measure
|
| 1160 |
+
debunk_img_path = await self.image_verifier._generate_counter_measure(
|
| 1161 |
+
false_context_frame["frame_path"],
|
| 1162 |
+
false_context_frame["evidence_image"],
|
| 1163 |
+
claim_context,
|
| 1164 |
+
claim_date
|
| 1165 |
+
)
|
| 1166 |
+
|
| 1167 |
+
# Move to temp directory
|
| 1168 |
+
final_debunk_img = os.path.join(temp_dir, "debunk.png")
|
| 1169 |
+
os.rename(debunk_img_path, final_debunk_img)
|
| 1170 |
+
|
| 1171 |
+
# Convert to video clip
|
| 1172 |
+
debunk_video_path = os.path.join(temp_dir, "debunk.mp4")
|
| 1173 |
+
await self._image_to_video(final_debunk_img, debunk_video_path, duration=5)
|
| 1174 |
+
|
| 1175 |
+
return debunk_video_path
|
| 1176 |
+
|
| 1177 |
+
except Exception as e:
|
| 1178 |
+
print(f"Error creating debunk clip: {e}")
|
| 1179 |
+
raise
|
| 1180 |
+
|
| 1181 |
+
async def _create_verdict_clip(self, temp_dir: str) -> str:
|
| 1182 |
+
"""Create verdict clip with conclusion"""
|
| 1183 |
+
try:
|
| 1184 |
+
# Create verdict image
|
| 1185 |
+
img = Image.new('RGB', (800, 400), 'white')
|
| 1186 |
+
draw = ImageDraw.Draw(img)
|
| 1187 |
+
|
| 1188 |
+
try:
|
| 1189 |
+
font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 36)
|
| 1190 |
+
font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
|
| 1191 |
+
except:
|
| 1192 |
+
font_large = ImageFont.load_default()
|
| 1193 |
+
font_medium = ImageFont.load_default()
|
| 1194 |
+
|
| 1195 |
+
# Add verdict
|
| 1196 |
+
verdict = "VERDICT: FALSE CONTEXT"
|
| 1197 |
+
verdict_bbox = draw.textbbox((0, 0), verdict, font=font_large)
|
| 1198 |
+
verdict_width = verdict_bbox[2] - verdict_bbox[0]
|
| 1199 |
+
verdict_x = (800 - verdict_width) // 2
|
| 1200 |
+
draw.text((verdict_x, 100), verdict, fill='red', font=font_large)
|
| 1201 |
+
|
| 1202 |
+
# Add explanation
|
| 1203 |
+
explanation = "This content is being used in a false context"
|
| 1204 |
+
explanation_bbox = draw.textbbox((0, 0), explanation, font=font_medium)
|
| 1205 |
+
explanation_width = explanation_bbox[2] - explanation_bbox[0]
|
| 1206 |
+
explanation_x = (800 - explanation_width) // 2
|
| 1207 |
+
draw.text((explanation_x, 200), explanation, fill='black', font=font_medium)
|
| 1208 |
+
|
| 1209 |
+
# Save image
|
| 1210 |
+
verdict_img_path = os.path.join(temp_dir, "verdict.png")
|
| 1211 |
+
img.save(verdict_img_path)
|
| 1212 |
+
|
| 1213 |
+
# Convert to video clip
|
| 1214 |
+
verdict_video_path = os.path.join(temp_dir, "verdict.mp4")
|
| 1215 |
+
await self._image_to_video(verdict_img_path, verdict_video_path, duration=3)
|
| 1216 |
+
|
| 1217 |
+
return verdict_video_path
|
| 1218 |
+
|
| 1219 |
+
except Exception as e:
|
| 1220 |
+
print(f"Error creating verdict clip: {e}")
|
| 1221 |
+
raise
|
| 1222 |
+
|
| 1223 |
+
async def _image_to_video(self, image_path: str, video_path: str, duration: int) -> None:
|
| 1224 |
+
"""Convert image to video clip using FFmpeg"""
|
| 1225 |
+
try:
|
| 1226 |
+
cmd = [
|
| 1227 |
+
'ffmpeg', '-loop', '1',
|
| 1228 |
+
'-i', image_path,
|
| 1229 |
+
'-c:v', 'libx264',
|
| 1230 |
+
'-t', str(duration),
|
| 1231 |
+
'-pix_fmt', 'yuv420p',
|
| 1232 |
+
'-y', video_path
|
| 1233 |
+
]
|
| 1234 |
+
|
| 1235 |
+
process = await asyncio.create_subprocess_exec(
|
| 1236 |
+
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 1237 |
+
)
|
| 1238 |
+
await process.communicate()
|
| 1239 |
+
|
| 1240 |
+
if process.returncode != 0:
|
| 1241 |
+
raise Exception("FFmpeg failed to convert image to video")
|
| 1242 |
+
|
| 1243 |
+
except Exception as e:
|
| 1244 |
+
print(f"Error converting image to video: {e}")
|
| 1245 |
+
raise
|
| 1246 |
+
|
| 1247 |
+
async def _concatenate_clips(self, clip_paths: List[str], temp_dir: str) -> str:
|
| 1248 |
+
"""Concatenate multiple video clips into one"""
|
| 1249 |
+
try:
|
| 1250 |
+
# Create file list for FFmpeg
|
| 1251 |
+
file_list_path = os.path.join(temp_dir, "clips.txt")
|
| 1252 |
+
with open(file_list_path, 'w') as f:
|
| 1253 |
+
for clip_path in clip_paths:
|
| 1254 |
+
f.write(f"file '{clip_path}'\n")
|
| 1255 |
+
|
| 1256 |
+
# Concatenate clips
|
| 1257 |
+
output_path = tempfile.mktemp(suffix=".mp4")
|
| 1258 |
+
|
| 1259 |
+
cmd = [
|
| 1260 |
+
'ffmpeg', '-f', 'concat',
|
| 1261 |
+
'-safe', '0',
|
| 1262 |
+
'-i', file_list_path,
|
| 1263 |
+
'-c', 'copy',
|
| 1264 |
+
'-y', output_path
|
| 1265 |
+
]
|
| 1266 |
+
|
| 1267 |
+
process = await asyncio.create_subprocess_exec(
|
| 1268 |
+
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 1269 |
+
)
|
| 1270 |
+
await process.communicate()
|
| 1271 |
+
|
| 1272 |
+
if process.returncode != 0:
|
| 1273 |
+
raise Exception("FFmpeg failed to concatenate clips")
|
| 1274 |
+
|
| 1275 |
+
return output_path
|
| 1276 |
+
|
| 1277 |
+
except Exception as e:
|
| 1278 |
+
print(f"Error concatenating clips: {e}")
|
| 1279 |
+
raise
|
| 1280 |
+
|
| 1281 |
+
def _cleanup_temp_files(self, temp_dir: str) -> None:
|
| 1282 |
+
"""Clean up temporary files and directory"""
|
| 1283 |
+
try:
|
| 1284 |
+
import shutil
|
| 1285 |
+
shutil.rmtree(temp_dir)
|
| 1286 |
+
except Exception as e:
|
| 1287 |
+
print(f"Error cleaning up temp files: {e}")
|
| 1288 |
+
|
| 1289 |
+
async def _cloudinary_cleanup_prefix(self, prefix: str) -> None:
|
| 1290 |
+
try:
|
| 1291 |
+
if not (config.CLOUDINARY_CLOUD_NAME and (config.CLOUDINARY_API_KEY and config.CLOUDINARY_API_SECRET)):
|
| 1292 |
+
return
|
| 1293 |
+
# List and delete all resources under the folder prefix (rate-limited; best-effort)
|
| 1294 |
+
import requests
|
| 1295 |
+
from requests.auth import HTTPBasicAuth
|
| 1296 |
+
cloud = config.CLOUDINARY_CLOUD_NAME
|
| 1297 |
+
auth = HTTPBasicAuth(config.CLOUDINARY_API_KEY, config.CLOUDINARY_API_SECRET)
|
| 1298 |
+
list_url = f"https://api.cloudinary.com/v1_1/{cloud}/resources/image"
|
| 1299 |
+
params = {"prefix": prefix, "max_results": 100}
|
| 1300 |
+
r = requests.get(list_url, params=params, auth=auth, timeout=20)
|
| 1301 |
+
if r.status_code != 200:
|
| 1302 |
+
return
|
| 1303 |
+
data = r.json()
|
| 1304 |
+
public_ids = [res.get("public_id") for res in data.get("resources", []) if res.get("public_id")]
|
| 1305 |
+
if not public_ids:
|
| 1306 |
+
return
|
| 1307 |
+
del_url = f"https://api.cloudinary.com/v1_1/{cloud}/resources/image/delete_by_ids"
|
| 1308 |
+
requests.post(del_url, data={"public_ids": ",".join(public_ids)}, auth=auth, timeout=20)
|
| 1309 |
+
except Exception as e:
|
| 1310 |
+
print(f"Cloudinary cleanup failed: {e}")
|
services/websocket_service.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
WebSocket Service for Real-time Updates
|
| 3 |
+
Handles WebSocket connections and MongoDB Change Streams for real-time data updates
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Set, Dict, Any, Optional
|
| 10 |
+
from fastapi import WebSocket, WebSocketDisconnect
|
| 11 |
+
from pymongo import MongoClient
|
| 12 |
+
from pymongo.errors import ConnectionFailure
|
| 13 |
+
import os
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
# Setup logging
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
class ConnectionManager:
|
| 22 |
+
"""Manages WebSocket connections"""
|
| 23 |
+
|
| 24 |
+
def __init__(self):
|
| 25 |
+
self.active_connections: Set[WebSocket] = set()
|
| 26 |
+
self.connection_data: Dict[WebSocket, Dict[str, Any]] = {}
|
| 27 |
+
|
| 28 |
+
async def connect(self, websocket: WebSocket, client_info: Optional[Dict[str, Any]] = None):
|
| 29 |
+
"""Accept a new WebSocket connection"""
|
| 30 |
+
await websocket.accept()
|
| 31 |
+
self.active_connections.add(websocket)
|
| 32 |
+
self.connection_data[websocket] = client_info or {}
|
| 33 |
+
logger.info(f"✅ WebSocket connected. Total connections: {len(self.active_connections)}")
|
| 34 |
+
|
| 35 |
+
def disconnect(self, websocket: WebSocket):
|
| 36 |
+
"""Remove a WebSocket connection"""
|
| 37 |
+
if websocket in self.active_connections:
|
| 38 |
+
self.active_connections.remove(websocket)
|
| 39 |
+
if websocket in self.connection_data:
|
| 40 |
+
del self.connection_data[websocket]
|
| 41 |
+
logger.info(f"🔌 WebSocket disconnected. Total connections: {len(self.active_connections)}")
|
| 42 |
+
|
| 43 |
+
async def send_personal_message(self, message: str, websocket: WebSocket):
|
| 44 |
+
"""Send a message to a specific WebSocket connection"""
|
| 45 |
+
try:
|
| 46 |
+
await websocket.send_text(message)
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"❌ Failed to send personal message: {e}")
|
| 49 |
+
self.disconnect(websocket)
|
| 50 |
+
|
| 51 |
+
async def broadcast(self, message: str):
|
| 52 |
+
"""Broadcast a message to all connected WebSocket clients"""
|
| 53 |
+
if not self.active_connections:
|
| 54 |
+
logger.warning("⚠️ No active connections to broadcast to")
|
| 55 |
+
return
|
| 56 |
+
|
| 57 |
+
disconnected = set()
|
| 58 |
+
for connection in self.active_connections:
|
| 59 |
+
try:
|
| 60 |
+
await connection.send_text(message)
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.error(f"❌ Failed to broadcast to connection: {e}")
|
| 63 |
+
disconnected.add(connection)
|
| 64 |
+
|
| 65 |
+
# Clean up disconnected connections
|
| 66 |
+
for connection in disconnected:
|
| 67 |
+
self.disconnect(connection)
|
| 68 |
+
|
| 69 |
+
logger.info(f"📡 Broadcasted message to {len(self.active_connections)} connections")
|
| 70 |
+
|
| 71 |
+
class MongoDBChangeStreamService:
|
| 72 |
+
"""Service to monitor MongoDB changes and notify WebSocket clients"""
|
| 73 |
+
|
| 74 |
+
def __init__(self, connection_string: Optional[str] = None):
|
| 75 |
+
"""Initialize MongoDB connection for change streams"""
|
| 76 |
+
self.connection_string = connection_string or os.getenv('MONGO_CONNECTION_STRING')
|
| 77 |
+
|
| 78 |
+
if not self.connection_string:
|
| 79 |
+
raise ValueError("MongoDB connection string is required. Set MONGO_CONNECTION_STRING environment variable.")
|
| 80 |
+
|
| 81 |
+
self.client = None
|
| 82 |
+
self.db = None
|
| 83 |
+
self.collection = None
|
| 84 |
+
self.change_stream = None
|
| 85 |
+
self.is_running = False
|
| 86 |
+
|
| 87 |
+
self._connect()
|
| 88 |
+
|
| 89 |
+
def _connect(self):
|
| 90 |
+
"""Establish MongoDB connection"""
|
| 91 |
+
try:
|
| 92 |
+
self.client = MongoClient(self.connection_string)
|
| 93 |
+
# Test connection
|
| 94 |
+
self.client.admin.command('ping')
|
| 95 |
+
|
| 96 |
+
# Use 'aegis' database
|
| 97 |
+
self.db = self.client['aegis']
|
| 98 |
+
self.collection = self.db['debunk_posts']
|
| 99 |
+
|
| 100 |
+
logger.info("✅ MongoDB Change Stream service connected successfully")
|
| 101 |
+
|
| 102 |
+
except ConnectionFailure as e:
|
| 103 |
+
logger.error(f"❌ Failed to connect to MongoDB for change streams: {e}")
|
| 104 |
+
raise
|
| 105 |
+
|
| 106 |
+
async def start_change_stream(self, connection_manager: ConnectionManager):
|
| 107 |
+
"""Start monitoring MongoDB changes and broadcast to WebSocket clients"""
|
| 108 |
+
if self.is_running:
|
| 109 |
+
logger.warning("⚠️ Change stream is already running")
|
| 110 |
+
return
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
# Check if MongoDB supports change streams (replica set)
|
| 114 |
+
try:
|
| 115 |
+
# Try to create change stream to watch for insertions
|
| 116 |
+
self.change_stream = self.collection.watch([
|
| 117 |
+
{
|
| 118 |
+
'$match': {
|
| 119 |
+
'operationType': 'insert'
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
])
|
| 123 |
+
|
| 124 |
+
self.is_running = True
|
| 125 |
+
logger.info("🔄 Started MongoDB change stream monitoring")
|
| 126 |
+
|
| 127 |
+
# Process change stream events
|
| 128 |
+
async def process_changes():
|
| 129 |
+
try:
|
| 130 |
+
while self.is_running:
|
| 131 |
+
if self.change_stream:
|
| 132 |
+
# Check for new changes (non-blocking)
|
| 133 |
+
try:
|
| 134 |
+
change = self.change_stream.try_next()
|
| 135 |
+
if change:
|
| 136 |
+
await self._handle_change(change, connection_manager)
|
| 137 |
+
else:
|
| 138 |
+
# No changes, sleep briefly
|
| 139 |
+
await asyncio.sleep(0.5)
|
| 140 |
+
except Exception as e:
|
| 141 |
+
logger.error(f"❌ Error processing change: {e}")
|
| 142 |
+
await asyncio.sleep(1) # Brief pause on error
|
| 143 |
+
continue
|
| 144 |
+
else:
|
| 145 |
+
await asyncio.sleep(1)
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.error(f"❌ Error in change stream processing: {e}")
|
| 149 |
+
finally:
|
| 150 |
+
self.is_running = False
|
| 151 |
+
|
| 152 |
+
# Start the change stream processing in the background
|
| 153 |
+
asyncio.create_task(process_changes())
|
| 154 |
+
|
| 155 |
+
except Exception as change_stream_error:
|
| 156 |
+
logger.warning(f"⚠️ MongoDB change streams not available: {change_stream_error}")
|
| 157 |
+
logger.info("🔄 Change streams require MongoDB replica set. WebSocket will work for manual updates.")
|
| 158 |
+
# Don't fail completely, just disable change streams
|
| 159 |
+
self.is_running = False
|
| 160 |
+
self.change_stream = None
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
logger.error(f"❌ Failed to start change stream: {e}")
|
| 164 |
+
self.is_running = False
|
| 165 |
+
# Don't raise the exception, allow WebSocket to work without change streams
|
| 166 |
+
|
| 167 |
+
async def _handle_change(self, change: Dict[str, Any], connection_manager: ConnectionManager):
|
| 168 |
+
"""Handle a MongoDB change event"""
|
| 169 |
+
try:
|
| 170 |
+
logger.info(f"🔄 MongoDB change detected: {change.get('operationType')}")
|
| 171 |
+
|
| 172 |
+
# Extract the new document
|
| 173 |
+
new_document = change.get('fullDocument')
|
| 174 |
+
if not new_document:
|
| 175 |
+
logger.warning("⚠️ No full document in change event")
|
| 176 |
+
return
|
| 177 |
+
|
| 178 |
+
# Convert ObjectId to string for JSON serialization
|
| 179 |
+
if '_id' in new_document:
|
| 180 |
+
new_document['_id'] = str(new_document['_id'])
|
| 181 |
+
|
| 182 |
+
# Create the broadcast message
|
| 183 |
+
message = {
|
| 184 |
+
"type": "new_post",
|
| 185 |
+
"data": {
|
| 186 |
+
"post": new_document,
|
| 187 |
+
"timestamp": change.get('clusterTime'),
|
| 188 |
+
"operation": change.get('operationType')
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
# Broadcast to all connected clients (serialize datetimes/ObjectIds)
|
| 193 |
+
await connection_manager.broadcast(json.dumps(message, default=str))
|
| 194 |
+
logger.info(f"📡 Broadcasted new post to {len(connection_manager.active_connections)} clients")
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logger.error(f"❌ Error handling MongoDB change: {e}")
|
| 198 |
+
|
| 199 |
+
def stop_change_stream(self):
|
| 200 |
+
"""Stop the MongoDB change stream"""
|
| 201 |
+
self.is_running = False
|
| 202 |
+
if self.change_stream:
|
| 203 |
+
self.change_stream.close()
|
| 204 |
+
self.change_stream = None
|
| 205 |
+
logger.info("🛑 Stopped MongoDB change stream")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def close(self):
|
| 209 |
+
"""Close MongoDB connection"""
|
| 210 |
+
self.stop_change_stream()
|
| 211 |
+
if self.client:
|
| 212 |
+
self.client.close()
|
| 213 |
+
logger.info("🔌 MongoDB Change Stream service connection closed")
|
| 214 |
+
|
| 215 |
+
# Global instances
|
| 216 |
+
connection_manager = ConnectionManager()
|
| 217 |
+
mongodb_change_service = None
|
| 218 |
+
|
| 219 |
+
async def initialize_mongodb_change_stream():
|
| 220 |
+
"""Initialize the MongoDB change stream service"""
|
| 221 |
+
global mongodb_change_service
|
| 222 |
+
|
| 223 |
+
try:
|
| 224 |
+
mongodb_change_service = MongoDBChangeStreamService()
|
| 225 |
+
await mongodb_change_service.start_change_stream(connection_manager)
|
| 226 |
+
logger.info("✅ MongoDB Change Stream service initialized successfully")
|
| 227 |
+
return mongodb_change_service
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"❌ Failed to initialize MongoDB Change Stream service: {e}")
|
| 230 |
+
return None
|
| 231 |
+
|
| 232 |
+
async def cleanup_mongodb_change_stream():
|
| 233 |
+
"""Cleanup the MongoDB change stream service"""
|
| 234 |
+
global mongodb_change_service
|
| 235 |
+
|
| 236 |
+
if mongodb_change_service:
|
| 237 |
+
mongodb_change_service.close()
|
| 238 |
+
mongodb_change_service = None
|
| 239 |
+
logger.info("🧹 MongoDB Change Stream service cleaned up")
|
services/youtube_api.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from typing import Dict, Any, Optional
|
| 4 |
+
from config import config
|
| 5 |
+
|
| 6 |
+
class YouTubeDataAPI:
|
| 7 |
+
"""
|
| 8 |
+
YouTube Data API v3 integration for video verification
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 12 |
+
"""
|
| 13 |
+
Initialize YouTube Data API client
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
api_key: Google API key. If None, will try to get from environment
|
| 17 |
+
"""
|
| 18 |
+
self.api_key = api_key or config.GOOGLE_API_KEY
|
| 19 |
+
if not self.api_key:
|
| 20 |
+
raise ValueError("GOOGLE_API_KEY environment variable or api_key parameter is required")
|
| 21 |
+
|
| 22 |
+
self.base_url = "https://www.googleapis.com/youtube/v3"
|
| 23 |
+
|
| 24 |
+
def extract_video_id(self, url: str) -> Optional[str]:
|
| 25 |
+
"""
|
| 26 |
+
Extract video ID from YouTube URL
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
url: YouTube URL (various formats supported)
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Video ID or None if not found
|
| 33 |
+
"""
|
| 34 |
+
import re
|
| 35 |
+
|
| 36 |
+
# YouTube URL patterns
|
| 37 |
+
patterns = [
|
| 38 |
+
r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
|
| 39 |
+
r'youtube\.com\/v\/([a-zA-Z0-9_-]{11})',
|
| 40 |
+
r'youtube\.com\/shorts\/([a-zA-Z0-9_-]{11})'
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
for pattern in patterns:
|
| 44 |
+
match = re.search(pattern, url)
|
| 45 |
+
if match:
|
| 46 |
+
return match.group(1)
|
| 47 |
+
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
def get_video_info(self, video_id: str) -> Dict[str, Any]:
|
| 51 |
+
"""
|
| 52 |
+
Get video information from YouTube Data API
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
video_id: YouTube video ID
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Dictionary with video information
|
| 59 |
+
"""
|
| 60 |
+
try:
|
| 61 |
+
url = f"{self.base_url}/videos"
|
| 62 |
+
params = {
|
| 63 |
+
'key': self.api_key,
|
| 64 |
+
'id': video_id,
|
| 65 |
+
'part': 'snippet,statistics,contentDetails'
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
response = requests.get(url, params=params, timeout=30)
|
| 69 |
+
response.raise_for_status()
|
| 70 |
+
|
| 71 |
+
data = response.json()
|
| 72 |
+
|
| 73 |
+
if not data.get('items'):
|
| 74 |
+
return {
|
| 75 |
+
'success': False,
|
| 76 |
+
'error': 'Video not found or not accessible'
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
video = data['items'][0]
|
| 80 |
+
snippet = video.get('snippet', {})
|
| 81 |
+
statistics = video.get('statistics', {})
|
| 82 |
+
content_details = video.get('contentDetails', {})
|
| 83 |
+
|
| 84 |
+
return {
|
| 85 |
+
'success': True,
|
| 86 |
+
'video_id': video_id,
|
| 87 |
+
'title': snippet.get('title', 'Unknown Title'),
|
| 88 |
+
'description': snippet.get('description', ''),
|
| 89 |
+
'channel_title': snippet.get('channelTitle', 'Unknown Channel'),
|
| 90 |
+
'published_at': snippet.get('publishedAt', ''),
|
| 91 |
+
'duration': content_details.get('duration', ''),
|
| 92 |
+
'view_count': statistics.get('viewCount', '0'),
|
| 93 |
+
'like_count': statistics.get('likeCount', '0'),
|
| 94 |
+
'comment_count': statistics.get('commentCount', '0'),
|
| 95 |
+
'tags': snippet.get('tags', []),
|
| 96 |
+
'category_id': snippet.get('categoryId', ''),
|
| 97 |
+
'thumbnail_url': snippet.get('thumbnails', {}).get('high', {}).get('url', ''),
|
| 98 |
+
'raw_data': video
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
except requests.exceptions.RequestException as e:
|
| 102 |
+
return {
|
| 103 |
+
'success': False,
|
| 104 |
+
'error': f'API request failed: {str(e)}'
|
| 105 |
+
}
|
| 106 |
+
except Exception as e:
|
| 107 |
+
return {
|
| 108 |
+
'success': False,
|
| 109 |
+
'error': f'Unexpected error: {str(e)}'
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
def search_videos(self, query: str, max_results: int = 10) -> Dict[str, Any]:
|
| 113 |
+
"""
|
| 114 |
+
Search for videos using YouTube Data API
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
query: Search query
|
| 118 |
+
max_results: Maximum number of results to return
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Dictionary with search results
|
| 122 |
+
"""
|
| 123 |
+
try:
|
| 124 |
+
url = f"{self.base_url}/search"
|
| 125 |
+
params = {
|
| 126 |
+
'key': self.api_key,
|
| 127 |
+
'q': query,
|
| 128 |
+
'part': 'snippet',
|
| 129 |
+
'type': 'video',
|
| 130 |
+
'maxResults': max_results,
|
| 131 |
+
'order': 'relevance'
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
response = requests.get(url, params=params, timeout=30)
|
| 135 |
+
response.raise_for_status()
|
| 136 |
+
|
| 137 |
+
data = response.json()
|
| 138 |
+
|
| 139 |
+
videos = []
|
| 140 |
+
for item in data.get('items', []):
|
| 141 |
+
snippet = item.get('snippet', {})
|
| 142 |
+
videos.append({
|
| 143 |
+
'video_id': item.get('id', {}).get('videoId', ''),
|
| 144 |
+
'title': snippet.get('title', ''),
|
| 145 |
+
'description': snippet.get('description', ''),
|
| 146 |
+
'channel_title': snippet.get('channelTitle', ''),
|
| 147 |
+
'published_at': snippet.get('publishedAt', ''),
|
| 148 |
+
'thumbnail_url': snippet.get('thumbnails', {}).get('high', {}).get('url', '')
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
'success': True,
|
| 153 |
+
'videos': videos,
|
| 154 |
+
'total_results': data.get('pageInfo', {}).get('totalResults', 0)
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
except requests.exceptions.RequestException as e:
|
| 158 |
+
return {
|
| 159 |
+
'success': False,
|
| 160 |
+
'error': f'API request failed: {str(e)}'
|
| 161 |
+
}
|
| 162 |
+
except Exception as e:
|
| 163 |
+
return {
|
| 164 |
+
'success': False,
|
| 165 |
+
'error': f'Unexpected error: {str(e)}'
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
def verify_video_exists(self, url: str) -> Dict[str, Any]:
|
| 169 |
+
"""
|
| 170 |
+
Verify if a YouTube video exists and is accessible
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
url: YouTube URL
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
Dictionary with verification results
|
| 177 |
+
"""
|
| 178 |
+
video_id = self.extract_video_id(url)
|
| 179 |
+
|
| 180 |
+
if not video_id:
|
| 181 |
+
return {
|
| 182 |
+
'verified': False,
|
| 183 |
+
'message': 'Invalid YouTube URL format',
|
| 184 |
+
'details': {'error': 'Could not extract video ID from URL'}
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
video_info = self.get_video_info(video_id)
|
| 188 |
+
|
| 189 |
+
if not video_info.get('success'):
|
| 190 |
+
return {
|
| 191 |
+
'verified': False,
|
| 192 |
+
'message': f'Video verification failed: {video_info.get("error", "Unknown error")}',
|
| 193 |
+
'details': {
|
| 194 |
+
'video_id': video_id,
|
| 195 |
+
'error': video_info.get('error', 'Unknown error')
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
return {
|
| 200 |
+
'verified': True,
|
| 201 |
+
'message': f'Video verified successfully: "{video_info["title"]}" by {video_info["channel_title"]}',
|
| 202 |
+
'details': {
|
| 203 |
+
'video_id': video_id,
|
| 204 |
+
'title': video_info['title'],
|
| 205 |
+
'channel_title': video_info['channel_title'],
|
| 206 |
+
'published_at': video_info['published_at'],
|
| 207 |
+
'duration': video_info['duration'],
|
| 208 |
+
'view_count': video_info['view_count'],
|
| 209 |
+
'thumbnail_url': video_info['thumbnail_url']
|
| 210 |
+
}
|
| 211 |
+
}
|
services/youtube_caption.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pip install yt-dlp
|
| 2 |
+
|
| 3 |
+
import yt_dlp
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
def get_youtube_transcript_ytdlp(video_url, output_file="transcript.txt"):
|
| 9 |
+
"""
|
| 10 |
+
Extract YouTube transcript using yt-dlp
|
| 11 |
+
Works perfectly in India - yt-dlp handles all signature/blocking issues
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
print("[*] Starting transcript extraction with yt-dlp...")
|
| 15 |
+
|
| 16 |
+
# Extract video ID for reference
|
| 17 |
+
video_id_match = re.search(r'v=([^&]*)', video_url)
|
| 18 |
+
video_id = video_id_match.group(1) if video_id_match else 'unknown'
|
| 19 |
+
|
| 20 |
+
print(f"[+] Video ID: {video_id}")
|
| 21 |
+
|
| 22 |
+
# Normalize URL to just the video (remove playlist parameters)
|
| 23 |
+
normalized_url = f"https://www.youtube.com/watch?v={video_id}"
|
| 24 |
+
print(f"[+] Normalized URL: {normalized_url}")
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
# Create temp directory for subtitles
|
| 28 |
+
temp_dir = "temp_subs"
|
| 29 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
# Setup yt-dlp options
|
| 32 |
+
ydl_opts = {
|
| 33 |
+
'writeautomaticsub': True, # Download auto-generated subtitles
|
| 34 |
+
'subtitlesformat': 'vtt', # Format (can also be 'json3', 'srt', 'ass')
|
| 35 |
+
'skip_download': True, # Only download subs, not video
|
| 36 |
+
'noplaylist': True, # Only download the video, not the playlist
|
| 37 |
+
'outtmpl': os.path.join(temp_dir, '%(id)s'), # Output template
|
| 38 |
+
'quiet': False, # Show progress
|
| 39 |
+
'no_warnings': False,
|
| 40 |
+
'sub_langs': 'en', # Only English subtitles
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
print("[*] Downloading subtitles...")
|
| 44 |
+
|
| 45 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 46 |
+
info = ydl.extract_info(normalized_url, download=True) # Use normalized URL
|
| 47 |
+
|
| 48 |
+
print("[+] Subtitles downloaded successfully")
|
| 49 |
+
|
| 50 |
+
# Find the subtitle file
|
| 51 |
+
subtitle_file = None
|
| 52 |
+
for file in os.listdir(temp_dir):
|
| 53 |
+
if video_id in file and (file.endswith('.vtt') or file.endswith('.srt')):
|
| 54 |
+
subtitle_file = os.path.join(temp_dir, file)
|
| 55 |
+
print(f"[+] Found subtitle file: {file}")
|
| 56 |
+
break
|
| 57 |
+
|
| 58 |
+
if not subtitle_file or not os.path.exists(subtitle_file):
|
| 59 |
+
print("[ERROR] Subtitle file not found")
|
| 60 |
+
print(f"[DEBUG] Files in {temp_dir}: {os.listdir(temp_dir)}")
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
# Read and parse the subtitle file
|
| 64 |
+
print("[*] Parsing subtitle file...")
|
| 65 |
+
|
| 66 |
+
transcript_lines = []
|
| 67 |
+
|
| 68 |
+
if subtitle_file.endswith('.vtt'):
|
| 69 |
+
# Parse VTT format
|
| 70 |
+
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
| 71 |
+
lines = f.readlines()
|
| 72 |
+
|
| 73 |
+
for line in lines:
|
| 74 |
+
line = line.strip()
|
| 75 |
+
# Skip headers, timestamps, and empty lines
|
| 76 |
+
if line and not line.startswith('WEBVTT') and not '-->' in line and line:
|
| 77 |
+
transcript_lines.append(line)
|
| 78 |
+
|
| 79 |
+
elif subtitle_file.endswith('.srt'):
|
| 80 |
+
# Parse SRT format
|
| 81 |
+
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
| 82 |
+
lines = f.readlines()
|
| 83 |
+
|
| 84 |
+
for line in lines:
|
| 85 |
+
line = line.strip()
|
| 86 |
+
# Skip sequence numbers and timestamps
|
| 87 |
+
if line and not line[0].isdigit() and not '-->' in line and line:
|
| 88 |
+
transcript_lines.append(line)
|
| 89 |
+
|
| 90 |
+
if not transcript_lines:
|
| 91 |
+
print("[ERROR] No text extracted from subtitle file")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
# Combine into full transcript
|
| 95 |
+
full_text = "\n".join(transcript_lines)
|
| 96 |
+
|
| 97 |
+
# Save to output file
|
| 98 |
+
print(f"[*] Saving transcript to {output_file}...")
|
| 99 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 100 |
+
f.write(full_text)
|
| 101 |
+
|
| 102 |
+
# Cleanup temp directory
|
| 103 |
+
import shutil
|
| 104 |
+
shutil.rmtree(temp_dir)
|
| 105 |
+
|
| 106 |
+
print(f"\n✓ SUCCESS!")
|
| 107 |
+
print(f" File: {output_file}")
|
| 108 |
+
print(f" Total characters: {len(full_text)}")
|
| 109 |
+
print(f" Total lines: {len(transcript_lines)}")
|
| 110 |
+
|
| 111 |
+
return full_text
|
| 112 |
+
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"[ERROR] {str(e)}")
|
| 115 |
+
import traceback
|
| 116 |
+
traceback.print_exc()
|
| 117 |
+
return None
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
# ==================== MAIN ====================
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
|
| 124 |
+
print("=" * 70)
|
| 125 |
+
print("YouTube Transcript Extractor - yt-dlp VERSION (WORKS IN INDIA!)")
|
| 126 |
+
print("=" * 70)
|
| 127 |
+
|
| 128 |
+
video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
| 129 |
+
|
| 130 |
+
print(f"\nTarget video: {video_url}\n")
|
| 131 |
+
|
| 132 |
+
transcript = get_youtube_transcript_ytdlp(video_url)
|
| 133 |
+
|
| 134 |
+
if transcript:
|
| 135 |
+
print("\n" + "=" * 70)
|
| 136 |
+
print("TRANSCRIPT PREVIEW (First 800 characters)")
|
| 137 |
+
print("=" * 70)
|
| 138 |
+
print(transcript[:800])
|
| 139 |
+
print("\n...")
|
| 140 |
+
else:
|
| 141 |
+
print("\n[FAILED] Could not extract transcript")
|
utils/file_utils.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
import shutil
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import List
|
| 6 |
+
from fastapi import UploadFile
|
| 7 |
+
|
| 8 |
+
async def save_upload_file(upload_file: UploadFile) -> str:
|
| 9 |
+
"""
|
| 10 |
+
Save an uploaded file to a temporary location
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
upload_file: FastAPI UploadFile object
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
Path to the saved temporary file
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
# Create temporary file with appropriate extension
|
| 20 |
+
suffix = Path(upload_file.filename).suffix if upload_file.filename else ""
|
| 21 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
| 22 |
+
|
| 23 |
+
# Write uploaded content to temporary file
|
| 24 |
+
content = await upload_file.read()
|
| 25 |
+
temp_file.write(content)
|
| 26 |
+
temp_file.close()
|
| 27 |
+
|
| 28 |
+
return temp_file.name
|
| 29 |
+
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"Error saving uploaded file: {e}")
|
| 32 |
+
raise
|
| 33 |
+
|
| 34 |
+
def cleanup_temp_files(file_paths: List[str]) -> None:
|
| 35 |
+
"""
|
| 36 |
+
Clean up temporary files
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
file_paths: List of file paths to delete
|
| 40 |
+
"""
|
| 41 |
+
for file_path in file_paths:
|
| 42 |
+
try:
|
| 43 |
+
if os.path.exists(file_path):
|
| 44 |
+
os.unlink(file_path)
|
| 45 |
+
print(f"Cleaned up temporary file: {file_path}")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"Error cleaning up file {file_path}: {e}")
|
| 48 |
+
|
| 49 |
+
def cleanup_temp_directories(dir_paths: List[str]) -> None:
|
| 50 |
+
"""
|
| 51 |
+
Clean up temporary directories
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
dir_paths: List of directory paths to delete
|
| 55 |
+
"""
|
| 56 |
+
for dir_path in dir_paths:
|
| 57 |
+
try:
|
| 58 |
+
if os.path.exists(dir_path):
|
| 59 |
+
shutil.rmtree(dir_path)
|
| 60 |
+
print(f"Cleaned up temporary directory: {dir_path}")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"Error cleaning up directory {dir_path}: {e}")
|
| 63 |
+
|
| 64 |
+
def get_file_extension(filename: str) -> str:
|
| 65 |
+
"""
|
| 66 |
+
Get file extension from filename
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
filename: Name of the file
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
File extension (including the dot)
|
| 73 |
+
"""
|
| 74 |
+
return Path(filename).suffix.lower()
|
| 75 |
+
|
| 76 |
+
def is_valid_image_file(filename: str) -> bool:
|
| 77 |
+
"""
|
| 78 |
+
Check if filename represents a valid image file
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
filename: Name of the file
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
True if valid image file
|
| 85 |
+
"""
|
| 86 |
+
valid_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
|
| 87 |
+
return get_file_extension(filename) in valid_extensions
|
| 88 |
+
|
| 89 |
+
def is_valid_video_file(filename: str) -> bool:
|
| 90 |
+
"""
|
| 91 |
+
Check if filename represents a valid video file
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
filename: Name of the file
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
True if valid video file
|
| 98 |
+
"""
|
| 99 |
+
valid_extensions = {'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv', '.m4v'}
|
| 100 |
+
return get_file_extension(filename) in valid_extensions
|
| 101 |
+
|
| 102 |
+
def create_temp_directory() -> str:
|
| 103 |
+
"""
|
| 104 |
+
Create a temporary directory
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
Path to the created temporary directory
|
| 108 |
+
"""
|
| 109 |
+
return tempfile.mkdtemp()
|
| 110 |
+
|
| 111 |
+
def get_file_size(file_path: str) -> int:
|
| 112 |
+
"""
|
| 113 |
+
Get file size in bytes
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
file_path: Path to the file
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
File size in bytes
|
| 120 |
+
"""
|
| 121 |
+
try:
|
| 122 |
+
return os.path.getsize(file_path)
|
| 123 |
+
except OSError:
|
| 124 |
+
return 0
|
| 125 |
+
|
| 126 |
+
def format_file_size(size_bytes: int) -> str:
|
| 127 |
+
"""
|
| 128 |
+
Format file size in human-readable format
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
size_bytes: File size in bytes
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
Formatted file size string
|
| 135 |
+
"""
|
| 136 |
+
if size_bytes == 0:
|
| 137 |
+
return "0B"
|
| 138 |
+
|
| 139 |
+
size_names = ["B", "KB", "MB", "GB", "TB"]
|
| 140 |
+
i = 0
|
| 141 |
+
while size_bytes >= 1024 and i < len(size_names) - 1:
|
| 142 |
+
size_bytes /= 1024.0
|
| 143 |
+
i += 1
|
| 144 |
+
|
| 145 |
+
return f"{size_bytes:.1f}{size_names[i]}"
|