Harshilforworks commited on
Commit
1cee84c
Β·
verified Β·
1 Parent(s): fca78a4

Upload 6 files

Browse files
Files changed (6) hide show
  1. .dockerignore +19 -0
  2. .gitignore +24 -0
  3. Dockerfile +26 -0
  4. add_sample_data.py +257 -0
  5. config.py +136 -0
  6. main.py +509 -0
.dockerignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ env/
6
+ venv/
7
+ .venv/
8
+ .git/
9
+ .gitignore
10
+ dist/
11
+ build/
12
+ node_modules/
13
+ *.log
14
+ .env
15
+ public/frames/*
16
+ *.sqlite3
17
+ *.db
18
+ uploads/
19
+ __pycache__/
.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend-specific ignores
2
+ __pycache__/
3
+ *.py[cod]
4
+
5
+ # Local env files
6
+ .env
7
+ .env.local
8
+
9
+ # Virtualenvs (in case devs create them here)
10
+ venv/
11
+ env/
12
+ .venv/
13
+
14
+ # Generated frames and uploads
15
+ public/frames/
16
+ uploads/
17
+
18
+ # Logs and temp
19
+ *.log
20
+ tmp/
21
+ temp/
22
+
23
+ # Docker artifacts
24
+ *.tar
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # Install system dependencies required by some Python packages (OpenCV, Pillow, ffmpeg)
4
+ RUN apt-get update \
5
+ && apt-get install -y --no-install-recommends \
6
+ build-essential \
7
+ ffmpeg \
8
+ libsm6 \
9
+ libxext6 \
10
+ libxrender1 \
11
+ libgl1 \
12
+ git \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ RUN useradd -m -u 1000 user
16
+ USER user
17
+ ENV PATH="/home/user/.local/bin:$PATH"
18
+
19
+ WORKDIR /app
20
+
21
+ COPY --chown=user ./requirements.txt requirements.txt
22
+ RUN pip install --no-cache-dir --upgrade pip \
23
+ && pip install --no-cache-dir -r requirements.txt
24
+
25
+ COPY --chown=user . /app
26
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
add_sample_data.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to add sample rumour data to MongoDB for testing real-time updates
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import asyncio
9
+ from datetime import datetime, timedelta
10
+ from pymongo import MongoClient
11
+ from pymongo.errors import DuplicateKeyError
12
+ from dotenv import load_dotenv
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ def get_mongo_client():
18
+ """Get MongoDB client connection"""
19
+ connection_string = os.getenv('MONGO_CONNECTION_STRING')
20
+ if not connection_string:
21
+ raise ValueError("MONGO_CONNECTION_STRING environment variable not set")
22
+
23
+ client = MongoClient(connection_string)
24
+ # Test connection
25
+ client.admin.command('ping')
26
+ return client
27
+
28
+ def add_sample_rumours():
29
+ """Add sample rumour data to MongoDB"""
30
+
31
+ client = get_mongo_client()
32
+ db = client['aegis']
33
+ collection = db['debunk_posts']
34
+
35
+ # Sample rumour data with unique post_ids
36
+ sample_rumours = [
37
+ {
38
+ "post_id": "sample_rumour_001",
39
+ "claim": "Scientists have discovered a new planet that could support human life",
40
+ "summary": "Recent astronomical observations suggest the possibility of a habitable exoplanet",
41
+ "platform": "Twitter",
42
+ "Post_link": "https://twitter.com/example/status/123456789",
43
+ "verification": {
44
+ "verdict": "true",
45
+ "message": "This claim is accurate based on NASA's recent findings",
46
+ "reasoning": "The discovery was confirmed by multiple telescopes and peer-reviewed research",
47
+ "verification_date": datetime.now() - timedelta(hours=2),
48
+ "sources": {
49
+ "count": 3,
50
+ "links": [
51
+ "https://www.nasa.gov/feature/nasa-discovers-new-exoplanet",
52
+ "https://www.nature.com/articles/space-discovery-2024",
53
+ "https://www.scientificamerican.com/article/new-habitable-planet"
54
+ ],
55
+ "titles": [
56
+ "NASA Discovers New Exoplanet",
57
+ "Nature: Space Discovery 2024",
58
+ "Scientific American: New Habitable Planet Found"
59
+ ]
60
+ }
61
+ },
62
+ "stored_at": datetime.now() - timedelta(hours=2)
63
+ },
64
+ {
65
+ "post_id": "sample_rumour_002",
66
+ "claim": "Breaking: Major tech company announces they're shutting down all services",
67
+ "summary": "A viral post claims a major technology company is discontinuing all its services",
68
+ "platform": "Facebook",
69
+ "Post_link": "https://facebook.com/example/posts/987654321",
70
+ "verification": {
71
+ "verdict": "false",
72
+ "message": "This is completely false and has been debunked by the company",
73
+ "reasoning": "The company's official channels have confirmed this is a hoax. No such announcement was made.",
74
+ "verification_date": datetime.now() - timedelta(hours=1, minutes=30),
75
+ "sources": {
76
+ "count": 2,
77
+ "links": [
78
+ "https://company.com/official-statement",
79
+ "https://techcrunch.com/company-denies-shutdown-rumors"
80
+ ],
81
+ "titles": [
82
+ "Official Company Statement",
83
+ "TechCrunch: Company Denies Shutdown Rumors"
84
+ ]
85
+ }
86
+ },
87
+ "stored_at": datetime.now() - timedelta(hours=1, minutes=30)
88
+ },
89
+ {
90
+ "post_id": "sample_rumour_003",
91
+ "claim": "New study shows that coffee increases life expectancy by 5 years",
92
+ "summary": "A recent research paper claims significant health benefits from coffee consumption",
93
+ "platform": "Instagram",
94
+ "Post_link": "https://instagram.com/p/coffee-study-2024",
95
+ "verification": {
96
+ "verdict": "mostly true",
97
+ "message": "While coffee does have health benefits, the 5-year claim is exaggerated",
98
+ "reasoning": "Studies show moderate coffee consumption has health benefits, but the specific 5-year claim is not supported by the research cited.",
99
+ "verification_date": datetime.now() - timedelta(minutes=45),
100
+ "sources": {
101
+ "count": 4,
102
+ "links": [
103
+ "https://www.nejm.org/journal/coffee-health-study",
104
+ "https://www.mayoclinic.org/coffee-health-benefits",
105
+ "https://www.hsph.harvard.edu/coffee-research",
106
+ "https://www.healthline.com/coffee-life-expectancy-study"
107
+ ],
108
+ "titles": [
109
+ "NEJM: Coffee Health Study",
110
+ "Mayo Clinic: Coffee Health Benefits",
111
+ "Harvard: Coffee Research",
112
+ "Healthline: Coffee Life Expectancy Study"
113
+ ]
114
+ }
115
+ },
116
+ "stored_at": datetime.now() - timedelta(minutes=45)
117
+ },
118
+ {
119
+ "post_id": "sample_rumour_004",
120
+ "claim": "Local restaurant caught serving expired food to customers",
121
+ "summary": "Social media posts allege a popular local restaurant is serving expired ingredients",
122
+ "platform": "Reddit",
123
+ "Post_link": "https://reddit.com/r/localnews/expired-food-restaurant",
124
+ "verification": {
125
+ "verdict": "disputed",
126
+ "message": "The claims are under investigation by health authorities",
127
+ "reasoning": "Health department inspection is ongoing. Some allegations have been confirmed, others are disputed by the restaurant management.",
128
+ "verification_date": datetime.now() - timedelta(minutes=20),
129
+ "sources": {
130
+ "count": 3,
131
+ "links": [
132
+ "https://healthdept.gov/inspection-reports",
133
+ "https://localnews.com/restaurant-investigation",
134
+ "https://restaurant.com/official-response"
135
+ ],
136
+ "titles": [
137
+ "Health Department Inspection Reports",
138
+ "Local News: Restaurant Investigation",
139
+ "Restaurant Official Response"
140
+ ]
141
+ }
142
+ },
143
+ "stored_at": datetime.now() - timedelta(minutes=20)
144
+ },
145
+ {
146
+ "post_id": "sample_rumour_005",
147
+ "claim": "Mysterious lights spotted in the sky over the city last night",
148
+ "summary": "Multiple reports of unusual lights in the night sky",
149
+ "platform": "TikTok",
150
+ "Post_link": "https://tiktok.com/@user/video/mysterious-lights-city",
151
+ "verification": {
152
+ "verdict": "unverified",
153
+ "message": "Unable to verify the source or authenticity of these reports",
154
+ "reasoning": "No official explanation has been provided. Could be various phenomena including aircraft, drones, or natural occurrences.",
155
+ "verification_date": datetime.now() - timedelta(minutes=10),
156
+ "sources": {
157
+ "count": 2,
158
+ "links": [
159
+ "https://weather.gov/sky-conditions-report",
160
+ "https://faa.gov/flight-tracker-archive"
161
+ ],
162
+ "titles": [
163
+ "Weather Service: Sky Conditions Report",
164
+ "FAA: Flight Tracker Archive"
165
+ ]
166
+ }
167
+ },
168
+ "stored_at": datetime.now() - timedelta(minutes=10)
169
+ }
170
+ ]
171
+
172
+ print("πŸ”„ Adding sample rumour data to MongoDB...")
173
+
174
+ added_count = 0
175
+ skipped_count = 0
176
+
177
+ for rumour in sample_rumours:
178
+ try:
179
+ # Try to insert the document
180
+ result = collection.insert_one(rumour)
181
+ print(f"βœ… Added rumour: {rumour['post_id']} - {rumour['claim'][:50]}...")
182
+ added_count += 1
183
+
184
+ except DuplicateKeyError:
185
+ print(f"⚠️ Skipped rumour (already exists): {rumour['post_id']}")
186
+ skipped_count += 1
187
+
188
+ except Exception as e:
189
+ print(f"❌ Error adding rumour {rumour['post_id']}: {e}")
190
+
191
+ print(f"\nπŸ“Š Summary:")
192
+ print(f" βœ… Added: {added_count} rumours")
193
+ print(f" ⚠️ Skipped: {skipped_count} rumours")
194
+ print(f" πŸ“ Total in database: {collection.count_documents({})} rumours")
195
+
196
+ # Close connection
197
+ client.close()
198
+ print("\nπŸ”Œ MongoDB connection closed")
199
+
200
+ def test_realtime_update():
201
+ """Add a new rumour to test real-time updates"""
202
+
203
+ client = get_mongo_client()
204
+ db = client['aegis']
205
+ collection = db['debunk_posts']
206
+
207
+ # Create a new rumour with current timestamp
208
+ new_rumour = {
209
+ "post_id": f"test_realtime_{int(datetime.now().timestamp())}",
210
+ "claim": "Test real-time update: This is a new rumour added for testing WebSocket functionality",
211
+ "summary": "This rumour was added to test the real-time WebSocket update system",
212
+ "platform": "Test Platform",
213
+ "Post_link": "https://example.com/test-realtime-update",
214
+ "verification": {
215
+ "verdict": "true",
216
+ "message": "This is a test rumour for real-time updates",
217
+ "reasoning": "Added programmatically to verify WebSocket functionality",
218
+ "verification_date": datetime.now(),
219
+ "sources": {
220
+ "count": 1,
221
+ "links": ["https://example.com/test-source"],
222
+ "titles": ["Test Source"]
223
+ }
224
+ },
225
+ "stored_at": datetime.now()
226
+ }
227
+
228
+ print("πŸ”„ Adding test rumour for real-time update...")
229
+
230
+ try:
231
+ result = collection.insert_one(new_rumour)
232
+ print(f"βœ… Test rumour added successfully!")
233
+ print(f" πŸ“ Post ID: {new_rumour['post_id']}")
234
+ print(f" πŸ“… Added at: {new_rumour['stored_at']}")
235
+ print(f" πŸ” MongoDB ID: {result.inserted_id}")
236
+ print("\nπŸ’‘ Check your frontend - you should see this new rumour appear automatically!")
237
+
238
+ except Exception as e:
239
+ print(f"❌ Error adding test rumour: {e}")
240
+
241
+ # Close connection
242
+ client.close()
243
+ print("\nπŸ”Œ MongoDB connection closed")
244
+
245
+ if __name__ == "__main__":
246
+ print("πŸš€ MongoDB Sample Data Script")
247
+ print("=" * 50)
248
+
249
+ if len(sys.argv) > 1 and sys.argv[1] == "test":
250
+ test_realtime_update()
251
+ else:
252
+ add_sample_rumours()
253
+
254
+ print("\n✨ Script completed!")
255
+ print("\nπŸ’‘ Usage:")
256
+ print(" python add_sample_data.py # Add sample rumours")
257
+ print(" python add_sample_data.py test # Add test rumour for real-time updates")
config.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ class Config:
8
+ """Configuration class for the Visual Verification Service"""
9
+
10
+ # API Configuration
11
+ SERP_API_KEY: Optional[str] = os.getenv("SERP_API_KEY")
12
+ SERPAPI_BASE_URL: str = "https://serpapi.com/search"
13
+ GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
14
+ GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
15
+ GEMINI_TEMPERATURE: float = float(os.getenv("GEMINI_TEMPERATURE", "0.1"))
16
+ GEMINI_TOP_P: float = float(os.getenv("GEMINI_TOP_P", "0.8"))
17
+ GEMINI_MAX_TOKENS: int = int(os.getenv("GEMINI_MAX_TOKENS", "1000000"))
18
+
19
+ # Google Custom Search API Configuration (replaces deprecated Fact Check Tools API)
20
+ GOOGLE_FACT_CHECK_API_KEY: Optional[str] = os.getenv("GOOGLE_FACT_CHECK_API_KEY")
21
+ GOOGLE_FACT_CHECK_CX: Optional[str] = os.getenv("GOOGLE_FACT_CHECK_CX")
22
+
23
+ # Low-priority (social/UGC) domains to downrank (override via LOW_PRIORITY_DOMAINS)
24
+ LOW_PRIORITY_DOMAINS: set = set((os.getenv(
25
+ "LOW_PRIORITY_DOMAINS",
26
+ ",".join([
27
+ "twitter.com","www.twitter.com","x.com","www.x.com",
28
+ "reddit.com","www.reddit.com",
29
+ "facebook.com","www.facebook.com","m.facebook.com",
30
+ "instagram.com","www.instagram.com",
31
+ "tiktok.com","www.tiktok.com",
32
+ "threads.net","www.threads.net"
33
+ ])
34
+ ) or "").split(","))
35
+ # Analysis thresholds (kept configurable to avoid hardcoding)
36
+ CONTEXT_SIM_THRESHOLD: float = float(os.getenv("CONTEXT_SIM_THRESHOLD", "0.6"))
37
+
38
+ # Streaming downloader (yt-dlp) integration
39
+ # If true, prefer yt-dlp for any video_url (works for YouTube/Instagram/Twitter/etc.)
40
+ USE_STREAM_DOWNLOADER: bool = os.getenv("USE_STREAM_DOWNLOADER", "true").lower() == "true"
41
+ # Binary path for yt-dlp (auto-resolved in code if not absolute)
42
+ YTDLP_BIN: str = os.getenv("YTDLP_BIN", "yt-dlp")
43
+ STREAM_DOWNLOAD_TIMEOUT: int = int(os.getenv("STREAM_DOWNLOAD_TIMEOUT", "120"))
44
+ # Optional comma-separated list of domains to always treat as streaming
45
+ STREAMING_DOMAINS: str = os.getenv("STREAMING_DOMAINS", "youtube.com,youtu.be,instagram.com,twitter.com,x.com,tiktok.com,facebook.com,fb.watch")
46
+
47
+ # Cloudinary
48
+ CLOUDINARY_CLOUD_NAME: Optional[str] = os.getenv("CLOUDINARY_CLOUD_NAME")
49
+ CLOUDINARY_API_KEY: Optional[str] = os.getenv("CLOUDINARY_API_KEY")
50
+ CLOUDINARY_API_SECRET: Optional[str] = os.getenv("CLOUDINARY_API_SECRET")
51
+ CLOUDINARY_UPLOAD_PRESET: Optional[str] = os.getenv("CLOUDINARY_UPLOAD_PRESET")
52
+ CLOUDINARY_FOLDER: str = os.getenv("CLOUDINARY_FOLDER", "frames")
53
+
54
+ # Service Configuration
55
+ SERVICE_HOST: str = os.getenv("SERVICE_HOST", "0.0.0.0")
56
+ SERVICE_PORT: int = int(os.getenv("SERVICE_PORT", "8000"))
57
+
58
+ # File Processing Configuration
59
+ MAX_FILE_SIZE: int = int(os.getenv("MAX_FILE_SIZE", "50")) * 1024 * 1024 # 50MB default
60
+ ALLOWED_IMAGE_EXTENSIONS: set = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
61
+ ALLOWED_VIDEO_EXTENSIONS: set = {'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv', '.m4v'}
62
+
63
+ # Video Processing Configuration
64
+ FRAME_EXTRACTION_INTERVAL: int = int(os.getenv("FRAME_INTERVAL", "4")) # seconds
65
+ MAX_FRAMES_TO_ANALYZE: int = int(os.getenv("MAX_FRAMES", "10"))
66
+ CLIP_DURATION: int = int(os.getenv("CLIP_DURATION", "5")) # seconds
67
+
68
+ # Image Processing Configuration
69
+ COUNTER_MEASURE_WIDTH: int = int(os.getenv("IMAGE_WIDTH", "400"))
70
+ COUNTER_MEASURE_HEIGHT: int = int(os.getenv("IMAGE_HEIGHT", "300"))
71
+
72
+ # Temporary Storage Configuration
73
+ TEMP_DIR: str = os.getenv("TEMP_DIR", "/tmp")
74
+ CLEANUP_INTERVAL: int = int(os.getenv("CLEANUP_INTERVAL", "3600")) # seconds
75
+
76
+ # Logging Configuration
77
+ LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
78
+ LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
79
+
80
+ # Debug Configuration
81
+ DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
82
+
83
+ # Redis Configuration
84
+ REDIS_HOST: str = os.getenv("REDIS_HOST", "localhost")
85
+ REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
86
+ REDIS_DB: int = int(os.getenv("REDIS_DB", "0"))
87
+ REDIS_TTL: int = int(os.getenv("REDIS_TTL", "86400")) # 24 hours in seconds
88
+
89
+ @classmethod
90
+ def validate(cls) -> bool:
91
+ """Validate configuration values"""
92
+ if not cls.SERP_API_KEY:
93
+ print("Warning: SERP_API_KEY not set. Service will not function without it.")
94
+ return False
95
+
96
+ if not cls.GOOGLE_FACT_CHECK_API_KEY:
97
+ print("Warning: GOOGLE_FACT_CHECK_API_KEY not set. Text fact-checking will not function without it.")
98
+ return False
99
+
100
+ if not cls.GOOGLE_FACT_CHECK_CX:
101
+ print("Warning: GOOGLE_FACT_CHECK_CX not set. Text fact-checking will not function without it.")
102
+ return False
103
+
104
+ if cls.MAX_FILE_SIZE <= 0:
105
+ print("Error: MAX_FILE_SIZE must be positive")
106
+ return False
107
+
108
+ if cls.FRAME_EXTRACTION_INTERVAL <= 0:
109
+ print("Error: FRAME_EXTRACTION_INTERVAL must be positive")
110
+ return False
111
+
112
+ if cls.CLIP_DURATION <= 0:
113
+ print("Error: CLIP_DURATION must be positive")
114
+ return False
115
+
116
+ return True
117
+
118
+ @classmethod
119
+ def get_allowed_extensions(cls) -> set:
120
+ """Get all allowed file extensions"""
121
+ return cls.ALLOWED_IMAGE_EXTENSIONS.union(cls.ALLOWED_VIDEO_EXTENSIONS)
122
+
123
+ @classmethod
124
+ def is_image_file(cls, filename: str) -> bool:
125
+ """Check if file is a valid image"""
126
+ from pathlib import Path
127
+ return Path(filename).suffix.lower() in cls.ALLOWED_IMAGE_EXTENSIONS
128
+
129
+ @classmethod
130
+ def is_video_file(cls, filename: str) -> bool:
131
+ """Check if file is a valid video"""
132
+ from pathlib import Path
133
+ return Path(filename).suffix.lower() in cls.ALLOWED_VIDEO_EXTENSIONS
134
+
135
+ # Global configuration instance
136
+ config = Config()
main.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form, WebSocket, WebSocketDisconnect
2
+ from typing import Optional, List, Dict, Any
3
+ from fastapi.responses import FileResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.staticfiles import StaticFiles
6
+ import uvicorn
7
+ import os
8
+ import tempfile
9
+ from pathlib import Path
10
+ import asyncio
11
+ import logging
12
+ import json
13
+
14
+ from services.image_verifier import ImageVerifier
15
+ from services.video_verifier import VideoVerifier
16
+ from services.input_processor import InputProcessor
17
+ from services.text_fact_checker import TextFactChecker
18
+ from services.educational_content_generator import EducationalContentGenerator
19
+ from services.mongodb_service import MongoDBService
20
+ from services.websocket_service import connection_manager, initialize_mongodb_change_stream, cleanup_mongodb_change_stream
21
+ from utils.file_utils import save_upload_file, cleanup_temp_files
22
+
23
+ app = FastAPI(
24
+ title="Visual Verification Service",
25
+ description="A service to verify images/videos and generate visual counter-measures",
26
+ version="1.0.0"
27
+ )
28
+
29
+ # Setup logging
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Add CORS middleware
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=["*"],
37
+ allow_credentials=True,
38
+ allow_methods=["*"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ # Mount static directory for local assets (e.g., extracted frames)
43
+ import os
44
+ os.makedirs("public/frames", exist_ok=True)
45
+ app.mount("/static", StaticFiles(directory="public"), name="static")
46
+
47
+
48
+ # Initialize verifiers and input processor
49
+ image_verifier = ImageVerifier()
50
+ video_verifier = VideoVerifier()
51
+ input_processor = InputProcessor()
52
+ text_fact_checker = TextFactChecker()
53
+ educational_generator = EducationalContentGenerator()
54
+
55
+ # Initialize MongoDB service
56
+ mongodb_service = None
57
+ try:
58
+ mongodb_service = MongoDBService()
59
+ except Exception as e:
60
+ print(f"Warning: MongoDB service initialization failed: {e}")
61
+
62
+ # Initialize MongoDB change service (will be set in startup event)
63
+ mongodb_change_service = None
64
+
65
+ @app.on_event("startup")
66
+ async def startup_event():
67
+ """Initialize services on startup"""
68
+ global mongodb_change_service
69
+ try:
70
+ mongodb_change_service = await initialize_mongodb_change_stream()
71
+ logger.info("βœ… All services initialized successfully")
72
+ except Exception as e:
73
+ logger.error(f"❌ Failed to initialize services: {e}")
74
+
75
+ @app.on_event("shutdown")
76
+ async def shutdown_event():
77
+ """Cleanup services on shutdown"""
78
+ try:
79
+ await cleanup_mongodb_change_stream()
80
+ logger.info("🧹 All services cleaned up successfully")
81
+ except Exception as e:
82
+ logger.error(f"❌ Error during cleanup: {e}")
83
+
84
+ @app.websocket("/ws")
85
+ async def websocket_endpoint(websocket: WebSocket):
86
+ """WebSocket endpoint for real-time updates"""
87
+ await connection_manager.connect(websocket, {"connected_at": asyncio.get_event_loop().time()})
88
+ logger.info(f"βœ… WebSocket client connected. Total connections: {len(connection_manager.active_connections)}")
89
+
90
+ try:
91
+ while True:
92
+ try:
93
+ # Wait for incoming messages with a timeout
94
+ data = await asyncio.wait_for(websocket.receive_text(), timeout=30.0)
95
+
96
+ # Echo back a response (optional)
97
+ await connection_manager.send_personal_message(
98
+ json.dumps({"type": "pong", "message": "Connection active"}),
99
+ websocket
100
+ )
101
+ except asyncio.TimeoutError:
102
+ # Send a ping to keep connection alive
103
+ await connection_manager.send_personal_message(
104
+ json.dumps({"type": "ping", "message": "Keep alive"}),
105
+ websocket
106
+ )
107
+ except Exception as e:
108
+ logger.error(f"❌ Error in WebSocket message handling: {e}")
109
+ break
110
+
111
+ except WebSocketDisconnect:
112
+ logger.info("πŸ”Œ WebSocket client disconnected normally")
113
+ connection_manager.disconnect(websocket)
114
+ except Exception as e:
115
+ logger.error(f"❌ WebSocket error: {e}")
116
+ connection_manager.disconnect(websocket)
117
+
118
+ @app.get("/")
119
+ async def root():
120
+ return {"message": "Visual Verification Service is running"}
121
+
122
+ @app.post("/verify/image")
123
+ async def verify_image(
124
+ file: Optional[UploadFile] = File(None),
125
+ image_url: Optional[str] = Form(None),
126
+ claim_context: str = Form("Unknown context"),
127
+ claim_date: str = Form("Unknown date")
128
+ ):
129
+ """
130
+ Verify a single image and generate a visual counter-measure
131
+ """
132
+ try:
133
+ # Save uploaded file temporarily
134
+ temp_file_path = None
135
+ if file is not None:
136
+ temp_file_path = await save_upload_file(file)
137
+
138
+ # Verify image
139
+ result = await image_verifier.verify(
140
+ image_path=temp_file_path,
141
+ claim_context=claim_context,
142
+ claim_date=claim_date,
143
+ image_url=image_url
144
+ )
145
+
146
+ # Clean up temp file
147
+ if temp_file_path:
148
+ cleanup_temp_files([temp_file_path])
149
+
150
+ return result
151
+
152
+ except Exception as e:
153
+ # Clean up on error
154
+ if 'temp_file_path' in locals() and temp_file_path:
155
+ cleanup_temp_files([temp_file_path])
156
+ raise HTTPException(status_code=500, detail=str(e))
157
+
158
+ @app.post("/verify/video")
159
+ async def verify_video(
160
+ file: Optional[UploadFile] = File(None),
161
+ video_url: Optional[str] = Form(None),
162
+ claim_context: str = Form("Unknown context"),
163
+ claim_date: str = Form("Unknown date")
164
+ ):
165
+ """
166
+ Verify a video and generate a visual counter-measure video
167
+ """
168
+ try:
169
+ # Save uploaded file temporarily
170
+ temp_file_path = None
171
+ if file is not None:
172
+ temp_file_path = await save_upload_file(file)
173
+
174
+ # Verify video
175
+ result = await video_verifier.verify(
176
+ video_path=temp_file_path,
177
+ claim_context=claim_context,
178
+ claim_date=claim_date,
179
+ video_url=video_url
180
+ )
181
+
182
+ # Clean up temp file
183
+ if temp_file_path:
184
+ cleanup_temp_files([temp_file_path])
185
+
186
+ return result
187
+
188
+ except Exception as e:
189
+ # Clean up on error
190
+ if 'temp_file_path' in locals() and temp_file_path:
191
+ cleanup_temp_files([temp_file_path])
192
+ raise HTTPException(status_code=500, detail=str(e))
193
+
194
+ @app.post("/verify/text")
195
+ async def verify_text(
196
+ text_input: str = Form(...),
197
+ claim_context: str = Form("Unknown context"),
198
+ claim_date: str = Form("Unknown date")
199
+ ):
200
+ """
201
+ Verify a textual claim using Google's Fact Check Tools API
202
+ """
203
+ try:
204
+ # Verify text claim
205
+ result = await text_fact_checker.verify(
206
+ text_input=text_input,
207
+ claim_context=claim_context,
208
+ claim_date=claim_date
209
+ )
210
+
211
+ return result
212
+
213
+ except Exception as e:
214
+ raise HTTPException(status_code=500, detail=str(e))
215
+
216
+ @app.post("/chatbot/verify")
217
+ async def chatbot_verify(
218
+ text_input: Optional[str] = Form(None),
219
+ files: Optional[List[UploadFile]] = File(None)
220
+ ):
221
+ """
222
+ Chatbot-friendly endpoint that intelligently processes input and routes to appropriate verification
223
+ """
224
+ try:
225
+ # Process input with LLM
226
+ processed_input = await input_processor.process_input(
227
+ text_input=text_input,
228
+ files=files
229
+ )
230
+
231
+ if "error" in processed_input:
232
+ return {"error": processed_input["error"]}
233
+
234
+ verification_type = processed_input["verification_type"]
235
+ content = processed_input["content"]
236
+ claim_context = processed_input["claim_context"]
237
+ claim_date = processed_input["claim_date"]
238
+
239
+ results = []
240
+ temp_files_to_cleanup = []
241
+
242
+ # Handle text-only verification
243
+ if verification_type == "text" and content.get("text"):
244
+ result = await text_fact_checker.verify(
245
+ text_input=content["text"],
246
+ claim_context=claim_context,
247
+ claim_date=claim_date
248
+ )
249
+ result["source"] = "text_input"
250
+ results.append(result)
251
+
252
+ # Process files if any
253
+ for file_path in content["files"]:
254
+ temp_files_to_cleanup.append(file_path)
255
+
256
+ if verification_type == "image":
257
+ result = await image_verifier.verify(
258
+ image_path=file_path,
259
+ claim_context=claim_context,
260
+ claim_date=claim_date
261
+ )
262
+ else: # video
263
+ result = await video_verifier.verify(
264
+ video_path=file_path,
265
+ claim_context=claim_context,
266
+ claim_date=claim_date
267
+ )
268
+
269
+ result["source"] = "uploaded_file"
270
+ results.append(result)
271
+
272
+ # Process URLs if any
273
+ for url in content["urls"]:
274
+ if verification_type == "image":
275
+ result = await image_verifier.verify(
276
+ image_url=url,
277
+ claim_context=claim_context,
278
+ claim_date=claim_date
279
+ )
280
+ else: # video
281
+ result = await video_verifier.verify(
282
+ video_url=url,
283
+ claim_context=claim_context,
284
+ claim_date=claim_date
285
+ )
286
+
287
+ result["source"] = "url"
288
+ results.append(result)
289
+
290
+ # Clean up temp files
291
+ if temp_files_to_cleanup:
292
+ input_processor.cleanup_temp_files(temp_files_to_cleanup)
293
+
294
+ # Build a single concise chatbot message
295
+ overall = _aggregate_verdicts(results)
296
+ # Prefer consolidated video summary when present, else per-item message
297
+ candidates: List[str] = []
298
+ for r in results:
299
+ if isinstance(r, dict):
300
+ details = r.get("details") or {}
301
+ if isinstance(details, dict) and details.get("overall_summary"):
302
+ candidates.append(str(details.get("overall_summary")))
303
+ elif r.get("message"):
304
+ candidates.append(str(r.get("message")))
305
+ best_msg = max(candidates, key=len) if candidates else ""
306
+ # Avoid duplication by detecting if clarification already begins with a verdict phrase
307
+ verdict_prefixes = [
308
+ "this claim is true:",
309
+ "this claim is false:",
310
+ "this claim is uncertain:",
311
+ "this claim has mixed evidence:",
312
+ "the claim is true:",
313
+ "the claim is false:",
314
+ "the claim is uncertain:",
315
+ "result:",
316
+ ]
317
+ prefix_map = {
318
+ "true": "This claim is true:",
319
+ "false": "This claim is false:",
320
+ "uncertain": "This claim is uncertain:",
321
+ "mixed": "This claim has mixed evidence:",
322
+ "no_content": "No verifiable content found:",
323
+ }
324
+ prefix = prefix_map.get(overall, "Result:")
325
+ lower_msg = (best_msg or "").strip().lower()
326
+ if best_msg and any(lower_msg.startswith(p) for p in verdict_prefixes):
327
+ final_message = best_msg.strip()
328
+ else:
329
+ final_message = f"{prefix} {best_msg}" if best_msg else prefix
330
+ return {
331
+ "message": final_message,
332
+ "verdict": overall,
333
+ "details": {
334
+ "results": results,
335
+ "verification_type": verification_type,
336
+ "claim_context": claim_context,
337
+ "claim_date": claim_date
338
+ }
339
+ }
340
+
341
+ except Exception as e:
342
+ # Clean up any temp files on error
343
+ if 'temp_files_to_cleanup' in locals():
344
+ input_processor.cleanup_temp_files(temp_files_to_cleanup)
345
+ raise HTTPException(status_code=500, detail=str(e))
346
+
347
+ def _aggregate_verdicts(results: List[Dict]) -> str:
348
+ """Aggregate individual verification results into overall verdict.
349
+
350
+ Supports image results (with 'verdict'), video results (with details.overall_verdict),
351
+ and text results (with 'verdict').
352
+ """
353
+ if not results:
354
+ return "no_content"
355
+
356
+ normalized: List[str] = []
357
+ for r in results:
358
+ # Prefer explicit boolean 'verified' if present
359
+ if "verified" in r and isinstance(r.get("verified"), bool):
360
+ v = "true" if r.get("verified") else "false"
361
+ else:
362
+ v = r.get("verdict")
363
+ if not v:
364
+ details = r.get("details") or {}
365
+ v = details.get("overall_verdict")
366
+ normalized.append((v or "unknown").lower())
367
+
368
+ # If any false, overall is false
369
+ if "false" in normalized:
370
+ return "false"
371
+
372
+ # If any uncertain, overall is uncertain
373
+ if "uncertain" in normalized:
374
+ return "uncertain"
375
+
376
+ # If all true, overall is true
377
+ if all(v == "true" for v in normalized):
378
+ return "true"
379
+
380
+ return "mixed"
381
+
382
+ @app.get("/mongodb/recent-posts")
383
+ async def get_recent_debunk_posts(limit: int = 5):
384
+ """
385
+ Get recent debunk posts from MongoDB
386
+
387
+ Args:
388
+ limit: Maximum number of posts to return (default: 5)
389
+
390
+ Returns:
391
+ List of recent debunk posts
392
+ """
393
+ try:
394
+ print(f"πŸ” DEBUG: Endpoint called with limit={limit}")
395
+ print(f"πŸ” DEBUG: MongoDB service available: {mongodb_service is not None}")
396
+
397
+ if not mongodb_service:
398
+ print("❌ DEBUG: MongoDB service is None!")
399
+ raise HTTPException(
400
+ status_code=503,
401
+ detail="MongoDB service is not available. Check MONGO_CONNECTION_STRING environment variable."
402
+ )
403
+
404
+ print("πŸ” DEBUG: Calling mongodb_service.get_recent_posts()")
405
+ posts = mongodb_service.get_recent_posts(limit)
406
+ print(f"πŸ” DEBUG: Service returned {len(posts)} posts")
407
+
408
+ if posts:
409
+ print(f"πŸ” DEBUG: First post keys: {list(posts[0].keys())}")
410
+ print(f"πŸ” DEBUG: First post _id: {posts[0].get('_id')}")
411
+ else:
412
+ print("⚠️ DEBUG: No posts returned from service")
413
+
414
+ result = {
415
+ "success": True,
416
+ "count": len(posts),
417
+ "posts": posts
418
+ }
419
+
420
+ print(f"πŸ” DEBUG: Returning result with {len(posts)} posts")
421
+ return result
422
+
423
+ except Exception as e:
424
+ print(f"❌ DEBUG: Exception in endpoint: {e}")
425
+ print(f"πŸ” DEBUG: Exception type: {type(e).__name__}")
426
+ raise HTTPException(status_code=500, detail=str(e))
427
+
428
+ @app.get("/health")
429
+ async def health_check():
430
+ return {"status": "healthy", "service": "visual-verification"}
431
+
432
+
433
+ # Educational Content API Endpoints
434
+ @app.get("/educational/modules")
435
+ async def get_educational_modules():
436
+ """Get list of available educational modules"""
437
+ try:
438
+ modules_data = await educational_generator.get_modules_list()
439
+ return modules_data
440
+ except Exception as e:
441
+ raise HTTPException(status_code=500, detail=str(e))
442
+
443
+ @app.get("/educational/modules/{module_id}")
444
+ async def get_module_content(
445
+ module_id: str,
446
+ difficulty_level: str = "beginner"
447
+ ):
448
+ """Get educational content for a specific module"""
449
+ try:
450
+ content = await educational_generator.generate_module_content(
451
+ module_id, difficulty_level
452
+ )
453
+ return content
454
+ except Exception as e:
455
+ raise HTTPException(status_code=500, detail=str(e))
456
+
457
+ @app.post("/educational/contextual-learning")
458
+ async def get_contextual_learning(verification_result: Dict[str, Any]):
459
+ """Generate educational content based on verification result"""
460
+ try:
461
+ content = await educational_generator.generate_contextual_learning(
462
+ verification_result
463
+ )
464
+ return content
465
+ except Exception as e:
466
+ raise HTTPException(status_code=500, detail=str(e))
467
+
468
+ @app.post("/educational/clear-cache")
469
+ async def clear_educational_cache():
470
+ """Clear all educational content from Redis cache"""
471
+ try:
472
+ if educational_generator.redis_client:
473
+ # Get all educational cache keys
474
+ keys = educational_generator.redis_client.keys("educational:*")
475
+ if keys:
476
+ educational_generator.redis_client.delete(*keys)
477
+ return {"message": f"Cleared {len(keys)} cache entries", "keys": keys}
478
+ else:
479
+ return {"message": "No cache entries found"}
480
+ else:
481
+ return {"message": "Redis not available"}
482
+ except Exception as e:
483
+ raise HTTPException(status_code=500, detail=str(e))
484
+
485
+ @app.get("/educational/cache-status")
486
+ async def get_cache_status():
487
+ """Get status of educational content cache"""
488
+ try:
489
+ if educational_generator.redis_client:
490
+ keys = educational_generator.redis_client.keys("educational:*")
491
+ cache_info = {}
492
+ for key in keys:
493
+ ttl = educational_generator.redis_client.ttl(key)
494
+ cache_info[key] = {
495
+ "ttl": ttl,
496
+ "exists": ttl > 0
497
+ }
498
+ return {
499
+ "redis_connected": True,
500
+ "total_keys": len(keys),
501
+ "cache_info": cache_info
502
+ }
503
+ else:
504
+ return {"redis_connected": False, "message": "Redis not available"}
505
+ except Exception as e:
506
+ raise HTTPException(status_code=500, detail=str(e))
507
+
508
+ if __name__ == "__main__":
509
+ uvicorn.run(app, host="0.0.0.0", port=7860)