Spaces:

Harshilforworks
/

MumbaiHacks-backend

Running

App Files Files Community

Harshilforworks commited on 18 days ago

Commit

be26322

verified ·

1 Parent(s): 400c540

Major reconstruction mode

Browse files

Files changed (17) hide show

Dockerfile +61 -32
add_sample_data.py +409 -409
config.py +6 -1
main.py +1399 -3
requirements.txt +24 -23
services/deepfake_checker.py +83 -0
services/educational_content_generator.py +533 -0
services/image_verifier.py +1377 -0
services/input_processor.py +308 -0
services/mongodb_service.py +684 -0
services/razorpay_service.py +322 -0
services/text_fact_checker.py +905 -0
services/video_verifier.py +1310 -0
services/websocket_service.py +239 -0
services/youtube_api.py +211 -0
services/youtube_caption.py +141 -0
utils/file_utils.py +145 -0

Dockerfile CHANGED Viewed

@@ -1,32 +1,61 @@
-FROM python:3.13
-# Install system dependencies required by some Python packages (OpenCV, Pillow, ffmpeg)
-RUN apt-get update \
-	&& apt-get install -y --no-install-recommends \
-	   build-essential \
-	   ffmpeg \
-	   libsm6 \
-	   libxext6 \
-	   libxrender1 \
-	   libgl1 \
-	   git \
-	&& rm -rf /var/lib/apt/lists/*
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-WORKDIR /app
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade pip \
-	&& pip install --no-cache-dir -r requirements.txt
-COPY --chown=user . /app
-# Set default environment variables for service
-ENV SERVICE_HOST=0.0.0.0
-ENV SERVICE_PORT=7860
-# Use shell form to allow environment variable expansion
-CMD sh -c "uvicorn main:app --host ${SERVICE_HOST} --port ${SERVICE_PORT}"

+# Use Python 3.13 full version (not slim) for Hugging Face deployment
+FROM python:3.13
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# Install system dependencies required by the application
+# Including OpenCV, PyTorch, ffmpeg, and other multimedia libraries
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    git \
+    wget \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Create non-root user for Hugging Face Spaces
+RUN useradd -m -u 1000 user
+# Set up working directory
+WORKDIR /app
+# Copy requirements first for better Docker layer caching
+COPY --chown=user:user requirements.txt .
+# Install Python dependencies as root to avoid permission issues
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install -r requirements.txt
+# Switch to non-root user
+USER user
+# Set PATH for user-installed packages
+ENV PATH="/home/user/.local/bin:$PATH"
+# Copy application code
+COPY --chown=user:user . .
+# Expose port for Hugging Face Spaces (default: 7860)
+EXPOSE 7860
+# Set default environment variables for Hugging Face deployment
+ENV SERVICE_HOST=0.0.0.0 \
+    SERVICE_PORT=7860
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Run the application
+CMD uvicorn main:app --host ${SERVICE_HOST} --port ${SERVICE_PORT} --workers 1

add_sample_data.py CHANGED Viewed

@@ -1,409 +1,409 @@
-#!/usr/bin/env python3
-"""
-Script to add sample rumour data to MongoDB for testing real-time updates
-"""
-import os
-import sys
-import asyncio
-from datetime import datetime, timedelta
-from pymongo import MongoClient
-from pymongo.errors import DuplicateKeyError
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-def get_mongo_client():
-    """Get MongoDB client connection"""
-    connection_string = os.getenv('MONGO_CONNECTION_STRING')
-    if not connection_string:
-        raise ValueError("MONGO_CONNECTION_STRING environment variable not set")
-    client = MongoClient(connection_string)
-    # Test connection
-    client.admin.command('ping')
-    return client
-def add_sample_rumours():
-    """Add sample rumour data to MongoDB"""
-    client = get_mongo_client()
-    db = client['aegis']
-    collection = db['debunk_posts']
-    # Sample rumour data with unique post_ids
-    sample_rumours = [
-        {
-            "post_id": "sample_rumour_001",
-            "claim": "Scientists have discovered a new planet that could support human life",
-            "summary": "Recent astronomical observations suggest the possibility of a habitable exoplanet",
-            "platform": "Twitter",
-            "Post_link": "https://twitter.com/example/status/123456789",
-            "verification": {
-                "verdict": "true",
-                "message": "This claim is accurate based on NASA's recent findings",
-                "reasoning": "The discovery was confirmed by multiple telescopes and peer-reviewed research",
-                "verification_date": datetime.now() - timedelta(hours=2),
-                "sources": {
-                    "count": 3,
-                    "links": [
-                        "https://www.nasa.gov/feature/nasa-discovers-new-exoplanet",
-                        "https://www.nature.com/articles/space-discovery-2024",
-                        "https://www.scientificamerican.com/article/new-habitable-planet"
-                    ],
-                    "titles": [
-                        "NASA Discovers New Exoplanet",
-                        "Nature: Space Discovery 2024",
-                        "Scientific American: New Habitable Planet Found"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(hours=2)
-        },
-        {
-            "post_id": "sample_rumour_002",
-            "claim": "Breaking: Major tech company announces they're shutting down all services",
-            "summary": "A viral post claims a major technology company is discontinuing all its services",
-            "platform": "Facebook",
-            "Post_link": "https://facebook.com/example/posts/987654321",
-            "verification": {
-                "verdict": "false",
-                "message": "This is completely false and has been debunked by the company",
-                "reasoning": "The company's official channels have confirmed this is a hoax. No such announcement was made.",
-                "verification_date": datetime.now() - timedelta(hours=1, minutes=30),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://company.com/official-statement",
-                        "https://techcrunch.com/company-denies-shutdown-rumors"
-                    ],
-                    "titles": [
-                        "Official Company Statement",
-                        "TechCrunch: Company Denies Shutdown Rumors"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(hours=1, minutes=30)
-        },
-        {
-            "post_id": "sample_rumour_003",
-            "claim": "New study shows that coffee increases life expectancy by 5 years",
-            "summary": "A recent research paper claims significant health benefits from coffee consumption",
-            "platform": "Instagram",
-            "Post_link": "https://instagram.com/p/coffee-study-2024",
-            "verification": {
-                "verdict": "mostly true",
-                "message": "While coffee does have health benefits, the 5-year claim is exaggerated",
-                "reasoning": "Studies show moderate coffee consumption has health benefits, but the specific 5-year claim is not supported by the research cited.",
-                "verification_date": datetime.now() - timedelta(minutes=45),
-                "sources": {
-                    "count": 4,
-                    "links": [
-                        "https://www.nejm.org/journal/coffee-health-study",
-                        "https://www.mayoclinic.org/coffee-health-benefits",
-                        "https://www.hsph.harvard.edu/coffee-research",
-                        "https://www.healthline.com/coffee-life-expectancy-study"
-                    ],
-                    "titles": [
-                        "NEJM: Coffee Health Study",
-                        "Mayo Clinic: Coffee Health Benefits",
-                        "Harvard: Coffee Research",
-                        "Healthline: Coffee Life Expectancy Study"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=45)
-        },
-        {
-            "post_id": "sample_rumour_004",
-            "claim": "Local restaurant caught serving expired food to customers",
-            "summary": "Social media posts allege a popular local restaurant is serving expired ingredients",
-            "platform": "Reddit",
-            "Post_link": "https://reddit.com/r/localnews/expired-food-restaurant",
-            "verification": {
-                "verdict": "disputed",
-                "message": "The claims are under investigation by health authorities",
-                "reasoning": "Health department inspection is ongoing. Some allegations have been confirmed, others are disputed by the restaurant management.",
-                "verification_date": datetime.now() - timedelta(minutes=20),
-                "sources": {
-                    "count": 3,
-                    "links": [
-                        "https://healthdept.gov/inspection-reports",
-                        "https://localnews.com/restaurant-investigation",
-                        "https://restaurant.com/official-response"
-                    ],
-                    "titles": [
-                        "Health Department Inspection Reports",
-                        "Local News: Restaurant Investigation",
-                        "Restaurant Official Response"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=20)
-        },
-        {
-            "post_id": "sample_rumour_005",
-            "claim": "Mysterious lights spotted in the sky over the city last night",
-            "summary": "Multiple reports of unusual lights in the night sky",
-            "platform": "TikTok",
-            "Post_link": "https://tiktok.com/@user/video/mysterious-lights-city",
-            "verification": {
-                "verdict": "unverified",
-                "message": "Unable to verify the source or authenticity of these reports",
-                "reasoning": "No official explanation has been provided. Could be various phenomena including aircraft, drones, or natural occurrences.",
-                "verification_date": datetime.now() - timedelta(minutes=10),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://weather.gov/sky-conditions-report",
-                        "https://faa.gov/flight-tracker-archive"
-                    ],
-                    "titles": [
-                        "Weather Service: Sky Conditions Report",
-                        "FAA: Flight Tracker Archive"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=10)
-        },
-        {
-            "post_id": "sample_rumour_006",
-            "claim": "Viral deepfake shows the president announcing an unexpected policy change",
-            "summary": "A widely shared video appears to show a surprise announcement from the president",
-            "platform": "YouTube",
-            "Post_link": "https://youtube.com/watch?v=deepfake-announcement",
-            "verification": {
-                "verdict": "false",
-                "message": "The clip is a deepfake; official channels have no record of this announcement",
-                "reasoning": "Audio-visual artifacts and mismatch with verified schedule indicate synthetic media",
-                "verification_date": datetime.now() - timedelta(minutes=5),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://whitehouse.gov/schedule",
-                        "https://journal.example.com/deepfake-analysis"
-                    ],
-                    "titles": [
-                        "Official Schedule",
-                        "Deepfake Analysis"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=5)
-        },
-        {
-            "post_id": "sample_rumour_007",
-            "claim": "Wildfire evacuation map shows entire county under immediate threat",
-            "summary": "A map circulating online claims an entire county is being evacuated",
-            "platform": "Telegram",
-            "Post_link": "https://t.me/channel/wildfire-map",
-            "verification": {
-                "verdict": "disputed",
-                "message": "Only specific zones are under watch; no county-wide evacuation order",
-                "reasoning": "Emergency management alerts list partial warnings, not blanket evacuations",
-                "verification_date": datetime.now() - timedelta(minutes=8),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://alerts.example.gov/region-updates",
-                        "https://county.gov/emergency"
-                    ],
-                    "titles": [
-                        "Regional Alerts",
-                        "County Emergency Updates"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=8)
-        },
-        {
-            "post_id": "sample_rumour_008",
-            "claim": "Celebrity X claimed in 2015 that vaccines are a government tracking program",
-            "summary": "A screenshot attributes an anti-vaccine quote to a well-known actor",
-            "platform": "Threads",
-            "Post_link": "https://www.threads.net/@user/post/abc123",
-            "verification": {
-                "verdict": "false",
-                "message": "No credible source supports this quote; likely fabricated image",
-                "reasoning": "Archive search and press records show no such statement from the celebrity",
-                "verification_date": datetime.now() - timedelta(minutes=12),
-                "sources": {
-                    "count": 3,
-                    "links": [
-                        "https://archive.org/celebrity-press",
-                        "https://newsdb.example.com/search",
-                        "https://snopes.com/fact-check/celebrity-misattributed-quote"
-                    ],
-                    "titles": [
-                        "Press Archive",
-                        "News Database",
-                        "Fact Check"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=12)
-        },
-        {
-            "post_id": "sample_rumour_009",
-            "claim": "Nationwide vaccine recall announced due to severe side effects",
-            "summary": "Posts claim an emergency recall affecting all batches",
-            "platform": "WhatsApp",
-            "Post_link": "https://example.com/forwarded-message",
-            "verification": {
-                "verdict": "false",
-                "message": "No regulatory recall issued; official notices contradict the claim",
-                "reasoning": "Regulatory databases list no recall matching the description",
-                "verification_date": datetime.now() - timedelta(minutes=25),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://fda.gov/recalls",
-                        "https://who.int/medical-product-alerts"
-                    ],
-                    "titles": [
-                        "FDA Recalls",
-                        "WHO Alerts"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=25)
-        },
-        {
-            "post_id": "sample_rumour_010",
-            "claim": "Earthquake predicted to hit the capital city at 7 PM tonight",
-            "summary": "A viral message predicts an exact time for a major quake",
-            "platform": "TikTok",
-            "Post_link": "https://tiktok.com/@user/video/quake-prediction",
-            "verification": {
-                "verdict": "false",
-                "message": "Earthquakes cannot be predicted with exact timing using current science",
-                "reasoning": "Seismology consensus rejects precise short-term predictions",
-                "verification_date": datetime.now() - timedelta(minutes=18),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://usgs.gov/faqs/can-you-predict-earthquakes",
-                        "https://seismo.org/position-on-prediction"
-                    ],
-                    "titles": [
-                        "USGS FAQs",
-                        "Seismology Position"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=18)
-        },
-        {
-            "post_id": "sample_rumour_011",
-            "claim": "Poll shows 98% support for Candidate Y after overnight update",
-            "summary": "Graphic claims near-unanimous polling shift in one night",
-            "platform": "X",
-            "Post_link": "https://x.com/example/status/shifted-poll",
-            "verification": {
-                "verdict": "uncertain",
-                "message": "No reputable pollster has published this figure; methodology unclear",
-                "reasoning": "Source lacks sampling details; awaiting official releases",
-                "verification_date": datetime.now() - timedelta(minutes=30),
-                "sources": {
-                    "count": 2,
-                    "links": [
-                        "https://fivethirtyeight.com/polls/",
-                        "https://aapor.org/methods-standards"
-                    ],
-                    "titles": [
-                        "Polling Aggregator",
-                        "Survey Standards"
-                    ]
-                }
-            },
-            "stored_at": datetime.now() - timedelta(minutes=30)
-        }
-    ]
-    print("🔄 Adding sample rumour data to MongoDB...")
-    added_count = 0
-    skipped_count = 0
-    for rumour in sample_rumours:
-        try:
-            # Try to insert the document
-            result = collection.insert_one(rumour)
-            print(f"✅ Added rumour: {rumour['post_id']} - {rumour['claim'][:50]}...")
-            added_count += 1
-        except DuplicateKeyError:
-            print(f"⚠️  Skipped rumour (already exists): {rumour['post_id']}")
-            skipped_count += 1
-        except Exception as e:
-            print(f"❌ Error adding rumour {rumour['post_id']}: {e}")
-    print(f"\n📊 Summary:")
-    print(f"   ✅ Added: {added_count} rumours")
-    print(f"   ⚠️  Skipped: {skipped_count} rumours")
-    print(f"   📝 Total in database: {collection.count_documents({})} rumours")
-    # Close connection
-    client.close()
-    print("\n🔌 MongoDB connection closed")
-def test_realtime_update():
-    """Add a new rumour to test real-time updates"""
-    client = get_mongo_client()
-    db = client['aegis']
-    collection = db['debunk_posts']
-    # Create a new rumour with current timestamp
-    new_rumour = {
-        "post_id": f"test_realtime_{int(datetime.now().timestamp())}",
-        "claim": "Test real-time update: This is a new rumour added for testing WebSocket functionality",
-        "summary": "This rumour was added to test the real-time WebSocket update system",
-        "platform": "Test Platform",
-        "Post_link": "https://example.com/test-realtime-update",
-        "verification": {
-            "verdict": "true",
-            "message": "This is a test rumour for real-time updates",
-            "reasoning": "Added programmatically to verify WebSocket functionality",
-            "verification_date": datetime.now(),
-            "sources": {
-                "count": 1,
-                "links": ["https://example.com/test-source"],
-                "titles": ["Test Source"]
-            }
-        },
-        "stored_at": datetime.now()
-    }
-    print("🔄 Adding test rumour for real-time update...")
-    try:
-        result = collection.insert_one(new_rumour)
-        print(f"✅ Test rumour added successfully!")
-        print(f"   📝 Post ID: {new_rumour['post_id']}")
-        print(f"   📅 Added at: {new_rumour['stored_at']}")
-        print(f"   🔍 MongoDB ID: {result.inserted_id}")
-        print("\n💡 Check your frontend - you should see this new rumour appear automatically!")
-    except Exception as e:
-        print(f"❌ Error adding test rumour: {e}")
-    # Close connection
-    client.close()
-    print("\n🔌 MongoDB connection closed")
-if __name__ == "__main__":
-    print("🚀 MongoDB Sample Data Script")
-    print("=" * 50)
-    if len(sys.argv) > 1 and sys.argv[1] == "test":
-        test_realtime_update()
-    else:
-        add_sample_rumours()
-    print("\n✨ Script completed!")
-    print("\n💡 Usage:")
-    print("   python add_sample_data.py          # Add sample rumours")
-    print("   python add_sample_data.py test     # Add test rumour for real-time updates")

+#!/usr/bin/env python3
+"""
+Script to add sample rumour data to MongoDB for testing real-time updates
+"""
+import os
+import sys
+import asyncio
+from datetime import datetime, timedelta
+from pymongo import MongoClient
+from pymongo.errors import DuplicateKeyError
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+def get_mongo_client():
+    """Get MongoDB client connection"""
+    connection_string = os.getenv('MONGO_CONNECTION_STRING')
+    if not connection_string:
+        raise ValueError("MONGO_CONNECTION_STRING environment variable not set")
+    client = MongoClient(connection_string)
+    # Test connection
+    client.admin.command('ping')
+    return client
+def add_sample_rumours():
+    """Add sample rumour data to MongoDB"""
+    client = get_mongo_client()
+    db = client['aegis']
+    collection = db['debunk_posts']
+    # Sample rumour data with unique post_ids
+    sample_rumours = [
+        {
+            "post_id": "sample_rumour_001",
+            "claim": "Scientists have discovered a new planet that could support human life",
+            "summary": "Recent astronomical observations suggest the possibility of a habitable exoplanet",
+            "platform": "Twitter",
+            "Post_link": "https://twitter.com/example/status/123456789",
+            "verification": {
+                "verdict": "true",
+                "message": "This claim is accurate based on NASA's recent findings",
+                "reasoning": "The discovery was confirmed by multiple telescopes and peer-reviewed research",
+                "verification_date": datetime.now() - timedelta(hours=2),
+                "sources": {
+                    "count": 3,
+                    "links": [
+                        "https://www.nasa.gov/feature/nasa-discovers-new-exoplanet",
+                        "https://www.nature.com/articles/space-discovery-2024",
+                        "https://www.scientificamerican.com/article/new-habitable-planet"
+                    ],
+                    "titles": [
+                        "NASA Discovers New Exoplanet",
+                        "Nature: Space Discovery 2024",
+                        "Scientific American: New Habitable Planet Found"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(hours=2)
+        },
+        {
+            "post_id": "sample_rumour_002",
+            "claim": "Breaking: Major tech company announces they're shutting down all services",
+            "summary": "A viral post claims a major technology company is discontinuing all its services",
+            "platform": "Facebook",
+            "Post_link": "https://facebook.com/example/posts/987654321",
+            "verification": {
+                "verdict": "false",
+                "message": "This is completely false and has been debunked by the company",
+                "reasoning": "The company's official channels have confirmed this is a hoax. No such announcement was made.",
+                "verification_date": datetime.now() - timedelta(hours=1, minutes=30),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://company.com/official-statement",
+                        "https://techcrunch.com/company-denies-shutdown-rumors"
+                    ],
+                    "titles": [
+                        "Official Company Statement",
+                        "TechCrunch: Company Denies Shutdown Rumors"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(hours=1, minutes=30)
+        },
+        {
+            "post_id": "sample_rumour_003",
+            "claim": "New study shows that coffee increases life expectancy by 5 years",
+            "summary": "A recent research paper claims significant health benefits from coffee consumption",
+            "platform": "Instagram",
+            "Post_link": "https://instagram.com/p/coffee-study-2024",
+            "verification": {
+                "verdict": "mostly true",
+                "message": "While coffee does have health benefits, the 5-year claim is exaggerated",
+                "reasoning": "Studies show moderate coffee consumption has health benefits, but the specific 5-year claim is not supported by the research cited.",
+                "verification_date": datetime.now() - timedelta(minutes=45),
+                "sources": {
+                    "count": 4,
+                    "links": [
+                        "https://www.nejm.org/journal/coffee-health-study",
+                        "https://www.mayoclinic.org/coffee-health-benefits",
+                        "https://www.hsph.harvard.edu/coffee-research",
+                        "https://www.healthline.com/coffee-life-expectancy-study"
+                    ],
+                    "titles": [
+                        "NEJM: Coffee Health Study",
+                        "Mayo Clinic: Coffee Health Benefits",
+                        "Harvard: Coffee Research",
+                        "Healthline: Coffee Life Expectancy Study"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=45)
+        },
+        {
+            "post_id": "sample_rumour_004",
+            "claim": "Local restaurant caught serving expired food to customers",
+            "summary": "Social media posts allege a popular local restaurant is serving expired ingredients",
+            "platform": "Reddit",
+            "Post_link": "https://reddit.com/r/localnews/expired-food-restaurant",
+            "verification": {
+                "verdict": "disputed",
+                "message": "The claims are under investigation by health authorities",
+                "reasoning": "Health department inspection is ongoing. Some allegations have been confirmed, others are disputed by the restaurant management.",
+                "verification_date": datetime.now() - timedelta(minutes=20),
+                "sources": {
+                    "count": 3,
+                    "links": [
+                        "https://healthdept.gov/inspection-reports",
+                        "https://localnews.com/restaurant-investigation",
+                        "https://restaurant.com/official-response"
+                    ],
+                    "titles": [
+                        "Health Department Inspection Reports",
+                        "Local News: Restaurant Investigation",
+                        "Restaurant Official Response"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=20)
+        },
+        {
+            "post_id": "sample_rumour_005",
+            "claim": "Mysterious lights spotted in the sky over the city last night",
+            "summary": "Multiple reports of unusual lights in the night sky",
+            "platform": "TikTok",
+            "Post_link": "https://tiktok.com/@user/video/mysterious-lights-city",
+            "verification": {
+                "verdict": "unverified",
+                "message": "Unable to verify the source or authenticity of these reports",
+                "reasoning": "No official explanation has been provided. Could be various phenomena including aircraft, drones, or natural occurrences.",
+                "verification_date": datetime.now() - timedelta(minutes=10),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://weather.gov/sky-conditions-report",
+                        "https://faa.gov/flight-tracker-archive"
+                    ],
+                    "titles": [
+                        "Weather Service: Sky Conditions Report",
+                        "FAA: Flight Tracker Archive"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=10)
+        },
+        {
+            "post_id": "sample_rumour_006",
+            "claim": "Viral deepfake shows the president announcing an unexpected policy change",
+            "summary": "A widely shared video appears to show a surprise announcement from the president",
+            "platform": "YouTube",
+            "Post_link": "https://youtube.com/watch?v=deepfake-announcement",
+            "verification": {
+                "verdict": "false",
+                "message": "The clip is a deepfake; official channels have no record of this announcement",
+                "reasoning": "Audio-visual artifacts and mismatch with verified schedule indicate synthetic media",
+                "verification_date": datetime.now() - timedelta(minutes=5),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://whitehouse.gov/schedule",
+                        "https://journal.example.com/deepfake-analysis"
+                    ],
+                    "titles": [
+                        "Official Schedule",
+                        "Deepfake Analysis"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=5)
+        },
+        {
+            "post_id": "sample_rumour_007",
+            "claim": "Wildfire evacuation map shows entire county under immediate threat",
+            "summary": "A map circulating online claims an entire county is being evacuated",
+            "platform": "Telegram",
+            "Post_link": "https://t.me/channel/wildfire-map",
+            "verification": {
+                "verdict": "disputed",
+                "message": "Only specific zones are under watch; no county-wide evacuation order",
+                "reasoning": "Emergency management alerts list partial warnings, not blanket evacuations",
+                "verification_date": datetime.now() - timedelta(minutes=8),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://alerts.example.gov/region-updates",
+                        "https://county.gov/emergency"
+                    ],
+                    "titles": [
+                        "Regional Alerts",
+                        "County Emergency Updates"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=8)
+        },
+        {
+            "post_id": "sample_rumour_008",
+            "claim": "Celebrity X claimed in 2015 that vaccines are a government tracking program",
+            "summary": "A screenshot attributes an anti-vaccine quote to a well-known actor",
+            "platform": "Threads",
+            "Post_link": "https://www.threads.net/@user/post/abc123",
+            "verification": {
+                "verdict": "false",
+                "message": "No credible source supports this quote; likely fabricated image",
+                "reasoning": "Archive search and press records show no such statement from the celebrity",
+                "verification_date": datetime.now() - timedelta(minutes=12),
+                "sources": {
+                    "count": 3,
+                    "links": [
+                        "https://archive.org/celebrity-press",
+                        "https://newsdb.example.com/search",
+                        "https://snopes.com/fact-check/celebrity-misattributed-quote"
+                    ],
+                    "titles": [
+                        "Press Archive",
+                        "News Database",
+                        "Fact Check"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=12)
+        },
+        {
+            "post_id": "sample_rumour_009",
+            "claim": "Nationwide vaccine recall announced due to severe side effects",
+            "summary": "Posts claim an emergency recall affecting all batches",
+            "platform": "WhatsApp",
+            "Post_link": "https://example.com/forwarded-message",
+            "verification": {
+                "verdict": "false",
+                "message": "No regulatory recall issued; official notices contradict the claim",
+                "reasoning": "Regulatory databases list no recall matching the description",
+                "verification_date": datetime.now() - timedelta(minutes=25),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://fda.gov/recalls",
+                        "https://who.int/medical-product-alerts"
+                    ],
+                    "titles": [
+                        "FDA Recalls",
+                        "WHO Alerts"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=25)
+        },
+        {
+            "post_id": "sample_rumour_010",
+            "claim": "Earthquake predicted to hit the capital city at 7 PM tonight",
+            "summary": "A viral message predicts an exact time for a major quake",
+            "platform": "TikTok",
+            "Post_link": "https://tiktok.com/@user/video/quake-prediction",
+            "verification": {
+                "verdict": "false",
+                "message": "Earthquakes cannot be predicted with exact timing using current science",
+                "reasoning": "Seismology consensus rejects precise short-term predictions",
+                "verification_date": datetime.now() - timedelta(minutes=18),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://usgs.gov/faqs/can-you-predict-earthquakes",
+                        "https://seismo.org/position-on-prediction"
+                    ],
+                    "titles": [
+                        "USGS FAQs",
+                        "Seismology Position"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=18)
+        },
+        {
+            "post_id": "sample_rumour_011",
+            "claim": "Poll shows 98% support for Candidate Y after overnight update",
+            "summary": "Graphic claims near-unanimous polling shift in one night",
+            "platform": "X",
+            "Post_link": "https://x.com/example/status/shifted-poll",
+            "verification": {
+                "verdict": "uncertain",
+                "message": "No reputable pollster has published this figure; methodology unclear",
+                "reasoning": "Source lacks sampling details; awaiting official releases",
+                "verification_date": datetime.now() - timedelta(minutes=30),
+                "sources": {
+                    "count": 2,
+                    "links": [
+                        "https://fivethirtyeight.com/polls/",
+                        "https://aapor.org/methods-standards"
+                    ],
+                    "titles": [
+                        "Polling Aggregator",
+                        "Survey Standards"
+                    ]
+                }
+            },
+            "stored_at": datetime.now() - timedelta(minutes=30)
+        }
+    ]
+    print("🔄 Adding sample rumour data to MongoDB...")
+    added_count = 0
+    skipped_count = 0
+    for rumour in sample_rumours:
+        try:
+            # Try to insert the document
+            result = collection.insert_one(rumour)
+            print(f"✅ Added rumour: {rumour['post_id']} - {rumour['claim'][:50]}...")
+            added_count += 1
+        except DuplicateKeyError:
+            print(f"⚠️  Skipped rumour (already exists): {rumour['post_id']}")
+            skipped_count += 1
+        except Exception as e:
+            print(f"❌ Error adding rumour {rumour['post_id']}: {e}")
+    print(f"\n📊 Summary:")
+    print(f"   ✅ Added: {added_count} rumours")
+    print(f"   ⚠️  Skipped: {skipped_count} rumours")
+    print(f"   📝 Total in database: {collection.count_documents({})} rumours")
+    # Close connection
+    client.close()
+    print("\n🔌 MongoDB connection closed")
+def test_realtime_update():
+    """Add a new rumour to test real-time updates"""
+    client = get_mongo_client()
+    db = client['aegis']
+    collection = db['debunk_posts']
+    # Create a new rumour with current timestamp
+    new_rumour = {
+        "post_id": f"test_realtime_{int(datetime.now().timestamp())}",
+        "claim": "Test real-time update: This is a new rumour added for testing WebSocket functionality",
+        "summary": "This rumour was added to test the real-time WebSocket update system",
+        "platform": "Test Platform",
+        "Post_link": "https://example.com/test-realtime-update",
+        "verification": {
+            "verdict": "true",
+            "message": "This is a test rumour for real-time updates",
+            "reasoning": "Added programmatically to verify WebSocket functionality",
+            "verification_date": datetime.now(),
+            "sources": {
+                "count": 1,
+                "links": ["https://example.com/test-source"],
+                "titles": ["Test Source"]
+            }
+        },
+        "stored_at": datetime.now()
+    }
+    print("🔄 Adding test rumour for real-time update...")
+    try:
+        result = collection.insert_one(new_rumour)
+        print(f"✅ Test rumour added successfully!")
+        print(f"   📝 Post ID: {new_rumour['post_id']}")
+        print(f"   📅 Added at: {new_rumour['stored_at']}")
+        print(f"   🔍 MongoDB ID: {result.inserted_id}")
+        print("\n💡 Check your frontend - you should see this new rumour appear automatically!")
+    except Exception as e:
+        print(f"❌ Error adding test rumour: {e}")
+    # Close connection
+    client.close()
+    print("\n🔌 MongoDB connection closed")
+if __name__ == "__main__":
+    print("🚀 MongoDB Sample Data Script")
+    print("=" * 50)
+    if len(sys.argv) > 1 and sys.argv[1] == "test":
+        test_realtime_update()
+    else:
+        add_sample_rumours()
+    print("\n✨ Script completed!")
+    print("\n💡 Usage:")
+    print("   python add_sample_data.py          # Add sample rumours")
+    print("   python add_sample_data.py test     # Add test rumour for real-time updates")

config.py CHANGED Viewed

@@ -11,7 +11,7 @@ class Config:
     SERP_API_KEY: Optional[str] = os.getenv("SERP_API_KEY")
     SERPAPI_BASE_URL: str = "https://serpapi.com/search"
     GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
-    GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash")
     GEMINI_TEMPERATURE: float = float(os.getenv("GEMINI_TEMPERATURE", "0.1"))
     GEMINI_TOP_P: float = float(os.getenv("GEMINI_TOP_P", "0.8"))
     GEMINI_MAX_TOKENS: int = int(os.getenv("GEMINI_MAX_TOKENS", "1000000"))
@@ -85,6 +85,11 @@ class Config:
     UPSTASH_REDIS_TOKEN: Optional[str] = os.getenv("UPSTASH_REDIS_TOKEN")
     REDIS_TTL: int = int(os.getenv("REDIS_TTL", "86400"))  # 24 hours in seconds
     @classmethod
     def validate(cls) -> bool:
         """Validate configuration values"""

     SERP_API_KEY: Optional[str] = os.getenv("SERP_API_KEY")
     SERPAPI_BASE_URL: str = "https://serpapi.com/search"
     GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
+    GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
     GEMINI_TEMPERATURE: float = float(os.getenv("GEMINI_TEMPERATURE", "0.1"))
     GEMINI_TOP_P: float = float(os.getenv("GEMINI_TOP_P", "0.8"))
     GEMINI_MAX_TOKENS: int = int(os.getenv("GEMINI_MAX_TOKENS", "1000000"))
     UPSTASH_REDIS_TOKEN: Optional[str] = os.getenv("UPSTASH_REDIS_TOKEN")
     REDIS_TTL: int = int(os.getenv("REDIS_TTL", "86400"))  # 24 hours in seconds
+    # Razorpay Configuration
+    RAZORPAY_ID: Optional[str] = os.getenv("RAZORPAY_ID")
+    RAZORPAY_KEY: Optional[str] = os.getenv("RAZORPAY_KEY")
+    RAZORPAY_WEBHOOK_SECRET: Optional[str] = os.getenv("RAZORPAY_WEBHOOK_SECRET")
     @classmethod
     def validate(cls) -> bool:
         """Validate configuration values"""

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, File, UploadFile, HTTPException, Form, WebSocket, WebSocketDisconnect
 from typing import Optional, List, Dict, Any
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
@@ -21,9 +21,13 @@ from services.text_fact_checker import TextFactChecker
 from services.educational_content_generator import EducationalContentGenerator
 from services.mongodb_service import MongoDBService
 from services.websocket_service import connection_manager, initialize_mongodb_change_stream, cleanup_mongodb_change_stream
 from utils.file_utils import save_upload_file, cleanup_temp_files
 from config import config
 from services.deepfake_checker import detect_audio_deepfake
 app = FastAPI(
     title="Visual Verification Service",
@@ -36,9 +40,18 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -65,15 +78,145 @@ try:
 except Exception as e:
     print(f"Warning: MongoDB service initialization failed: {e}")
 # Initialize MongoDB change service (will be set in startup event)
 mongodb_change_service = None
 @app.on_event("startup")
 async def startup_event():
     """Initialize services on startup"""
     global mongodb_change_service
     try:
         mongodb_change_service = await initialize_mongodb_change_stream()
         logger.info("✅ All services initialized successfully")
     except Exception as e:
         logger.error(f"❌ Failed to initialize services: {e}")
@@ -224,6 +367,407 @@ async def verify_text(
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/chatbot/verify")
 async def chatbot_verify(
     text_input: Optional[str] = Form(None),
@@ -313,11 +857,11 @@ async def chatbot_verify(
                 try:
                     gemini_prompt = f"""
 You are an assistant for audio authenticity analysis.
-File name: {os.path.basename(file_path)}
 {('User question: ' + claim_context) if claim_context else ''}
 The audio has been analyzed and the result is: {'deepfake' if deepfake else 'NOT deepfake'}.
 Compose a clear, friendly, 1-2 line summary verdict for the user, tailored to the above context/result (do not answer with JSON or code, just a natural response).
 Avoid repeating 'deepfake detection' technical language; be concise and direct.
 """
                     gemini_response = input_processor_for_audio.model.generate_content(gemini_prompt)
                     ai_message = None
@@ -373,6 +917,75 @@ Avoid repeating 'deepfake detection' technical language; be concise and direct.
         print(f"🔍 DEBUG: Processing {len(urls_list)} URLs")
         for i, url in enumerate(urls_list):
             print(f"🔍 DEBUG: Processing URL {i}: {url}")
             if verification_type == "image":
                 print(f"🔍 DEBUG: Calling image_verifier.verify for URL")
                 result = await image_verifier.verify(
@@ -563,6 +1176,58 @@ async def get_recent_debunk_posts(limit: int = 5):
         print(f"🔍 DEBUG: Exception type: {type(e).__name__}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "service": "visual-verification"}
@@ -708,5 +1373,736 @@ async def get_cache_status():
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=config.SERVICE_PORT)

+from fastapi import FastAPI, File, UploadFile, HTTPException, Form, WebSocket, WebSocketDisconnect, Request
 from typing import Optional, List, Dict, Any
 from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from services.educational_content_generator import EducationalContentGenerator
 from services.mongodb_service import MongoDBService
 from services.websocket_service import connection_manager, initialize_mongodb_change_stream, cleanup_mongodb_change_stream
+from services.razorpay_service import RazorpayService
+import razorpay.errors
 from utils.file_utils import save_upload_file, cleanup_temp_files
 from config import config
 from services.deepfake_checker import detect_audio_deepfake
+from services.youtube_caption import get_youtube_transcript_ytdlp
+import google.generativeai as genai
 app = FastAPI(
     title="Visual Verification Service",
 logger = logging.getLogger(__name__)
 # Add CORS middleware
+# Note: When allow_credentials=True, you cannot use allow_origins=["*"]
+# Must specify exact origins
+# Chrome extensions make requests from background scripts which bypass CORS,
+# but we include common origins for web frontend access
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=[
+        "http://localhost:5173",
+        "http://127.0.0.1:5173",
+        "http://localhost:3000",
+        "http://127.0.0.1:3000",
+    ],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 except Exception as e:
     print(f"Warning: MongoDB service initialization failed: {e}")
+# Initialize Razorpay service
+razorpay_service = None
+try:
+    razorpay_service = RazorpayService()
+except Exception as e:
+    print(f"Warning: Razorpay service initialization failed: {e}")
 # Initialize MongoDB change service (will be set in startup event)
 mongodb_change_service = None
+async def initialize_subscription_plans():
+    """Initialize subscription plans in Razorpay if they don't exist"""
+    if not razorpay_service or not razorpay_service.client:
+        logger.warning("⚠️ Razorpay service not available. Skipping plan initialization.")
+        return
+    # First, test Razorpay connection by trying to fetch account details or make a simple API call
+    try:
+        # Try to verify credentials work by attempting a simple operation
+        # We'll skip listing plans if it fails and just try to create
+        logger.info("🔍 Testing Razorpay API connection...")
+    except Exception as e:
+        logger.error(f"❌ Razorpay API connection test failed: {e}")
+        logger.warning("⚠️ Skipping plan initialization due to API connection issues")
+        return
+    try:
+        # Try to list existing plans, but don't fail if it errors
+        existing_plan_names = set()
+        try:
+            existing_plans = razorpay_service.list_plans(count=100)
+            if existing_plans and existing_plans.get("items"):
+                existing_plan_names = {
+                    p.get("item", {}).get("name")
+                    for p in existing_plans.get("items", [])
+                    if p.get("item", {}).get("name")
+                }
+                logger.info(f"📋 Found {len(existing_plan_names)} existing plans")
+        except Exception as list_error:
+            error_msg = str(list_error).lower()
+            if "not found" in error_msg or "404" in error_msg:
+                logger.info("ℹ️ No existing plans found (this is normal for new accounts)")
+            else:
+                logger.warning(f"⚠️ Could not list existing plans: {list_error}")
+            # Continue anyway - we'll try to create plans and handle duplicates
+        plans_to_create = [
+            {
+                "name": "Plan 1",
+                "amount": 100,  # 1 INR in paise
+                "currency": "INR",
+                "interval": 1,
+                "period": "monthly",
+                "description": "Plan 1 - Monthly Subscription (1 INR)"
+            },
+            {
+                "name": "Plan 2",
+                "amount": 200,  # 2 INR in paise
+                "currency": "INR",
+                "interval": 1,
+                "period": "monthly",
+                "description": "Plan 2 - Monthly Subscription (2 INR)"
+            },
+            {
+                "name": "Plan 3",
+                "amount": 300,  # 3 INR in paise
+                "currency": "INR",
+                "interval": 1,
+                "period": "monthly",
+                "description": "Plan 3 - Monthly Subscription (3 INR)"
+            }
+        ]
+        created_count = 0
+        skipped_count = 0
+        error_count = 0
+        for plan_data in plans_to_create:
+            plan_name = plan_data["name"]
+            # Check if plan already exists
+            if plan_name in existing_plan_names:
+                logger.info(f"⏭️ Plan {plan_name} already exists, skipping")
+                skipped_count += 1
+                continue
+            try:
+                logger.info(f"🔄 Creating plan: {plan_name}...")
+                plan = razorpay_service.create_plan(**plan_data)
+                logger.info(f"✅ Created subscription plan: {plan_name} (ID: {plan.get('id')})")
+                created_count += 1
+            except razorpay.errors.BadRequestError as e:
+                error_msg = str(e).lower()
+                # Check if error is due to plan already existing (duplicate)
+                if "already exists" in error_msg or "duplicate" in error_msg:
+                    logger.info(f"⏭️ Plan {plan_name} already exists (detected during creation), skipping")
+                    skipped_count += 1
+                else:
+                    logger.error(f"❌ BadRequestError creating plan {plan_name}: {e}")
+                    error_count += 1
+            except Exception as e:
+                error_msg = str(e).lower()
+                # Check if error is due to plan already existing (duplicate)
+                if "already exists" in error_msg or "duplicate" in error_msg:
+                    logger.info(f"⏭️ Plan {plan_name} already exists (detected during creation), skipping")
+                    skipped_count += 1
+                elif "not found" in error_msg or "404" in error_msg:
+                    logger.error(f"❌ API endpoint not found for plan {plan_name}. Check Razorpay credentials and API access.")
+                    logger.error(f"   Error details: {e}")
+                    error_count += 1
+                else:
+                    logger.error(f"❌ Failed to create plan {plan_name}: {e}")
+                    error_count += 1
+        if created_count > 0:
+            logger.info(f"✅ Successfully created {created_count} subscription plans")
+        if skipped_count > 0:
+            logger.info(f"⏭️ Skipped {skipped_count} plans (already exist)")
+        if error_count > 0:
+            logger.warning(f"⚠️ {error_count} plans failed to create. Check Razorpay credentials and API permissions.")
+        if created_count == 0 and skipped_count == 0 and error_count > 0:
+            logger.error("❌ All plan creation attempts failed. Please verify:")
+            logger.error("   1. RAZORPAY_ID and RAZORPAY_KEY are correct")
+            logger.error("   2. API keys have subscription/plan creation permissions")
+            logger.error("   3. Razorpay account has subscriptions feature enabled")
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize subscription plans: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
 @app.on_event("startup")
 async def startup_event():
     """Initialize services on startup"""
     global mongodb_change_service
     try:
         mongodb_change_service = await initialize_mongodb_change_stream()
+        # Initialize subscription plans
+        await initialize_subscription_plans()
         logger.info("✅ All services initialized successfully")
     except Exception as e:
         logger.error(f"❌ Failed to initialize services: {e}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+async def _extract_media_from_url(url: str) -> Optional[Dict[str, Any]]:
+    """
+    Use yt-dlp to extract media from a URL and determine if it's an image or video.
+    Returns:
+        Dict with "type" ("image" or "video") and "path" (local file path), or None if fails
+    """
+    try:
+        from shutil import which
+        import subprocess
+        import tempfile
+        # Resolve yt-dlp binary
+        ytdlp_bin = config.YTDLP_BIN or "yt-dlp"
+        found = which(ytdlp_bin) or which("yt-dlp")
+        if not found:
+            print("[extract_media] yt-dlp not found")
+            return None
+        # Create temp directory
+        temp_dir = tempfile.mkdtemp(prefix="media_extract_")
+        # First, get info about the media
+        info_cmd = [found, url, "--dump-json", "--no-playlist"]
+        result = subprocess.run(
+            info_cmd,
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+        if result.returncode != 0:
+            print(f"[extract_media] yt-dlp info failed: {result.stderr}")
+            return None
+        info = json.loads(result.stdout)
+        # Determine media type
+        ext = info.get("ext", "").lower()
+        is_video = ext in ["mp4", "webm", "mkv", "avi", "mov", "flv", "m4v"]
+        is_image = ext in ["jpg", "jpeg", "png", "gif", "webp", "bmp"]
+        if not is_video and not is_image:
+            # Check formats to determine type
+            formats = info.get("formats", [])
+            has_video_codec = any(f.get("vcodec") != "none" for f in formats)
+            has_audio_codec = any(f.get("acodec") != "none" for f in formats)
+            if has_video_codec:
+                is_video = True
+            elif not has_audio_codec and not has_video_codec:
+                # Likely an image
+                is_image = True
+        media_type = "video" if is_video else "image"
+        # Download the media
+        output_template = os.path.join(temp_dir, f"media.%(ext)s")
+        download_cmd = [
+            found,
+            url,
+            "-o", output_template,
+            "--no-playlist",
+        ]
+        # For images, prefer best quality; for videos, get best format
+        if is_image:
+            download_cmd.extend(["--format", "best"])
+        else:
+            download_cmd.extend(["--format", "best[ext=mp4]/best"])
+        result = subprocess.run(
+            download_cmd,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        if result.returncode != 0:
+            print(f"[extract_media] yt-dlp download failed: {result.stderr}")
+            return None
+        # Find the downloaded file
+        downloaded_files = [f for f in os.listdir(temp_dir) if os.path.isfile(os.path.join(temp_dir, f))]
+        if not downloaded_files:
+            print("[extract_media] No file downloaded")
+            return None
+        media_path = os.path.join(temp_dir, downloaded_files[0])
+        return {
+            "type": media_type,
+            "path": media_path,
+            "temp_dir": temp_dir  # Keep for cleanup
+        }
+    except Exception as e:
+        print(f"[extract_media] Error: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return None
+def _is_youtube_url(url: str) -> bool:
+    """Check if URL is a YouTube URL"""
+    url_lower = url.lower()
+    youtube_domains = ['youtube.com', 'youtu.be', 'www.youtube.com', 'www.youtu.be', 'm.youtube.com']
+    return any(domain in url_lower for domain in youtube_domains)
+async def _generate_claims_summary(claim_results: List[Dict[str, Any]], gemini_model) -> str:
+    """Generate a comprehensive summary of all claim verification results using Gemini"""
+    try:
+        # Prepare claims data for Gemini
+        claims_data = []
+        for i, result in enumerate(claim_results, 1):
+            claims_data.append({
+                "number": i,
+                "claim": result.get("claim_text", ""),
+                "verdict": result.get("verdict", "uncertain"),
+                "explanation": result.get("message", "No explanation available")
+            })
+        prompt = f"""You are a fact-checking summary writer. Based on the following verified claims from a YouTube video, create a comprehensive, user-friendly summary.
+CLAIM VERIFICATION RESULTS:
+{json.dumps(claims_data, indent=2)}
+Your task is to create a clear, concise summary that:
+1. Lists each claim with its verdict (TRUE/FALSE/MIXED/UNCERTAIN)
+2. Explains WHY each claim is true or false in simple terms
+3. Highlights the most important findings
+4. Provides an overall assessment of the video's factual accuracy
+Format your response as a well-structured summary that is easy to read. Use clear sections and bullet points where appropriate.
+IMPORTANT:
+- Be concise but thorough
+- Explain the reasoning for each verdict
+- Focus on the most significant false or misleading claims
+- Keep the tone professional and informative
+- Do NOT use markdown formatting, just plain text with clear structure
+Return ONLY the summary text, no JSON or code blocks."""
+        response = gemini_model.generate_content(prompt)
+        response_text = response.text.strip()
+        # Clean up response if needed
+        if response_text.startswith('```'):
+            response_text = re.sub(r'^```[a-z]*\n?', '', response_text, flags=re.IGNORECASE)
+            response_text = re.sub(r'```$', '', response_text, flags=re.IGNORECASE).strip()
+        print(f"✅ Generated comprehensive summary")
+        return response_text
+    except Exception as e:
+        print(f"❌ Error generating summary with Gemini: {e}")
+        import traceback
+        print(traceback.format_exc())
+        # Fallback to simple concatenation
+        summary_parts = []
+        summary_parts.append(f"Analyzed {len(claim_results)} controversial claim(s) from the video transcript:\n")
+        for i, result in enumerate(claim_results, 1):
+            claim_text = result.get("claim_text", "")
+            verdict = result.get("verdict", "uncertain")
+            message = result.get("message", "No explanation available")
+            claim_display = claim_text[:150] + "..." if len(claim_text) > 150 else claim_text
+            verdict_label = {
+                "true": "✅ TRUE",
+                "false": "❌ FALSE",
+                "mixed": "⚠️ MIXED",
+                "uncertain": "❓ UNCERTAIN",
+                "error": "⚠️ ERROR"
+            }.get(verdict, "❓ UNCERTAIN")
+            summary_parts.append(f"\n{i}. {verdict_label}: {claim_display}")
+            summary_parts.append(f"   Explanation: {message}")
+        return "\n".join(summary_parts)
+async def _extract_claims_from_captions(captions: str, gemini_model) -> List[str]:
+    """Extract top 5 controversial claims from video captions using Gemini"""
+    try:
+        prompt = f"""You are a fact-checking assistant. Analyze the following video transcript and extract the TOP 5 MOST CONTROVERSIAL and verifiable claims that were mentioned in the video.
+VIDEO TRANSCRIPT:
+{captions}
+Your task is to identify the 5 MOST controversial, factual claims that can be verified. Prioritize:
+- Claims about events, statistics, or facts that are controversial or disputed
+- Claims about people, organizations, or institutions that are potentially misleading
+- Claims that are specific enough to be fact-checked and are likely to be false or disputed
+- Claims that have significant impact or are widely discussed
+Ignore:
+- General opinions or subjective statements
+- Questions or hypothetical scenarios
+- Vague statements without specific claims
+- Small talk or filler content
+IMPORTANT: Return EXACTLY 5 claims (or fewer if the video doesn't contain 5 verifiable controversial claims). Rank them by controversy/importance.
+Return ONLY a JSON object in this exact format:
+{{
+    "claims": [
+        "Claim 1 text here (most controversial)",
+        "Claim 2 text here",
+        "Claim 3 text here",
+        "Claim 4 text here",
+        "Claim 5 text here"
+    ]
+}}
+Return ONLY the JSON object, no other text or explanation."""
+        response = gemini_model.generate_content(prompt)
+        response_text = response.text.strip()
+        # Clean up response if needed
+        if response_text.startswith('```json'):
+            response_text = response_text.replace('```json', '').replace('```', '').strip()
+        elif response_text.startswith('```'):
+            response_text = response_text.replace('```', '').strip()
+        # Parse JSON response
+        parsed = json.loads(response_text)
+        claims = parsed.get("claims", [])
+        # Filter out empty claims and limit to 5
+        claims = [c.strip() for c in claims if c and c.strip()][:5]
+        print(f"✅ Extracted {len(claims)} claims from video captions")
+        return claims
+    except Exception as e:
+        print(f"❌ Error extracting claims from captions: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return []
+async def _verify_youtube_video(url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
+    """Verify a YouTube video by extracting captions, extracting claims, and verifying each claim"""
+    import tempfile
+    import asyncio
+    try:
+        print(f"🎥 Starting YouTube video verification for: {url}")
+        # Step 1: Extract captions
+        print(f"📝 Extracting captions from YouTube video...")
+        # Create a temporary file for the transcript output
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
+            temp_output_file = temp_file.name
+        # Run the synchronous function in an executor to avoid blocking
+        loop = asyncio.get_event_loop()
+        captions = await loop.run_in_executor(
+            None,
+            get_youtube_transcript_ytdlp,
+            url,
+            temp_output_file
+        )
+        # Clean up the temporary output file if it was created
+        try:
+            if os.path.exists(temp_output_file):
+                os.unlink(temp_output_file)
+        except Exception as cleanup_error:
+            print(f"⚠️ Warning: Could not clean up temp file {temp_output_file}: {cleanup_error}")
+        if not captions:
+            return {
+                "verified": False,
+                "verdict": "error",
+                "message": "Could not extract captions from the YouTube video. The video may not have captions available.",
+                "details": {
+                    "video_url": url,
+                    "error": "Caption extraction failed"
+                },
+                "source": "youtube_url"
+            }
+        print(f"✅ Extracted {len(captions)} characters of captions")
+        # Step 2: Extract claims using Gemini
+        print(f"🔍 Extracting controversial claims from captions...")
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        gemini_model = genai.GenerativeModel(config.GEMINI_MODEL)
+        claims = await _extract_claims_from_captions(captions, gemini_model)
+        if not claims:
+            return {
+                "verified": False,
+                "verdict": "uncertain",
+                "message": "No verifiable claims were found in the video transcript. The video may contain only opinions, questions, or non-factual content.",
+                "details": {
+                    "video_url": url,
+                    "captions_length": len(captions),
+                    "claims_extracted": 0
+                },
+                "source": "youtube_url"
+            }
+        print(f"✅ Extracted {len(claims)} claims, starting verification...")
+        # Step 3: Verify each claim
+        claim_results = []
+        for i, claim in enumerate(claims, 1):
+            print(f"🔍 Verifying claim {i}/{len(claims)}: {claim[:100]}...")
+            try:
+                verification_result = await text_fact_checker.verify(
+                    text_input=claim,
+                    claim_context=f"Claim from YouTube video: {url}",
+                    claim_date=claim_date
+                )
+                verification_result["claim_text"] = claim
+                verification_result["claim_index"] = i
+                claim_results.append(verification_result)
+            except Exception as e:
+                print(f"❌ Error verifying claim {i}: {e}")
+                claim_results.append({
+                    "claim_text": claim,
+                    "claim_index": i,
+                    "verified": False,
+                    "verdict": "error",
+                    "message": f"Error during verification: {str(e)}"
+                })
+        # Step 4: Combine results
+        print(f"📊 Combining {len(claim_results)} claim verification results...")
+        # Aggregate verdicts
+        verdicts = [r.get("verdict", "uncertain") for r in claim_results]
+        true_count = verdicts.count("true")
+        false_count = verdicts.count("false")
+        uncertain_count = verdicts.count("uncertain")
+        mixed_count = verdicts.count("mixed")
+        error_count = verdicts.count("error")
+        # Determine overall verdict
+        if false_count > 0:
+            overall_verdict = "false"
+            verified = False
+        elif true_count > 0 and false_count == 0:
+            overall_verdict = "true"
+            verified = True
+        elif mixed_count > 0:
+            overall_verdict = "mixed"
+            verified = False
+        elif uncertain_count > 0:
+            overall_verdict = "uncertain"
+            verified = False
+        else:
+            overall_verdict = "error"
+            verified = False
+        # Step 5: Generate comprehensive summary using Gemini
+        print(f"📝 Generating comprehensive summary with Gemini...")
+        combined_message = await _generate_claims_summary(claim_results, gemini_model)
+        return {
+            "verified": verified,
+            "verdict": overall_verdict,
+            "message": combined_message,
+            "details": {
+                "video_url": url,
+                "captions_length": len(captions),
+                "total_claims": len(claims),
+                "claims_verified": true_count,
+                "claims_false": false_count,
+                "claims_mixed": mixed_count,
+                "claims_uncertain": uncertain_count,
+                "claims_error": error_count,
+                "claim_results": claim_results
+            },
+            "source": "youtube_url"
+        }
+    except Exception as e:
+        print(f"❌ Error verifying YouTube video: {e}")
+        import traceback
+        print(traceback.format_exc())
+        return {
+            "verified": False,
+            "verdict": "error",
+            "message": f"Error processing YouTube video: {str(e)}",
+            "details": {
+                "video_url": url,
+                "error": str(e)
+            },
+            "source": "youtube_url"
+        }
 @app.post("/chatbot/verify")
 async def chatbot_verify(
     text_input: Optional[str] = Form(None),
                 try:
                     gemini_prompt = f"""
 You are an assistant for audio authenticity analysis.
 {('User question: ' + claim_context) if claim_context else ''}
 The audio has been analyzed and the result is: {'deepfake' if deepfake else 'NOT deepfake'}.
 Compose a clear, friendly, 1-2 line summary verdict for the user, tailored to the above context/result (do not answer with JSON or code, just a natural response).
 Avoid repeating 'deepfake detection' technical language; be concise and direct.
+Do NOT mention file names or file paths in your response.
 """
                     gemini_response = input_processor_for_audio.model.generate_content(gemini_prompt)
                     ai_message = None
         print(f"🔍 DEBUG: Processing {len(urls_list)} URLs")
         for i, url in enumerate(urls_list):
             print(f"🔍 DEBUG: Processing URL {i}: {url}")
+            # STEP 0: Check if this is a YouTube URL - handle specially
+            if _is_youtube_url(url):
+                print(f"🎥 DEBUG: Detected YouTube URL, using caption-based verification: {url}")
+                try:
+                    result = await _verify_youtube_video(url, claim_context, claim_date)
+                    results.append(result)
+                    print(f"🔍 DEBUG: YouTube verification result: {result}")
+                    continue  # Skip the rest of the URL processing
+                except Exception as e:
+                    print(f"❌ DEBUG: YouTube verification failed: {e}")
+                    import traceback
+                    print(traceback.format_exc())
+                    # Fall through to regular video processing as fallback
+            # STEP 1: For social media URLs, use yt-dlp to fetch the actual media first
+            # This determines the REAL media type, not just what the LLM guessed
+            url_lower = url.lower()
+            is_social_media = any(domain in url_lower for domain in [
+                'twitter.com', 'x.com', 'instagram.com', 'tiktok.com',
+                'facebook.com', 'youtube.com', 'youtu.be'
+            ])
+            extracted_media = None
+            if is_social_media:
+                print(f"🔍 DEBUG: Detected social media URL, extracting media with yt-dlp: {url}")
+                try:
+                    # Use yt-dlp to extract media and determine actual type
+                    extracted_media = await _extract_media_from_url(url)
+                    if extracted_media:
+                        actual_type = extracted_media.get("type")  # "image" or "video"
+                        media_path = extracted_media.get("path")
+                        temp_dir = extracted_media.get("temp_dir")
+                        print(f"🔍 DEBUG: yt-dlp extracted {actual_type} from URL: {media_path}")
+                        # Route based on ACTUAL media type, not LLM's guess
+                        if actual_type == "image":
+                            result = await image_verifier.verify(
+                                image_path=media_path,
+                                claim_context=claim_context,
+                                claim_date=claim_date
+                            )
+                        else:  # video
+                            result = await video_verifier.verify(
+                                video_path=media_path,
+                                claim_context=claim_context,
+                                claim_date=claim_date
+                            )
+                        result["source"] = "url"
+                        results.append(result)
+                        # Add to cleanup list
+                        if media_path:
+                            temp_files_to_cleanup.append(media_path)
+                        if temp_dir:
+                            temp_files_to_cleanup.append(temp_dir)
+                        continue  # Skip the old routing logic below
+                    else:
+                        print(f"⚠️ DEBUG: yt-dlp extraction returned None, falling back to direct URL")
+                except Exception as e:
+                    print(f"⚠️ DEBUG: Failed to extract media from URL with yt-dlp: {e}, falling back to direct URL")
+                    import traceback
+                    print(traceback.format_exc())
+                    # Fall through to old logic
+            # STEP 2: Fallback to old routing (for direct image/video URLs or if yt-dlp fails)
             if verification_type == "image":
                 print(f"🔍 DEBUG: Calling image_verifier.verify for URL")
                 result = await image_verifier.verify(
         print(f"🔍 DEBUG: Exception type: {type(e).__name__}")
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/mongodb/search-similar")
+async def search_similar_rumours(
+    query: str,
+    similarity_threshold: float = 0.6,
+    limit: int = 5
+):
+    """
+    Search for rumours similar to the query text
+    Args:
+        query: Search query text
+        similarity_threshold: Minimum similarity score (0.0 to 1.0, default: 0.6)
+        limit: Maximum number of results to return (default: 5)
+    Returns:
+        List of similar rumours with similarity scores
+    """
+    try:
+        if not mongodb_service:
+            raise HTTPException(
+                status_code=503,
+                detail="MongoDB service is not available. Check MONGO_CONNECTION_STRING environment variable."
+            )
+        if not query or not query.strip():
+            return {
+                "success": True,
+                "count": 0,
+                "results": []
+            }
+        # Validate threshold
+        similarity_threshold = max(0.0, min(1.0, similarity_threshold))
+        results = mongodb_service.search_similar_rumours(
+            query=query,
+            similarity_threshold=similarity_threshold,
+            limit=limit
+        )
+        return {
+            "success": True,
+            "count": len(results),
+            "query": query,
+            "similarity_threshold": similarity_threshold,
+            "results": results
+        }
+    except Exception as e:
+        logger.error(f"❌ Error searching similar rumours: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "service": "visual-verification"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+from pydantic import BaseModel
+# ---------- Auth endpoints (minimal implementation) ----------
+class LoginRequest(BaseModel):
+    email: str
+    password: str
+class SignupRequest(BaseModel):
+    name: str
+    email: str
+    password: str
+    phone_number: Optional[str] = None
+    age: Optional[int] = None
+    domain_preferences: Optional[List[str]] = []
+class UserResponse(BaseModel):
+    email: str
+    id: Optional[str] = None
+@app.post("/auth/signup")
+async def signup(request: SignupRequest):
+    """Sign up a new user"""
+    if not mongodb_service:
+        raise HTTPException(status_code=503, detail="MongoDB service not available")
+    try:
+        # Hash password (in production, use bcrypt or similar)
+        import hashlib
+        password_hash = hashlib.sha256(request.password.encode()).hexdigest()
+        user_data = {
+            "name": request.name,
+            "email": request.email,
+            "password": password_hash,
+            "phone_number": request.phone_number,
+            "age": request.age,
+            "domain_preferences": request.domain_preferences or [],
+            "created_at": None,  # Will be set by MongoDB service
+            "updated_at": None,
+        }
+        user = mongodb_service.create_user(user_data)
+        # Generate token (in production, use JWT)
+        token = f"mock_token_{request.email}"
+        return {
+            "message": "User created successfully",
+            "token": token,
+            "user": {
+                "name": user.get("name"),
+                "email": user["email"],
+                "id": user["id"],
+                "phone_number": user.get("phone_number"),
+                "age": user.get("age"),
+                "domain_preferences": user.get("domain_preferences", [])
+            }
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Signup error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to create user")
+@app.post("/auth/login")
+async def login(request: LoginRequest):
+    """Login user"""
+    if not mongodb_service:
+        raise HTTPException(status_code=503, detail="MongoDB service not available")
+    try:
+        user = mongodb_service.get_user_by_email(request.email)
+        if not user:
+            raise HTTPException(status_code=401, detail="Invalid email or password")
+        # Verify password (in production, use bcrypt or similar)
+        import hashlib
+        password_hash = hashlib.sha256(request.password.encode()).hexdigest()
+        if user["password"] != password_hash:
+            raise HTTPException(status_code=401, detail="Invalid email or password")
+        # Generate token (in production, use JWT)
+        token = f"mock_token_{request.email}"
+        return {
+            "message": "Login successful",
+            "token": token,
+            "user": {
+                "name": user.get("name"),
+                "email": user["email"],
+                "id": user["id"],
+                "phone_number": user.get("phone_number"),
+                "age": user.get("age"),
+                "domain_preferences": user.get("domain_preferences", [])
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Login error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to login")
+@app.get("/auth/me")
+async def get_current_user(request: Request):
+    """Get current user (requires authentication in production)"""
+    if not mongodb_service:
+        raise HTTPException(status_code=503, detail="MongoDB service not available")
+    # In production, verify JWT token from Authorization header
+    auth_header = request.headers.get("Authorization")
+    if not auth_header or not auth_header.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Not authenticated")
+    token = auth_header.replace("Bearer ", "")
+    # Extract email from token (in production, decode JWT)
+    if not token.startswith("mock_token_"):
+        raise HTTPException(status_code=401, detail="Invalid token")
+    email = token.replace("mock_token_", "")
+    try:
+        user = mongodb_service.get_user_by_email(email)
+        if not user:
+            raise HTTPException(status_code=401, detail="User not found")
+        # Get subscription tier from user document (preferred) or check subscription
+        subscription_tier = user.get("subscription_tier", "Free")
+        # If not in user doc, check active subscription
+        if subscription_tier == "Free" and user.get("id"):
+            subscription = mongodb_service.get_user_subscription(user_id=user["id"], status="active")
+            if subscription:
+                subscription_tier = subscription.get("plan_name", "Free")
+                # Update user document with subscription tier
+                mongodb_service.update_user_subscription_tier(user["id"], subscription_tier)
+        return {
+            "name": user.get("name"),
+            "email": user["email"],
+            "id": user["id"],
+            "phone_number": user.get("phone_number"),
+            "age": user.get("age"),
+            "domain_preferences": user.get("domain_preferences", []),
+            "subscription_tier": subscription_tier
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Get user error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get user")
+# ---------- Chat history endpoints ----------
+class ChatSessionUpsert(BaseModel):
+    session_id: Optional[str] = None
+    title: Optional[str] = None
+    user_id: Optional[str] = None
+    anonymous_id: Optional[str] = None
+    last_verdict: Optional[str] = None
+    last_summary: Optional[str] = None
+class ChatTurn(BaseModel):
+    role: str
+    content: str
+    created_at: Optional[Any] = None  # Can be datetime, string, or None
+    verdict: Optional[str] = None
+    confidence: Optional[float] = None
+    sources: Optional[Dict[str, Any]] = None
+    attachments: Optional[List[Dict[str, Any]]] = None
+    metadata: Optional[Dict[str, Any]] = None
+class ChatMessagesAppend(BaseModel):
+    session_id: str
+    user_id: Optional[str] = None
+    anonymous_id: Optional[str] = None
+    messages: List[ChatTurn]
+@app.get("/chat/sessions")
+async def list_chat_sessions(
+    user_id: Optional[str] = None,
+    anonymous_id: Optional[str] = None,
+):
+    """Return chat sessions for logged-in users only.
+    Anonymous users will receive an empty list since their sessions are not persisted.
+    """
+    try:
+        if not mongodb_service:
+            raise HTTPException(status_code=503, detail="MongoDB service not available")
+        # Only return sessions for logged-in users
+        if not user_id:
+            logger.info(f"⏭️ No user_id provided, returning empty sessions list")
+            return {"sessions": []}
+        logger.info(f"🔍 Loading chat sessions: user_id={user_id}")
+        sessions = mongodb_service.get_chat_sessions(
+            user_id=user_id,
+            anonymous_id=None,  # Don't query by anonymous_id anymore
+        )
+        logger.info(f"✅ Found {len(sessions)} chat sessions")
+        return {"sessions": sessions}
+    except Exception as e:
+        logger.error(f"❌ Error loading chat sessions: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Failed to load chat sessions: {str(e)}")
+@app.post("/chat/sessions")
+async def upsert_chat_session(payload: ChatSessionUpsert):
+    """Create or update a chat session.
+    Only saves sessions for logged-in users (user_id required).
+    Anonymous sessions are not persisted to MongoDB but a session_id is still returned for UI purposes.
+    """
+    try:
+        if not mongodb_service:
+            raise HTTPException(status_code=503, detail="MongoDB service not available")
+        data = payload.dict(exclude_unset=True)
+        user_id = data.get("user_id")
+        anonymous_id = data.get("anonymous_id")
+        # Only persist sessions for logged-in users
+        if not user_id:
+            # Still return a session_id for UI purposes, but don't persist
+            import uuid
+            session_id = data.get("session_id") or str(uuid.uuid4())
+            logger.info(f"⏭️ Skipping session persistence for anonymous user (session_id={session_id})")
+            return {
+                "session_id": session_id,
+                "title": data.get("title", "New Chat"),
+                "user_id": None,
+                "anonymous_id": anonymous_id,
+                "created_at": None,
+                "updated_at": None,
+                "persisted": False,
+            }
+        logger.info(f"🔍 Upserting chat session: {data}")
+        # Optionally migrate anonymous history on first login
+        if user_id and anonymous_id:
+            try:
+                migrated = mongodb_service.migrate_anonymous_sessions(
+                    anonymous_id=anonymous_id, user_id=user_id
+                )
+                logger.info(f"✅ Migrated {migrated} anonymous sessions to user {user_id}")
+            except Exception as exc:
+                logger.error(f"Failed to migrate anonymous sessions: {exc}")
+        session_doc = mongodb_service.upsert_chat_session(data)
+        logger.info(f"✅ Created/updated session: {session_doc.get('session_id')}")
+        return session_doc
+    except Exception as e:
+        logger.error(f"❌ Error upserting chat session: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Failed to create/update chat session: {str(e)}")
+@app.get("/chat/messages/{session_id}")
+async def get_chat_messages(session_id: str):
+    """Return all messages for a given chat session."""
+    if not mongodb_service:
+        raise HTTPException(status_code=503, detail="MongoDB service not available")
+    messages = mongodb_service.get_chat_messages(session_id=session_id)
+    return {"session_id": session_id, "messages": messages}
+@app.post("/chat/messages")
+async def append_chat_messages(payload: ChatMessagesAppend):
+    """Append one or more messages to a chat session.
+    Only saves messages for logged-in users (user_id required).
+    Anonymous messages are not persisted to MongoDB.
+    """
+    if not mongodb_service:
+        raise HTTPException(status_code=503, detail="MongoDB service not available")
+    data = payload.dict()
+    user_id = data.get("user_id")
+    # Only persist messages for logged-in users
+    if not user_id:
+        logger.info(f"⏭️ Skipping message persistence for anonymous user (session_id={data['session_id']})")
+        return {"inserted": 0, "message": "Messages not persisted for anonymous users"}
+    inserted = mongodb_service.append_chat_messages(
+        session_id=data["session_id"],
+        messages=[m for m in data["messages"]],
+        user_id=user_id,
+        anonymous_id=data.get("anonymous_id"),
+    )
+    logger.info(f"✅ Persisted {inserted} messages for user {user_id}")
+    return {"inserted": inserted}
+# ---------- Subscription endpoints ----------
+class CreatePlanRequest(BaseModel):
+    name: str
+    amount: int  # Amount in paise (smallest currency unit)
+    currency: str = "INR"
+    interval: int = 1
+    period: str = "monthly"  # daily, weekly, monthly, yearly
+    description: Optional[str] = None
+class CreateSubscriptionRequest(BaseModel):
+    plan_id: str
+    user_id: str
+    customer_notify: int = 1
+    total_count: Optional[int] = None
+    notes: Optional[Dict[str, str]] = None
+class CancelSubscriptionRequest(BaseModel):
+    subscription_id: str
+    cancel_at_cycle_end: bool = False
+@app.post("/subscriptions/plans")
+async def create_subscription_plan(request: CreatePlanRequest):
+    """Create a subscription plan in Razorpay (admin/one-time setup)"""
+    try:
+        if not razorpay_service or not razorpay_service.client:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
+            )
+        plan = razorpay_service.create_plan(
+            name=request.name,
+            amount=request.amount,
+            currency=request.currency,
+            interval=request.interval,
+            period=request.period,
+            description=request.description
+        )
+        return {
+            "success": True,
+            "plan": plan
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to create subscription plan: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/subscriptions/plans")
+async def list_subscription_plans(count: int = 10, skip: int = 0):
+    """List available subscription plans"""
+    try:
+        if not razorpay_service or not razorpay_service.client:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
+            )
+        plans = razorpay_service.list_plans(count=count, skip=skip)
+        return {
+            "success": True,
+            "plans": plans
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to list subscription plans: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/subscriptions/config")
+async def get_subscription_config():
+    """Get Razorpay public configuration (Key ID) for frontend"""
+    try:
+        if not config.RAZORPAY_ID:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay not configured"
+            )
+        return {
+            "success": True,
+            "razorpay_key_id": config.RAZORPAY_ID
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to get subscription config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/subscriptions/create")
+async def create_subscription(request: CreateSubscriptionRequest):
+    """Create a subscription for a user"""
+    try:
+        if not razorpay_service or not razorpay_service.client:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
+            )
+        if not mongodb_service:
+            raise HTTPException(
+                status_code=503,
+                detail="MongoDB service not available"
+            )
+        # Create subscription in Razorpay
+        subscription = razorpay_service.create_subscription(
+            plan_id=request.plan_id,
+            customer_notify=request.customer_notify,
+            total_count=request.total_count,
+            notes=request.notes
+        )
+        # Get plan details
+        plan = razorpay_service.get_plan(request.plan_id)
+        # Extract plan name - try multiple possible locations
+        plan_name = "Pro"  # Default
+        if plan:
+            # Try different possible locations for plan name
+            plan_name_raw = (
+                plan.get("item", {}).get("name") or
+                plan.get("name") or
+                request.notes.get("plan_name") if request.notes else None or
+                "Pro"
+            )
+            # Normalize plan name
+            plan_name_raw_lower = plan_name_raw.lower()
+            if "pro" in plan_name_raw_lower:
+                plan_name = "Pro"
+            elif "enterprise" in plan_name_raw_lower:
+                plan_name = "Enterprise"
+            else:
+                plan_name = plan_name_raw
+        # Store subscription in MongoDB
+        from datetime import datetime
+        subscription_data = {
+            "user_id": request.user_id,
+            "razorpay_subscription_id": subscription.get("id"),
+            "razorpay_plan_id": request.plan_id,
+            "plan_name": plan_name,
+            "status": subscription.get("status", "created"),
+            "amount": plan.get("item", {}).get("amount", 0) if plan else 0,
+            "currency": plan.get("item", {}).get("currency", "INR") if plan else "INR",
+            "current_start": subscription.get("current_start"),
+            "current_end": subscription.get("current_end"),
+            "next_billing_at": subscription.get("end_at"),
+            "created_at": datetime.utcnow(),
+            "razorpay_data": subscription  # Store full Razorpay response
+        }
+        mongodb_service.upsert_subscription(subscription_data)
+        # Update user's subscription tier immediately if status is active
+        # Otherwise, it will be updated via webhook when payment is completed
+        if subscription.get("status") == "active":
+            mongodb_service.update_user_subscription_tier(request.user_id, plan_name)
+            logger.info(f"✅ Updated user {request.user_id} subscription tier to {plan_name}")
+        else:
+            logger.info(f"⏳ Subscription created with status '{subscription.get('status')}'. User tier will be updated when subscription is activated via webhook.")
+        return {
+            "success": True,
+            "subscription_id": subscription.get("id"),
+            "short_url": subscription.get("short_url"),
+            "subscription": subscription
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to create subscription: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/subscriptions/status")
+async def get_subscription_status(user_id: Optional[str] = None):
+    """Get user's subscription status"""
+    try:
+        if not mongodb_service:
+            raise HTTPException(
+                status_code=503,
+                detail="MongoDB service not available"
+            )
+        if not user_id:
+            return {
+                "success": True,
+                "subscription": None,
+                "message": "No user_id provided"
+            }
+        subscription = mongodb_service.get_user_subscription(user_id=user_id)
+        if subscription:
+            # Optionally fetch latest data from Razorpay
+            if razorpay_service and razorpay_service.client:
+                try:
+                    razorpay_sub = razorpay_service.get_subscription(
+                        subscription.get("razorpay_subscription_id")
+                    )
+                    # Update status if changed
+                    if razorpay_sub.get("status") != subscription.get("status"):
+                        mongodb_service.update_subscription_status(
+                            subscription.get("razorpay_subscription_id"),
+                            razorpay_sub.get("status"),
+                            {
+                                "current_start": razorpay_sub.get("current_start"),
+                                "current_end": razorpay_sub.get("current_end"),
+                                "next_billing_at": razorpay_sub.get("end_at")
+                            }
+                        )
+                        subscription["status"] = razorpay_sub.get("status")
+                except Exception as e:
+                    logger.warning(f"Failed to sync with Razorpay: {e}")
+        return {
+            "success": True,
+            "subscription": subscription
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to get subscription status: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/subscriptions/cancel")
+async def cancel_subscription(request: CancelSubscriptionRequest):
+    """Cancel user's subscription"""
+    try:
+        if not razorpay_service or not razorpay_service.client:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay service not available. Check RAZORPAY_ID and RAZORPAY_KEY."
+            )
+        if not mongodb_service:
+            raise HTTPException(
+                status_code=503,
+                detail="MongoDB service not available"
+            )
+        # Cancel subscription in Razorpay
+        subscription = razorpay_service.cancel_subscription(
+            subscription_id=request.subscription_id,
+            cancel_at_cycle_end=request.cancel_at_cycle_end
+        )
+        # Update status in MongoDB
+        mongodb_service.update_subscription_status(
+            request.subscription_id,
+            subscription.get("status", "cancelled"),
+            {
+                "current_start": subscription.get("current_start"),
+                "current_end": subscription.get("current_end"),
+                "next_billing_at": subscription.get("end_at")
+            }
+        )
+        return {
+            "success": True,
+            "subscription": subscription
+        }
+    except Exception as e:
+        logger.error(f"❌ Failed to cancel subscription: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/webhooks/razorpay")
+async def razorpay_webhook(request: Request):
+    """Handle Razorpay webhook events"""
+    try:
+        if not razorpay_service:
+            raise HTTPException(
+                status_code=503,
+                detail="Razorpay service not available"
+            )
+        if not mongodb_service:
+            raise HTTPException(
+                status_code=503,
+                detail="MongoDB service not available"
+            )
+        # Get raw body for signature verification
+        body = await request.body()
+        body_str = body.decode('utf-8')
+        # Get signature from header
+        signature = request.headers.get("X-Razorpay-Signature", "")
+        # Verify webhook signature
+        if not razorpay_service.verify_webhook_signature(body_str, signature):
+            logger.warning("⚠️ Invalid webhook signature")
+            raise HTTPException(status_code=400, detail="Invalid webhook signature")
+        # Parse webhook payload from body string
+        webhook_data = json.loads(body_str)
+        event = webhook_data.get("event")
+        payload = webhook_data.get("payload", {})
+        logger.info(f"📥 Received Razorpay webhook: {event}")
+        # Handle different webhook events
+        if event == "subscription.activated":
+            subscription = payload.get("subscription", {}).get("entity", {})
+            subscription_id = subscription.get("id")
+            if subscription_id:
+                # Get subscription from DB to get user_id and plan_name
+                sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
+                if sub_doc:
+                    user_id = sub_doc.get("user_id")
+                    plan_name = sub_doc.get("plan_name", "Pro")
+                    logger.info(f"📥 Processing subscription.activated for user {user_id}, plan {plan_name}")
+                    mongodb_service.update_subscription_status(
+                        subscription_id,
+                        "active",
+                        {
+                            "current_start": subscription.get("current_start"),
+                            "current_end": subscription.get("current_end"),
+                            "next_billing_at": subscription.get("end_at")
+                        }
+                    )
+                    # Update user's subscription tier
+                    if user_id:
+                        success = mongodb_service.update_user_subscription_tier(user_id, plan_name)
+                        if success:
+                            logger.info(f"✅ Successfully updated user {user_id} tier to {plan_name} via webhook")
+                        else:
+                            logger.error(f"❌ Failed to update user {user_id} tier to {plan_name}")
+                else:
+                    logger.warning(f"⚠️ Subscription {subscription_id} not found in database")
+        elif event == "subscription.charged":
+            subscription = payload.get("subscription", {}).get("entity", {})
+            payment = payload.get("payment", {}).get("entity", {})
+            subscription_id = subscription.get("id")
+            if subscription_id:
+                # Get subscription from DB to get user_id and plan_name
+                sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
+                if sub_doc:
+                    user_id = sub_doc.get("user_id")
+                    plan_name = sub_doc.get("plan_name", "Pro")
+                    logger.info(f"📥 Processing subscription.charged for user {user_id}, plan {plan_name}")
+                    # Update subscription with payment info
+                    update_data = {
+                        "current_start": subscription.get("current_start"),
+                        "current_end": subscription.get("current_end"),
+                        "next_billing_at": subscription.get("end_at"),
+                        "last_payment_id": payment.get("id"),
+                        "last_payment_amount": payment.get("amount"),
+                        "last_payment_date": payment.get("created_at")
+                    }
+                    mongodb_service.update_subscription_status(
+                        subscription_id,
+                        subscription.get("status", "active"),
+                        update_data
+                    )
+                    # Update user's subscription tier when payment is charged
+                    if user_id and subscription.get("status") == "active":
+                        success = mongodb_service.update_user_subscription_tier(user_id, plan_name)
+                        if success:
+                            logger.info(f"✅ Successfully updated user {user_id} tier to {plan_name} via subscription.charged webhook")
+                        else:
+                            logger.error(f"❌ Failed to update user {user_id} tier to {plan_name}")
+                else:
+                    logger.warning(f"⚠️ Subscription {subscription_id} not found in database for subscription.charged event")
+        elif event == "subscription.cancelled":
+            subscription = payload.get("subscription", {}).get("entity", {})
+            subscription_id = subscription.get("id")
+            if subscription_id:
+                # Get subscription from DB to get user_id
+                sub_doc = mongodb_service.get_subscription_by_razorpay_id(subscription_id)
+                if sub_doc:
+                    user_id = sub_doc.get("user_id")
+                    mongodb_service.update_subscription_status(
+                        subscription_id,
+                        "cancelled",
+                        {
+                            "current_start": subscription.get("current_start"),
+                            "current_end": subscription.get("current_end"),
+                            "next_billing_at": subscription.get("end_at")
+                        }
+                    )
+                    # Update user's subscription tier to Free
+                    if user_id:
+                        mongodb_service.update_user_subscription_tier(user_id, "Free")
+        elif event == "payment.failed":
+            payment = payload.get("payment", {}).get("entity", {})
+            subscription_id = payment.get("subscription_id")
+            if subscription_id:
+                # Update subscription to reflect failed payment
+                subscription = razorpay_service.get_subscription(subscription_id)
+                mongodb_service.update_subscription_status(
+                    subscription_id,
+                    subscription.get("status", "pending"),
+                    {
+                        "last_payment_failed": True,
+                        "last_payment_failure_reason": payment.get("error_description")
+                    }
+                )
+        return {"success": True, "message": "Webhook processed"}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"❌ Failed to process webhook: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=config.SERVICE_PORT)

requirements.txt CHANGED Viewed

@@ -1,23 +1,24 @@
-requests
-pillow
-opencv-python
-fastapi
-uvicorn[standard]
-websockets
-serpapi
-python-dotenv
-python-multipart
-yt-dlp
-google-generativeai
-google-auth
-google-auth-oauthlib
-google-auth-httplib2
-scikit-learn
-numpy
-pymongo
-upstash-redis
-google-search-results
-cloudinary
-torch
-transformers
-pytorchvideo

+requests
+pillow
+opencv-python
+fastapi
+uvicorn[standard]
+websockets
+serpapi
+python-dotenv
+python-multipart
+yt-dlp
+google-generativeai
+google-auth
+google-auth-oauthlib
+google-auth-httplib2
+scikit-learn
+numpy
+pymongo
+upstash-redis
+google-search-results
+cloudinary
+torch
+transformers
+pytorchvideo
+razorpay

services/deepfake_checker.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import sys
+from typing import Set
+try:
+    import torch
+    from transformers import pipeline
+except ImportError:
+    print("="*80)
+    print("ERROR: Missing critical libraries.")
+    print("Please install all required dependencies first:")
+    print("pip install torch transformers")
+    print("="*80)
+    sys.exit(1)
+# --- Configuration ---
+AUDIO_FORMATS: Set[str] = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
+DEVICE = 0 if torch.cuda.is_available() else -1  # 0 for CUDA, -1 for CPU
+AUDIO_MODEL_ID = "mo-thecreator/Deepfake-audio-detection"
+audio_pipeline_instance = None
+def get_audio_pipeline():
+    """Loads the audio pipeline into memory (if not already loaded)."""
+    global audio_pipeline_instance
+    if audio_pipeline_instance is None:
+        try:
+            print(f"Loading audio model '{AUDIO_MODEL_ID}' from Hugging Face Hub...")
+            audio_pipeline_instance = pipeline(
+                "audio-classification",
+                model=AUDIO_MODEL_ID,
+                device=DEVICE
+            )
+            print("Audio detection pipeline loaded successfully.")
+        except Exception as e:
+            print(f"Error loading audio pipeline: {e}")
+            print("Please ensure the model ID is correct.")
+            sys.exit(1)
+    return audio_pipeline_instance
+def detect_audio_deepfake(file_path: str) -> bool:
+    """
+    Runs a pretrained audio deepfake detection model from the HF Hub.
+    """
+    print(f"Analyzing audio file: {os.path.basename(file_path)}")
+    try:
+        detector = get_audio_pipeline()
+    except Exception as e:
+        print(f"Failed to load audio pipeline: {e}")
+        return False  # Fail safe
+    try:
+        results = detector(file_path)
+        best_result = max(results, key=lambda x: x['score'])
+        top_label = best_result['label'].lower()
+        top_score = best_result['score']
+        print(f"...Audio pipeline result: '{top_label}' with score {top_score:.4f}")
+        is_fake = top_label in ['spoof', 'fake']
+        return is_fake
+    except Exception as e:
+        print(f"Error during audio processing/inference: {e}")
+        return False
+def is_audio_deepfake(file_path: str) -> bool:
+    """
+    Checks if a given audio file is a deepfake.
+    Args:
+        file_path: The absolute or relative path to the audio file.
+    Returns:
+        True if the file is classified as a deepfake, False otherwise.
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        ValueError: If the file format is not supported.
+    """
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found at path: {file_path}")
+    ext = os.path.splitext(file_path)[1].lower()
+    if ext in AUDIO_FORMATS:
+        return detect_audio_deepfake(file_path)
+    else:
+        raise ValueError(
+            f"Unsupported file format: {ext}. Supported types: {AUDIO_FORMATS}"
+        )

services/educational_content_generator.py ADDED Viewed

	@@ -0,0 +1,533 @@

+import json
+import os
+from typing import Dict, List, Optional, Any
+import google.generativeai as genai
+from upstash_redis import Redis
+from config import config
+class EducationalContentGenerator:
+    """Service for generating educational content about misinformation detection"""
+    def __init__(self):
+        # Configure Gemini
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        self.model = genai.GenerativeModel(config.GEMINI_MODEL)
+        # Initialize Upstash Redis connection
+        try:
+            if config.UPSTASH_REDIS_URL and config.UPSTASH_REDIS_TOKEN:
+                self.redis_client = Redis(
+                    url=config.UPSTASH_REDIS_URL,
+                    token=config.UPSTASH_REDIS_TOKEN
+                )
+                # Test connection
+                self.redis_client.set("test", "connection")
+                self.redis_client.delete("test")
+                print("✅ Upstash Redis connection established")
+            else:
+                print("⚠️ Upstash Redis credentials not found, running without cache")
+                self.redis_client = None
+        except Exception as e:
+            print(f"❌ Upstash Redis connection failed: {e}")
+            self.redis_client = None
+        # Cache TTL (Time To Live) in seconds
+        self.cache_ttl = config.REDIS_TTL
+        # Pre-defined content templates
+        self.content_templates = {
+            "red_flags": {
+                "title": "How to Spot Red Flags in Misinformation",
+                "categories": [
+                    "Emotional Language",
+                    "Suspicious URLs",
+                    "Poor Grammar",
+                    "Missing Sources",
+                    "Outdated Information",
+                    "Confirmation Bias Triggers"
+                ]
+            },
+            "source_credibility": {
+                "title": "Evaluating Source Credibility",
+                "categories": [
+                    "Authority Assessment",
+                    "Bias Detection",
+                    "Fact-checking Methodology",
+                    "Peer Review Process",
+                    "Transparency Standards"
+                ]
+            },
+            "manipulation_techniques": {
+                "title": "Common Manipulation Techniques",
+                "categories": [
+                    "Deepfakes and AI-generated Content",
+                    "Outdated Images",
+                    "Misleading Headlines",
+                    "False Context",
+                    "Social Media Manipulation",
+                    "Bot Networks"
+                ]
+            }
+        }
+    def _get_cache_key(self, key: str) -> str:
+        """Get the Redis cache key"""
+        return f"educational:{key}"
+    def _load_from_cache(self, cache_key: str) -> Optional[Dict[str, Any]]:
+        """Load content from Redis cache if it exists"""
+        if not self.redis_client:
+            return None
+        try:
+            cached_data = self.redis_client.get(self._get_cache_key(cache_key))
+            if cached_data:
+                return json.loads(cached_data)
+        except Exception as e:
+            print(f"Failed to load from Redis cache {cache_key}: {e}")
+        return None
+    def _save_to_cache(self, cache_key: str, content: Dict[str, Any]) -> None:
+        """Save content to Redis cache"""
+        if not self.redis_client:
+            return
+        try:
+            self.redis_client.setex(
+                self._get_cache_key(cache_key),
+                self.cache_ttl,
+                json.dumps(content, ensure_ascii=False)
+            )
+            print(f"✅ Cached {cache_key} in Redis")
+        except Exception as e:
+            print(f"Failed to save to Redis cache {cache_key}: {e}")
+    async def get_modules_list(self) -> Dict[str, Any]:
+        """Get the list of available modules (cached in Redis)"""
+        cache_key = "modules_list"
+        cached = self._load_from_cache(cache_key)
+        if cached:
+            print(f"📦 Loading modules list from Redis cache")
+            return cached
+        print(f"🔄 Generating new modules list")
+        # Generate modules list
+        modules_data = {
+            "modules": [
+                {
+                    "id": "red_flags",
+                    "title": "How to Spot Red Flags",
+                    "description": "Learn to identify warning signs in misinformation",
+                    "difficulty_levels": ["beginner", "intermediate", "advanced"],
+                    "estimated_time": "10-15 minutes"
+                },
+                {
+                    "id": "source_credibility",
+                    "title": "Evaluating Source Credibility",
+                    "description": "Understand how to assess source reliability",
+                    "difficulty_levels": ["beginner", "intermediate", "advanced"],
+                    "estimated_time": "15-20 minutes"
+                },
+                {
+                    "id": "manipulation_techniques",
+                    "title": "Common Manipulation Techniques",
+                    "description": "Learn about various misinformation techniques",
+                    "difficulty_levels": ["intermediate", "advanced"],
+                    "estimated_time": "20-25 minutes"
+                }
+            ]
+        }
+        # Save to Redis cache
+        self._save_to_cache(cache_key, modules_data)
+        return modules_data
+    async def generate_module_content(self, module_type: str, difficulty_level: str = "beginner") -> Dict[str, Any]:
+        """
+        Generate educational content for a specific module (with Redis caching)
+        Args:
+            module_type: Type of module (red_flags, source_credibility, etc.)
+            difficulty_level: beginner, intermediate, advanced
+        Returns:
+            Dictionary containing educational content
+        """
+        # Check Redis cache first
+        cache_key = f"{module_type}_{difficulty_level}"
+        cached_content = self._load_from_cache(cache_key)
+        if cached_content:
+            print(f"📦 Loading {module_type} ({difficulty_level}) from Redis cache")
+            return cached_content
+        print(f"🔄 Generating new content for {module_type} ({difficulty_level})")
+        try:
+            template = self.content_templates.get(module_type, {})
+            if not template:
+                return {"error": f"Unknown module type: {module_type}"}
+            # Generate content using AI
+            content = await self._generate_ai_content(module_type, difficulty_level, template)
+            # Add interactive elements
+            content["interactive_elements"] = await self._generate_interactive_elements(module_type, difficulty_level)
+            # Add real-world examples
+            content["examples"] = await self._generate_examples(module_type, difficulty_level)
+            # Save to Redis cache
+            self._save_to_cache(cache_key, content)
+            return content
+        except Exception as e:
+            print(f"Failed to generate content: {str(e)}")
+            # Return fallback content
+            fallback = self._get_fallback_content(module_type, difficulty_level)
+            self._save_to_cache(cache_key, fallback)
+            return fallback
+    async def _generate_ai_content(self, module_type: str, difficulty_level: str, template: Dict) -> Dict[str, Any]:
+        """Generate AI-powered educational content"""
+        prompt = f"""
+        You are an expert digital literacy educator specializing in misinformation detection.
+        Create comprehensive educational content for the following module:
+        MODULE TYPE: {module_type}
+        DIFFICULTY LEVEL: {difficulty_level}
+        TEMPLATE: {json.dumps(template, indent=2)}
+        Create educational content that includes:
+        1. Clear explanations of concepts
+        2. Step-by-step instructions
+        3. Visual indicators to look for
+        4. Common mistakes to avoid
+        5. Practical exercises
+        Respond in this JSON format:
+        {{
+            "title": "Module title",
+            "overview": "Brief overview of what users will learn",
+            "learning_objectives": ["Objective 1", "Objective 2", "Objective 3"],
+            "content_sections": [
+                {{
+                    "title": "Section title",
+                    "content": "Detailed explanation",
+                    "key_points": ["Point 1", "Point 2"],
+                    "visual_indicators": ["Indicator 1", "Indicator 2"],
+                    "examples": ["Example 1", "Example 2"]
+                }}
+            ],
+            "practical_tips": ["Tip 1", "Tip 2", "Tip 3"],
+            "common_mistakes": ["Mistake 1", "Mistake 2"],
+            "difficulty_level": "{difficulty_level}"
+        }}
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            # Clean up JSON response
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            return json.loads(response_text)
+        except Exception as e:
+            print(f"AI content generation failed: {e}")
+            return self._get_fallback_content(module_type, difficulty_level)
+    async def _generate_interactive_elements(self, module_type: str, difficulty_level: str) -> Dict[str, Any]:
+        """Generate interactive learning elements"""
+        prompt = f"""
+        Create interactive learning elements for a {difficulty_level} level module about {module_type}.
+        Generate:
+        1. Quiz questions with multiple choice answers
+        2. True/false statements
+        3. Scenario-based questions
+        Respond in JSON format:
+        {{
+            "quiz_questions": [
+                {{
+                    "question": "Question text",
+                    "options": ["Option A", "Option B", "Option C", "Option D"],
+                    "correct_answer": 0,
+                    "explanation": "Why this answer is correct"
+                }}
+            ],
+            "true_false": [
+                {{
+                    "statement": "Statement to evaluate",
+                    "answer": true,
+                    "explanation": "Explanation"
+                }}
+            ],
+            "scenarios": [
+                {{
+                    "scenario": "Real-world scenario description",
+                    "question": "What should you do?",
+                    "correct_action": "Correct action",
+                    "explanation": "Why this is the right approach"
+                }}
+            ]
+        }}
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            return json.loads(response_text)
+        except Exception as e:
+            print(f"Interactive elements generation failed: {e}")
+            return {"quiz_questions": [], "true_false": [], "scenarios": []}
+    async def _generate_examples(self, module_type: str, difficulty_level: str) -> List[Dict[str, Any]]:
+        """Generate real-world examples"""
+        prompt = f"""
+        Create realistic examples of {module_type} for {difficulty_level} learners.
+        For each example, provide:
+        1. A realistic scenario
+        2. What to look for
+        3. How to verify
+        4. Why it's misleading
+        Respond in JSON format:
+        {{
+            "examples": [
+                {{
+                    "title": "Example title",
+                    "scenario": "Realistic scenario description",
+                    "red_flags": ["Flag 1", "Flag 2"],
+                    "verification_steps": ["Step 1", "Step 2"],
+                    "explanation": "Why this is misleading",
+                    "difficulty": "{difficulty_level}"
+                }}
+            ]
+        }}
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            result = json.loads(response_text)
+            return result.get("examples", [])
+        except Exception as e:
+            print(f"Examples generation failed: {e}")
+            return []
+    def _get_fallback_content(self, module_type: str, difficulty_level: str) -> Dict[str, Any]:
+        """Fallback content when AI generation fails"""
+        fallback_content = {
+            "red_flags": {
+                "title": "How to Spot Red Flags in Misinformation",
+                "overview": "Learn to identify warning signs that content might be misleading",
+                "learning_objectives": [
+                    "Identify emotional manipulation techniques",
+                    "Recognize suspicious URLs and sources",
+                    "Spot grammatical and formatting errors",
+                    "Understand confirmation bias triggers"
+                ],
+                "content_sections": [
+                    {
+                        "title": "Emotional Language",
+                        "content": "Misinformation often uses strong emotional language to bypass critical thinking.",
+                        "key_points": [
+                            "Look for excessive use of emotional words",
+                            "Be wary of content that makes you feel angry or scared",
+                            "Check if emotions are being used to distract from facts"
+                        ],
+                        "visual_indicators": ["ALL CAPS", "Multiple exclamation marks", "Emotional imagery"],
+                        "examples": ["URGENT!!!", "You won't believe this!", "This will shock you!"]
+                    },
+                    {
+                        "title": "Suspicious URLs",
+                        "content": "Fake news often uses URLs that mimic legitimate news sources.",
+                        "key_points": [
+                            "Check for slight misspellings in domain names",
+                            "Look for unusual domain extensions",
+                            "Verify the actual website matches the URL"
+                        ],
+                        "visual_indicators": ["typos in URLs", "unusual extensions", "redirects"],
+                        "examples": ["cnn-news.com", "bbc-news.net", "reuters.info"]
+                    }
+                ],
+                "practical_tips": [
+                    "Take a deep breath before sharing emotional content",
+                    "Ask yourself: 'Why do I feel this way?'",
+                    "Look for factual evidence, not just emotional appeals"
+                ],
+                "common_mistakes": [
+                    "Sharing content because it makes you angry",
+                    "Ignoring red flags when content confirms your beliefs",
+                    "Not checking sources when content feels 'right'"
+                ],
+                "difficulty_level": difficulty_level
+            },
+            "source_credibility": {
+                "title": "Evaluating Source Credibility",
+                "overview": "Learn how to assess whether a source is trustworthy and reliable",
+                "learning_objectives": [
+                    "Understand what makes a source credible",
+                    "Identify bias in news sources",
+                    "Evaluate author expertise",
+                    "Check source transparency"
+                ],
+                "content_sections": [
+                    {
+                        "title": "Authority Assessment",
+                        "content": "Credible sources have recognized expertise in their field.",
+                        "key_points": [
+                            "Check the author's credentials and background",
+                            "Look for institutional affiliations",
+                            "Verify expertise matches the topic"
+                        ],
+                        "visual_indicators": ["Author bio", "Credentials listed", "Institutional affiliation"],
+                        "examples": ["PhD in relevant field", "Journalist with experience", "Academic institution"]
+                    }
+                ],
+                "practical_tips": [
+                    "Always check the 'About' page",
+                    "Look for contact information",
+                    "Verify claims with multiple sources"
+                ],
+                "common_mistakes": [
+                    "Trusting sources without checking credentials",
+                    "Ignoring bias in sources",
+                    "Not verifying institutional affiliations"
+                ],
+                "difficulty_level": difficulty_level
+            },
+            "manipulation_techniques": {
+                "title": "Common Manipulation Techniques",
+                "overview": "Understand the various methods used to create and spread misinformation",
+                "learning_objectives": [
+                    "Recognize different manipulation techniques",
+                    "Understand how AI-generated content works",
+                    "Identify social media manipulation",
+                    "Learn verification strategies"
+                ],
+                "content_sections": [
+                    {
+                        "title": "Deepfakes and AI-generated Content",
+                        "content": "Advanced technology can create convincing fake videos and images.",
+                        "key_points": [
+                            "Look for unnatural facial movements",
+                            "Check for inconsistencies in lighting",
+                            "Verify with original sources"
+                        ],
+                        "visual_indicators": ["Unnatural blinking", "Lighting inconsistencies", "Audio sync issues"],
+                        "examples": ["AI-generated celebrity videos", "Deepfake political speeches"]
+                    }
+                ],
+                "practical_tips": [
+                    "Use reverse image search",
+                    "Check multiple angles of the same event",
+                    "Verify with official sources"
+                ],
+                "common_mistakes": [
+                    "Trusting videos without verification",
+                    "Not checking for AI generation",
+                    "Sharing before verification"
+                ],
+                "difficulty_level": difficulty_level
+            }
+        }
+        return fallback_content.get(module_type, {
+            "title": f"Educational Module: {module_type}",
+            "overview": "Learn about misinformation detection",
+            "learning_objectives": ["Understand basic concepts"],
+            "content_sections": [],
+            "practical_tips": [],
+            "common_mistakes": [],
+            "difficulty_level": difficulty_level
+        })
+    async def generate_contextual_learning(self, verification_result: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generate educational content based on a specific verification result
+        Args:
+            verification_result: Result from fact-checking
+        Returns:
+            Educational content tailored to the verification result
+        """
+        try:
+            # Extract relevant information from verification result
+            verdict = verification_result.get("verdict", "uncertain")
+            message = verification_result.get("message", "")
+            details = verification_result.get("details", {})
+            # Generate contextual learning content
+            prompt = f"""
+            Based on this fact-checking result, create educational content to help users learn:
+            VERDICT: {verdict}
+            MESSAGE: {message}
+            DETAILS: {json.dumps(details, indent=2)}
+            Create learning content that explains:
+            1. What this result means
+            2. What red flags were found (if any)
+            3. How to verify similar claims in the future
+            4. Key lessons learned
+            Respond in JSON format:
+            {{
+                "learning_summary": "What users learned from this verification",
+                "red_flags_found": ["List of red flags detected"],
+                "verification_techniques": ["Techniques used to verify"],
+                "future_tips": ["Tips for similar situations"],
+                "key_lessons": ["Main takeaways"],
+                "related_topics": ["Related educational topics to explore"]
+            }}
+            """
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            return json.loads(response_text)
+        except Exception as e:
+            print(f"Contextual learning generation failed: {e}")
+            return {
+                "learning_summary": "Learn to verify information systematically",
+                "red_flags_found": [],
+                "verification_techniques": ["Source checking", "Cross-referencing"],
+                "future_tips": ["Always verify before sharing"],
+                "key_lessons": ["Critical thinking is essential"],
+                "related_topics": ["Source credibility", "Fact-checking basics"]
+            }

services/image_verifier.py ADDED Viewed

	@@ -0,0 +1,1377 @@

+import os
+import tempfile
+from typing import Dict, Any, Optional, Tuple, List
+import requests
+from PIL import Image, ImageDraw, ImageFont
+import io
+import base64
+import json
+import google.generativeai as genai
+# Import SerpApi client - use the correct import path from documentation
+GoogleSearch = None  # type: ignore
+try:
+    from serpapi import GoogleSearch as _GS  # correct import per SerpApi docs
+    GoogleSearch = _GS
+    print("[serpapi] Successfully imported GoogleSearch from serpapi")
+except Exception as e:
+    print(f"[serpapi] Failed to import GoogleSearch: {e}")
+    GoogleSearch = None  # client unavailable; will fall back to HTTP
+from config import config
+class ImageVerifier:
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the ImageVerifier with SerpApi credentials
+        Args:
+            api_key: SerpApi API key. If None, will try to get from environment
+        """
+        self.api_key = api_key or config.SERP_API_KEY
+        if not self.api_key:
+            raise ValueError("SERP_API_KEY environment variable or api_key parameter is required")
+        # Configure Gemini
+        if config.GEMINI_API_KEY:
+            genai.configure(api_key=config.GEMINI_API_KEY)
+            self.gemini_model = genai.GenerativeModel(
+                config.GEMINI_MODEL,
+                generation_config=genai.types.GenerationConfig(
+                    temperature=config.GEMINI_TEMPERATURE,
+                    top_p=config.GEMINI_TOP_P,
+                    max_output_tokens=config.GEMINI_MAX_TOKENS
+                )
+            )
+        else:
+            self.gemini_model = None
+        # SerpApi endpoints
+        self.base_url_json = "https://serpapi.com/search.json"  # for GET with image_url
+        self.base_url_form = "https://serpapi.com/search.json"  # for POST form with image_content
+    async def verify(self, image_path: Optional[str] = None, claim_context: str = "", claim_date: str = "", image_url: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Verify an image using a two-stage approach:
+        1. Gemini Vision analyzes the image directly for AI-generated/deepfake/manipulation
+        2. Reverse image search + evidence analysis
+        Args:
+            image_path: Path to the image file
+            claim_context: The claimed context of the image
+            claim_date: The claimed date of the image
+            image_url: URL of the image
+        Returns:
+            Dictionary with verification results and output file path
+        """
+        try:
+            print("[verify] start", {"claim_context": claim_context, "claim_date": claim_date, "has_image_path": bool(image_path), "has_image_url": bool(image_url)})
+            # STEP 0: Gemini Vision analysis of the actual image
+            preliminary_analysis = await self._analyze_image_with_vision(
+                image_path=image_path,
+                image_url=image_url,
+                claim_context=claim_context,
+                claim_date=claim_date
+            )
+            print(f"✅ Gemini Vision analysis result: {preliminary_analysis.get('verdict', 'unknown')}")
+            # STEP 1: Perform reverse image search (wrap in try/except so vision analysis can still proceed)
+            search_results = None
+            try:
+                search_results = await self._reverse_image_search(image_path=image_path, image_url=image_url)
+            except Exception as search_error:
+                print(f"⚠️ Reverse image search failed (will use vision analysis only): {search_error}")
+                # Continue with vision analysis only - this is fine, we have a fallback
+            # STEP 2: Build evidence from SerpApi (reverse image search)
+            evidence = []
+            curated_analysis = None
+            if search_results and (search_results.get("inline_images") or search_results.get("image_results")):
+                evidence = self._collect_evidence(search_results)
+                print("[verify] serpapi_counts", {
+                    "image_results": len(search_results.get("image_results", [])) if isinstance(search_results, dict) else None,
+                    "inline_images": len(search_results.get("inline_images", [])) if isinstance(search_results, dict) else None,
+                    "status": (search_results.get("search_metadata", {}) or {}).get("status") if isinstance(search_results, dict) else None,
+                })
+                print("[verify] evidence_collected", {"count": len(evidence), "sample_titles": [e.get("title") for e in evidence[:3]]})
+                # Ask Gemini to produce structured verdict + structured claim parse with citations
+                filtered_evidence = self._rank_and_filter_evidence(evidence, claim_context, top_k=12)
+                print("[verify] preparing_llm_request", {"evidence_count": len(filtered_evidence)})
+                curated_analysis = self._summarize_with_gemini_structured(
+                    claim_context=claim_context,
+                    claim_date=claim_date,
+                    evidence=filtered_evidence,
+                )
+            else:
+                print("[verify] No reverse image search results, using vision analysis only")
+                filtered_evidence = []
+            # STEP 3: Synthesize vision analysis + reverse image search results
+            final_response = self._synthesize_vision_and_evidence(
+                preliminary_analysis=preliminary_analysis,
+                curated_analysis=curated_analysis,
+                evidence=filtered_evidence,
+                claim_context=claim_context,
+                claim_date=claim_date,
+            )
+            if final_response:
+                return final_response
+            # Fallback: use vision analysis if available, else curated analysis
+            if preliminary_analysis and preliminary_analysis.get("verdict") in ["false", "true"]:
+                llm = preliminary_analysis
+            elif curated_analysis:
+                llm = curated_analysis
+            else:
+                llm = None
+            validator = {"passed": False, "reasons": [], "checks": {}}
+            debug_details = {}
+            if llm:
+                print("[verify] llm_keys", list(llm.keys()))
+                base_verdict = (llm.get("verdict") or "uncertain").lower()
+                relation_verdict = (llm.get("relation_verdict") or base_verdict).lower()
+                # Enforce policy: default to false when the claimed relation isn't supported by evidence.
+                cp = (llm.get("claim_parse") or {})
+                citations = (cp.get("citations") or {})
+                relation_citations = citations.get("relation") or []
+                has_any_evidence = bool(filtered_evidence)
+                relation_supported = bool(relation_citations)
+                if relation_verdict == "false":
+                    verdict = "false"
+                elif has_any_evidence and not relation_supported:
+                    # We have evidence but none supports the claimed relation → false
+                    verdict = "false"
+                else:
+                    verdict = base_verdict
+                summary = llm.get("summary") or ""
+                # Enforce reputable domain gating + cross-source agreement
+                sources = llm.get("top_sources") or self._top_sources(filtered_evidence, 3)
+                from urllib.parse import urlparse
+                def is_reputable(url: Optional[str]) -> bool:
+                    try:
+                        net = urlparse(url or "").netloc
+                    except Exception:
+                        net = ""
+                    # Reputable = not low-priority social/UGC domain
+                    return bool(net and (net not in config.LOW_PRIORITY_DOMAINS))
+                reputable_sources = [s for s in (sources or []) if is_reputable(s.get("link"))]
+                # Relation support must come from reputable domains and have >=2 independent domains
+                cp = (llm.get("claim_parse") or {})
+                rel_cits = (cp.get("citations") or {}).get("relation") or []
+                cited_domains = set()
+                for j in rel_cits:
+                    try:
+                        ev = filtered_evidence[int(j)]
+                        net = urlparse(ev.get("link") or "").netloc
+                        if net and (net not in config.LOW_PRIORITY_DOMAINS):
+                            cited_domains.add(net)
+                    except Exception:
+                        pass
+                cross_source_ok = len(cited_domains) >= 2
+                # Stronger relation test: require co-mention already validated (checks[relation_comention])
+                relation_comention_ok = False
+                try:
+                    relation_comention_ok = bool(validator["checks"].get("relation_comention"))
+                except Exception:
+                    relation_comention_ok = False
+                if verdict == "true":
+                    if not (cross_source_ok and relation_comention_ok):
+                        verdict = "uncertain"
+                # If verdict is still not false, ensure at least two reputable sources overall
+                if verdict == "true" and len({urlparse((s.get("link") or "")).netloc for s in reputable_sources}) < 2:
+                    verdict = "uncertain"
+                # Run validator: require citations for all extracted parts and relation co-mention
+                validator, debug_details = self._validate_llm_parse(
+                    claim_text=claim_context,
+                    evidence=filtered_evidence,
+                    llm=llm,
+                )
+                # Only downgrade true to uncertain if validator fails; never upgrade false
+                if verdict == "true" and not validator.get("passed", False):
+                    verdict = "uncertain"
+                if verdict == "true":
+                    from urllib.parse import urlparse
+                    cited_idx = set()
+                    cp = (llm.get("claim_parse") or {}).get("citations") or {}
+                    for key, val in cp.items():
+                        if isinstance(val, list):
+                            if key in ["entities","roles"]:
+                                for arr in val:
+                                    for j in (arr or []):
+                                        try:
+                                            cited_idx.add(int(j))
+                                        except Exception:
+                                            pass
+                            else:
+                                for j in val:
+                                    try:
+                                        cited_idx.add(int(j))
+                                    except Exception:
+                                        pass
+                    domains = set()
+                    for ix in cited_idx:
+                        if 0 <= ix < len(filtered_evidence):
+                            lk = filtered_evidence[ix].get("link") or ""
+                            try:
+                                net = urlparse(lk).netloc
+                            except Exception:
+                                net = ""
+                            if net:
+                                domains.add(net)
+                    print("[verify] domain_independence", {"cited_count": len(cited_idx), "domains": list(domains)})
+                    if len(domains) < 2:
+                        verdict = "uncertain"
+                        validator.setdefault("reasons", []).append("Insufficient domain independence for true verdict")
+                print("[verify] gemini_structured", {"verdict": verdict, "summary_preview": summary[:120]})
+                print("[verify] validator", validator)
+                print("[verify] debug_details_keys", list(debug_details.keys()))
+            else:
+                # Fallback minimal output
+                verdict = "uncertain"
+                summary = self._fallback_summary("uncertain", claim_context, claim_date, None, None, None)
+                sources = self._top_sources(filtered_evidence, 3)
+                print("[verify] gemini_structured_none_fallback", {"verdict": verdict, "summary_preview": summary[:120]})
+            if verdict != "false":
+                resp = {
+                    "verdict": verdict,
+                    "summary": summary,
+                    "message": summary,
+                    "sources": sources,
+                    "claim_context": claim_context,
+                    "claim_date": claim_date,
+                    "validator": validator,
+                }
+                if config.DEBUG:
+                    resp["debug"] = debug_details
+                return resp
+            # Generate visual counter-measure (pick first usable evidence image)
+            evidence_img_url = None
+            for ev in filtered_evidence:
+                if ev.get("thumbnail"):
+                    evidence_img_url = ev.get("thumbnail")
+                    break
+            if not evidence_img_url:
+                for ev in filtered_evidence:
+                    if ev.get("link") and isinstance(ev.get("link"), str) and ev.get("link").startswith("http"):
+                        evidence_img_url = ev.get("link")
+                        break
+            evidence_img_url = evidence_img_url or (image_url or "")
+            output_path = await self._generate_counter_measure(
+                original_image_path=image_path,
+                evidence_image_url=evidence_img_url,
+                claim_context=claim_context,
+                claim_date=claim_date,
+                original_image_url=image_url,
+            )
+            print("[verify] counter_measure_generated", {"output_path": output_path})
+            # For false verdict, ensure summary exists
+            if not llm or llm.get("verdict", "").lower() != "false":
+                # Force LLM to produce a false-context explanation
+                llm = self._summarize_with_gemini_structured(
+                    claim_context=claim_context,
+                    claim_date=claim_date,
+                    evidence=filtered_evidence,
+                    forced_verdict="false",
+                ) or {}
+            summary = llm.get("summary") or self._fallback_summary("false", claim_context, claim_date, None, None, None)
+            sources = llm.get("top_sources") or self._top_sources(filtered_evidence, 3)
+            resp = {
+                "verdict": "false",
+                "summary": summary,
+                "message": summary,
+                "sources": sources,
+                "output_path": output_path,
+                "claim_context": claim_context,
+                "claim_date": claim_date,
+                "validator": validator,
+            }
+            if config.DEBUG:
+                resp["debug"] = debug_details
+            return resp
+        except Exception as e:
+            return {
+                "verdict": "error",
+                "summary": f"Error during verification: {str(e)}",
+            }
+    async def _analyze_image_with_vision(
+        self,
+        image_path: Optional[str] = None,
+        image_url: Optional[str] = None,
+        claim_context: str = "",
+        claim_date: str = ""
+    ) -> Dict[str, Any]:
+        """
+        Use Gemini Vision to analyze the actual image content for:
+        - AI-generated/deepfake indicators
+        - Manipulation artifacts
+        - Visual inconsistencies
+        - Context analysis
+        Args:
+            image_path: Path to the image file
+            image_url: URL of the image
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Dictionary with preliminary analysis
+        """
+        try:
+            if not self.gemini_model:
+                return {
+                    "verdict": "uncertain",
+                    "verified": False,
+                    "message": "Gemini Vision not available",
+                    "confidence": "low",
+                    "analysis_method": "vision_unavailable",
+                }
+            # Load the image
+            import PIL.Image as PILImage
+            if image_path:
+                img = PILImage.open(image_path)
+            elif image_url:
+                img = await self._download_image(image_url)
+            else:
+                return {
+                    "verdict": "uncertain",
+                    "verified": False,
+                    "message": "No image provided for vision analysis",
+                    "confidence": "low",
+                    "analysis_method": "vision_no_image",
+                }
+            prompt = f"""You are an expert image forensics analyst. Analyze this image carefully for authenticity and manipulation.
+CLAIMED CONTEXT: {claim_context}
+CLAIMED DATE: {claim_date}
+Analyze the image for:
+1. **AI-Generated/Deepfake Indicators**: Look for signs of AI generation (inconsistent lighting, unnatural textures, artifacts around faces/objects, watermarks, telltale patterns)
+2. **Manipulation Artifacts**: Check for signs of editing (cloning, copy-paste, inconsistent shadows, lighting mismatches, pixelation patterns)
+3. **Visual Inconsistencies**: Look for impossible physics, inconsistent perspectives, mismatched elements
+4. **Context Analysis**: Does the visual content match the claimed context and date? (e.g., clothing styles, technology visible, environment)
+Respond in JSON format:
+{{
+    "verdict": "true|false|uncertain",
+    "verified": true|false,
+    "message": "Clear explanation of your findings",
+    "confidence": "high|medium|low",
+    "ai_generated_indicators": ["list of specific indicators found"],
+    "manipulation_artifacts": ["list of artifacts found"],
+    "visual_inconsistencies": ["list of inconsistencies"],
+    "context_match": "Does the image content match the claimed context?",
+    "reasoning": "Detailed reasoning for your verdict"
+}}
+Be specific and cite what you see in the image. If uncertain, explain why."""
+            # Use Gemini Vision to analyze the image
+            response = self.gemini_model.generate_content([prompt, img])
+            if not response.text:
+                return {
+                    "verdict": "uncertain",
+                    "verified": False,
+                    "message": "Gemini Vision returned no response",
+                    "confidence": "low",
+                    "analysis_method": "vision_no_response",
+                }
+            # Parse JSON response
+            import json
+            response_text = response.text.strip()
+            if response_text.startswith("```json"):
+                response_text = response_text.replace("```json", "").replace("```", "").strip()
+            elif response_text.startswith("```"):
+                response_text = response_text.replace("```", "").strip()
+            try:
+                analysis = json.loads(response_text)
+                analysis["analysis_method"] = "gemini_vision"
+                return analysis
+            except json.JSONDecodeError:
+                # Fallback: extract verdict from text
+                verdict = "uncertain"
+                if "false" in response_text.lower() or "fake" in response_text.lower() or "manipulated" in response_text.lower():
+                    verdict = "false"
+                elif "true" in response_text.lower() and "not" not in response_text.lower()[:50]:
+                    verdict = "true"
+                return {
+                    "verdict": verdict,
+                    "verified": verdict == "true",
+                    "message": response_text[:500],
+                    "confidence": "medium",
+                    "analysis_method": "gemini_vision_fallback",
+                    "raw_response": response_text,
+                }
+        except Exception as e:
+            print(f"[vision] Error in Gemini Vision analysis: {e}")
+            return {
+                "verdict": "uncertain",
+                "verified": False,
+                "message": f"Error during vision analysis: {str(e)}",
+                "confidence": "low",
+                "analysis_method": "vision_error",
+            }
+    def _synthesize_vision_and_evidence(
+        self,
+        preliminary_analysis: Dict[str, Any],
+        curated_analysis: Optional[Dict[str, Any]],
+        evidence: List[Dict[str, Any]],
+        claim_context: str,
+        claim_date: str,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Synthesize Gemini Vision analysis with reverse image search evidence.
+        Similar to text verification's hybrid synthesis.
+        """
+        try:
+            if not self.gemini_model:
+                return None
+            source_briefs = []
+            for item in evidence[:5]:
+                source_briefs.append({
+                    "title": item.get("title"),
+                    "snippet": item.get("snippet"),
+                    "link": item.get("link"),
+                })
+            prompt = f"""You are an expert image verification analyst. Combine direct image analysis (Gemini Vision) with reverse image search evidence to produce a final verdict.
+CLAIM: {claim_context}
+CLAIM DATE: {claim_date}
+DIRECT IMAGE ANALYSIS (Gemini Vision):
+{json.dumps(preliminary_analysis or {}, indent=2, ensure_ascii=False)}
+REVERSE IMAGE SEARCH ANALYSIS:
+{json.dumps(curated_analysis or {}, indent=2, ensure_ascii=False)}
+REVERSE IMAGE SEARCH SOURCES:
+{json.dumps(source_briefs, indent=2, ensure_ascii=False)}
+INSTRUCTIONS:
+- Combine both analyses to make a final decision (true/false/uncertain)
+- If vision analysis detects AI-generated/manipulated content, prioritize that
+- If reverse image search finds contradictory evidence, factor that in
+- If evidence is thin, keep the tone cautious
+- Provide clear, actionable messaging for the end user
+Respond ONLY in this JSON format:
+{{
+  "verdict": "true|false|uncertain",
+  "verified": true|false,
+  "message": "Concise user-facing summary combining both analyses",
+  "confidence": "high|medium|low",
+  "reasoning": "Brief reasoning trail you followed",
+  "vision_findings": "Key findings from direct image analysis",
+  "search_findings": "Key findings from reverse image search"
+}}"""
+            response = self.gemini_model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith("```json"):
+                response_text = response_text.replace("```json", "").replace("```", "").strip()
+            elif response_text.startswith("```"):
+                response_text = response_text.replace("```", "").strip()
+            final_analysis = json.loads(response_text)
+            final_analysis.setdefault("verdict", "uncertain")
+            final_analysis.setdefault("verified", False)
+            final_analysis.setdefault("message", "Unable to synthesize final verdict.")
+            final_analysis.setdefault("confidence", "low")
+            final_analysis["analysis_method"] = "hybrid_vision_and_search"
+            # Build response similar to existing format
+            sources = self._top_sources(evidence, 3) if evidence else []
+            return {
+                "verdict": final_analysis["verdict"],
+                "summary": final_analysis["message"],
+                "message": final_analysis["message"],
+                "sources": sources,
+                "claim_context": claim_context,
+                "claim_date": claim_date,
+                "confidence": final_analysis.get("confidence", "medium"),
+                "analysis_method": "hybrid_vision_and_search",
+                "preliminary_analysis": preliminary_analysis,
+                "curated_analysis": curated_analysis,
+            }
+        except Exception as e:
+            print(f"Hybrid synthesis error: {e}")
+            return None
+    async def gather_evidence(self, image_path: Optional[str] = None, image_url: Optional[str] = None, claim_context: str = "") -> List[Dict[str, Any]]:
+        """
+        Evidence-only helper: performs reverse image search and returns ranked/filterred evidence
+        without invoking the LLM or producing a verdict.
+        """
+        try:
+            print("[verify] start", {"gather_only": True, "has_image_path": bool(image_path), "has_image_url": bool(image_url)})
+            search_results = await self._reverse_image_search(image_path=image_path, image_url=image_url)
+            if not search_results or (not search_results.get("inline_images") and not search_results.get("image_results")):
+                return []
+            evidence = self._collect_evidence(search_results)
+            filtered = self._rank_and_filter_evidence(evidence, claim_context, top_k=12)
+            return filtered
+        except Exception as e:
+            print(f"[gather_evidence] error: {e}")
+            return []
+    def _summarize_with_gemini(self, claim_context: str, claim_date: str, analysis: Dict[str, Any], forced_verdict: Optional[str] = None) -> Optional[Dict[str, Any]]:
+        try:
+            if not self.gemini_model:
+                return None
+            verdict = forced_verdict or analysis.get("verdict", "uncertain")
+            prompt = f"""You are a fact-checking assistant. Generate a single, concise sentence (no code blocks, no JSON)
+that explains the verdict. Mirror the provided verdict exactly (do not change it).
+If false, mention the most likely real context/time from evidence; if true, confirm briefly;
+if uncertain, state uncertainty.
+Claim context: {claim_context}
+Claim date: {claim_date}
+Verdict: {verdict}
+Evidence (condensed): {self._top_sources(analysis.get('evidence', []), 3)}"""
+            response = self.gemini_model.generate_content(prompt)
+            text = response.text if response.text else None
+            return {"model": config.GEMINI_MODEL, "verdict": verdict, "text": text}
+        except Exception:
+            return None
+    def _collect_evidence(self, search_results: Dict[str, Any]) -> List[Dict[str, Any]]:
+        evidence: List[Dict[str, Any]] = []
+        for res in search_results.get("image_results", []):
+            evidence.append({
+                "title": res.get("title"),
+                "link": res.get("link"),
+                "source": res.get("source"),
+                "date": res.get("date"),
+                "thumbnail": res.get("thumbnail"),
+                "snippet": res.get("snippet"),
+            })
+        for img in search_results.get("inline_images", []):
+            evidence.append({
+                "title": img.get("title"),
+                "link": img.get("link"),
+                "source": img.get("source"),
+                "thumbnail": img.get("thumbnail"),
+                "snippet": img.get("snippet"),
+            })
+        return evidence
+    def _normalize_tokens(self, text: Optional[str]) -> List[str]:
+        if not text:
+            return []
+        import re
+        t = (text or "").lower()
+        stop = set(["the","a","an","and","or","for","to","of","in","on","at","with","by","from","this","that","is","are","was","were","as","it","its","their","his","her","him","she","he","they","them","we","you"])
+        toks = re.findall(r"[a-z0-9]{3,}", t)
+        return [x for x in toks if x not in stop]
+    def _evidence_score(self, claim_text: str, ev: Dict[str, Any]) -> float:
+        claim_tokens = set(self._normalize_tokens(claim_text))
+        ev_text = " ".join([s for s in [ev.get("title"), ev.get("snippet"), ev.get("source")] if s])
+        ev_tokens = set(self._normalize_tokens(ev_text))
+        if not claim_tokens or not ev_tokens:
+            return 0.0
+        overlap = len(claim_tokens & ev_tokens)
+        return overlap / float(len(claim_tokens))
+    def _rank_and_filter_evidence(self, evidence: List[Dict[str, Any]], claim_text: str, top_k: int = 12) -> List[Dict[str, Any]]:
+        scored: List[Tuple[float, int, Dict[str, Any]]] = []
+        for i, ev in enumerate(evidence):
+            s = self._evidence_score(claim_text, ev)
+            # Downrank social/UGC and YouTube to prefer article pages when checking relations
+            try:
+                from urllib.parse import urlparse
+                net = urlparse((ev.get("link") or "").strip()).netloc
+            except Exception:
+                net = ""
+            if net in config.LOW_PRIORITY_DOMAINS or net in ("youtube.com", "www.youtube.com", "youtu.be"):
+                s *= 0.6
+            scored.append((s, i, ev))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        seen_urls = set()
+        seen_titles = set()
+        filtered: List[Dict[str, Any]] = []
+        for s, i, ev in scored:
+            url = (ev.get("link") or "").strip()
+            title = (ev.get("title") or "").strip().lower()
+            title_key = title[:80] if title else ""
+            if url and url in seen_urls:
+                continue
+            if title_key and title_key in seen_titles:
+                continue
+            filtered.append(ev)
+            if url:
+                seen_urls.add(url)
+            if title_key:
+                seen_titles.add(title_key)
+            if len(filtered) >= top_k:
+                break
+        print("[verify] evidence_rank_filter", {"input": len(evidence), "kept": len(filtered)})
+        return filtered
+    def _extract_json(self, text: str) -> Dict[str, Any]:
+        # Strip common fences and attempt to locate JSON object
+        t = text.strip()
+        if t.startswith("```"):
+            t = t.split("```", 1)[1]
+            t = t.lstrip("json").lstrip("\n").strip()
+            if "```" in t:
+                t = t.split("```", 1)[0].strip()
+        # Find first '{' and last '}'
+        start = t.find('{')
+        end = t.rfind('}')
+        if start != -1 and end != -1 and end > start:
+            t = t[start:end+1]
+        import json
+        return json.loads(t)
+    def _summarize_with_gemini_structured(self, claim_context: str, claim_date: str,
+                                          evidence: List[Dict[str, Any]],
+                                          forced_verdict: Optional[str] = None) -> Optional[Dict[str, Any]]:
+        try:
+            if not self.gemini_model:
+                return None
+            prompt = f"""You are a fact-checking assistant. Use the provided evidence items (title, link, date, source, snippet) to evaluate the FULL claim text.
+The claim can include: event/context, place, timeframe, actors/entities, quantities, and relations/attribution. You may use only the provided evidence items.
+Respond STRICTLY as compact JSON with keys:
+  - verdict: one of 'true' | 'false' | 'uncertain'
+  - relation_verdict: one of 'true' | 'false' | 'uncertain' (whether the stated relation holds)
+  - summary: <= 2 sentences, plain text
+  - top_sources: array of up to 3 objects {{title, link}}
+  - claim_parse: {{
+      entities: array of strings,
+      roles: array of strings,
+      relation: {{ predicate: string, subject: string, object: string }},
+      timeframe: {{ year: number|null, month: number|null }},
+      location: string|null,
+      citations: {{
+        entities: array of arrays of evidence indices (per entity),
+        roles: array of arrays of evidence indices (per role),
+        relation: array of evidence indices supporting subject+predicate+object together,
+        timeframe: array of evidence indices supporting the timeframe,
+        location: array of evidence indices supporting the location
+      }}
+    }}
+Rules:
+  - verdict 'true' ONLY if evidence supports ALL key parts: event/context, place, timeframe, AND any stated relation.
+  - relation_verdict 'false' if the evidence supports a different relation and none supports the claimed relation.
+  - verdict 'false' if relation_verdict is 'false' or if place/time contradicts the claim without supporting evidence.
+  - 'uncertain' if ANY extracted part in claim_parse has no supporting citations.
+  - relation consistency: at least one cited evidence item MUST co-mention subject and object tokens with the predicate.
+Do not include code fences or extra text; return only the JSON object.
+Claim text: {claim_context}
+Claim date: {claim_date}
+Forced verdict: {forced_verdict}
+Evidence: {evidence}"""
+            print("[gemini] request_meta", {"model": config.GEMINI_MODEL, "temp": config.GEMINI_TEMPERATURE, "topP": config.GEMINI_TOP_P})
+            response = self.gemini_model.generate_content(prompt)
+            if not response.text:
+                return None
+            text = response.text.strip()
+            print("[gemini] structured_text_preview", text[:200])
+            parsed = self._extract_json(text)
+            print("[gemini] parsed_json_keys", list(parsed.keys()) if isinstance(parsed, dict) else type(parsed).__name__)
+            return parsed if isinstance(parsed, dict) else None
+        except Exception as e:
+            print(f"[gemini] error: {e}")
+            return None
+    def _summarize_with_gemini_majority(self, claim_context: str, claim_date: str,
+                                         evidence: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """
+        Simpler majority-based prompt: ask Gemini to decide true/false by which side has more supporting
+        evidence; only return uncertain if support is roughly equal/ambiguous.
+        Returns compact JSON: { verdict, clarification, corrected_relation, top_sources }
+        """
+        try:
+            if not self.gemini_model:
+                return None
+            prompt = f"""You are a citation-driven fact-checking assistant.
+Given a CLAIM and a list of EVIDENCE items (title, link, date, source, snippet), decide if the CLAIM itself is true or false.
+STRICT adjudication rules (apply literally to the CLAIM):
+1) Extract the relation from the CLAIM as:
+   relation: {{ predicate: string, subject: string, object: string }}
+2) Evaluate ONLY the CLAIM's relation. Mentions of a different object (alternative person/role/event/location) are NOT support for the CLAIM.
+3) SUPPORT only when an evidence item explicitly co-mentions the CLAIM's subject AND the CLAIM's object with the predicate in title/snippet (token-level match; paraphrases of those tokens are fine). General marital status or vague wording does NOT count as support if the CLAIM's object is not explicitly present.
+4) CONTRADICTION when evidence explicitly supports a mutually exclusive alternative relation (e.g., same subject + predicate with a different object), or explicitly negates the CLAIM.
+5) Social/UGC links may appear; still judge by content but prefer clearer, explicit co-mentions from any source.
+6) Decision for the CLAIM:
+   - If SUPPORT > CONTRADICTION by a meaningful margin, verdict = "true".
+   - If CONTRADICTION > SUPPORT by a meaningful margin, verdict = "false".
+   - If neither side is clearly stronger or no explicit co-mentions exist, verdict = "uncertain".
+7) Use only the provided EVIDENCE texts; no outside knowledge.
+Output strictly as compact JSON with keys (and nothing else):
+  verdict: one of 'true' | 'false' | 'uncertain'
+  clarification: one concise sentence that answers the CLAIM directly. If verdict is 'false' or 'uncertain', state the most supported alternative relation (e.g., "<subject> was not <predicate> <object>. Instead, <subject> <predicate> <alt_object> at <context>."). Avoid hedging like "does not confirm".
+  corrected_relation: {{ predicate: string, subject: string, object: string }} | null
+  top_sources: up to 3 objects {{title, link}}
+CLAIM: {claim_context}
+CLAIM_DATE: {claim_date}
+EVIDENCE: {evidence}
+"""
+            print("[gemini] request_meta", {"model": config.GEMINI_MODEL, "temp": config.GEMINI_TEMPERATURE, "topP": config.GEMINI_TOP_P})
+            response = self.gemini_model.generate_content(prompt)
+            if not response.text:
+                return None
+            text = response.text.strip()
+            print("[gemini] structured_text_preview", text[:200])
+            parsed = self._extract_json(text)
+            print("[gemini] parsed_json_keys", list(parsed.keys()) if isinstance(parsed, dict) else type(parsed).__name__)
+            return parsed if isinstance(parsed, dict) else None
+        except Exception as e:
+            print(f"[gemini] error: {e}")
+            return None
+    def _top_sources(self, evidence: List[Dict[str, Any]], k: int) -> List[Dict[str, Any]]:
+        items = []
+        for e in evidence:
+            title = e.get("title")
+            link = e.get("link")
+            if title or link:
+                items.append({"title": title, "link": link})
+            if len(items) >= k:
+                break
+        return items
+    def _validate_llm_parse(self, claim_text: str, evidence: List[Dict[str, Any]], llm: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        checks: Dict[str, Any] = {}
+        reasons: List[str] = []
+        passed = True
+        parse = (llm or {}).get("claim_parse") or {}
+        citations = parse.get("citations") or {}
+        # Helper to get combined text for an evidence index
+        def ev_text(i: int) -> str:
+            if i < 0 or i >= len(evidence):
+                return ""
+            ev = evidence[i]
+            return " ".join([t for t in [ev.get("title"), ev.get("snippet")] if t])
+        # 1) Ensure each entities[] and roles[] item has at least one citation
+        for key in ["entities", "roles"]:
+            items = parse.get(key) or []
+            cits = citations.get(key) or []
+            ok = bool(items) and len(cits) == len(items) and all(len(lst) > 0 for lst in cits if isinstance(lst, list))
+            checks[f"{key}_citations"] = ok
+            if not ok:
+                passed = False
+                reasons.append(f"Missing citations for {key}")
+        # 2) timeframe and location citations exist if present
+        for key in ["timeframe", "location"]:
+            has_item = bool(parse.get(key))
+            if has_item:
+                ok = bool(citations.get(key)) and len(citations.get(key)) > 0
+                checks[f"{key}_citations"] = ok
+                if not ok:
+                    passed = False
+                    reasons.append(f"Missing citations for {key}")
+        # 2b) If location cited, require token presence in at least one cited item
+        def _tok(text: str) -> set:
+            import re
+            return set(re.findall(r"[a-z0-9]{3,}", (text or "").lower()))
+        if parse.get("location") and citations.get("location"):
+            loc_toks = _tok(str(parse.get("location") or ""))
+            loc_token_ok = False
+            for i in citations.get("location"):
+                try:
+                    it = _tok(ev_text(int(i)))
+                except Exception:
+                    it = set()
+                if loc_toks and (loc_toks & it):
+                    loc_token_ok = True
+                    break
+            checks["location_token_match"] = loc_token_ok
+            if not loc_token_ok:
+                passed = False
+                reasons.append("Location tokens not found in cited items")
+        # 3) relation citations and co-mention (subject/object in same item)
+        relation = parse.get("relation") or {}
+        subj = (relation.get("subject") or "").strip()
+        obj = (relation.get("object") or "").strip()
+        # Token-based co-mention: require at least one informative token from subject and object in same item
+        def tokens(text: str) -> List[str]:
+            import re
+            return re.findall(r"[a-z0-9]{3,}", (text or "").lower())
+        subj_toks = set(tokens(subj))
+        obj_toks = set(tokens(obj))
+        rel_indices: List[int] = citations.get("relation") or []
+        rel_ok = False
+        for idx in rel_indices:
+            txt = ev_text(int(idx))
+            tl_toks = set(tokens(txt))
+            if subj_toks and obj_toks and (subj_toks & tl_toks) and (obj_toks & tl_toks):
+                rel_ok = True
+                break
+        checks["relation_comention"] = rel_ok
+        # Allow pooled-evidence relation support via shared anchors if co-mention failed
+        pooled_ok = False
+        pooled_detail: Dict[str, Any] = {}
+        if not rel_ok:
+            try:
+                entity_list: List[str] = (parse.get("entities") or [])
+                entity_cits: List[List[int]] = (citations.get("entities") or [])
+                def _tokens(text: str) -> set:
+                    import re
+                    return set(re.findall(r"[a-z0-9]{3,}", (text or "").lower()))
+                # Map subject/object to entity indices by token overlap
+                def best_entity_indices(name_toks: set) -> List[int]:
+                    scored: List[Tuple[int,int]] = []
+                    for idx, ent in enumerate(entity_list):
+                        et = _tokens(ent)
+                        scored.append((len(name_toks & et), idx))
+                    scored.sort(reverse=True)
+                    return [i for s,i in scored if s > 0]
+                subj_toks_set = _tokens(subj)
+                obj_toks_set = _tokens(obj)
+                subj_idxs = best_entity_indices(subj_toks_set) if subj_toks_set else []
+                obj_idxs = best_entity_indices(obj_toks_set) if obj_toks_set else []
+                subj_pool: List[int] = []
+                obj_pool: List[int] = []
+                for si in subj_idxs:
+                    if si < len(entity_cits) and isinstance(entity_cits[si], list):
+                        for v in entity_cits[si]:
+                            try:
+                                subj_pool.append(int(v))
+                            except Exception:
+                                pass
+                for oi in obj_idxs:
+                    if oi < len(entity_cits) and isinstance(entity_cits[oi], list):
+                        for v in entity_cits[oi]:
+                            try:
+                                obj_pool.append(int(v))
+                            except Exception:
+                                pass
+                subj_pool = list({int(x) for x in subj_pool})
+                obj_pool = list({int(x) for x in obj_pool})
+                # Anchors from claim parse
+                anchor_year = None
+                tf = parse.get("timeframe") or {}
+                try:
+                    anchor_year = int(tf.get("year")) if tf.get("year") is not None else None
+                except Exception:
+                    anchor_year = None
+                anchor_month_name = None
+                try:
+                    mn = int(tf.get("month")) if tf.get("month") is not None else None
+                    months = ["january","february","march","april","may","june","july","august","september","october","november","december"]
+                    anchor_month_name = months[mn-1] if mn and 1 <= mn <= 12 else None
+                except Exception:
+                    anchor_month_name = None
+                loc_tokens = _tok(str(parse.get("location") or ""))
+                claim_event_tokens = _tok(claim_text)
+                import re
+                def item_text(idx: int) -> str:
+                    return ev_text(idx)
+                def has_year(idx: int) -> bool:
+                    return bool(anchor_year is not None and re.search(rf"\b{anchor_year}\b", item_text(idx) or ""))
+                def has_month(idx: int) -> bool:
+                    return bool(anchor_month_name and (anchor_month_name in (item_text(idx) or "").lower()))
+                def has_loc(idx: int) -> bool:
+                    return bool(loc_tokens and (loc_tokens & _tok(item_text(idx))))
+                def event_overlap(idx1: int, idx2: int) -> bool:
+                    t1 = _tok(item_text(idx1))
+                    t2 = _tok(item_text(idx2))
+                    return bool((claim_event_tokens & t1) and (claim_event_tokens & t2))
+                def anchors_align(i: int, j: int) -> Tuple[bool, List[str]]:
+                    reasons: List[str] = []
+                    if has_year(i) and has_year(j):
+                        reasons.append("year")
+                    if has_month(i) and has_month(j):
+                        reasons.append("month")
+                    if has_loc(i) and has_loc(j):
+                        reasons.append("location")
+                    if event_overlap(i, j):
+                        reasons.append("event")
+                    return (len(reasons) > 0, reasons)
+                for si in subj_pool:
+                    for oj in obj_pool:
+                        ok, rs = anchors_align(int(si), int(oj))
+                        if ok:
+                            pooled_ok = True
+                            pooled_detail = {"subj_idx": int(si), "obj_idx": int(oj), "anchors": rs}
+                            break
+                    if pooled_ok:
+                        break
+            except Exception:
+                pooled_ok = False
+        checks["relation_pooled_anchor"] = pooled_ok
+        if pooled_ok:
+            checks["relation_pooled_detail"] = pooled_detail
+        if not rel_ok and not pooled_ok:
+            passed = False
+            reasons.append("Relation not supported by co-mention or pooled anchors")
+        # 4) Simple entity overlap score between claim tokens and cited items
+        import re
+        claim_tokens = set([t.lower() for t in re.findall(r"[A-Za-z]{3,}", claim_text or "")])
+        cited_indices = set()
+        for arr in (citations.get("entities") or []):
+            for i in arr:
+                try:
+                    cited_indices.add(int(i))
+                except Exception:
+                    pass
+        overlap_hits = 0
+        for i in cited_indices:
+            tl = ev_text(i).lower()
+            if any(tok in tl for tok in claim_tokens):
+                overlap_hits += 1
+        entity_overlap_score = overlap_hits / (len(cited_indices) or 1)
+        checks["entity_overlap_score"] = entity_overlap_score
+        # 5) Date check: allow year and optional month names from claim timeframe in cited items
+        year = None
+        month_num = None
+        tf = parse.get("timeframe") or {}
+        try:
+            year = int(tf.get("year")) if tf.get("year") is not None else None
+        except Exception:
+            year = None
+        try:
+            month_num = int(tf.get("month")) if tf.get("month") is not None else None
+        except Exception:
+            month_num = None
+        date_ok = True
+        if year is not None:
+            date_ok = False
+            for i in (citations.get("timeframe") or []):
+                try:
+                    ev = evidence[int(i)]
+                except Exception:
+                    continue
+                text = " ".join([t for t in [ev.get("title"), ev.get("snippet"), ev.get("date"), ev.get("source"), ev.get("link")] if t])
+                if re.search(rf"\b{year}\b", text or ""):
+                    date_ok = True
+                    break
+                # Month name matching if provided
+                if month_num is not None:
+                    month_names = [
+                        "january","february","march","april","may","june",
+                        "july","august","september","october","november","december"
+                    ]
+                    mname = month_names[month_num-1] if 1 <= month_num <= 12 else None
+                    if mname and (mname in (text or "").lower()):
+                        date_ok = True
+                        break
+        checks["timeframe_match"] = date_ok
+        if not date_ok:
+            passed = False
+            reasons.append("Timeframe year not supported in cited items")
+        # Domains used (for logging only)
+        from urllib.parse import urlparse
+        domains = []
+        for ev in evidence:
+            try:
+                net = urlparse(ev.get("link") or "").netloc
+            except Exception:
+                net = ""
+            if net:
+                domains.append(net)
+        debug = {
+            "claim_parse": parse,
+            "citations": citations,
+            "domains_used": domains,
+        }
+        return {"passed": passed, "reasons": reasons, "checks": checks}, debug
+    def _fallback_summary(self, verdict: str, claim_context: str, claim_date: str,
+                           best_title: Optional[str], best_link: Optional[str], best_year: Optional[int]) -> str:
+        if verdict == "false":
+            where = best_title or "another place/time"
+            when = str(best_year) if best_year else "an earlier date"
+            src = best_link or "a corroborating source"
+            return f"Claim is false. The image corresponds to {where} from {when}, not {claim_context}, {claim_date}. Source: {src}."
+        if verdict == "true":
+            return f"Claim is true. The available evidence supports {claim_context}, {claim_date}."
+        return f"Claim is uncertain. Evidence is inconclusive for {claim_context}, {claim_date}."
+    def _clean_summary_text(self, text: Optional[str]) -> str:
+        if not text:
+            return ""
+        t = text.strip()
+        # Remove common code-fence wrappers
+        if t.startswith("```"):
+            # drop first fence
+            t = t.split("```", 1)[1]
+            # drop language tag if present
+            t = t.lstrip("\n").split("\n", 1)[-1] if "\n" in t else t
+            # drop trailing fence
+            if "```" in t:
+                t = t.rsplit("```", 1)[0]
+        return t.strip()
+    async def _reverse_image_search(self, image_path: Optional[str] = None, image_url: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Perform reverse image search using SerpApi
+        Args:
+            image_path: Path to the image file
+            image_url: URL of the image
+        Returns:
+            Search results from SerpApi
+        """
+        try:
+            if GoogleSearch is None:
+                raise RuntimeError("google-search-results package not available. Install with: pip install google-search-results")
+            # Build params per SerpApi docs - use official client for ALL requests
+            params: Dict[str, Any] = {
+                "engine": "google_reverse_image",
+                "api_key": self.api_key,
+            }
+            if image_url:
+                # Use image_url parameter for URLs
+                params["image_url"] = image_url
+                print("[serpapi] Using image_url parameter")
+            elif image_path:
+                # For local files, upload to Cloudinary first to get a public URL
+                try:
+                    cloudinary_url = await self._upload_to_cloudinary(image_path)
+                    if cloudinary_url:
+                        params["image_url"] = cloudinary_url
+                        print(f"[serpapi] Using Cloudinary URL: {cloudinary_url}")
+                    else:
+                        print("[serpapi] Cloudinary upload failed, falling back to base64")
+                        # Fallback to base64 if Cloudinary fails
+                        with open(image_path, "rb") as img_file:
+                            img_data = img_file.read()
+                        img_base64 = base64.b64encode(img_data).decode("utf-8")
+                        params["image_content"] = img_base64
+                        print("[serpapi] Using image_content parameter (base64 fallback)")
+                except Exception as e:
+                    print(f"[serpapi] Error uploading to Cloudinary: {e}")
+                    # Fallback to base64
+                    with open(image_path, "rb") as img_file:
+                        img_data = img_file.read()
+                    img_base64 = base64.b64encode(img_data).decode("utf-8")
+                    params["image_content"] = img_base64
+                    print("[serpapi] Using image_content parameter (base64 fallback)")
+            # Debug prints
+            print("[serpapi] params", {
+                "engine": params.get("engine"),
+                "has_image_url": bool(params.get("image_url")),
+                "has_image_content": bool(params.get("image_content")),
+                "image_content_len": len(params.get("image_content", "")) if params.get("image_content") else 0,
+            })
+            # Use different approaches based on whether we have image_url or image_content
+            if params.get("image_url"):
+                # For image_url, use the official client (works well)
+                print("[serpapi] Using official GoogleSearch client for image_url")
+                search = GoogleSearch(params)  # type: ignore
+                results = search.get_dict()
+                print("[serpapi] Successfully got results from GoogleSearch client")
+                return results
+            else:
+                # For image_content (base64), use direct HTTP POST to avoid header size issues
+                print("[serpapi] Using direct HTTP POST for image_content (base64)")
+                try:
+                    import requests
+                    response = requests.post(
+                        "https://serpapi.com/search?engine=google_reverse_image",
+                        data=params,
+                        timeout=60
+                    )
+                    print(f"[serpapi] HTTP POST status: {response.status_code}")
+                    response.raise_for_status()
+                    results = response.json()
+                    print("[serpapi] Successfully got results from HTTP POST")
+                    return results
+                except Exception as http_error:
+                    print(f"[serpapi] HTTP POST failed: {http_error}")
+                    return {}
+        except Exception as e:
+            print(f"[serpapi] Error in reverse image search: {e}")
+            print(f"[serpapi] Error type: {type(e).__name__}")
+            import traceback
+            print(f"[serpapi] Traceback: {traceback.format_exc()}")
+            return {}
+    def _extract_year_from_text(self, text: str) -> Optional[int]:
+        if not text:
+            return None
+        import re
+        years = re.findall(r"(19\d{2}|20\d{2})", text)
+        if not years:
+            return None
+        try:
+            return int(years[0])
+        except Exception:
+            return None
+    def _context_mismatch(self, claim_context_lc: str, text: str) -> bool:
+        t = (text or "").lower()
+        if not claim_context_lc:
+            return False
+        # Simple heuristic: if text contains a strong, different location keyword
+        known = {
+            "mumbai": ["delhi", "bangalore", "chennai", "kolkata", "new york", "london"],
+            "new york": ["mumbai", "delhi", "london", "paris", "dubai"],
+        }
+        for k, others in known.items():
+            if claim_context_lc == k:
+                if any(o in t for o in others):
+                    return True
+        return False
+    async def _generate_counter_measure(self, original_image_path: Optional[str], evidence_image_url: str,
+                                      claim_context: str, claim_date: str, original_image_url: Optional[str] = None) -> str:
+        """
+        Generate a visual counter-measure image
+        Args:
+            original_image_path: Path to the original misleading image
+            evidence_image_url: URL of the evidence image
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Path to the generated counter-measure image
+        """
+        try:
+            # Load original image: from path if available, else download from original_image_url
+            if original_image_path:
+                original_img = Image.open(original_image_path)
+            elif original_image_url:
+                original_img = await self._download_image(original_image_url)
+            else:
+                # Fallback to evidence image as placeholder
+                original_img = await self._download_image(evidence_image_url)
+            # Download evidence image
+            evidence_img = await self._download_image(evidence_image_url)
+            # Create counter-measure
+            counter_measure = self._create_counter_measure_image(
+                original_img, evidence_img, claim_context, claim_date
+            )
+            # Save to temporary file
+            output_path = tempfile.mktemp(suffix=".png")
+            counter_measure.save(output_path, "PNG")
+            return output_path
+        except Exception as e:
+            print(f"Error generating counter-measure: {e}")
+            raise
+    async def _upload_to_cloudinary(self, image_path: str) -> Optional[str]:
+        """
+        Upload image to Cloudinary and return the public URL
+        Args:
+            image_path: Path to the source image file
+        Returns:
+            Cloudinary public URL of the uploaded image, or None if upload fails
+        """
+        try:
+            import cloudinary
+            import cloudinary.uploader
+            from config import config
+            # Configure Cloudinary
+            cloudinary.config(
+                cloud_name=config.CLOUDINARY_CLOUD_NAME,
+                api_key=config.CLOUDINARY_API_KEY,
+                api_secret=config.CLOUDINARY_API_SECRET
+            )
+            # Upload to Cloudinary with frames folder
+            result = cloudinary.uploader.upload(
+                image_path,
+                folder="frames",
+                resource_type="image"
+            )
+            if result and result.get('secure_url'):
+                public_url = result['secure_url']
+                print(f"[cloudinary] Uploaded {image_path} to {public_url}")
+                return public_url
+            else:
+                print("[cloudinary] Upload failed - no secure_url in response")
+                return None
+        except Exception as e:
+            print(f"[cloudinary] Error uploading to Cloudinary: {e}")
+            return None
+    async def _copy_to_public_folder(self, image_path: str) -> Optional[str]:
+        """
+        Copy image to public/frames folder and return the public URL
+        Args:
+            image_path: Path to the source image file
+        Returns:
+            Public URL of the copied image, or None if copy fails
+        """
+        try:
+            import shutil
+            import uuid
+            from pathlib import Path
+            # Create public/frames directory if it doesn't exist
+            public_frames_dir = Path("public/frames")
+            public_frames_dir.mkdir(parents=True, exist_ok=True)
+            # Generate unique filename
+            file_extension = Path(image_path).suffix
+            unique_filename = f"{uuid.uuid4()}{file_extension}"
+            public_path = public_frames_dir / unique_filename
+            # Copy the file
+            shutil.copy2(image_path, public_path)
+            # Return the public URL
+            public_url = f"http://127.0.0.1:{config.SERVICE_PORT}/frames/{unique_filename}"
+            print(f"[copy] Copied {image_path} to {public_path}")
+            print(f"[copy] Public URL: {public_url}")
+            return public_url
+        except Exception as e:
+            print(f"[copy] Error copying to public folder: {e}")
+            return None
+    async def _download_image(self, image_url: str) -> Image.Image:
+        """
+        Download an image from URL
+        Args:
+            image_url: URL of the image to download
+        Returns:
+            PIL Image object
+        """
+        try:
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0 Safari/537.36",
+                "Referer": "https://www.google.com/",
+            }
+            response = requests.get(image_url, timeout=15, headers=headers, stream=True)
+            response.raise_for_status()
+            content_type = response.headers.get("Content-Type", "").lower()
+            if "image" not in content_type:
+                # Not an image (likely a webpage); return placeholder
+                return Image.new('RGB', (300, 200), color='gray')
+            data = response.content
+            img = Image.open(io.BytesIO(data))
+            return img
+        except Exception:
+            # Return a placeholder image if download fails
+            return Image.new('RGB', (300, 200), color='gray')
+    def _create_counter_measure_image(self, original_img: Image.Image, evidence_img: Image.Image,
+                                    claim_context: str, claim_date: str) -> Image.Image:
+        """
+        Create the counter-measure image with side-by-side comparison
+        Args:
+            original_img: The original misleading image
+            evidence_img: The evidence image
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Generated counter-measure image
+        """
+        # Resize images to consistent dimensions
+        target_width, target_height = 400, 300
+        original_img = original_img.resize((target_width, target_height), Image.Resampling.LANCZOS)
+        evidence_img = evidence_img.resize((target_width, target_height), Image.Resampling.LANCZOS)
+        # Create canvas for side-by-side layout
+        canvas_width = target_width * 2 + 50  # Extra space for padding
+        canvas_height = target_height + 200   # Extra space for labels and watermark
+        canvas = Image.new('RGB', (canvas_width, canvas_height), 'white')
+        draw = ImageDraw.Draw(canvas)
+        # Try to load a font, fall back to default if not available
+        try:
+            font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
+            font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 18)
+            font_small = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14)
+        except:
+            font_large = ImageFont.load_default()
+            font_medium = ImageFont.load_default()
+            font_small = ImageFont.load_default()
+        # Add title
+        title = "FALSE CONTEXT DETECTED"
+        title_bbox = draw.textbbox((0, 0), title, font=font_large)
+        title_width = title_bbox[2] - title_bbox[0]
+        title_x = (canvas_width - title_width) // 2
+        draw.text((title_x, 20), title, fill='red', font=font_large)
+        # Add original image (left side)
+        original_x = 25
+        original_y = 80
+        canvas.paste(original_img, (original_x, original_y))
+        # Add evidence image (right side)
+        evidence_x = original_x + target_width + 25
+        evidence_y = original_y
+        canvas.paste(evidence_img, (evidence_x, evidence_y))
+        # Add labels
+        claim_label = f"CLAIM: {claim_context}, {claim_date}"
+        reality_label = "REALITY: Different context/earlier date"
+        draw.text((original_x, original_y - 30), claim_label, fill='red', font=font_medium)
+        draw.text((evidence_x, evidence_y - 30), reality_label, fill='green', font=font_medium)
+        # Add watermark
+        watermark = "FALSE CONTEXT"
+        watermark_img = Image.new('RGBA', canvas.size, (0, 0, 0, 0))
+        watermark_draw = ImageDraw.Draw(watermark_img)
+        # Create semi-transparent watermark
+        watermark_bbox = watermark_draw.textbbox((0, 0), watermark, font=font_large)
+        watermark_width = watermark_bbox[2] - watermark_bbox[0]
+        watermark_height = watermark_bbox[3] - watermark_bbox[1]
+        watermark_x = (canvas_width - watermark_width) // 2
+        watermark_y = (canvas_height - watermark_height) // 2
+        watermark_draw.text((watermark_x, watermark_y), watermark, fill=(255, 0, 0, 128), font=font_large)
+        # Composite watermark onto canvas
+        canvas = Image.alpha_composite(canvas.convert('RGBA'), watermark_img).convert('RGB')
+        return canvas

services/input_processor.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import os
+import re
+import json
+from typing import Dict, List, Optional, Union, Tuple
+import google.generativeai as genai
+import tempfile
+from config import config
+class InputProcessor:
+    """
+    Intelligent input processor that converts chatbot input into structured verification requests
+    """
+    def __init__(self):
+        # Configure Gemini
+        genai.configure(api_key=config.GEMINI_API_KEY)
+        self.model = genai.GenerativeModel(
+            config.GEMINI_MODEL,
+            generation_config=genai.types.GenerationConfig(
+                temperature=config.GEMINI_TEMPERATURE,
+                top_p=config.GEMINI_TOP_P,
+                max_output_tokens=config.GEMINI_MAX_TOKENS
+            )
+        )
+        self.system_prompt = """You are an intelligent input processor for a visual verification service.
+Your task is to analyze user input and extract:
+1. Image/video/audio content (files, URLs, or descriptions)
+2. Claim context (what the user is claiming)
+3. Claim date (when the claim was made)
+4. Type of verification needed (image, video, audio, or text)
+Return a JSON response with this structure:
+{
+    "verification_type": "image" or "video" or "audio" or "text",
+    "content": {
+        "files": ["list of file paths if files provided"],
+        "urls": ["list of image/video/audio URLs"],
+        "descriptions": ["list of text descriptions"],
+        "text": "the text claim to verify (if verification_type is text)"
+    },
+    "claim_context": "extracted or inferred claim context",
+    "claim_date": "extracted or inferred date"
+}
+Rules:
+- If multiple images/videos/audio files are mentioned, separate them clearly
+- Extract URLs from text using regex patterns
+- Infer context from surrounding text if not explicitly stated
+- If no date is mentioned leave it blank
+- Handle mixed content types appropriately"""
+    async def process_input(
+        self,
+        text_input: Optional[str] = None,
+        files: Optional[List] = None
+    ) -> Dict:
+        """
+        Process chatbot input and return structured verification request
+        """
+        try:
+            print(f"🔍 DEBUG: InputProcessor.process_input called")
+            print(f"🔍 DEBUG: text_input = {text_input}")
+            print(f"🔍 DEBUG: files = {files}")
+            print(f"🔍 DEBUG: files type = {type(files)}")
+            # Prepare input for LLM analysis
+            print(f"🔍 DEBUG: Preparing input text for LLM analysis")
+            input_text = self._prepare_input_text(text_input, files)
+            print(f"🔍 DEBUG: Prepared input_text = {input_text}")
+            # Get LLM analysis
+            print(f"🔍 DEBUG: Calling LLM analysis")
+            llm_response = await self._analyze_with_llm(input_text)
+            print(f"🔍 DEBUG: LLM response = {llm_response}")
+            # Parse and validate LLM response
+            print(f"🔍 DEBUG: Parsing LLM response")
+            parsed_response = self._parse_llm_response(llm_response)
+            print(f"🔍 DEBUG: Parsed response = {parsed_response}")
+            # Post-process and enhance the response
+            print(f"🔍 DEBUG: Post-processing response")
+            final_response = await self._post_process_response(parsed_response, files)
+            # PATCH: If verification_type is 'video' but all files have audio extensions, reassign to 'audio'
+            audio_exts = ['.mp3', '.wav', '.ogg', '.flac', '.m4a']
+            content_files = final_response.get('content', {}).get('files', [])
+            if (
+                final_response.get('verification_type') == 'video' and
+                content_files and
+                all(any(f.lower().endswith(e) for e in audio_exts) for f in content_files)
+            ):
+                print(f"🔍 PATCH: Rewriting 'verification_type' from 'video' to 'audio' (all files are audio)")
+                final_response['verification_type'] = 'audio'
+            print(f"🔍 DEBUG: Final response = {final_response}")
+            return final_response
+        except Exception as e:
+            print(f"❌ DEBUG: Exception in InputProcessor.process_input: {e}")
+            print(f"❌ DEBUG: Exception type: {type(e).__name__}")
+            import traceback
+            print(f"❌ DEBUG: Traceback: {traceback.format_exc()}")
+            return {
+                "error": f"Failed to process input: {str(e)}",
+                "verification_type": "unknown",
+                "content": {"files": [], "urls": [], "descriptions": []},
+                "claim_context": "Unknown context",
+                "claim_date": "Unknown date",
+            }
+    def _prepare_input_text(self, text_input: Optional[str], files: Optional[List]) -> str:
+        """Prepare input text for LLM analysis"""
+        print(f"🔍 DEBUG: _prepare_input_text called with text_input={text_input}, files={files}")
+        input_parts = []
+        if text_input:
+            input_parts.append(f"Text input: {text_input}")
+            print(f"🔍 DEBUG: Added text input: {text_input}")
+        if files:
+            file_info = []
+            for i, file in enumerate(files):
+                file_info.append(f"File {i+1}: {file.filename} ({file.content_type})")
+                print(f"🔍 DEBUG: Added file {i+1}: {file.filename} ({file.content_type})")
+            input_parts.append(f"Files provided: {'; '.join(file_info)}")
+        if not input_parts:
+            input_parts.append("No text or files provided")
+            print(f"🔍 DEBUG: No input parts, using default message")
+        result = "\n".join(input_parts)
+        print(f"🔍 DEBUG: Final prepared input text: {result}")
+        return result
+    async def _analyze_with_llm(self, input_text: str) -> str:
+        """Use Gemini to analyze the input"""
+        try:
+            print(f"🔍 DEBUG: _analyze_with_llm called with input_text: {input_text}")
+            prompt = f"{self.system_prompt}\n\nUser input: {input_text}"
+            print(f"🔍 DEBUG: Generated prompt: {prompt}")
+            response = self.model.generate_content(prompt)
+            print(f"🔍 DEBUG: LLM response text: {response.text}")
+            return response.text
+        except Exception as e:
+            print(f"❌ DEBUG: LLM analysis failed: {e}")
+            print(f"🔍 DEBUG: Falling back to rule-based parsing")
+            # Fallback to rule-based parsing if LLM fails
+            return self._fallback_parsing(input_text)
+    def _fallback_parsing(self, input_text: str) -> str:
+        """Fallback parsing when LLM is unavailable"""
+        print(f"🔍 DEBUG: _fallback_parsing called with input_text: {input_text}")
+        # Extract URLs using regex
+        url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
+        urls = re.findall(url_pattern, input_text)
+        print(f"🔍 DEBUG: Extracted URLs: {urls}")
+        # Simple content type detection
+        verification_type = "text"  # default for text-only queries
+        # Check for video platform URLs first
+        video_platforms = [
+            'instagram.com/reels/', 'instagram.com/p/', 'instagram.com/tv/',
+            'youtube.com/watch', 'youtu.be/', 'youtube.com/shorts/',
+            'tiktok.com/', 'vm.tiktok.com/',
+            'twitter.com/', 'x.com/', 't.co/',
+            'facebook.com/', 'fb.watch/',
+            'vimeo.com/', 'twitch.tv/', 'dailymotion.com/',
+            'imgur.com/', 'soundcloud.com/', 'mixcloud.com/',
+            'lbry.tv/', 'odysee.com/', 't.me/'
+        ]
+        # Check for image platform URLs
+        image_platforms = [
+            'instagram.com/p/', 'imgur.com/', 'flickr.com/',
+            'pinterest.com/', 'unsplash.com/', 'pexels.com/'
+        ]
+        # Check for direct file extensions
+        if any(ext in input_text.lower() for ext in ['.mp4', '.avi', '.mov', '.mkv', '.webm', 'video']):
+            verification_type = "video"
+        elif any(ext in input_text.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', 'image', 'photo', 'picture']):
+            verification_type = "image"
+        elif any(ext in input_text.lower() for ext in ['.mp3', '.wav', '.ogg', '.flac', '.m4a', 'audio']):
+            verification_type = "audio"
+        # Check for video platform URLs
+        elif any(platform in input_text.lower() for platform in video_platforms):
+            verification_type = "video"
+        # Check for image platform URLs
+        elif any(platform in input_text.lower() for platform in image_platforms):
+            verification_type = "image"
+        print(f"🔍 DEBUG: Detected verification_type: {verification_type}")
+        # Extract date patterns
+        date_pattern = r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}'
+        dates = re.findall(date_pattern, input_text)
+        claim_date = dates[0] if dates else "Unknown date"
+        print(f"🔍 DEBUG: Extracted dates: {dates}, using: {claim_date}")
+        # Clean up the input text for better processing
+        clean_text = input_text.replace("Text input: ", "").strip()
+        result = {
+            "verification_type": verification_type,
+            "content": {
+                "files": [],
+                "urls": urls,
+                "descriptions": [clean_text],
+                "text": clean_text if verification_type == "text" else None
+            },
+            "claim_context": clean_text,
+            "claim_date": claim_date,
+        }
+        print(f"🔍 DEBUG: Fallback parsing result: {result}")
+        return json.dumps(result)
+    def _parse_llm_response(self, llm_response: str) -> Dict:
+        """Parse and validate LLM response"""
+        try:
+            print(f"🔍 DEBUG: _parse_llm_response called with llm_response: {llm_response}")
+            # Extract JSON from response
+            json_match = re.search(r'\{.*\}', llm_response, re.DOTALL)
+            if json_match:
+                print(f"🔍 DEBUG: Found JSON match: {json_match.group()}")
+                parsed = json.loads(json_match.group())
+                print(f"🔍 DEBUG: Parsed JSON: {parsed}")
+            else:
+                print(f"❌ DEBUG: No JSON found in response")
+                raise ValueError("No JSON found in response")
+            # Validate required fields
+            required_fields = ["verification_type", "content", "claim_context", "claim_date"]
+            for field in required_fields:
+                if field not in parsed:
+                    print(f"❌ DEBUG: Missing required field: {field}")
+                    raise ValueError(f"Missing required field: {field}")
+            print(f"🔍 DEBUG: Successfully parsed and validated response")
+            return parsed
+        except Exception as e:
+            print(f"❌ DEBUG: Failed to parse LLM response: {e}")
+            print(f"🔍 DEBUG: Returning safe defaults")
+            # Return safe defaults if parsing fails
+            return {
+                "verification_type": "image",
+                "content": {"files": [], "urls": [], "descriptions": []},
+                "claim_context": "Unknown context",
+                "claim_date": "Unknown date",
+            }
+    async def _post_process_response(self, parsed_response: Dict, files: Optional[List]) -> Dict:
+        """Post-process the parsed response and add file information"""
+        print(f"🔍 DEBUG: _post_process_response called with parsed_response: {parsed_response}, files: {files}")
+        # Add actual file information if files were provided
+        if files:
+            print(f"🔍 DEBUG: Processing {len(files)} files")
+            file_paths = []
+            for i, file in enumerate(files):
+                print(f"🔍 DEBUG: Saving file {i}: {file.filename}")
+                # Save file temporarily and get path
+                temp_path = await self._save_temp_file(file)
+                if temp_path:
+                    file_paths.append(temp_path)
+                    print(f"🔍 DEBUG: Saved file {i} to: {temp_path}")
+                else:
+                    print(f"❌ DEBUG: Failed to save file {i}")
+            parsed_response["content"]["files"] = file_paths
+            print(f"🔍 DEBUG: Updated files list: {file_paths}")
+        else:
+            print(f"🔍 DEBUG: No files to process")
+        print(f"🔍 DEBUG: Final post-processed response: {parsed_response}")
+        return parsed_response
+    async def _save_temp_file(self, file) -> Optional[str]:
+        """Save uploaded file temporarily and return path"""
+        try:
+            print(f"🔍 DEBUG: _save_temp_file called for file: {file.filename}")
+            # Create temp file
+            import os
+            suffix = os.path.splitext(file.filename)[1] if file.filename else ""
+            print(f"🔍 DEBUG: Using suffix: {suffix}")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+                content = await file.read()
+                print(f"🔍 DEBUG: Read {len(content)} bytes from file")
+                temp_file.write(content)
+                temp_path = temp_file.name
+                print(f"🔍 DEBUG: Saved temp file to: {temp_path}")
+                return temp_path
+        except Exception as e:
+            print(f"❌ DEBUG: Failed to save temp file: {e}")
+            return None
+    def cleanup_temp_files(self, file_paths: List[str]):
+        """Clean up temporary files"""
+        for path in file_paths:
+            try:
+                if os.path.exists(path):
+                    os.unlink(path)
+            except Exception as e:
+                print(f"Failed to cleanup temp file {path}: {e}")

services/mongodb_service.py ADDED Viewed

	@@ -0,0 +1,684 @@

+"""
+MongoDB Service for Backend
+Handles MongoDB operations for debunk posts
+"""
+import os
+import logging
+from typing import List, Dict, Any, Optional
+from pymongo import MongoClient
+from pymongo.errors import ConnectionFailure
+from dotenv import load_dotenv
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+class MongoDBService:
+    """MongoDB service for backend operations"""
+    def __init__(self, connection_string: Optional[str] = None):
+        """Initialize MongoDB connection
+        Args:
+            connection_string: MongoDB connection string. If None, uses MONGO_CONNECTION_STRING env var
+        """
+        self.connection_string = connection_string or os.getenv('MONGO_CONNECTION_STRING')
+        if not self.connection_string:
+            raise ValueError("MongoDB connection string is required. Set MONGO_CONNECTION_STRING environment variable.")
+        self.client = None
+        self.db = None
+        self.collection = None
+        self.chat_sessions = None
+        self.chat_messages = None
+        self._connect()
+    def _connect(self):
+        """Establish MongoDB connection"""
+        try:
+            self.client = MongoClient(self.connection_string)
+            # Test connection
+            self.client.admin.command('ping')
+            # Use 'aegis' database
+            self.db = self.client["aegis"]
+            self.collection = self.db["debunk_posts"]
+            # Additional collections used by other features
+            self.chat_sessions = self.db["chat_sessions"]
+            self.chat_messages = self.db["chat_messages"]
+            self.subscriptions = self.db["subscriptions"]
+            self.users = self.db["users"]
+            logger.info("✅ Successfully connected to MongoDB")
+        except ConnectionFailure as e:
+            logger.error(f"❌ Failed to connect to MongoDB: {e}")
+            raise
+    def get_recent_posts(self, limit: int = 5) -> List[Dict[str, Any]]:
+        """Get recent debunk posts from MongoDB
+        Args:
+            limit: Maximum number of posts to return
+        Returns:
+            List of recent debunk posts
+        """
+        try:
+            logger.info(f"🔍 DEBUG: Starting get_recent_posts with limit={limit}")
+            logger.info(f"🔍 DEBUG: Collection name: {self.collection.name}")
+            logger.info(f"🔍 DEBUG: Database name: {self.db.name}")
+            # Check if collection exists and has documents
+            total_count = self.collection.count_documents({})
+            logger.info(f"🔍 DEBUG: Total documents in collection: {total_count}")
+            if total_count == 0:
+                logger.warning("⚠️ DEBUG: Collection is empty!")
+                return []
+            # Get sample document to check structure
+            sample_doc = self.collection.find_one()
+            if sample_doc:
+                logger.info(f"🔍 DEBUG: Sample document keys: {list(sample_doc.keys())}")
+                logger.info(f"🔍 DEBUG: Sample document _id: {sample_doc.get('_id')}")
+                logger.info(f"🔍 DEBUG: Sample document stored_at: {sample_doc.get('stored_at')}")
+            else:
+                logger.warning("⚠️ DEBUG: No sample document found!")
+            posts = list(self.collection
+                        .find()
+                        .sort("stored_at", -1)
+                        .limit(limit))
+            logger.info(f"🔍 DEBUG: Raw query returned {len(posts)} posts")
+            # Convert ObjectId to string for JSON serialization
+            for i, post in enumerate(posts):
+                if '_id' in post:
+                    post['_id'] = str(post['_id'])
+                logger.info(f"🔍 DEBUG: Post {i+1} keys: {list(post.keys())}")
+                logger.info(f"🔍 DEBUG: Post {i+1} stored_at: {post.get('stored_at')}")
+            logger.info(f"📋 Retrieved {len(posts)} recent debunk posts")
+            return posts
+        except Exception as e:
+            logger.error(f"❌ Failed to get recent posts: {e}")
+            logger.error(f"🔍 DEBUG: Exception type: {type(e).__name__}")
+            logger.error(f"🔍 DEBUG: Exception details: {str(e)}")
+            return []
+    def search_similar_rumours(self, query: str, similarity_threshold: float = 0.6, limit: int = 5) -> List[Dict[str, Any]]:
+        """Search for rumours similar to the query text using TF-IDF similarity
+        Args:
+            query: Search query text
+            similarity_threshold: Minimum similarity score (0.0 to 1.0)
+            limit: Maximum number of results to return
+        Returns:
+            List of similar rumours with similarity scores
+        """
+        try:
+            from sklearn.feature_extraction.text import TfidfVectorizer
+            from sklearn.metrics.pairwise import cosine_similarity
+            import re
+            if not query or not query.strip():
+                logger.warning("⚠️ Empty query provided")
+                return []
+            logger.info(f"🔍 Searching for rumours similar to: {query[:50]}...")
+            # Get all rumours from database
+            all_posts = list(self.collection.find())
+            if not all_posts:
+                logger.warning("⚠️ No rumours found in database")
+                return []
+            # Extract claim text from each post
+            claims = []
+            posts_data = []
+            for post in all_posts:
+                # Extract claim text - try multiple fields
+                claim_text = (
+                    post.get('claim') or
+                    post.get('summary') or
+                    ""
+                )
+                # Handle nested claim structure
+                if isinstance(claim_text, dict):
+                    claim_text = claim_text.get('text') or claim_text.get('claim_text') or ""
+                if claim_text and claim_text.strip():
+                    claims.append(claim_text)
+                    posts_data.append(post)
+            if not claims:
+                logger.warning("⚠️ No claims found in posts")
+                return []
+            # Preprocess query
+            def preprocess_text(text: str) -> str:
+                text = text.lower()
+                text = re.sub(r'[^\w\s]', ' ', text)
+                text = ' '.join(text.split())
+                return text
+            query_processed = preprocess_text(query)
+            # Calculate TF-IDF similarity
+            try:
+                vectorizer = TfidfVectorizer(
+                    stop_words='english',
+                    ngram_range=(1, 2),
+                    max_features=500,
+                    lowercase=True
+                )
+                # Combine query and claims for vectorization
+                all_texts = [query_processed] + [preprocess_text(c) for c in claims]
+                tfidf_matrix = vectorizer.fit_transform(all_texts)
+                # Calculate similarity between query and each claim
+                query_vector = tfidf_matrix[0:1]
+                claims_matrix = tfidf_matrix[1:]
+                similarities = cosine_similarity(query_vector, claims_matrix)[0]
+            except Exception as e:
+                logger.error(f"❌ TF-IDF calculation failed: {e}")
+                # Fallback to simple word overlap
+                similarities = []
+                query_words = set(query_processed.split())
+                for claim in claims:
+                    claim_words = set(preprocess_text(claim).split())
+                    if not query_words or not claim_words:
+                        similarities.append(0.0)
+                    else:
+                        intersection = query_words.intersection(claim_words)
+                        union = query_words.union(claim_words)
+                        similarities.append(len(intersection) / len(union) if union else 0.0)
+            # Filter by threshold and sort by similarity
+            results = []
+            for i, (post, similarity) in enumerate(zip(posts_data, similarities)):
+                if similarity >= similarity_threshold:
+                    # Convert ObjectId to string
+                    if '_id' in post:
+                        post['_id'] = str(post['_id'])
+                    result = {
+                        **post,
+                        'similarity_score': float(similarity)
+                    }
+                    results.append(result)
+            # Sort by similarity score (descending) and limit
+            results.sort(key=lambda x: x.get('similarity_score', 0), reverse=True)
+            results = results[:limit]
+            logger.info(f"✅ Found {len(results)} similar rumours (threshold: {similarity_threshold})")
+            return results
+        except Exception as e:
+            logger.error(f"❌ Failed to search similar rumours: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return []
+    # ---------- Chat sessions & messages ----------
+    def get_chat_sessions(
+        self,
+        user_id: Optional[str] = None,
+        anonymous_id: Optional[str] = None,
+        limit: int = 50,
+    ) -> List[Dict[str, Any]]:
+        """Return chat sessions for a given user or anonymous visitor."""
+        if self.chat_sessions is None:
+            return []
+        query: Dict[str, Any] = {}
+        if user_id:
+          query["user_id"] = user_id
+        if anonymous_id and not user_id:
+          # For anonymous visitors we only look at sessions that have not yet been
+          # attached to a concrete user id.
+          query["anonymous_id"] = anonymous_id
+          query["user_id"] = None
+        cursor = (
+            self.chat_sessions.find(query)
+            .sort("updated_at", -1)
+            .limit(limit)
+        )
+        sessions: List[Dict[str, Any]] = []
+        for doc in cursor:
+            doc["session_id"] = str(doc.get("session_id") or doc.get("_id"))
+            doc["_id"] = str(doc["_id"])
+            sessions.append(doc)
+        return sessions
+    def migrate_anonymous_sessions(self, anonymous_id: str, user_id: str) -> int:
+        """Attach existing anonymous sessions to a logged-in user.
+        This keeps history when a visitor later signs in.
+        """
+        if self.chat_sessions is None or not anonymous_id or not user_id:
+            return 0
+        result = self.chat_sessions.update_many(
+            {"anonymous_id": anonymous_id, "user_id": None},
+            {"$set": {"user_id": user_id}},
+        )
+        return int(getattr(result, "modified_count", 0))
+    def upsert_chat_session(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Create or update a chat session document.
+        Expected keys in payload: session_id (optional), user_id, anonymous_id,
+        title, last_verdict, last_summary.
+        """
+        if self.chat_sessions is None:
+            raise RuntimeError("chat_sessions collection not initialised")
+        from datetime import datetime
+        session_id = payload.get("session_id")
+        now = datetime.utcnow()
+        base_updates: Dict[str, Any] = {
+            "title": payload.get("title") or "New Chat",
+            "user_id": payload.get("user_id"),
+            "anonymous_id": payload.get("anonymous_id"),
+            "last_verdict": payload.get("last_verdict"),
+            "last_summary": payload.get("last_summary"),
+            "updated_at": now,
+        }
+        if session_id:
+            doc = self.chat_sessions.find_one_and_update(
+                {"session_id": session_id},
+                {"$set": base_updates},
+                upsert=True,
+                return_document=True,
+            )
+        else:
+            doc_to_insert = {
+                **base_updates,
+                "session_id": payload.get("session_id") or os.urandom(12).hex(),
+                "created_at": now,
+            }
+            inserted = self.chat_sessions.insert_one(doc_to_insert)
+            doc = self.chat_sessions.find_one({"_id": inserted.inserted_id})
+        doc["_id"] = str(doc["_id"])
+        doc["session_id"] = str(doc.get("session_id"))
+        return doc
+    def append_chat_messages(
+        self,
+        session_id: str,
+        messages: List[Dict[str, Any]],
+        user_id: Optional[str] = None,
+        anonymous_id: Optional[str] = None,
+    ) -> int:
+        """Append one or more messages to a given session."""
+        if self.chat_messages is None:
+            raise RuntimeError("chat_messages collection not initialised")
+        from datetime import datetime
+        docs = []
+        for msg in messages:
+            docs.append(
+                {
+                    "session_id": session_id,
+                    "user_id": user_id,
+                    "anonymous_id": anonymous_id,
+                    "role": msg.get("role"),
+                    "content": msg.get("content"),
+                    "attachments": msg.get("attachments") or [],
+                    "verdict": msg.get("verdict"),
+                    "confidence": msg.get("confidence"),
+                    "sources": msg.get("sources"),
+                    "created_at": msg.get("created_at") or datetime.utcnow(),
+                    "metadata": msg.get("metadata") or {},
+                }
+            )
+        if not docs:
+            return 0
+        result = self.chat_messages.insert_many(docs)
+        return len(getattr(result, "inserted_ids", []))
+    def get_chat_messages(
+        self, session_id: str, limit: int = 100
+    ) -> List[Dict[str, Any]]:
+        """Return messages for a particular session ordered by time."""
+        if self.chat_messages is None:
+            return []
+        cursor = (
+            self.chat_messages.find({"session_id": session_id})
+            .sort("created_at", 1)
+            .limit(limit)
+        )
+        docs: List[Dict[str, Any]] = []
+        for doc in cursor:
+            doc["_id"] = str(doc["_id"])
+            docs.append(doc)
+        return docs
+    # ---------- Subscription management ----------
+    def upsert_subscription(self, subscription_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Create or update a subscription document
+        Expected keys in subscription_data:
+        - user_id: User ID
+        - razorpay_subscription_id: Razorpay subscription ID
+        - razorpay_plan_id: Razorpay plan ID
+        - plan_name: Plan name (e.g., "Pro")
+        - status: Subscription status (e.g., "active", "cancelled", "expired")
+        - amount: Subscription amount
+        - currency: Currency code
+        - current_start: Current billing cycle start
+        - current_end: Current billing cycle end
+        - next_billing_at: Next billing date
+        - created_at: Subscription creation date
+        - updated_at: Last update date
+        """
+        if self.subscriptions is None:
+            raise RuntimeError("subscriptions collection not initialised")
+        from datetime import datetime
+        razorpay_subscription_id = subscription_data.get("razorpay_subscription_id")
+        if not razorpay_subscription_id:
+            raise ValueError("razorpay_subscription_id is required")
+        now = datetime.utcnow()
+        # Prepare update data
+        update_data = {
+            **subscription_data,
+            "updated_at": now,
+        }
+        # Set created_at only if creating new subscription
+        existing = self.subscriptions.find_one(
+            {"razorpay_subscription_id": razorpay_subscription_id}
+        )
+        if not existing:
+            update_data["created_at"] = subscription_data.get("created_at") or now
+        # Upsert subscription
+        result = self.subscriptions.find_one_and_update(
+            {"razorpay_subscription_id": razorpay_subscription_id},
+            {"$set": update_data},
+            upsert=True,
+            return_document=True
+        )
+        if result:
+            result["_id"] = str(result["_id"])
+            logger.info(f"✅ Upserted subscription: {razorpay_subscription_id}")
+            # Update user's subscription tier if user_id is present
+            user_id = subscription_data.get("user_id")
+            status = subscription_data.get("status")
+            plan_name = subscription_data.get("plan_name", "Free")
+            if user_id:
+                if status == "active":
+                    success = self.update_user_subscription_tier(user_id, plan_name)
+                    if success:
+                        logger.info(f"✅ Updated user {user_id} subscription tier to {plan_name} via upsert_subscription")
+                elif status in ["cancelled", "expired", "paused", "ended"]:
+                    success = self.update_user_subscription_tier(user_id, "Free")
+                    if success:
+                        logger.info(f"✅ Updated user {user_id} subscription tier to Free (status: {status})")
+        return result
+    def get_user_subscription(
+        self,
+        user_id: str,
+        status: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get user's active subscription
+        Args:
+            user_id: User ID
+            status: Filter by status (e.g., "active"). If None, returns most recent
+        Returns:
+            Subscription document or None
+        """
+        if self.subscriptions is None:
+            return None
+        query = {"user_id": user_id}
+        if status:
+            query["status"] = status
+        subscription = self.subscriptions.find_one(
+            query,
+            sort=[("created_at", -1)]
+        )
+        if subscription:
+            subscription["_id"] = str(subscription["_id"])
+        return subscription
+    def update_subscription_status(
+        self,
+        razorpay_subscription_id: str,
+        status: str,
+        additional_data: Optional[Dict[str, Any]] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Update subscription status from webhook events
+        Args:
+            razorpay_subscription_id: Razorpay subscription ID
+            status: New status
+            additional_data: Additional fields to update
+        Returns:
+            Updated subscription document or None
+        """
+        if self.subscriptions is None:
+            return None
+        from datetime import datetime
+        update_data = {
+            "status": status,
+            "updated_at": datetime.utcnow()
+        }
+        if additional_data:
+            update_data.update(additional_data)
+        result = self.subscriptions.find_one_and_update(
+            {"razorpay_subscription_id": razorpay_subscription_id},
+            {"$set": update_data},
+            return_document=True
+        )
+        if result:
+            result["_id"] = str(result["_id"])
+            logger.info(f"✅ Updated subscription status: {razorpay_subscription_id} -> {status}")
+            # Update user's subscription tier
+            user_id = result.get("user_id")
+            if user_id:
+                plan_name = result.get("plan_name", "Free")
+                if status == "active":
+                    self.update_user_subscription_tier(user_id, plan_name)
+                elif status in ["cancelled", "expired", "paused"]:
+                    self.update_user_subscription_tier(user_id, "Free")
+        return result
+    def get_subscription_by_razorpay_id(
+        self,
+        razorpay_subscription_id: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get subscription by Razorpay subscription ID
+        Args:
+            razorpay_subscription_id: Razorpay subscription ID
+        Returns:
+            Subscription document or None
+        """
+        if self.subscriptions is None:
+            return None
+        subscription = self.subscriptions.find_one(
+            {"razorpay_subscription_id": razorpay_subscription_id}
+        )
+        if subscription:
+            subscription["_id"] = str(subscription["_id"])
+        return subscription
+    def create_user(self, user_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Create a new user in MongoDB
+        Args:
+            user_data: User data including email, password (hashed), domain_preferences, etc.
+        Returns:
+            Created user document
+        """
+        if self.users is None:
+            raise RuntimeError("users collection not initialised")
+        from datetime import datetime
+        from bson import ObjectId
+        # Check if user already exists
+        existing = self.users.find_one({"email": user_data["email"]})
+        if existing:
+            raise ValueError("Email already registered")
+        user_doc = {
+            **user_data,
+            "created_at": datetime.utcnow(),
+            "updated_at": datetime.utcnow(),
+        }
+        result = self.users.insert_one(user_doc)
+        user_doc["_id"] = str(result.inserted_id)
+        user_doc["id"] = str(result.inserted_id)
+        logger.info(f"✅ Created user: {user_data['email']}")
+        return user_doc
+    def get_user_by_email(self, email: str) -> Optional[Dict[str, Any]]:
+        """
+        Get user by email
+        Args:
+            email: User email
+        Returns:
+            User document or None
+        """
+        if self.users is None:
+            return None
+        user = self.users.find_one({"email": email})
+        if user:
+            user["_id"] = str(user["_id"])
+            user["id"] = str(user["_id"])
+        return user
+    def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get user by ID
+        Args:
+            user_id: User ID
+        Returns:
+            User document or None
+        """
+        if self.users is None:
+            return None
+        from bson import ObjectId
+        try:
+            user = self.users.find_one({"_id": ObjectId(user_id)})
+            if user:
+                user["_id"] = str(user["_id"])
+                user["id"] = str(user["_id"])
+            return user
+        except Exception as e:
+            logger.error(f"Error getting user by ID: {e}")
+            return None
+    def update_user_subscription_tier(self, user_id: str, subscription_tier: str) -> bool:
+        """
+        Update user's subscription tier in user collection
+        Args:
+            user_id: User ID
+            subscription_tier: Subscription tier (Free, Pro, Enterprise)
+        Returns:
+            True if updated successfully, False otherwise
+        """
+        if self.users is None:
+            return False
+        from datetime import datetime
+        from bson import ObjectId
+        try:
+            result = self.users.update_one(
+                {"_id": ObjectId(user_id)},
+                {
+                    "$set": {
+                        "subscription_tier": subscription_tier,
+                        "updated_at": datetime.utcnow()
+                    }
+                }
+            )
+            if result.modified_count > 0:
+                logger.info(f"✅ Updated user {user_id} subscription tier to {subscription_tier}")
+                return True
+            return False
+        except Exception as e:
+            logger.error(f"Error updating user subscription tier: {e}")
+            return False
+    def close(self):
+        """Close MongoDB connection"""
+        if self.client:
+            self.client.close()
+            logger.info("🔌 MongoDB connection closed")

services/razorpay_service.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+Razorpay Service for Subscription Management
+Handles Razorpay API interactions for subscription payments
+"""
+import logging
+import hmac
+import hashlib
+from typing import Dict, Any, Optional
+import razorpay
+from config import config
+logger = logging.getLogger(__name__)
+class RazorpayService:
+    """Service for handling Razorpay subscription operations"""
+    def __init__(self):
+        """Initialize Razorpay client"""
+        if not config.RAZORPAY_ID or not config.RAZORPAY_KEY:
+            logger.warning("⚠️ Razorpay credentials not configured. Subscription features will not work.")
+            self.client = None
+        else:
+            try:
+                # Initialize Razorpay client with explicit base URL
+                # Test mode uses different base URL, but SDK handles this automatically
+                self.client = razorpay.Client(auth=(config.RAZORPAY_ID, config.RAZORPAY_KEY))
+                logger.info(f"✅ Razorpay client initialized with Key ID: {config.RAZORPAY_ID[:8]}...")
+            except Exception as e:
+                logger.error(f"❌ Failed to initialize Razorpay client: {e}")
+                self.client = None
+    def create_plan(
+        self,
+        name: str,
+        amount: int,
+        currency: str = "INR",
+        interval: int = 1,
+        period: str = "monthly",
+        description: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Create a subscription plan in Razorpay
+        Args:
+            name: Plan name
+            amount: Amount in smallest currency unit (paise for INR)
+            currency: Currency code (default: INR)
+            interval: Billing interval (default: 1)
+            period: Billing period - 'daily', 'weekly', 'monthly', 'yearly' (default: monthly)
+            description: Plan description
+        Returns:
+            Dict containing plan details from Razorpay
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            plan_data = {
+                "period": period,
+                "interval": interval,
+                "item": {
+                    "name": name,
+                    "amount": amount,
+                    "currency": currency,
+                    "description": description or f"{name} subscription plan"
+                }
+            }
+            logger.debug(f"Creating plan with data: {plan_data}")
+            # Try creating plan - note: some accounts may need subscriptions enabled first
+            plan = self.client.plan.create(plan_data)
+            logger.info(f"✅ Created Razorpay plan: {plan.get('id')}")
+            return plan
+        except razorpay.errors.BadRequestError as e:
+            error_msg = str(e)
+            logger.error(f"❌ BadRequestError creating plan '{name}': {error_msg}")
+            # Check if it's a "URL not found" error which indicates subscriptions might not be enabled
+            if "not found" in error_msg.lower() or "url" in error_msg.lower():
+                logger.error(f"   This error typically means:")
+                logger.error(f"   1. Subscriptions feature is NOT enabled on your Razorpay account")
+                logger.error(f"   2. You need to enable subscriptions in Razorpay Dashboard")
+                logger.error(f"   3. Go to: Razorpay Dashboard > Settings > Subscriptions")
+                logger.error(f"   4. Or contact Razorpay support to enable subscriptions")
+            # Check if plan already exists
+            elif "already exists" in error_msg.lower() or "duplicate" in error_msg.lower():
+                logger.warning(f"⚠️ Plan '{name}' may already exist")
+            raise
+        except razorpay.errors.ServerError as e:
+            logger.error(f"❌ ServerError creating plan '{name}': {e}")
+            raise
+        except Exception as e:
+            error_type = type(e).__name__
+            error_msg = str(e)
+            logger.error(f"❌ Failed to create Razorpay plan '{name}' ({error_type}): {error_msg}")
+            # Log more details if available
+            if hasattr(e, 'status_code'):
+                logger.error(f"   Status code: {e.status_code}")
+            if hasattr(e, 'error'):
+                logger.error(f"   Error details: {e.error}")
+            raise
+    def create_subscription(
+        self,
+        plan_id: str,
+        customer_notify: int = 1,
+        total_count: Optional[int] = None,
+        start_at: Optional[int] = None,
+        end_at: Optional[int] = None,
+        notes: Optional[Dict[str, str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Create a subscription for a user
+        Args:
+            plan_id: Razorpay plan ID
+            customer_notify: Whether to notify customer (1 or 0)
+            total_count: Total number of billing cycles (None for infinite - will use end_at instead)
+            start_at: Unix timestamp for subscription start (None for immediate)
+            end_at: Unix timestamp for subscription end (used if total_count is None for infinite subscriptions)
+            notes: Additional notes/metadata
+        Returns:
+            Dict containing subscription details from Razorpay
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            subscription_data = {
+                "plan_id": plan_id,
+                "customer_notify": customer_notify,
+            }
+            # Razorpay requires either total_count or end_at
+            # If end_at is provided, start_at is also required
+            # start_at must be in the future (add 60 seconds buffer to account for clock differences)
+            import time
+            current_time = int(time.time())
+            # Add 60 seconds buffer to ensure start_at is always in the future
+            future_start_time = current_time + 60
+            if total_count is not None:
+                subscription_data["total_count"] = total_count
+            elif end_at is not None:
+                subscription_data["end_at"] = end_at
+                # If end_at is set but start_at is not, set start_at to 60 seconds in the future
+                if start_at is None:
+                    subscription_data["start_at"] = future_start_time
+            else:
+                # Set both start_at and end_at for infinite subscriptions
+                subscription_data["start_at"] = future_start_time
+                subscription_data["end_at"] = future_start_time + (10 * 365 * 24 * 60 * 60)  # 10 years
+                logger.info("ℹ️ No total_count or end_at provided, setting start_at to 60 seconds in future and end_at to 10 years from start (infinite subscription)")
+            # Override start_at if explicitly provided (but ensure it's in the future)
+            if start_at is not None:
+                if start_at <= current_time:
+                    # If provided start_at is in the past, add 60 seconds buffer
+                    subscription_data["start_at"] = current_time + 60
+                    logger.warning(f"⚠️ Provided start_at was in the past, adjusted to {subscription_data['start_at']}")
+                else:
+                    subscription_data["start_at"] = start_at
+            if notes:
+                subscription_data["notes"] = notes
+            subscription = self.client.subscription.create(subscription_data)
+            logger.info(f"✅ Created Razorpay subscription: {subscription.get('id')}")
+            return subscription
+        except Exception as e:
+            logger.error(f"❌ Failed to create Razorpay subscription: {e}")
+            raise
+    def get_subscription(self, subscription_id: str) -> Dict[str, Any]:
+        """
+        Get subscription details from Razorpay
+        Args:
+            subscription_id: Razorpay subscription ID
+        Returns:
+            Dict containing subscription details
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            subscription = self.client.subscription.fetch(subscription_id)
+            return subscription
+        except Exception as e:
+            logger.error(f"❌ Failed to fetch subscription {subscription_id}: {e}")
+            raise
+    def cancel_subscription(
+        self,
+        subscription_id: str,
+        cancel_at_cycle_end: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Cancel a subscription
+        Args:
+            subscription_id: Razorpay subscription ID
+            cancel_at_cycle_end: If True, cancel at end of current cycle
+        Returns:
+            Dict containing updated subscription details
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            if cancel_at_cycle_end:
+                subscription = self.client.subscription.cancel(
+                    subscription_id,
+                    {"cancel_at_cycle_end": 1}
+                )
+            else:
+                subscription = self.client.subscription.cancel(subscription_id)
+            logger.info(f"✅ Cancelled subscription: {subscription_id}")
+            return subscription
+        except Exception as e:
+            logger.error(f"❌ Failed to cancel subscription {subscription_id}: {e}")
+            raise
+    def verify_webhook_signature(
+        self,
+        payload: str,
+        signature: str
+    ) -> bool:
+        """
+        Verify Razorpay webhook signature
+        Args:
+            payload: Raw webhook payload (string)
+            signature: Webhook signature from X-Razorpay-Signature header
+        Returns:
+            True if signature is valid, False otherwise
+        """
+        if not config.RAZORPAY_WEBHOOK_SECRET:
+            logger.warning("⚠️ RAZORPAY_WEBHOOK_SECRET not set. Webhook verification skipped.")
+            return True  # Allow if secret not configured (for development)
+        try:
+            expected_signature = hmac.new(
+                config.RAZORPAY_WEBHOOK_SECRET.encode('utf-8'),
+                payload.encode('utf-8'),
+                hashlib.sha256
+            ).hexdigest()
+            return hmac.compare_digest(expected_signature, signature)
+        except Exception as e:
+            logger.error(f"❌ Webhook signature verification failed: {e}")
+            return False
+    def get_plan(self, plan_id: str) -> Dict[str, Any]:
+        """
+        Get plan details from Razorpay
+        Args:
+            plan_id: Razorpay plan ID
+        Returns:
+            Dict containing plan details
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            plan = self.client.plan.fetch(plan_id)
+            return plan
+        except Exception as e:
+            logger.error(f"❌ Failed to fetch plan {plan_id}: {e}")
+            raise
+    def list_plans(self, count: int = 10, skip: int = 0) -> Dict[str, Any]:
+        """
+        List all plans
+        Args:
+            count: Number of plans to fetch
+            skip: Number of plans to skip
+        Returns:
+            Dict containing list of plans
+        """
+        if not self.client:
+            raise ValueError("Razorpay client not initialized. Check RAZORPAY_ID and RAZORPAY_KEY.")
+        try:
+            # Try to list plans - this may fail if no plans exist or API endpoint is different
+            plans = self.client.plan.all({"count": count, "skip": skip})
+            return plans
+        except razorpay.errors.BadRequestError as e:
+            error_msg = str(e).lower()
+            logger.error(f"❌ BadRequestError listing plans: {e}")
+            # Check if it's a "not found" error which might mean subscriptions aren't enabled
+            if "not found" in error_msg or "url" in error_msg:
+                logger.warning("⚠️ Subscriptions API endpoint not found. This might mean:")
+                logger.warning("   1. Subscriptions feature is not enabled on your Razorpay account")
+                logger.warning("   2. Your API keys don't have subscription permissions")
+                logger.warning("   3. You need to enable subscriptions in Razorpay Dashboard")
+            # Return empty structure if it's a "not found" type error
+            return {"items": [], "count": 0}
+        except razorpay.errors.ServerError as e:
+            logger.error(f"❌ ServerError listing plans: {e}")
+            raise
+        except Exception as e:
+            error_type = type(e).__name__
+            error_msg = str(e)
+            logger.error(f"❌ Failed to list plans ({error_type}): {error_msg}")
+            # If it's a "not found" error, return empty list instead of raising
+            if "not found" in error_msg.lower() or "404" in error_msg:
+                logger.warning("⚠️ No plans found or endpoint not available, returning empty list")
+                return {"items": [], "count": 0}
+            raise

services/text_fact_checker.py ADDED Viewed

	@@ -0,0 +1,905 @@

+import requests
+import json
+from typing import Dict, List, Optional, Any
+import google.generativeai as genai
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+from config import config
+class TextFactChecker:
+    """Service for fact-checking textual claims using Google Custom Search API with fact-checking sites"""
+    def __init__(self):
+        self.api_key = config.GOOGLE_API_KEY
+        self.search_engine_id = config.GOOGLE_FACT_CHECK_CX
+        self.base_url = "https://www.googleapis.com/customsearch/v1"
+        # Configure Gemini for analysis
+        if not config.GEMINI_API_KEY:
+            print("⚠️ WARNING: GEMINI_API_KEY not set. Gemini features will not work.")
+        else:
+            try:
+                genai.configure(api_key=config.GEMINI_API_KEY)
+                self.model = genai.GenerativeModel(config.GEMINI_MODEL)
+                print(f"✅ Gemini configured with model: {config.GEMINI_MODEL}")
+            except Exception as e:
+                print(f"❌ Failed to configure Gemini: {e}")
+                raise
+        if not self.api_key:
+            raise ValueError("Google Custom Search API key is required")
+        if not self.search_engine_id:
+            raise ValueError("Google Custom Search Engine ID (cx) is required")
+    async def verify(self, text_input: str, claim_context: str = "Unknown context", claim_date: str = "Unknown date") -> Dict[str, Any]:
+        """
+        Verify a textual claim using a three-phase approach:
+        1. Immediate Gemini read-through for a quick, reference-free baseline
+        2. Curated SERP (fact-check) harvesting with structured analysis
+        3. A final Gemini synthesis that reasons over BOTH the baseline and SERP data
+        Args:
+            text_input: The text claim to verify
+            claim_context: Context about the claim
+            claim_date: Date when the claim was made
+        Returns:
+            Dictionary containing verification results
+        """
+        try:
+            print(f"🔍 DEBUG: TextFactChecker.verify called")
+            print(f"🔍 DEBUG: text_input = {text_input}")
+            print(f"🔍 DEBUG: claim_context = {claim_context}")
+            print(f"🔍 DEBUG: claim_date = {claim_date}")
+            print(f"Starting verification for: {text_input}")
+            # STEP 0: quick general-knowledge pass (baseline)
+            preliminary_analysis = await self._verify_with_general_knowledge(
+                text_input, claim_context, claim_date
+            )
+            print(f"🔍 DEBUG: preliminary_analysis = {preliminary_analysis}")
+            # STEP 1: Search for fact-checked claims in curated sources
+            search_results = await self._search_claims(text_input)
+            print(f"🔍 DEBUG: search_results = {search_results}")
+            curated_analysis = None
+            if search_results:
+                # Analyze the search results with Gemini
+                curated_analysis = self._analyze_results(search_results, text_input)
+            final_response = self._synthesize_final_response(
+                text_input=text_input,
+                claim_context=claim_context,
+                claim_date=claim_date,
+                preliminary_analysis=preliminary_analysis,
+                curated_analysis=curated_analysis,
+                search_results=search_results or []
+            )
+            if final_response:
+                return final_response
+            # Fallback ladder: curated -> preliminary -> default error
+            if curated_analysis:
+                return self._build_simple_response(
+                    curated_analysis,
+                    text_input,
+                    claim_context,
+                    claim_date,
+                    search_results or [],
+                    method_label="curated_sources_only",
+                    extra_details={
+                        "preliminary_analysis": preliminary_analysis,
+                        "curated_analysis": curated_analysis,
+                    },
+                )
+            if preliminary_analysis:
+                return self._build_simple_response(
+                    preliminary_analysis,
+                    text_input,
+                    claim_context,
+                    claim_date,
+                    search_results or [],
+                    method_label="general_knowledge_only",
+                    extra_details={"preliminary_analysis": preliminary_analysis},
+                )
+            return {
+                "verified": False,
+                "verdict": "error",
+                "message": "Unable to generate a verification response.",
+                "details": {
+                    "claim_text": text_input,
+                    "claim_context": claim_context,
+                    "claim_date": claim_date,
+                    "fact_checks": search_results or [],
+                    "analysis": {},
+                    "verification_method": "unavailable",
+                },
+            }
+        except Exception as e:
+            print(f"❌ Error in verify: {e}")
+            return {
+                "verified": False,
+                "verdict": "error",
+                "message": f"Error during fact-checking: {str(e)}",
+                "details": {
+                    "claim_text": text_input,
+                    "claim_context": claim_context,
+                    "claim_date": claim_date,
+                    "error": str(e)
+                }
+            }
+    async def _search_claims(self, query: str) -> List[Dict[str, Any]]:
+        """
+        Search for fact-checked claims using Google Custom Search API with LLM-powered fallback strategies
+        Args:
+            query: The search query
+        Returns:
+            List of search results
+        """
+        # Try the original query first
+        results = await self._perform_search(query)
+        # If no results, use LLM to create alternative queries
+        if not results:
+            print("No results found, using LLM to create alternative queries...")
+            alternative_queries = self._create_alternative_queries(query)
+            print(f"Generated alternative queries: {alternative_queries}")
+            results = await self._perform_search(alternative_queries)
+            if results:
+                print(f"Found {len(results)} results with alternative query")
+            else:
+                print("No results found with alternative query")
+        return results
+    async def _perform_search(self, query: str) -> List[Dict[str, Any]]:
+        """
+        Perform a single search request
+        Args:
+            query: The search query
+        Returns:
+            List of search results
+        """
+        params = {
+            "q": query,
+            "key": self.api_key,
+            "cx": self.search_engine_id,
+            "num": 10  # Limit results to 10 for better performance
+        }
+        try:
+            print(f"Making request to: {self.base_url}")
+            print(f"Params: {params}")
+            response = requests.get(self.base_url, params=params, timeout=30)
+            print(f"Response status: {response.status_code}")
+            print(f"Response text: {response.text}")
+            response.raise_for_status()
+            data = response.json()
+            items = data.get("items", [])
+            return items
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"API request failed: {str(e)}")
+        except json.JSONDecodeError as e:
+            raise Exception(f"Failed to parse API response: {str(e)}")
+        except Exception as e:
+            raise Exception(f"Search error: {str(e)}")
+    def _create_alternative_queries(self, query: str) -> List[str]:
+        """
+        Use LLM to create alternative search queries (broader and simpler)
+        Args:
+            query: Original query
+        Returns:
+            List of alternative queries to try
+        """
+        prompt = f"""
+You are a search query optimizer. Given a fact-checking query that returned no results, create alternative queries that might find relevant information.
+ORIGINAL QUERY: "{query}"
+Create an alternative query:
+1. A BROADER query that removes specific assumptions and focuses on key entities/events
+Examples:
+- "Is it true the CEO of Astronomer resigned because of toxic workplace allegations?"
+  → Broader: "Astronomer CEO resignation"
+- "Did Apple release a new iPhone with 5G in 2023?"
+  → Broader: "Apple iPhone 2023 release"
+Respond in this exact JSON format:
+{{
+    "broader_query": "your broader query here",
+}}
+"""
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            # Try to parse JSON response
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            alternatives = json.loads(response_text)
+            # Return both alternatives
+            queries = []
+            if alternatives.get("broader_query") and alternatives["broader_query"] != query:
+                queries.append(alternatives["broader_query"])
+            if alternatives.get("simpler_query") and alternatives["simpler_query"] != query:
+                queries.append(alternatives["simpler_query"])
+            return queries
+        except Exception as e:
+            print(f"Failed to create alternative queries with LLM: {e}")
+    def _analyze_results(self, results: List[Dict[str, Any]], original_text: str) -> Dict[str, Any]:
+        """
+        Analyze the search results using Gemini AI to determine overall verdict
+        Args:
+            results: List of search results from the API
+            original_text: The original text being verified
+        Returns:
+            Analysis results including verdict and message
+        """
+        if not results:
+            return {
+                "verified": False,
+                "verdict": "no_content",
+                "message": "No fact-checked information found for this claim"
+            }
+        # Filter relevant results
+        relevant_results = []
+        for result in results:
+            title = result.get("title", "").lower()
+            snippet = result.get("snippet", "").lower()
+            original_lower = original_text.lower()
+            # Check if the result is relevant to our original text
+            relevance_score = self._calculate_relevance(result, original_text)
+            print(f"Relevance score for '{title[:50]}...': {relevance_score:.3f}")
+            if relevance_score > 0.05:  # Very low threshold to catch all relevant results
+                relevant_results.append(result)
+        if not relevant_results:
+            return {
+                "verified": False,
+                "verdict": "no_content",
+                "message": "No relevant fact-checked information found for this specific claim"
+            }
+        # Use Gemini to analyze the results
+        try:
+            analysis = self._analyze_with_gemini(original_text, relevant_results)
+            return analysis
+        except Exception as e:
+            print(f"Gemini analysis failed: {str(e)}")
+            # Fallback to simple analysis
+            return self._fallback_analysis(relevant_results)
+    def _calculate_relevance(self, result: Dict[str, Any], original_text: str) -> float:
+        """
+        Calculate relevance score using TF-IDF similarity with multiple components
+        Args:
+            result: Search result dictionary
+            original_text: Original text being verified
+        Returns:
+            Relevance score between 0 and 1
+        """
+        score = 0.0
+        # 1. Title relevance (40% weight)
+        title = result.get("title", "")
+        if title:
+            title_score = self._tfidf_similarity(title, original_text)
+            score += title_score * 0.6
+        # 2. Snippet relevance (30% weight)
+        snippet = result.get("snippet", "")
+        if snippet:
+            snippet_score = self._tfidf_similarity(snippet, original_text)
+            score += snippet_score * 0.4
+        # 3. Fact-check specific bonus (30% weight)
+        factcheck_score = self._has_factcheck_data(result)
+        score += factcheck_score * 0.1
+        return min(1.0, score)
+    def _tfidf_similarity(self, text1: str, text2: str) -> float:
+        """
+        Calculate TF-IDF cosine similarity between two texts
+        Args:
+            text1: First text
+            text2: Second text
+        Returns:
+            Similarity score between 0 and 1
+        """
+        if not text1.strip() or not text2.strip():
+            return 0.0
+        try:
+            # Preprocess texts
+            texts = [self._preprocess_text(text1), self._preprocess_text(text2)]
+            # Create TF-IDF vectors
+            vectorizer = TfidfVectorizer(
+                stop_words='english',
+                ngram_range=(1, 2),  # Include bigrams
+                max_features=500,
+                lowercase=True
+            )
+            tfidf_matrix = vectorizer.fit_transform(texts)
+            # Calculate cosine similarity
+            similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+            return float(similarity)
+        except Exception as e:
+            print(f"TF-IDF calculation failed: {e}")
+            # Fallback to simple word overlap
+            return self._simple_word_overlap(text1, text2)
+    def _preprocess_text(self, text: str) -> str:
+        """
+        Preprocess text for TF-IDF analysis
+        Args:
+            text: Raw text
+        Returns:
+            Preprocessed text
+        """
+        import re
+        # Convert to lowercase
+        text = text.lower()
+        # Remove special characters but keep spaces
+        text = re.sub(r'[^\w\s]', ' ', text)
+        # Remove extra whitespace
+        text = ' '.join(text.split())
+        return text
+    def _simple_word_overlap(self, text1: str, text2: str) -> float:
+        """
+        Fallback similarity calculation using word overlap
+        Args:
+            text1: First text
+            text2: Second text
+        Returns:
+            Similarity score between 0 and 1
+        """
+        words1 = set(text1.lower().split())
+        words2 = set(text2.lower().split())
+        if not words1 or not words2:
+            return 0.0
+        intersection = words1.intersection(words2)
+        union = words1.union(words2)
+        return len(intersection) / len(union) if union else 0.0
+    def _has_factcheck_data(self, result: Dict[str, Any]) -> float:
+        """
+        Check if result has fact-check specific metadata
+        Args:
+            result: Search result dictionary
+        Returns:
+            1.0 if has fact-check data, 0.0 otherwise
+        """
+        # Check for ClaimReview metadata
+        pagemap = result.get("pagemap", {})
+        claim_review = pagemap.get("ClaimReview", [])
+        if claim_review:
+            return 1.0
+        # Check for fact-check related keywords in URL or title
+        url = result.get("link", "").lower()
+        title = result.get("title", "").lower()
+        factcheck_keywords = [
+            "fact-check", "factcheck", "snopes", "politifact",
+            "factcrescendo", "boomlive", "newschecker", "afp"
+        ]
+        for keyword in factcheck_keywords:
+            if keyword in url or keyword in title:
+                return 1.0
+        return 0.0
+    def _analyze_with_gemini(self, original_text: str, results: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Use Gemini AI to analyze fact-check results and determine verdict
+        Args:
+            original_text: The original claim being verified
+            results: List of relevant search results
+        Returns:
+            Analysis results with verdict and message
+        """
+        # Prepare the prompt
+        results_text = ""
+        for i, result in enumerate(results[:5], 1):  # Limit to top 5 results
+            title = result.get("title", "")
+            snippet = result.get("snippet", "")
+            link = result.get("link", "")
+            results_text += f"{i}. Title: {title}\n   Snippet: {snippet}\n   Link: {link}\n\n"
+        prompt = f"""
+You are a fact-checking expert. Analyze the following claim against the provided fact-checking sources.
+CLAIM TO VERIFY: "{original_text}"
+FACT-CHECKING SOURCES:
+{results_text}
+STEP-BY-STEP ANALYSIS:
+1. What does each source say ACTUALLY HAPPENED?
+2. What does each source say was FAKE or MISLEADING?
+3. Based on the evidence, what is the most likely truth about the claim?
+Think through this systematically and provide your analysis.
+IMPORTANT INSTRUCTIONS FOR YOUR RESPONSE:
+- When referring to sources in your message, DO NOT use specific numbers like "Source 1", "Source 3", or "Sources 2, 4, and 5"
+- Instead, use generic references like "the sources", "multiple sources", "one source", "several sources"
+- Example: Instead of "Sources 3, 4, and 5 confirm..." say "Multiple sources confirm..." or "The sources confirm..."
+Respond in this exact JSON format:
+{{
+    "verdict": "true|false|mixed|uncertain",
+    "verified": true|false,
+    "message": "Your explanation here",
+    "confidence": "high|medium|low",
+    "reasoning": "Your step-by-step reasoning process"
+}}
+"""
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            # Try to parse JSON response
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            analysis = json.loads(response_text)
+            # Ensure required fields
+            analysis.setdefault("verdict", "uncertain")
+            analysis.setdefault("verified", False)
+            analysis.setdefault("message", "Analysis completed")
+            analysis.setdefault("confidence", "medium")
+            analysis.setdefault("reasoning", "Analysis completed")
+            # Add metadata
+            analysis["relevant_results_count"] = len(results)
+            analysis["analysis_method"] = "gemini"
+            return analysis
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse Gemini response as JSON: {e}")
+            print(f"Raw response: {response_text}")
+            return self._fallback_analysis(results)
+        except Exception as e:
+            print(f"Gemini analysis error: {e}")
+            return self._fallback_analysis(results)
+    def _format_source_summary(self, results: List[Dict[str, Any]]) -> str:
+        """Create a short, human readable summary of the surfaced sources."""
+        if not results:
+            return "No vetted sources surfaced yet."
+        highlights = []
+        for result in results[:3]:
+            title = result.get("title") or "Unknown source"
+            outlet = result.get("displayLink")
+            summary = title
+            if outlet:
+                summary += f" ({outlet})"
+            highlights.append(summary)
+        return "Sources surfaced: " + "; ".join(highlights)
+    def _fallback_analysis(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Fallback analysis when Gemini fails
+        Args:
+            results: List of search results
+        Returns:
+            Basic analysis results
+        """
+        summary = self._format_source_summary(results)
+        return {
+            "verified": False,
+            "verdict": "uncertain",
+            "message": f"Could not verify this claim yet. {summary}",
+            "confidence": "low",
+            "relevant_results_count": len(results),
+            "analysis_method": "fallback"
+        }
+    async def _verify_with_general_knowledge(self, text_input: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
+        """
+        Verify a claim using Gemini's general knowledge base directly (no curated sources)
+        This is used as a fallback when curated sources don't have enough information
+        Args:
+            text_input: The text claim to verify
+            claim_context: Context about the claim
+            claim_date: Date when the claim was made
+        Returns:
+            Analysis results with verdict and message
+        """
+        from datetime import datetime
+        current_date = datetime.now().strftime("%B %d, %Y")
+        prompt = f"""
+You are a fact-checking expert AI with access to current information as of {current_date}.
+CLAIM TO VERIFY: "{text_input}"
+CONTEXT: {claim_context if claim_context != "Unknown context" else "No additional context provided"}
+CLAIM DATE: {claim_date if claim_date != "Unknown date" else "Unknown"}
+Your task is to verify this claim using your knowledge base. Since this is a direct factual question that may not be covered by news articles:
+1. **Use your most recent training data** to answer the question directly
+2. If this is about current events, political positions, or time-sensitive facts, be especially careful to provide the MOST CURRENT information
+3. If you're uncertain about recent changes, acknowledge that
+4. Always answer based on the most recent information you have
+Provide a clear, direct answer. Think step-by-step:
+- What does the claim assert?
+- Based on your knowledge (as of your training cutoff and any recent data you have), is this true or false?
+- If it's a time-sensitive claim, what is the current status?
+Respond in this exact JSON format:
+{{
+    "verdict": "true|false|mixed|uncertain",
+    "verified": true|false,
+    "message": "Your clear, direct answer explaining whether the claim is true or false and why",
+    "confidence": "high|medium|low",
+    "reasoning": "Your step-by-step reasoning process",
+    "knowledge_cutoff_note": "Optional note if the answer might be outdated or if recent changes are possible"
+}}
+IMPORTANT: For current events or political positions, provide the MOST RECENT information you have access to.
+"""
+        try:
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            # Try to parse JSON response
+            if response_text.startswith('```json'):
+                response_text = response_text.replace('```json', '').replace('```', '').strip()
+            elif response_text.startswith('```'):
+                response_text = response_text.replace('```', '').strip()
+            analysis = json.loads(response_text)
+            # Ensure required fields
+            analysis.setdefault("verdict", "uncertain")
+            analysis.setdefault("verified", False)
+            analysis.setdefault("message", "Analysis completed using general knowledge")
+            analysis.setdefault("confidence", "medium")
+            analysis.setdefault("reasoning", "Direct verification using AI knowledge base")
+            # Add metadata
+            analysis["analysis_method"] = "general_knowledge"
+            analysis["verification_date"] = current_date
+            print(f"✅ General knowledge verification result: {analysis['verdict']}")
+            return analysis
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse Gemini general knowledge response as JSON: {e}")
+            print(f"Raw response: {response_text[:500]}")
+            # Try to extract plain text answer
+            return {
+                "verified": False,
+                "verdict": "uncertain",
+                "message": response_text if response_text else "Unable to verify using general knowledge",
+                "confidence": "low",
+                "analysis_method": "general_knowledge",
+                "error": "JSON parsing failed, used plain text response"
+            }
+        except Exception as e:
+            print(f"General knowledge verification error: {e}")
+            return {
+                "verified": False,
+                "verdict": "error",
+                "message": f"Error during general knowledge verification: {str(e)}",
+                "confidence": "low",
+                "analysis_method": "general_knowledge"
+            }
+    def _extract_verdict_from_content(self, content: str) -> str:
+        """
+        Extract verdict from search result content
+        Args:
+            content: Combined title and snippet text
+        Returns:
+            Verdict string
+        """
+        content_lower = content.lower()
+        # Look for verdict indicators
+        if any(word in content_lower for word in ["false", "misleading", "incorrect", "debunked", "not true"]):
+            return "false"
+        elif any(word in content_lower for word in ["true", "accurate", "correct", "verified", "confirmed", "is true", "is correct"]):
+            return "true"
+        elif any(word in content_lower for word in ["partially", "mixed", "somewhat", "half"]):
+            return "mixed"
+        elif any(word in content_lower for word in ["unverified", "unproven", "uncertain", "disputed"]):
+            return "uncertain"
+        else:
+            return "unknown"
+    def _analyze_verdicts(self, verdicts: List[str]) -> Dict[str, Any]:
+        """
+        Analyze verdicts to determine overall result
+        Args:
+            verdicts: List of verdict strings
+        Returns:
+            Analysis of verdicts
+        """
+        if not verdicts:
+            return {
+                "verified": False,
+                "verdict": "uncertain",
+                "message": "No verdicts found"
+            }
+        true_count = verdicts.count("true")
+        false_count = verdicts.count("false")
+        mixed_count = verdicts.count("mixed")
+        uncertain_count = verdicts.count("uncertain")
+        unknown_count = verdicts.count("unknown")
+        total = len(verdicts)
+        # Determine overall verdict
+        if false_count > 0:
+            overall_verdict = "false"
+            verified = False
+        elif true_count > 0 and false_count == 0:
+            overall_verdict = "true"
+            verified = True
+        elif mixed_count > 0:
+            overall_verdict = "mixed"
+            verified = False
+        elif uncertain_count > 0:
+            overall_verdict = "uncertain"
+            verified = False
+        else:
+            overall_verdict = "unknown"
+            verified = False
+        return {
+            "verified": verified,
+            "verdict": overall_verdict,
+            "true_count": true_count,
+            "false_count": false_count,
+            "mixed_count": mixed_count,
+            "uncertain_count": uncertain_count,
+            "unknown_count": unknown_count,
+            "total_verdicts": total
+        }
+    def _build_message(self, analysis: Dict[str, Any], results: List[Dict[str, Any]]) -> str:
+        """
+        Build a human-readable message based on the analysis
+        Args:
+            analysis: Analysis results
+            results: Relevant search results
+        Returns:
+            Formatted message
+        """
+        verdict = analysis["verdict"]
+        total_verdicts = analysis["total_verdicts"]
+        relevant_results_count = len(results)
+        base_messages = {
+            "true": "This claim appears to be TRUE based on fact-checking sources.",
+            "false": "This claim appears to be FALSE based on fact-checking sources.",
+            "mixed": "This claim has MIXED evidence - some parts are true, others are false.",
+            "uncertain": "This claim is UNCERTAIN - insufficient evidence to determine accuracy.",
+            "unknown": "This claim needs further investigation - verdict unclear from available sources.",
+            "no_content": "No fact-checked information found for this claim."
+        }
+        message = base_messages.get(verdict, "Unable to determine claim accuracy.")
+        # Add details about sources
+        if relevant_results_count > 0:
+            message += f" Found {relevant_results_count} relevant fact-check(s) with {total_verdicts} total verdicts."
+            # Add top sources
+            top_sources = []
+            for result in results[:3]:  # Show top 3 sources
+                title = result.get("title", "Unknown")
+                link = result.get("link", "")
+                if title not in top_sources and link:
+                    top_sources.append(f"{title}")
+            if top_sources:
+                message += f" Sources include: {', '.join(top_sources[:3])}."
+        return message
+    def _synthesize_final_response(
+        self,
+        text_input: str,
+        claim_context: str,
+        claim_date: str,
+        preliminary_analysis: Optional[Dict[str, Any]],
+        curated_analysis: Optional[Dict[str, Any]],
+        search_results: List[Dict[str, Any]],
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Ask Gemini to reconcile preliminary + curated evidence into a single user-facing verdict.
+        """
+        try:
+            source_briefs = []
+            for item in search_results[:5]:
+                source_briefs.append(
+                    {
+                        "title": item.get("title"),
+                        "snippet": item.get("snippet"),
+                        "outlet": item.get("displayLink"),
+                        "link": item.get("link"),
+                    }
+                )
+            prompt = f"""
+You are an AI fact-checking editor. Combine the baseline assessment and curated sources to produce the final answer.
+CLAIM: "{text_input}"
+CONTEXT: {claim_context}
+CLAIM DATE: {claim_date}
+BASELINE ANALYSIS (Gemini quick look):
+{json.dumps(preliminary_analysis or {}, indent=2, ensure_ascii=False)}
+CURATED FACT-CHECK ANALYSIS:
+{json.dumps(curated_analysis or {}, indent=2, ensure_ascii=False)}
+FACT-CHECK SOURCES:
+{json.dumps(source_briefs, indent=2, ensure_ascii=False)}
+INSTRUCTIONS:
+- Make a reasoned decision (true/false/mixed/uncertain) based on the above.
+- If evidence is thin, keep the tone cautious and say it is unverified/uncertain but mention what was found.
+- Refer to sources generically (e.g., "one BBC article", "multiple outlets") — never number them.
+- Provide clear, actionable messaging for the end user.
+Respond ONLY in this JSON format:
+{{
+  "verdict": "true|false|mixed|uncertain",
+  "verified": true|false,
+  "message": "Concise user-facing summary referencing evidence in plain language",
+  "confidence": "high|medium|low",
+  "reasoning": "Brief reasoning trail you followed",
+  "tone": "confident|balanced|cautious"
+}}
+"""
+            response = self.model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith("```json"):
+                response_text = response_text.replace("```json", "").replace("```", "").strip()
+            elif response_text.startswith("```"):
+                response_text = response_text.replace("```", "").strip()
+            final_analysis = json.loads(response_text)
+            final_analysis.setdefault("verdict", "uncertain")
+            final_analysis.setdefault("verified", False)
+            final_analysis.setdefault("message", "Unable to synthesize final verdict.")
+            final_analysis.setdefault("confidence", "low")
+            final_analysis.setdefault("reasoning", "")
+            final_analysis.setdefault("tone", "cautious")
+            final_analysis["analysis_method"] = "hybrid_synthesis"
+            return self._build_simple_response(
+                final_analysis,
+                text_input,
+                claim_context,
+                claim_date,
+                search_results,
+                method_label="hybrid_synthesis",
+                extra_details={
+                    "preliminary_analysis": preliminary_analysis,
+                    "curated_analysis": curated_analysis,
+                    "source_highlights": source_briefs,
+                },
+            )
+        except Exception as e:
+            print(f"Hybrid synthesis error: {e}")
+            return None
+    def _build_simple_response(
+        self,
+        analysis: Dict[str, Any],
+        text_input: str,
+        claim_context: str,
+        claim_date: str,
+        search_results: List[Dict[str, Any]],
+        method_label: str,
+        extra_details: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        details = {
+            "claim_text": text_input,
+            "claim_context": claim_context,
+            "claim_date": claim_date,
+            "fact_checks": search_results,
+            "analysis": analysis,
+            "verification_method": method_label,
+        }
+        if extra_details:
+            details.update(extra_details)
+        return {
+            "verified": analysis.get("verified", False),
+            "verdict": analysis.get("verdict", "uncertain"),
+            "message": analysis.get("message", "No message produced."),
+            "details": details,
+        }

services/video_verifier.py ADDED Viewed

	@@ -0,0 +1,1310 @@

+import os
+import tempfile
+from typing import Dict, Any, Optional, List, Tuple
+import cv2
+import requests
+from PIL import Image, ImageDraw, ImageFont
+import subprocess
+import json
+import asyncio
+from .image_verifier import ImageVerifier
+from .youtube_api import YouTubeDataAPI
+from config import config
+import time
+class VideoVerifier:
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the VideoVerifier with SerpApi credentials
+        Args:
+            api_key: SerpApi API key. If None, will try to get from environment
+        """
+        self.api_key = api_key or config.SERP_API_KEY
+        if not self.api_key:
+            raise ValueError("SERP_API_KEY environment variable or api_key parameter is required")
+        # Initialize image verifier for frame analysis
+        self.image_verifier = ImageVerifier(api_key)
+        # Initialize YouTube Data API client
+        self.youtube_api = YouTubeDataAPI(api_key)
+        # Video processing parameters
+        self.frame_interval = 4  # Extract frame every 4 seconds
+        self.clip_duration = 5   # Duration of misleading clip in seconds
+    async def verify(self, video_path: Optional[str] = None, claim_context: str = "", claim_date: str = "", video_url: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Verify a video and generate a visual counter-measure video if false context is detected
+        Args:
+            video_path: Path to the video file
+            claim_context: The claimed context of the video
+            claim_date: The claimed date of the video
+        Returns:
+            Dictionary with verification results and output file path
+        """
+        try:
+            # If a video URL is supplied, determine the best verification approach
+            if video_url and not video_path:
+                # Check if it's a YouTube URL and use API verification
+                if self._is_youtube_url(video_url):
+                    return await self._verify_youtube_video(video_url, claim_context, claim_date)
+                # Check if it's a supported platform for yt-dlp
+                if self._is_supported_platform(video_url):
+                    return await self._verify_with_ytdlp(video_url, claim_context, claim_date)
+                # For unsupported platforms, try direct download first; if not a real video, fallback to yt-dlp
+                try:
+                    video_path = await self._download_video(video_url)
+                except Exception as direct_err:
+                    # Always attempt yt-dlp as fallback when available
+                    try:
+                        video_path = await self._download_with_ytdlp(video_url)
+                        used_ytdlp = True
+                    except Exception as ytdlp_err:
+                        # Return the more informative error
+                        raise RuntimeError(f"Direct download failed: {direct_err}; yt-dlp failed: {ytdlp_err}")
+            # Extract key frames from video
+            frames = await self._extract_key_frames(video_path)
+            # If extraction failed and we have a URL, try yt-dlp fallback once
+            if (not frames) and video_url and config.USE_STREAM_DOWNLOADER and not used_ytdlp:
+                video_path = await self._download_with_ytdlp(video_url)
+                used_ytdlp = True
+                frames = await self._extract_key_frames(video_path)
+            if not frames:
+                return {
+                    "verified": False,
+                    "message": "Could not extract frames from video",
+                    "details": {"error": "Frame extraction failed"}
+                }
+            # STEP 0: Analyze frames with Gemini Vision first (direct frame analysis)
+            preliminary_vision_analysis = await self._analyze_frames_with_vision(
+                frames, claim_context, claim_date
+            )
+            print(f"✅ Gemini Vision analysis result: {preliminary_vision_analysis.get('overall_verdict', 'unknown')}")
+            # STEP 1: Analyze frames with reverse image search (existing approach)
+            # Wrap in try/except so vision analysis can still proceed if search fails
+            reverse_search_analysis = None
+            try:
+                reverse_search_analysis = await self._analyze_frames(frames, claim_context, claim_date)
+            except Exception as search_error:
+                print(f"⚠️ Reverse image search analysis failed (will use vision analysis only): {search_error}")
+                # Continue with vision analysis only
+            # STEP 2: Synthesize vision analysis + reverse image search results
+            if reverse_search_analysis:
+                final_analysis = self._synthesize_video_analyses(
+                    preliminary_vision_analysis=preliminary_vision_analysis,
+                    reverse_search_analysis=reverse_search_analysis,
+                    frames=frames,
+                    claim_context=claim_context,
+                    claim_date=claim_date,
+                )
+                if final_analysis:
+                    analysis = final_analysis
+                else:
+                    # Fallback: use vision analysis if synthesis fails
+                    if preliminary_vision_analysis.get("overall_verdict") in ["false", "true"]:
+                        analysis = preliminary_vision_analysis
+                    else:
+                        analysis = reverse_search_analysis
+            else:
+                # No reverse search results, use vision analysis only
+                print("⚠️ Using vision analysis only (reverse image search unavailable)")
+                analysis = preliminary_vision_analysis
+            if analysis.get("overall_verdict") != "false":
+                return {
+                    "verified": analysis.get("overall_verdict") == "true",
+                    "message": analysis.get("overall_summary") or "No decisive false context detected in video frames",
+                    "details": {
+                        "frames_analyzed": len(frames),
+                        "overall_verdict": analysis.get("overall_verdict"),
+                        "frame_summaries": analysis.get("frame_summaries", []),
+                    }
+                }
+            # Generate video counter-measure only if we have a specific false frame
+            false_ctx = analysis.get("false_context_frame")
+            if not false_ctx:
+                return {
+                    "verified": False,
+                    "message": analysis.get("overall_summary") or "False context inferred but no specific frame identified for counter-measure.",
+                    "details": {
+                        "frames_analyzed": len(frames),
+                        "overall_verdict": analysis.get("overall_verdict"),
+                        "frame_summaries": analysis.get("frame_summaries", []),
+                    }
+                }
+            output_path = await self._generate_video_counter_measure(
+                video_path, false_ctx, claim_context, claim_date
+            )
+            result: Dict[str, Any] = {
+                "verified": True,
+                "message": "False context detected and video counter-measure generated",
+                "output_path": output_path,
+                "false_context_frame": analysis.get("false_context_frame"),
+                "details": {
+                    "frames_analyzed": len(frames),
+                    "claim_context": claim_context,
+                    "claim_date": claim_date
+                }
+            }
+            # Attempt Cloudinary cleanup (best-effort) before responding
+            await self._cloudinary_cleanup_prefix(config.CLOUDINARY_FOLDER or "frames")
+            return result
+        except Exception as e:
+            return {
+                "verified": False,
+                "message": f"Error during video verification: {str(e)}",
+                "details": {"error": str(e)}
+            }
+    async def _download_video(self, url: str) -> str:
+        try:
+            resp = requests.get(url, stream=True, timeout=30)
+            resp.raise_for_status()
+            content_type = (resp.headers.get("Content-Type") or "").lower()
+            looks_like_video = ("video" in content_type) or url.lower().endswith((".mp4", ".mov", ".mkv", ".webm", ".m4v"))
+            if not looks_like_video:
+                raise RuntimeError(f"URL is not a direct video (content-type={content_type})")
+            suffix = ".mp4"
+            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+            bytes_written = 0
+            for chunk in resp.iter_content(chunk_size=1 << 14):
+                if chunk:
+                    tmp.write(chunk)
+                    bytes_written += len(chunk)
+            tmp.close()
+            # Heuristic: reject tiny files that aren't valid containers
+            if bytes_written < 200 * 1024:  # 200KB
+                os.unlink(tmp.name)
+                raise RuntimeError("Downloaded file too small to be a valid video")
+            return tmp.name
+        except Exception as e:
+            raise RuntimeError(f"Failed to download video: {e}")
+    async def _download_with_ytdlp(self, url: str) -> str:
+        try:
+            # Resolve yt-dlp binary
+            ytdlp_bin = self._resolve_ytdlp_bin()
+            tmp_dir = tempfile.mkdtemp()
+            out_path = os.path.join(tmp_dir, "video.%(ext)s")
+            cmd = [
+                ytdlp_bin,
+                "-f", "best[height<=720]/best[height<=480]/best",
+                "--no-warnings",
+                "--no-call-home",
+                "--no-progress",
+                "--restrict-filenames",
+                "--socket-timeout", "30",
+                "--retries", "3",
+                "--fragment-retries", "3",
+                "--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+                "--extractor-retries", "3",
+                "-o", out_path,
+                url,
+            ]
+            proc = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            try:
+                await asyncio.wait_for(proc.communicate(), timeout=config.STREAM_DOWNLOAD_TIMEOUT)
+            except asyncio.TimeoutError:
+                proc.kill()
+                raise RuntimeError("yt-dlp timed out")
+            if proc.returncode != 0:
+                # capture stderr for diagnostics
+                raise RuntimeError("yt-dlp failed (non-zero exit)")
+            # Resolve resulting file (first mp4 in dir)
+            for fname in os.listdir(tmp_dir):
+                if fname.lower().endswith((".mp4", ".mkv", ".webm", ".mov")):
+                    return os.path.join(tmp_dir, fname)
+            raise RuntimeError("yt-dlp produced no playable file")
+        except Exception as e:
+            raise RuntimeError(f"yt-dlp error: {e}")
+    def _resolve_ytdlp_bin(self) -> str:
+        # Prefer configured path if executable, else try PATH
+        cand = config.YTDLP_BIN or "yt-dlp"
+        if os.path.isabs(cand) and os.path.isfile(cand) and os.access(cand, os.X_OK):
+            return cand
+        from shutil import which
+        found = which(cand) or which("yt-dlp")
+        if not found:
+            raise RuntimeError("yt-dlp not found on PATH; install yt-dlp or set YTDLP_BIN")
+        return found
+    def _is_youtube_url(self, url: str) -> bool:
+        """
+        Check if the URL is a YouTube URL
+        Args:
+            url: URL to check
+        Returns:
+            True if it's a YouTube URL, False otherwise
+        """
+        youtube_domains = [
+            'youtube.com',
+            'www.youtube.com',
+            'youtu.be',
+            'www.youtu.be',
+            'm.youtube.com'
+        ]
+        url_lower = url.lower()
+        return any(domain in url_lower for domain in youtube_domains)
+    def _is_supported_platform(self, url: str) -> bool:
+        """
+        Check if the URL is from a platform supported by yt-dlp
+        Args:
+            url: URL to check
+        Returns:
+            True if it's a supported platform, False otherwise
+        """
+        supported_domains = [
+            # Video platforms
+            'instagram.com', 'www.instagram.com',
+            'tiktok.com', 'www.tiktok.com', 'vm.tiktok.com',
+            'twitter.com', 'x.com', 'www.twitter.com', 'www.x.com',
+            'facebook.com', 'www.facebook.com', 'fb.watch',
+            'vimeo.com', 'www.vimeo.com',
+            'twitch.tv', 'www.twitch.tv',
+            'dailymotion.com', 'www.dailymotion.com',
+            'youtube.com', 'www.youtube.com', 'youtu.be', 'www.youtu.be',
+            # Image platforms
+            'imgur.com', 'www.imgur.com',
+            'flickr.com', 'www.flickr.com',
+            # Audio platforms
+            'soundcloud.com', 'www.soundcloud.com',
+            'mixcloud.com', 'www.mixcloud.com',
+            # Alternative platforms
+            'lbry.tv', 'odysee.com', 'www.odysee.com',
+            'telegram.org', 't.me',
+            'linkedin.com', 'www.linkedin.com',
+            # Other platforms
+            'streamable.com', 'www.streamable.com',
+            'rumble.com', 'www.rumble.com',
+            'bitchute.com', 'www.bitchute.com',
+            'peertube.tv', 'www.peertube.tv'
+        ]
+        url_lower = url.lower()
+        return any(domain in url_lower for domain in supported_domains)
+    async def _verify_with_ytdlp(self, url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
+        """
+        Verify a video from supported platforms using yt-dlp + visual analysis
+        Args:
+            url: Video URL from supported platform
+            claim_context: The claimed context of the video
+            claim_date: The claimed date of the video
+        Returns:
+            Dictionary with verification results
+        """
+        try:
+            print(f"🔍 DEBUG: Verifying video with yt-dlp: {url}")
+            # Download video using yt-dlp
+            video_path = await self._download_with_ytdlp(url)
+            # Extract frames for visual verification
+            frames = await self._extract_key_frames(video_path)
+            if frames:
+                # Perform visual analysis on frames
+                visual_analysis = await self._analyze_frames_visually(frames, claim_context, claim_date)
+                # Get platform info
+                platform = self._get_platform_name(url)
+                return {
+                    'verified': visual_analysis.get('verified', True),
+                    'message': f"✅ Video verified from {platform}: {visual_analysis.get('message', 'Visual analysis completed')}",
+                    'details': {
+                        'verification_method': 'ytdlp_plus_visual',
+                        'platform': platform,
+                        'url': url,
+                        'claim_context': claim_context,
+                        'claim_date': claim_date,
+                        'visual_analysis': visual_analysis.get('details', {}),
+                        'frames_analyzed': len(frames)
+                    },
+                    'reasoning': f"Video verified from {platform} using yt-dlp and visual analysis. {visual_analysis.get('reasoning', '')}",
+                    'sources': [url]
+                }
+            else:
+                # Fallback to basic verification if frames can't be extracted
+                platform = self._get_platform_name(url)
+                return {
+                    'verified': True,
+                    'message': f"✅ Video verified from {platform} (basic verification - frame extraction failed)",
+                    'details': {
+                        'verification_method': 'ytdlp_basic',
+                        'platform': platform,
+                        'url': url,
+                        'claim_context': claim_context,
+                        'claim_date': claim_date,
+                        'limitation': 'Visual frame analysis unavailable'
+                    },
+                    'reasoning': f"Video verified from {platform} using yt-dlp. Visual analysis was not possible due to frame extraction issues.",
+                    'sources': [url]
+                }
+        except Exception as e:
+            platform = self._get_platform_name(url)
+            return {
+                'verified': False,
+                'message': f'Error during {platform} video verification: {str(e)}',
+                'details': {'error': str(e), 'platform': platform},
+                'reasoning': f'An error occurred while verifying the {platform} video: {str(e)}',
+                'sources': [url]
+            }
+    def _get_platform_name(self, url: str) -> str:
+        """Get the platform name from URL"""
+        url_lower = url.lower()
+        if 'instagram.com' in url_lower:
+            return 'Instagram'
+        elif 'tiktok.com' in url_lower or 'vm.tiktok.com' in url_lower:
+            return 'TikTok'
+        elif 'twitter.com' in url_lower or 'x.com' in url_lower:
+            return 'Twitter/X'
+        elif 'facebook.com' in url_lower or 'fb.watch' in url_lower:
+            return 'Facebook'
+        elif 'vimeo.com' in url_lower:
+            return 'Vimeo'
+        elif 'twitch.tv' in url_lower:
+            return 'Twitch'
+        elif 'dailymotion.com' in url_lower:
+            return 'DailyMotion'
+        elif 'imgur.com' in url_lower:
+            return 'Imgur'
+        elif 'soundcloud.com' in url_lower:
+            return 'SoundCloud'
+        elif 'mixcloud.com' in url_lower:
+            return 'Mixcloud'
+        elif 'lbry.tv' in url_lower or 'odysee.com' in url_lower:
+            return 'LBRY/Odysee'
+        elif 'telegram.org' in url_lower or 't.me' in url_lower:
+            return 'Telegram'
+        elif 'linkedin.com' in url_lower:
+            return 'LinkedIn'
+        else:
+            return 'Unknown Platform'
+    async def _verify_youtube_video(self, url: str, claim_context: str, claim_date: str) -> Dict[str, Any]:
+        """
+        Verify a YouTube video using hybrid approach: API metadata + yt-dlp for visual analysis
+        Args:
+            url: YouTube URL
+            claim_context: The claimed context of the video
+            claim_date: The claimed date of the video
+        Returns:
+            Dictionary with verification results
+        """
+        try:
+            # Step 1: Use YouTube Data API to verify the video exists and get metadata
+            verification_result = self.youtube_api.verify_video_exists(url)
+            if not verification_result.get('verified'):
+                return {
+                    'verified': False,
+                    'message': f'YouTube video verification failed: {verification_result.get("message", "Unknown error")}',
+                    'details': verification_result.get('details', {}),
+                    'reasoning': f'The video could not be verified through YouTube Data API. {verification_result.get("message", "Unknown error")}',
+                    'sources': [url]
+                }
+            # Step 2: Video exists, now try to download for visual analysis
+            video_details = verification_result.get('details', {})
+            try:
+                # Attempt to download video for frame analysis
+                print(f"🔍 DEBUG: Attempting to download video for visual analysis: {url}")
+                video_path = await self._download_with_ytdlp(url)
+                # Extract frames for visual verification
+                frames = await self._extract_key_frames(video_path)
+                if frames:
+                    # Perform visual analysis on frames
+                    visual_analysis = await self._analyze_frames_visually(frames, claim_context, claim_date)
+                    # Combine metadata + visual analysis
+                    return {
+                        'verified': visual_analysis.get('verified', True),
+                        'message': f"✅ Video verified with visual analysis: '{video_details.get('title', 'Unknown Title')}' by {video_details.get('channel_title', 'Unknown Channel')}\n\n{visual_analysis.get('message', '')}",
+                        'details': {
+                            'verification_method': 'hybrid_youtube_api_plus_visual',
+                            'video_id': video_details.get('video_id'),
+                            'title': video_details.get('title'),
+                            'channel_title': video_details.get('channel_title'),
+                            'published_at': video_details.get('published_at'),
+                            'duration': video_details.get('duration'),
+                            'view_count': video_details.get('view_count'),
+                            'thumbnail_url': video_details.get('thumbnail_url'),
+                            'claim_context': claim_context,
+                            'claim_date': claim_date,
+                            'visual_analysis': visual_analysis.get('details', {}),
+                            'frames_analyzed': len(frames)
+                        },
+                        'reasoning': f"Video verified through YouTube Data API and visual analysis. {visual_analysis.get('reasoning', '')}",
+                        'sources': [url]
+                    }
+                else:
+                    # Fallback to metadata-only verification
+                    print(f"⚠️ DEBUG: Could not extract frames, falling back to metadata verification")
+                    return self._create_metadata_only_response(video_details, claim_context, claim_date, url)
+            except Exception as download_error:
+                # Fallback to metadata-only verification if download fails
+                print(f"⚠️ DEBUG: Video download failed: {download_error}, falling back to metadata verification")
+                return self._create_metadata_only_response(video_details, claim_context, claim_date, url)
+        except Exception as e:
+            return {
+                'verified': False,
+                'message': f'Error during YouTube video verification: {str(e)}',
+                'details': {'error': str(e)},
+                'reasoning': f'An error occurred while verifying the YouTube video: {str(e)}',
+                'sources': [url]
+            }
+    def _create_metadata_only_response(self, video_details: Dict[str, Any], claim_context: str, claim_date: str, url: str) -> Dict[str, Any]:
+        """Create a metadata-only verification response when visual analysis fails"""
+        verification_message = f"✅ Video verified (metadata only): '{video_details.get('title', 'Unknown Title')}' by {video_details.get('channel_title', 'Unknown Channel')}"
+        # Add context analysis if available
+        if claim_context and claim_context.lower() != "the user wants to verify the content of the provided youtube video.":
+            verification_message += f"\n\n📝 Claim Context: {claim_context}"
+            verification_message += f"\n⚠️ Note: Visual content analysis unavailable - only metadata verification performed"
+        if claim_date and claim_date.strip():
+            verification_message += f"\n📅 Claimed Date: {claim_date}"
+        verification_message += f"\n📊 Video Stats: {video_details.get('view_count', 'Unknown')} views, Published: {video_details.get('published_at', 'Unknown')}"
+        return {
+            'verified': True,
+            'message': verification_message,
+            'details': {
+                'verification_method': 'youtube_data_api_metadata_only',
+                'video_id': video_details.get('video_id'),
+                'title': video_details.get('title'),
+                'channel_title': video_details.get('channel_title'),
+                'published_at': video_details.get('published_at'),
+                'duration': video_details.get('duration'),
+                'view_count': video_details.get('view_count'),
+                'thumbnail_url': video_details.get('thumbnail_url'),
+                'claim_context': claim_context,
+                'claim_date': claim_date,
+                'limitation': 'Visual content analysis unavailable'
+            },
+            'reasoning': f"Video verified through YouTube Data API metadata only. Visual content analysis was not possible due to download limitations.",
+            'sources': [url]
+        }
+    async def _analyze_frames_visually(self, frames: List[Tuple[str, float]], claim_context: str, claim_date: str) -> Dict[str, Any]:
+        """
+        Analyze extracted frames for visual verification
+        Args:
+            frames: List of (frame_path, timestamp) tuples
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Dictionary with visual analysis results
+        """
+        try:
+            # Analyze each frame using the image verifier
+            frame_analyses = []
+            for frame_path, timestamp in frames:
+                try:
+                    frame_result = await self.image_verifier.verify(
+                        image_path=frame_path,
+                        claim_context=f"{claim_context} (Frame at {timestamp}s)",
+                        claim_date=claim_date
+                    )
+                    frame_analyses.append({
+                        'timestamp': timestamp,
+                        'result': frame_result
+                    })
+                except Exception as e:
+                    print(f"⚠️ DEBUG: Frame analysis failed for {timestamp}s: {e}")
+                    continue
+            if not frame_analyses:
+                return {
+                    'verified': False,
+                    'message': 'No frames could be analyzed',
+                    'details': {'error': 'All frame analyses failed'},
+                    'reasoning': 'Visual analysis failed for all extracted frames'
+                }
+            # Determine overall verification result
+            verified_count = sum(1 for analysis in frame_analyses if analysis['result'].get('verified', False))
+            total_frames = len(frame_analyses)
+            if verified_count == 0:
+                verification_status = False
+                message = f"❌ Visual analysis found no supporting evidence in {total_frames} frames"
+            elif verified_count == total_frames:
+                verification_status = True
+                message = f"✅ Visual analysis confirmed claim in all {total_frames} frames"
+            else:
+                verification_status = True  # Partial verification
+                message = f"⚠️ Visual analysis partially confirmed claim in {verified_count}/{total_frames} frames"
+            return {
+                'verified': verification_status,
+                'message': message,
+                'details': {
+                    'frames_analyzed': total_frames,
+                    'verified_frames': verified_count,
+                    'frame_results': frame_analyses
+                },
+                'reasoning': f"Analyzed {total_frames} video frames. {verified_count} frames supported the claim."
+            }
+        except Exception as e:
+            return {
+                'verified': False,
+                'message': f'Visual analysis failed: {str(e)}',
+                'details': {'error': str(e)},
+                'reasoning': f'Error during visual frame analysis: {str(e)}'
+            }
+    async def _extract_key_frames(self, video_path: str) -> List[Tuple[str, float]]:
+        """
+        Extract key frames from video at regular intervals
+        Args:
+            video_path: Path to the video file
+        Returns:
+            List of tuples (frame_path, timestamp)
+        """
+        try:
+            frames = []
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                print(f"Error: Could not open video file {video_path}")
+                return []
+            # Get video properties
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = total_frames / fps if fps > 0 else 0
+            frame_interval_frames = int(fps * self.frame_interval)
+            frame_count = 0
+            saved_count = 0
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Save frame at regular intervals
+                if frame_count % frame_interval_frames == 0:
+                    timestamp = frame_count / fps
+                    # Save frame into public/frames for local static serving
+                    out_dir = os.path.join("public", "frames")
+                    os.makedirs(out_dir, exist_ok=True)
+                    frame_file = f"frame_{int(timestamp*1000)}.jpg"
+                    frame_path = os.path.join(out_dir, frame_file)
+                    cv2.imwrite(frame_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
+                    frames.append((frame_path, timestamp))
+                    saved_count += 1
+                    # Limit number of frames to analyze
+                    if saved_count >= 10:  # Max 10 frames
+                        break
+                frame_count += 1
+            cap.release()
+            return frames
+        except Exception as e:
+            print(f"Error extracting frames: {e}")
+            return []
+    async def _analyze_frames_with_vision(
+        self,
+        frames: List[Tuple[str, float]],
+        claim_context: str,
+        claim_date: str
+    ) -> Dict[str, Any]:
+        """
+        Analyze video frames directly with Gemini Vision (first pass).
+        Detects AI-generated/deepfake/manipulation in frames.
+        Args:
+            frames: List of (frame_path, timestamp) tuples
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Dictionary with preliminary vision analysis
+        """
+        try:
+            if not self.image_verifier.gemini_model:
+                return {
+                    "overall_verdict": "uncertain",
+                    "overall_summary": "Gemini Vision not available",
+                    "frame_analyses": [],
+                    "analysis_method": "vision_unavailable",
+                }
+            frame_analyses = []
+            for frame_path, timestamp in frames:
+                try:
+                    # Use image verifier's vision analysis method
+                    vision_result = await self.image_verifier._analyze_image_with_vision(
+                        image_path=frame_path,
+                        image_url=None,
+                        claim_context=f"{claim_context} (Frame at {timestamp}s)",
+                        claim_date=claim_date
+                    )
+                    frame_analyses.append({
+                        "timestamp": timestamp,
+                        "frame_path": frame_path,
+                        "vision_analysis": vision_result,
+                    })
+                except Exception as e:
+                    print(f"⚠️ Vision analysis failed for frame at {timestamp}s: {e}")
+                    continue
+            if not frame_analyses:
+                return {
+                    "overall_verdict": "uncertain",
+                    "overall_summary": "No frames could be analyzed with vision",
+                    "frame_analyses": [],
+                    "analysis_method": "vision_no_frames",
+                }
+            # Aggregate vision results across frames
+            false_count = sum(1 for fa in frame_analyses if fa["vision_analysis"].get("verdict") == "false")
+            true_count = sum(1 for fa in frame_analyses if fa["vision_analysis"].get("verdict") == "true")
+            uncertain_count = len(frame_analyses) - false_count - true_count
+            # Determine overall verdict
+            if false_count > true_count and false_count > uncertain_count:
+                overall_verdict = "false"
+                overall_summary = f"Vision analysis detected manipulation/AI-generated content in {false_count}/{len(frame_analyses)} frames"
+            elif true_count > false_count and true_count > uncertain_count:
+                overall_verdict = "true"
+                overall_summary = f"Vision analysis found authentic content in {true_count}/{len(frame_analyses)} frames"
+            else:
+                overall_verdict = "uncertain"
+                overall_summary = f"Vision analysis inconclusive: {true_count} true, {false_count} false, {uncertain_count} uncertain across {len(frame_analyses)} frames"
+            return {
+                "overall_verdict": overall_verdict,
+                "overall_summary": overall_summary,
+                "frame_analyses": frame_analyses,
+                "false_count": false_count,
+                "true_count": true_count,
+                "uncertain_count": uncertain_count,
+                "analysis_method": "gemini_vision",
+            }
+        except Exception as e:
+            print(f"[vision] Error in frame vision analysis: {e}")
+            return {
+                "overall_verdict": "uncertain",
+                "overall_summary": f"Error during vision analysis: {str(e)}",
+                "frame_analyses": [],
+                "analysis_method": "vision_error",
+            }
+    def _synthesize_video_analyses(
+        self,
+        preliminary_vision_analysis: Dict[str, Any],
+        reverse_search_analysis: Dict[str, Any],
+        frames: List[Tuple[str, float]],
+        claim_context: str,
+        claim_date: str,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Synthesize Gemini Vision analysis with reverse image search results.
+        """
+        try:
+            if not self.image_verifier.gemini_model:
+                return None
+            prompt = f"""You are an expert video verification analyst. Combine direct frame analysis (Gemini Vision) with reverse image search evidence to produce a final verdict.
+CLAIM: {claim_context}
+CLAIM DATE: {claim_date}
+DIRECT FRAME ANALYSIS (Gemini Vision):
+{json.dumps(preliminary_vision_analysis or {}, indent=2, ensure_ascii=False)}
+REVERSE IMAGE SEARCH ANALYSIS:
+{json.dumps(reverse_search_analysis or {}, indent=2, ensure_ascii=False)}
+TOTAL FRAMES ANALYZED: {len(frames)}
+INSTRUCTIONS:
+- Combine both analyses to make a final decision (true/false/uncertain)
+- If vision analysis detects AI-generated/manipulated content in multiple frames, prioritize that
+- If reverse image search finds contradictory evidence, factor that in
+- Consider consistency across frames
+- If evidence is thin, keep the tone cautious
+- Provide clear, actionable messaging for the end user
+Respond ONLY in this JSON format:
+{{
+  "overall_verdict": "true|false|uncertain",
+  "overall_summary": "Concise user-facing summary combining both analyses",
+  "confidence": "high|medium|low",
+  "reasoning": "Brief reasoning trail you followed",
+  "vision_findings": "Key findings from direct frame analysis",
+  "search_findings": "Key findings from reverse image search"
+}}"""
+            response = self.image_verifier.gemini_model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith("```json"):
+                response_text = response_text.replace("```json", "").replace("```", "").strip()
+            elif response_text.startswith("```"):
+                response_text = response_text.replace("```", "").strip()
+            final_analysis = json.loads(response_text)
+            final_analysis.setdefault("overall_verdict", "uncertain")
+            final_analysis.setdefault("overall_summary", "Unable to synthesize final verdict.")
+            final_analysis.setdefault("confidence", "low")
+            final_analysis["analysis_method"] = "hybrid_vision_and_search"
+            # Preserve frame summaries and sources from reverse search
+            final_analysis["frame_summaries"] = reverse_search_analysis.get("frame_summaries", [])
+            final_analysis["consolidated_sources"] = reverse_search_analysis.get("consolidated_sources", [])
+            final_analysis["preliminary_vision_analysis"] = preliminary_vision_analysis
+            final_analysis["reverse_search_analysis"] = reverse_search_analysis
+            return final_analysis
+        except Exception as e:
+            print(f"Video hybrid synthesis error: {e}")
+            return None
+    async def _analyze_frames(self, frames: List[Tuple[str, float]],
+                             claim_context: str, claim_date: str) -> Dict[str, Any]:
+        """
+        Analyze extracted frames for false context
+        Args:
+            frames: List of (frame_path, timestamp) tuples
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Aggregated analysis with overall verdict and optional false frame
+        """
+        frame_summaries: List[Dict[str, Any]] = []
+        false_hit: Optional[Dict[str, Any]] = None
+        true_hit: Optional[Dict[str, Any]] = None
+        saw_false_validated = False
+        saw_true_validated = False
+        # 1) Per-frame: only gather evidence; defer verdict to a single final pass
+        all_evidence: List[Dict[str, Any]] = []
+        for frame_path, timestamp in frames:
+            try:
+                # Upload frame to Cloudinary if configured, else local static URL
+                frame_url = None
+                if config.CLOUDINARY_CLOUD_NAME and (config.CLOUDINARY_UPLOAD_PRESET or (config.CLOUDINARY_API_KEY and config.CLOUDINARY_API_SECRET)):
+                    frame_url = await self._upload_frame_cloudinary(frame_path)
+                if not frame_url:
+                    # fallback local (note: SerpApi can't access localhost; cloudinary is preferred)
+                    from urllib.parse import quote
+                    rel = frame_path.replace(os.path.join("public", ''), "") if frame_path.startswith("public"+os.sep) else os.path.basename(frame_path)
+                    frame_url = f"http://127.0.0.1:{config.SERVICE_PORT}/static/{quote(rel)}"
+                print("[video] analyze_frame", {"ts": timestamp, "path": frame_path})
+                # Gather evidence only for this frame
+                ev = await self.image_verifier.gather_evidence(
+                    image_path=None, image_url=frame_url, claim_context=claim_context
+                )
+                all_evidence.extend(ev or [])
+                # Populate a placeholder entry per frame (no verdict yet)
+                frame_entry = {
+                    "timestamp": timestamp,
+                    "verdict": None,
+                    "summary": None,
+                    "sources": None,
+                    "frame_path": frame_path,
+                    "validator": None,
+                    "details": {"evidence": ev or []},
+                }
+                # Compute rule-based confidence (0..1)
+                conf = 0.2
+                reasons: List[str] = []
+                checks = {}
+                if frame_entry["verdict"] == "true":
+                    if checks.get("relation_comention"):
+                        conf += 0.3; reasons.append("relation_comention")
+                if frame_entry["verdict"] == "false":
+                    if not checks.get("relation_comention"):
+                        conf += 0.25; reasons.append("no_relation_support")
+                if checks.get("timeframe_citations") or checks.get("timeframe_match"):
+                    conf += 0.15; reasons.append("timeframe_match")
+                eos = checks.get("entity_overlap_score")
+                try:
+                    if eos is not None and float(eos) >= 0.7:
+                        conf += 0.1; reasons.append("entity_overlap")
+                except Exception:
+                    pass
+                # Penalize if sources dominated by low-priority domains
+                low_priority_hits = 0
+                total_sources = 0
+                try:
+                    from urllib.parse import urlparse
+                    for s in (frame_entry.get("sources") or []):
+                        total_sources += 1
+                        net = urlparse((s.get("link") or "")).netloc
+                        if net in config.LOW_PRIORITY_DOMAINS:
+                            low_priority_hits += 1
+                except Exception:
+                    pass
+                if total_sources > 0 and low_priority_hits / float(total_sources) >= 0.5:
+                    conf -= 0.2; reasons.append("low_priority_sources")
+                if conf < 0.0: conf = 0.0
+                if conf > 1.0: conf = 1.0
+                frame_entry["confidence"] = conf
+                frame_entry["confidence_reasons"] = reasons
+                print("[video] frame_result", {"ts": timestamp, "verdict": frame_entry["verdict"], "passed": (frame_entry.get("validator") or {}).get("passed")})
+                # No per-frame debug when gathering evidence only
+                frame_summaries.append(frame_entry)
+                # No per-frame validator flags when gathering evidence only
+                if false_hit is None:
+                    false_hit = {
+                        "timestamp": timestamp,
+                        "frame_path": frame_path,
+                        "evidence_image": None,
+                        "details": {"evidence": ev or []},
+                    }
+                if true_hit is None:
+                    true_hit = {
+                        "timestamp": timestamp,
+                        "frame_path": frame_path,
+                        "details": {"evidence": ev or []},
+                    }
+            except Exception as e:
+                print(f"Error analyzing frame {frame_path}: {e}")
+                # Keep files even on error for debugging
+        # 2) Single final pass: send aggregated evidence to image verifier's Gemini summarizer
+        #    Reuse image verifier's structured summarizer for a consolidated verdict
+        # Use the simple majority-based summarizer per product rule
+        final_llm = self.image_verifier._summarize_with_gemini_majority(
+            claim_context=claim_context,
+            claim_date=claim_date,
+            evidence=all_evidence[:24],  # cap to keep prompt manageable
+        ) or {}
+        final_verdict = (final_llm.get("verdict") or "uncertain").lower()
+        # Prefer LLM clarification if present; else fallback to previous summary
+        final_summary = final_llm.get("clarification") or final_llm.get("summary") or "Consolidated evidence analyzed."
+        # Deterministic co-mention vote to override ambiguous LLM outcomes
+        def _tokens(text: str) -> List[str]:
+            import re
+            return re.findall(r"[a-z0-9]{3,}", (text or "").lower())
+        def _split_relation(claim: str) -> Tuple[List[str], List[str]]:
+            # Heuristic: split on ' with ' to get subject vs object; fallback to all tokens as subject
+            cl = (claim or "").strip()
+            i = cl.lower().find(" with ")
+            if i != -1:
+                subj = cl[:i].strip()
+                obj = cl[i+6:].strip().split(".")[0]
+            else:
+                subj = cl
+                obj = ""
+            return list(set(_tokens(subj))), list(set(_tokens(obj)))
+        def _evidence_text(ev: Dict[str, Any]) -> str:
+            return " ".join([t for t in [ev.get("title"), ev.get("snippet"), ev.get("source")] if t])
+        subj_toks, obj_toks = _split_relation(claim_context)
+        support = 0
+        contra = 0
+        for ev in all_evidence[:24]:
+            txt_toks = set(_tokens(_evidence_text(ev)))
+            if not txt_toks:
+                continue
+            subj_hit = bool(subj_toks and (set(subj_toks) & txt_toks))
+            obj_hit = bool(obj_toks and (set(obj_toks) & txt_toks))
+            if subj_hit and obj_hit:
+                support += 1
+            elif subj_hit and obj_toks:
+                # mentions subject but not the claimed object → treat as contradiction to the claimed relation
+                contra += 1
+        # Apply override rules: prioritize clear majority; else keep LLM
+        if support == 0 and contra > 0:
+            final_verdict = "false"  # keep LLM clarification
+        elif support > contra and (support - contra) >= 1:
+            final_verdict = "true"   # keep LLM clarification
+        elif contra > support and (contra - support) >= 1:
+            final_verdict = "false"  # keep LLM clarification
+        # else keep LLM's verdict/summary
+        return {
+            "overall_verdict": final_verdict,
+            "overall_summary": final_summary,
+            "frame_summaries": frame_summaries,
+            "consolidated_sources": final_llm.get("top_sources") or self.image_verifier._top_sources(all_evidence, 3),
+        }
+    async def _upload_frame_cloudinary(self, frame_path: str) -> Optional[str]:
+        try:
+            import hashlib
+            import requests
+            cloud = config.CLOUDINARY_CLOUD_NAME
+            folder = config.CLOUDINARY_FOLDER.strip('/')
+            # Unsigned upload if preset provided
+            if config.CLOUDINARY_UPLOAD_PRESET:
+                url = f"https://api.cloudinary.com/v1_1/{cloud}/image/upload"
+                with open(frame_path, 'rb') as f:
+                    files = {"file": f}
+                    data = {"upload_preset": config.CLOUDINARY_UPLOAD_PRESET, "folder": folder}
+                    r = requests.post(url, files=files, data=data, timeout=30)
+                r.raise_for_status()
+                return r.json().get("secure_url")
+            # Signed upload
+            ts = str(int(time.time()))
+            params_to_sign = {"timestamp": ts, "folder": folder}
+            to_sign = "&".join([f"{k}={v}" for k, v in sorted(params_to_sign.items())]) + config.CLOUDINARY_API_SECRET
+            signature = hashlib.sha1(to_sign.encode('utf-8')).hexdigest()
+            url = f"https://api.cloudinary.com/v1_1/{cloud}/image/upload"
+            with open(frame_path, 'rb') as f:
+                files = {"file": f}
+                data = {
+                    "api_key": config.CLOUDINARY_API_KEY,
+                    "timestamp": ts,
+                    "signature": signature,
+                    "folder": folder,
+                }
+                r = requests.post(url, files=files, data=data, timeout=30)
+            r.raise_for_status()
+            return r.json().get("secure_url")
+        except Exception as e:
+            print(f"Cloudinary upload failed: {e}")
+            return None
+    async def _generate_video_counter_measure(self, video_path: str,
+                                            false_context_frame: Dict[str, Any],
+                                            claim_context: str, claim_date: str) -> str:
+        """
+        Generate a video counter-measure
+        Args:
+            video_path: Path to the original video
+            false_context_frame: Information about the false context frame
+            claim_context: The claimed context
+            claim_date: The claimed date
+        Returns:
+            Path to the generated counter-measure video
+        """
+        try:
+            # Create temporary directory for video processing
+            temp_dir = tempfile.mkdtemp()
+            # Generate video components
+            title_clip = await self._create_title_clip(temp_dir, claim_context, claim_date)
+            misleading_clip = await self._create_misleading_clip(
+                video_path, false_context_frame["timestamp"], temp_dir
+            )
+            debunk_clip = await self._create_debunk_clip(
+                temp_dir, false_context_frame, claim_context, claim_date
+            )
+            verdict_clip = await self._create_verdict_clip(temp_dir)
+            # Concatenate all clips
+            output_path = await self._concatenate_clips(
+                [title_clip, misleading_clip, debunk_clip, verdict_clip],
+                temp_dir
+            )
+            # Clean up temporary files
+            self._cleanup_temp_files(temp_dir)
+            # Attempt Cloudinary cleanup (best-effort) before responding
+            await self._cloudinary_cleanup_prefix(config.CLOUDINARY_FOLDER or "frames")
+            return output_path
+        except Exception as e:
+            print(f"Error generating video counter-measure: {e}")
+            raise
+    async def _create_title_clip(self, temp_dir: str, claim_context: str, claim_date: str) -> str:
+        """Create title clip with claim information"""
+        try:
+            # Create title image
+            img = Image.new('RGB', (800, 400), 'white')
+            draw = ImageDraw.Draw(img)
+            try:
+                font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 36)
+                font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
+            except:
+                font_large = ImageFont.load_default()
+                font_medium = ImageFont.load_default()
+            # Add title
+            title = "FALSE CONTEXT DETECTED"
+            title_bbox = draw.textbbox((0, 0), title, font=font_large)
+            title_width = title_bbox[2] - title_bbox[0]
+            title_x = (800 - title_width) // 2
+            draw.text((title_x, 100), title, fill='red', font=font_large)
+            # Add claim details
+            claim_text = f"Claim: {claim_context}, {claim_date}"
+            claim_bbox = draw.textbbox((0, 0), claim_text, font=font_medium)
+            claim_width = claim_bbox[2] - claim_bbox[0]
+            claim_x = (800 - claim_width) // 2
+            draw.text((claim_x, 200), claim_text, fill='black', font=font_medium)
+            # Save image
+            title_img_path = os.path.join(temp_dir, "title.png")
+            img.save(title_img_path)
+            # Convert to video clip
+            title_video_path = os.path.join(temp_dir, "title.mp4")
+            await self._image_to_video(title_img_path, title_video_path, duration=3)
+            return title_video_path
+        except Exception as e:
+            print(f"Error creating title clip: {e}")
+            raise
+    async def _create_misleading_clip(self, video_path: str, timestamp: float, temp_dir: str) -> str:
+        """Create clip from original misleading video"""
+        try:
+            # Calculate frame numbers for 5-second clip
+            cap = cv2.VideoCapture(video_path)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            cap.release()
+            start_frame = int(timestamp * fps) - int(self.clip_duration / 2 * fps)
+            start_frame = max(0, start_frame)
+            # Extract clip using ffmpeg
+            clip_path = os.path.join(temp_dir, "misleading_clip.mp4")
+            start_time = max(0, timestamp - self.clip_duration / 2)
+            cmd = [
+                'ffmpeg', '-i', video_path,
+                '-ss', str(start_time),
+                '-t', str(self.clip_duration),
+                '-c', 'copy',
+                '-y', clip_path
+            ]
+            process = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            await process.communicate()
+            if process.returncode != 0:
+                raise Exception("FFmpeg failed to create misleading clip")
+            return clip_path
+        except Exception as e:
+            print(f"Error creating misleading clip: {e}")
+            raise
+    async def _create_debunk_clip(self, temp_dir: str, false_context_frame: Dict[str, Any],
+                                 claim_context: str, claim_date: str) -> str:
+        """Create debunk scene clip with side-by-side comparison"""
+        try:
+            # Create debunk image using image verifier's counter-measure
+            debunk_img_path = await self.image_verifier._generate_counter_measure(
+                false_context_frame["frame_path"],
+                false_context_frame["evidence_image"],
+                claim_context,
+                claim_date
+            )
+            # Move to temp directory
+            final_debunk_img = os.path.join(temp_dir, "debunk.png")
+            os.rename(debunk_img_path, final_debunk_img)
+            # Convert to video clip
+            debunk_video_path = os.path.join(temp_dir, "debunk.mp4")
+            await self._image_to_video(final_debunk_img, debunk_video_path, duration=5)
+            return debunk_video_path
+        except Exception as e:
+            print(f"Error creating debunk clip: {e}")
+            raise
+    async def _create_verdict_clip(self, temp_dir: str) -> str:
+        """Create verdict clip with conclusion"""
+        try:
+            # Create verdict image
+            img = Image.new('RGB', (800, 400), 'white')
+            draw = ImageDraw.Draw(img)
+            try:
+                font_large = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 36)
+                font_medium = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 24)
+            except:
+                font_large = ImageFont.load_default()
+                font_medium = ImageFont.load_default()
+            # Add verdict
+            verdict = "VERDICT: FALSE CONTEXT"
+            verdict_bbox = draw.textbbox((0, 0), verdict, font=font_large)
+            verdict_width = verdict_bbox[2] - verdict_bbox[0]
+            verdict_x = (800 - verdict_width) // 2
+            draw.text((verdict_x, 100), verdict, fill='red', font=font_large)
+            # Add explanation
+            explanation = "This content is being used in a false context"
+            explanation_bbox = draw.textbbox((0, 0), explanation, font=font_medium)
+            explanation_width = explanation_bbox[2] - explanation_bbox[0]
+            explanation_x = (800 - explanation_width) // 2
+            draw.text((explanation_x, 200), explanation, fill='black', font=font_medium)
+            # Save image
+            verdict_img_path = os.path.join(temp_dir, "verdict.png")
+            img.save(verdict_img_path)
+            # Convert to video clip
+            verdict_video_path = os.path.join(temp_dir, "verdict.mp4")
+            await self._image_to_video(verdict_img_path, verdict_video_path, duration=3)
+            return verdict_video_path
+        except Exception as e:
+            print(f"Error creating verdict clip: {e}")
+            raise
+    async def _image_to_video(self, image_path: str, video_path: str, duration: int) -> None:
+        """Convert image to video clip using FFmpeg"""
+        try:
+            cmd = [
+                'ffmpeg', '-loop', '1',
+                '-i', image_path,
+                '-c:v', 'libx264',
+                '-t', str(duration),
+                '-pix_fmt', 'yuv420p',
+                '-y', video_path
+            ]
+            process = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            await process.communicate()
+            if process.returncode != 0:
+                raise Exception("FFmpeg failed to convert image to video")
+        except Exception as e:
+            print(f"Error converting image to video: {e}")
+            raise
+    async def _concatenate_clips(self, clip_paths: List[str], temp_dir: str) -> str:
+        """Concatenate multiple video clips into one"""
+        try:
+            # Create file list for FFmpeg
+            file_list_path = os.path.join(temp_dir, "clips.txt")
+            with open(file_list_path, 'w') as f:
+                for clip_path in clip_paths:
+                    f.write(f"file '{clip_path}'\n")
+            # Concatenate clips
+            output_path = tempfile.mktemp(suffix=".mp4")
+            cmd = [
+                'ffmpeg', '-f', 'concat',
+                '-safe', '0',
+                '-i', file_list_path,
+                '-c', 'copy',
+                '-y', output_path
+            ]
+            process = await asyncio.create_subprocess_exec(
+                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            await process.communicate()
+            if process.returncode != 0:
+                raise Exception("FFmpeg failed to concatenate clips")
+            return output_path
+        except Exception as e:
+            print(f"Error concatenating clips: {e}")
+            raise
+    def _cleanup_temp_files(self, temp_dir: str) -> None:
+        """Clean up temporary files and directory"""
+        try:
+            import shutil
+            shutil.rmtree(temp_dir)
+        except Exception as e:
+            print(f"Error cleaning up temp files: {e}")
+    async def _cloudinary_cleanup_prefix(self, prefix: str) -> None:
+        try:
+            if not (config.CLOUDINARY_CLOUD_NAME and (config.CLOUDINARY_API_KEY and config.CLOUDINARY_API_SECRET)):
+                return
+            # List and delete all resources under the folder prefix (rate-limited; best-effort)
+            import requests
+            from requests.auth import HTTPBasicAuth
+            cloud = config.CLOUDINARY_CLOUD_NAME
+            auth = HTTPBasicAuth(config.CLOUDINARY_API_KEY, config.CLOUDINARY_API_SECRET)
+            list_url = f"https://api.cloudinary.com/v1_1/{cloud}/resources/image"
+            params = {"prefix": prefix, "max_results": 100}
+            r = requests.get(list_url, params=params, auth=auth, timeout=20)
+            if r.status_code != 200:
+                return
+            data = r.json()
+            public_ids = [res.get("public_id") for res in data.get("resources", []) if res.get("public_id")]
+            if not public_ids:
+                return
+            del_url = f"https://api.cloudinary.com/v1_1/{cloud}/resources/image/delete_by_ids"
+            requests.post(del_url, data={"public_ids": ",".join(public_ids)}, auth=auth, timeout=20)
+        except Exception as e:
+            print(f"Cloudinary cleanup failed: {e}")

services/websocket_service.py ADDED Viewed

	@@ -0,0 +1,239 @@

+"""
+WebSocket Service for Real-time Updates
+Handles WebSocket connections and MongoDB Change Streams for real-time data updates
+"""
+import asyncio
+import json
+import logging
+from typing import Set, Dict, Any, Optional
+from fastapi import WebSocket, WebSocketDisconnect
+from pymongo import MongoClient
+from pymongo.errors import ConnectionFailure
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+class ConnectionManager:
+    """Manages WebSocket connections"""
+    def __init__(self):
+        self.active_connections: Set[WebSocket] = set()
+        self.connection_data: Dict[WebSocket, Dict[str, Any]] = {}
+    async def connect(self, websocket: WebSocket, client_info: Optional[Dict[str, Any]] = None):
+        """Accept a new WebSocket connection"""
+        await websocket.accept()
+        self.active_connections.add(websocket)
+        self.connection_data[websocket] = client_info or {}
+        logger.info(f"✅ WebSocket connected. Total connections: {len(self.active_connections)}")
+    def disconnect(self, websocket: WebSocket):
+        """Remove a WebSocket connection"""
+        if websocket in self.active_connections:
+            self.active_connections.remove(websocket)
+            if websocket in self.connection_data:
+                del self.connection_data[websocket]
+            logger.info(f"🔌 WebSocket disconnected. Total connections: {len(self.active_connections)}")
+    async def send_personal_message(self, message: str, websocket: WebSocket):
+        """Send a message to a specific WebSocket connection"""
+        try:
+            await websocket.send_text(message)
+        except Exception as e:
+            logger.error(f"❌ Failed to send personal message: {e}")
+            self.disconnect(websocket)
+    async def broadcast(self, message: str):
+        """Broadcast a message to all connected WebSocket clients"""
+        if not self.active_connections:
+            logger.warning("⚠️ No active connections to broadcast to")
+            return
+        disconnected = set()
+        for connection in self.active_connections:
+            try:
+                await connection.send_text(message)
+            except Exception as e:
+                logger.error(f"❌ Failed to broadcast to connection: {e}")
+                disconnected.add(connection)
+        # Clean up disconnected connections
+        for connection in disconnected:
+            self.disconnect(connection)
+        logger.info(f"📡 Broadcasted message to {len(self.active_connections)} connections")
+class MongoDBChangeStreamService:
+    """Service to monitor MongoDB changes and notify WebSocket clients"""
+    def __init__(self, connection_string: Optional[str] = None):
+        """Initialize MongoDB connection for change streams"""
+        self.connection_string = connection_string or os.getenv('MONGO_CONNECTION_STRING')
+        if not self.connection_string:
+            raise ValueError("MongoDB connection string is required. Set MONGO_CONNECTION_STRING environment variable.")
+        self.client = None
+        self.db = None
+        self.collection = None
+        self.change_stream = None
+        self.is_running = False
+        self._connect()
+    def _connect(self):
+        """Establish MongoDB connection"""
+        try:
+            self.client = MongoClient(self.connection_string)
+            # Test connection
+            self.client.admin.command('ping')
+            # Use 'aegis' database
+            self.db = self.client['aegis']
+            self.collection = self.db['debunk_posts']
+            logger.info("✅ MongoDB Change Stream service connected successfully")
+        except ConnectionFailure as e:
+            logger.error(f"❌ Failed to connect to MongoDB for change streams: {e}")
+            raise
+    async def start_change_stream(self, connection_manager: ConnectionManager):
+        """Start monitoring MongoDB changes and broadcast to WebSocket clients"""
+        if self.is_running:
+            logger.warning("⚠️ Change stream is already running")
+            return
+        try:
+            # Check if MongoDB supports change streams (replica set)
+            try:
+                # Try to create change stream to watch for insertions
+                self.change_stream = self.collection.watch([
+                    {
+                        '$match': {
+                            'operationType': 'insert'
+                        }
+                    }
+                ])
+                self.is_running = True
+                logger.info("🔄 Started MongoDB change stream monitoring")
+                # Process change stream events
+                async def process_changes():
+                    try:
+                        while self.is_running:
+                            if self.change_stream:
+                                # Check for new changes (non-blocking)
+                                try:
+                                    change = self.change_stream.try_next()
+                                    if change:
+                                        await self._handle_change(change, connection_manager)
+                                    else:
+                                        # No changes, sleep briefly
+                                        await asyncio.sleep(0.5)
+                                except Exception as e:
+                                    logger.error(f"❌ Error processing change: {e}")
+                                    await asyncio.sleep(1)  # Brief pause on error
+                                    continue
+                            else:
+                                await asyncio.sleep(1)
+                    except Exception as e:
+                        logger.error(f"❌ Error in change stream processing: {e}")
+                    finally:
+                        self.is_running = False
+                # Start the change stream processing in the background
+                asyncio.create_task(process_changes())
+            except Exception as change_stream_error:
+                logger.warning(f"⚠️ MongoDB change streams not available: {change_stream_error}")
+                logger.info("🔄 Change streams require MongoDB replica set. WebSocket will work for manual updates.")
+                # Don't fail completely, just disable change streams
+                self.is_running = False
+                self.change_stream = None
+        except Exception as e:
+            logger.error(f"❌ Failed to start change stream: {e}")
+            self.is_running = False
+            # Don't raise the exception, allow WebSocket to work without change streams
+    async def _handle_change(self, change: Dict[str, Any], connection_manager: ConnectionManager):
+        """Handle a MongoDB change event"""
+        try:
+            logger.info(f"🔄 MongoDB change detected: {change.get('operationType')}")
+            # Extract the new document
+            new_document = change.get('fullDocument')
+            if not new_document:
+                logger.warning("⚠️ No full document in change event")
+                return
+            # Convert ObjectId to string for JSON serialization
+            if '_id' in new_document:
+                new_document['_id'] = str(new_document['_id'])
+            # Create the broadcast message
+            message = {
+                "type": "new_post",
+                "data": {
+                    "post": new_document,
+                    "timestamp": change.get('clusterTime'),
+                    "operation": change.get('operationType')
+                }
+            }
+            # Broadcast to all connected clients (serialize datetimes/ObjectIds)
+            await connection_manager.broadcast(json.dumps(message, default=str))
+            logger.info(f"📡 Broadcasted new post to {len(connection_manager.active_connections)} clients")
+        except Exception as e:
+            logger.error(f"❌ Error handling MongoDB change: {e}")
+    def stop_change_stream(self):
+        """Stop the MongoDB change stream"""
+        self.is_running = False
+        if self.change_stream:
+            self.change_stream.close()
+            self.change_stream = None
+        logger.info("🛑 Stopped MongoDB change stream")
+    def close(self):
+        """Close MongoDB connection"""
+        self.stop_change_stream()
+        if self.client:
+            self.client.close()
+            logger.info("🔌 MongoDB Change Stream service connection closed")
+# Global instances
+connection_manager = ConnectionManager()
+mongodb_change_service = None
+async def initialize_mongodb_change_stream():
+    """Initialize the MongoDB change stream service"""
+    global mongodb_change_service
+    try:
+        mongodb_change_service = MongoDBChangeStreamService()
+        await mongodb_change_service.start_change_stream(connection_manager)
+        logger.info("✅ MongoDB Change Stream service initialized successfully")
+        return mongodb_change_service
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize MongoDB Change Stream service: {e}")
+        return None
+async def cleanup_mongodb_change_stream():
+    """Cleanup the MongoDB change stream service"""
+    global mongodb_change_service
+    if mongodb_change_service:
+        mongodb_change_service.close()
+        mongodb_change_service = None
+        logger.info("🧹 MongoDB Change Stream service cleaned up")

services/youtube_api.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import os
+import requests
+from typing import Dict, Any, Optional
+from config import config
+class YouTubeDataAPI:
+    """
+    YouTube Data API v3 integration for video verification
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize YouTube Data API client
+        Args:
+            api_key: Google API key. If None, will try to get from environment
+        """
+        self.api_key = api_key or config.GOOGLE_API_KEY
+        if not self.api_key:
+            raise ValueError("GOOGLE_API_KEY environment variable or api_key parameter is required")
+        self.base_url = "https://www.googleapis.com/youtube/v3"
+    def extract_video_id(self, url: str) -> Optional[str]:
+        """
+        Extract video ID from YouTube URL
+        Args:
+            url: YouTube URL (various formats supported)
+        Returns:
+            Video ID or None if not found
+        """
+        import re
+        # YouTube URL patterns
+        patterns = [
+            r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})',
+            r'youtube\.com\/v\/([a-zA-Z0-9_-]{11})',
+            r'youtube\.com\/shorts\/([a-zA-Z0-9_-]{11})'
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+        return None
+    def get_video_info(self, video_id: str) -> Dict[str, Any]:
+        """
+        Get video information from YouTube Data API
+        Args:
+            video_id: YouTube video ID
+        Returns:
+            Dictionary with video information
+        """
+        try:
+            url = f"{self.base_url}/videos"
+            params = {
+                'key': self.api_key,
+                'id': video_id,
+                'part': 'snippet,statistics,contentDetails'
+            }
+            response = requests.get(url, params=params, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            if not data.get('items'):
+                return {
+                    'success': False,
+                    'error': 'Video not found or not accessible'
+                }
+            video = data['items'][0]
+            snippet = video.get('snippet', {})
+            statistics = video.get('statistics', {})
+            content_details = video.get('contentDetails', {})
+            return {
+                'success': True,
+                'video_id': video_id,
+                'title': snippet.get('title', 'Unknown Title'),
+                'description': snippet.get('description', ''),
+                'channel_title': snippet.get('channelTitle', 'Unknown Channel'),
+                'published_at': snippet.get('publishedAt', ''),
+                'duration': content_details.get('duration', ''),
+                'view_count': statistics.get('viewCount', '0'),
+                'like_count': statistics.get('likeCount', '0'),
+                'comment_count': statistics.get('commentCount', '0'),
+                'tags': snippet.get('tags', []),
+                'category_id': snippet.get('categoryId', ''),
+                'thumbnail_url': snippet.get('thumbnails', {}).get('high', {}).get('url', ''),
+                'raw_data': video
+            }
+        except requests.exceptions.RequestException as e:
+            return {
+                'success': False,
+                'error': f'API request failed: {str(e)}'
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'error': f'Unexpected error: {str(e)}'
+            }
+    def search_videos(self, query: str, max_results: int = 10) -> Dict[str, Any]:
+        """
+        Search for videos using YouTube Data API
+        Args:
+            query: Search query
+            max_results: Maximum number of results to return
+        Returns:
+            Dictionary with search results
+        """
+        try:
+            url = f"{self.base_url}/search"
+            params = {
+                'key': self.api_key,
+                'q': query,
+                'part': 'snippet',
+                'type': 'video',
+                'maxResults': max_results,
+                'order': 'relevance'
+            }
+            response = requests.get(url, params=params, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            videos = []
+            for item in data.get('items', []):
+                snippet = item.get('snippet', {})
+                videos.append({
+                    'video_id': item.get('id', {}).get('videoId', ''),
+                    'title': snippet.get('title', ''),
+                    'description': snippet.get('description', ''),
+                    'channel_title': snippet.get('channelTitle', ''),
+                    'published_at': snippet.get('publishedAt', ''),
+                    'thumbnail_url': snippet.get('thumbnails', {}).get('high', {}).get('url', '')
+                })
+            return {
+                'success': True,
+                'videos': videos,
+                'total_results': data.get('pageInfo', {}).get('totalResults', 0)
+            }
+        except requests.exceptions.RequestException as e:
+            return {
+                'success': False,
+                'error': f'API request failed: {str(e)}'
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'error': f'Unexpected error: {str(e)}'
+            }
+    def verify_video_exists(self, url: str) -> Dict[str, Any]:
+        """
+        Verify if a YouTube video exists and is accessible
+        Args:
+            url: YouTube URL
+        Returns:
+            Dictionary with verification results
+        """
+        video_id = self.extract_video_id(url)
+        if not video_id:
+            return {
+                'verified': False,
+                'message': 'Invalid YouTube URL format',
+                'details': {'error': 'Could not extract video ID from URL'}
+            }
+        video_info = self.get_video_info(video_id)
+        if not video_info.get('success'):
+            return {
+                'verified': False,
+                'message': f'Video verification failed: {video_info.get("error", "Unknown error")}',
+                'details': {
+                    'video_id': video_id,
+                    'error': video_info.get('error', 'Unknown error')
+                }
+            }
+        return {
+            'verified': True,
+            'message': f'Video verified successfully: "{video_info["title"]}" by {video_info["channel_title"]}',
+            'details': {
+                'video_id': video_id,
+                'title': video_info['title'],
+                'channel_title': video_info['channel_title'],
+                'published_at': video_info['published_at'],
+                'duration': video_info['duration'],
+                'view_count': video_info['view_count'],
+                'thumbnail_url': video_info['thumbnail_url']
+            }
+        }

services/youtube_caption.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# pip install yt-dlp
+import yt_dlp
+import os
+import re
+from pathlib import Path
+def get_youtube_transcript_ytdlp(video_url, output_file="transcript.txt"):
+    """
+    Extract YouTube transcript using yt-dlp
+    Works perfectly in India - yt-dlp handles all signature/blocking issues
+    """
+    print("[*] Starting transcript extraction with yt-dlp...")
+    # Extract video ID for reference
+    video_id_match = re.search(r'v=([^&]*)', video_url)
+    video_id = video_id_match.group(1) if video_id_match else 'unknown'
+    print(f"[+] Video ID: {video_id}")
+    # Normalize URL to just the video (remove playlist parameters)
+    normalized_url = f"https://www.youtube.com/watch?v={video_id}"
+    print(f"[+] Normalized URL: {normalized_url}")
+    try:
+        # Create temp directory for subtitles
+        temp_dir = "temp_subs"
+        os.makedirs(temp_dir, exist_ok=True)
+        # Setup yt-dlp options
+        ydl_opts = {
+            'writeautomaticsub': True,      # Download auto-generated subtitles
+            'subtitlesformat': 'vtt',       # Format (can also be 'json3', 'srt', 'ass')
+            'skip_download': True,          # Only download subs, not video
+            'noplaylist': True,             # Only download the video, not the playlist
+            'outtmpl': os.path.join(temp_dir, '%(id)s'),  # Output template
+            'quiet': False,                 # Show progress
+            'no_warnings': False,
+            'sub_langs': 'en',              # Only English subtitles
+        }
+        print("[*] Downloading subtitles...")
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(normalized_url, download=True)  # Use normalized URL
+        print("[+] Subtitles downloaded successfully")
+        # Find the subtitle file
+        subtitle_file = None
+        for file in os.listdir(temp_dir):
+            if video_id in file and (file.endswith('.vtt') or file.endswith('.srt')):
+                subtitle_file = os.path.join(temp_dir, file)
+                print(f"[+] Found subtitle file: {file}")
+                break
+        if not subtitle_file or not os.path.exists(subtitle_file):
+            print("[ERROR] Subtitle file not found")
+            print(f"[DEBUG] Files in {temp_dir}: {os.listdir(temp_dir)}")
+            return None
+        # Read and parse the subtitle file
+        print("[*] Parsing subtitle file...")
+        transcript_lines = []
+        if subtitle_file.endswith('.vtt'):
+            # Parse VTT format
+            with open(subtitle_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+            for line in lines:
+                line = line.strip()
+                # Skip headers, timestamps, and empty lines
+                if line and not line.startswith('WEBVTT') and not '-->' in line and line:
+                    transcript_lines.append(line)
+        elif subtitle_file.endswith('.srt'):
+            # Parse SRT format
+            with open(subtitle_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+            for line in lines:
+                line = line.strip()
+                # Skip sequence numbers and timestamps
+                if line and not line[0].isdigit() and not '-->' in line and line:
+                    transcript_lines.append(line)
+        if not transcript_lines:
+            print("[ERROR] No text extracted from subtitle file")
+            return None
+        # Combine into full transcript
+        full_text = "\n".join(transcript_lines)
+        # Save to output file
+        print(f"[*] Saving transcript to {output_file}...")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write(full_text)
+        # Cleanup temp directory
+        import shutil
+        shutil.rmtree(temp_dir)
+        print(f"\n✓ SUCCESS!")
+        print(f"  File: {output_file}")
+        print(f"  Total characters: {len(full_text)}")
+        print(f"  Total lines: {len(transcript_lines)}")
+        return full_text
+    except Exception as e:
+        print(f"[ERROR] {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return None
+# ==================== MAIN ====================
+if __name__ == "__main__":
+    print("=" * 70)
+    print("YouTube Transcript Extractor - yt-dlp VERSION (WORKS IN INDIA!)")
+    print("=" * 70)
+    video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    print(f"\nTarget video: {video_url}\n")
+    transcript = get_youtube_transcript_ytdlp(video_url)
+    if transcript:
+        print("\n" + "=" * 70)
+        print("TRANSCRIPT PREVIEW (First 800 characters)")
+        print("=" * 70)
+        print(transcript[:800])
+        print("\n...")
+    else:
+        print("\n[FAILED] Could not extract transcript")

utils/file_utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+import tempfile
+import shutil
+from pathlib import Path
+from typing import List
+from fastapi import UploadFile
+async def save_upload_file(upload_file: UploadFile) -> str:
+    """
+    Save an uploaded file to a temporary location
+    Args:
+        upload_file: FastAPI UploadFile object
+    Returns:
+        Path to the saved temporary file
+    """
+    try:
+        # Create temporary file with appropriate extension
+        suffix = Path(upload_file.filename).suffix if upload_file.filename else ""
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+        # Write uploaded content to temporary file
+        content = await upload_file.read()
+        temp_file.write(content)
+        temp_file.close()
+        return temp_file.name
+    except Exception as e:
+        print(f"Error saving uploaded file: {e}")
+        raise
+def cleanup_temp_files(file_paths: List[str]) -> None:
+    """
+    Clean up temporary files
+    Args:
+        file_paths: List of file paths to delete
+    """
+    for file_path in file_paths:
+        try:
+            if os.path.exists(file_path):
+                os.unlink(file_path)
+                print(f"Cleaned up temporary file: {file_path}")
+        except Exception as e:
+            print(f"Error cleaning up file {file_path}: {e}")
+def cleanup_temp_directories(dir_paths: List[str]) -> None:
+    """
+    Clean up temporary directories
+    Args:
+        dir_paths: List of directory paths to delete
+    """
+    for dir_path in dir_paths:
+        try:
+            if os.path.exists(dir_path):
+                shutil.rmtree(dir_path)
+                print(f"Cleaned up temporary directory: {dir_path}")
+        except Exception as e:
+            print(f"Error cleaning up directory {dir_path}: {e}")
+def get_file_extension(filename: str) -> str:
+    """
+    Get file extension from filename
+    Args:
+        filename: Name of the file
+    Returns:
+        File extension (including the dot)
+    """
+    return Path(filename).suffix.lower()
+def is_valid_image_file(filename: str) -> bool:
+    """
+    Check if filename represents a valid image file
+    Args:
+        filename: Name of the file
+    Returns:
+        True if valid image file
+    """
+    valid_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
+    return get_file_extension(filename) in valid_extensions
+def is_valid_video_file(filename: str) -> bool:
+    """
+    Check if filename represents a valid video file
+    Args:
+        filename: Name of the file
+    Returns:
+        True if valid video file
+    """
+    valid_extensions = {'.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv', '.m4v'}
+    return get_file_extension(filename) in valid_extensions
+def create_temp_directory() -> str:
+    """
+    Create a temporary directory
+    Returns:
+        Path to the created temporary directory
+    """
+    return tempfile.mkdtemp()
+def get_file_size(file_path: str) -> int:
+    """
+    Get file size in bytes
+    Args:
+        file_path: Path to the file
+    Returns:
+        File size in bytes
+    """
+    try:
+        return os.path.getsize(file_path)
+    except OSError:
+        return 0
+def format_file_size(size_bytes: int) -> str:
+    """
+    Format file size in human-readable format
+    Args:
+        size_bytes: File size in bytes
+    Returns:
+        Formatted file size string
+    """
+    if size_bytes == 0:
+        return "0B"
+    size_names = ["B", "KB", "MB", "GB", "TB"]
+    i = 0
+    while size_bytes >= 1024 and i < len(size_names) - 1:
+        size_bytes /= 1024.0
+        i += 1
+    return f"{size_bytes:.1f}{size_names[i]}"