import gradio as gr import sqlite3 import pandas as pd from huggingface_hub import hf_hub_download, HfApi import os import time import json # ===== CONFIGURATION ===== TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh'] INDEXED_REPO_ID = "cstr/conceptnet-de-indexed" INDEXED_DB_FILENAME = "conceptnet-de-indexed.db" PROGRESS_FILENAME = "indexing_progress.json" CONCEPTNET_BASE = "http://conceptnet.io" # ========================= print(f"šŸŒ Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}") HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN") def log_progress(message, level="INFO"): timestamp = time.strftime("%H:%M:%S") prefix = {"INFO": "ā„¹ļø ", "SUCCESS": "āœ…", "ERROR": "āŒ", "WARN": "āš ļø ", "DEBUG": "šŸ”"}.get(level, "") print(f"[{timestamp}] {prefix} {message}") def check_remote_progress(): if not HF_TOKEN: return {"indexing_complete": False} try: api = HfApi() api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN) progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN) with open(progress_path, 'r') as f: return json.load(f) except: return {"indexing_complete": False} def create_indexed_database(): progress = check_remote_progress() if progress.get("indexing_complete"): try: return hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN) except: pass return None DB_PATH = create_indexed_database() def get_db_connection(): conn = sqlite3.connect(DB_PATH, check_same_thread=False) conn.execute("PRAGMA cache_size = -256000") return conn log_progress("Database loaded", "SUCCESS") def get_semantic_profile(word, lang='en', progress=gr.Progress()): """FIXED: Use full URL for relations!""" log_progress(f"Profile: {word} ({lang})", "INFO") progress(0, desc="Starting...") if not word or lang not in TARGET_LANGUAGES: return "āš ļø Invalid input" word = word.strip().lower().replace(' ', '_') like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%" # FIXED: Use FULL URLs for relations! relations = [ ("IsA", f"{CONCEPTNET_BASE}/r/IsA"), ("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"), ("HasA", f"{CONCEPTNET_BASE}/r/HasA"), ("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"), ("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"), ("Causes", f"{CONCEPTNET_BASE}/r/Causes"), ("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"), ("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"), ("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"), ("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"), ("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"), ("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"), ] output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n" try: with get_db_connection() as conn: cursor = conn.cursor() progress(0.05, desc="Finding nodes...") # Find nodes cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) nodes = cursor.fetchall() if not nodes: return f"# 🧠 '{word}'\n\nāš ļø Not found" log_progress(f"Found {len(nodes)} nodes", "SUCCESS") for node_id, label in nodes[:3]: output_md += f"**Node:** `{node_id}` → **{label}**\n" output_md += "\n" total = 0 for i, (rel_name, rel_url) in enumerate(relations): progress((i + 1) / len(relations), desc=f"Querying {rel_name}...") output_md += f"## {rel_name}\n\n" found = False # Outgoing - use exact match on rel_id (FAST!) start = time.time() cursor.execute(""" SELECT en.label, e.weight FROM edge e JOIN node en ON e.end_id = en.id WHERE e.start_id LIKE ? AND e.rel_id = ? ORDER BY e.weight DESC LIMIT 7 """, (like_path, rel_url)) results = cursor.fetchall() elapsed = time.time() - start log_progress(f" {rel_name} outgoing: {len(results)} in {elapsed:.3f}s", "DEBUG") for label, weight in results: output_md += f"- **{word}** {rel_name} → *{label}* `[{weight:.3f}]`\n" found = True total += 1 # Incoming start = time.time() cursor.execute(""" SELECT s.label, e.weight FROM edge e JOIN node s ON e.start_id = s.id WHERE e.end_id LIKE ? AND e.rel_id = ? ORDER BY e.weight DESC LIMIT 7 """, (like_path, rel_url)) results = cursor.fetchall() elapsed = time.time() - start log_progress(f" {rel_name} incoming: {len(results)} in {elapsed:.3f}s", "DEBUG") for label, weight in results: output_md += f"- *{label}* {rel_name} → **{word}** `[{weight:.3f}]`\n" found = True total += 1 if not found: output_md += "*No results*\n" output_md += "\n" progress(1.0, desc="Complete!") output_md += f"---\n**Total relations:** {total}\n" log_progress(f"Profile complete: {total} relations", "SUCCESS") return output_md except Exception as e: log_progress(f"Error: {e}", "ERROR") import traceback traceback.print_exc() return f"**āŒ Error:** {e}" def run_query(start_node, relation, end_node, limit, progress=gr.Progress()): """Query builder - FIXED with full URLs""" log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO") progress(0, desc="Building...") query = """ SELECT e.id, s.id, r.label, en.id, e.weight, s.label, en.label FROM edge e JOIN relation r ON e.rel_id = r.id JOIN node s ON e.start_id = s.id JOIN node en ON e.end_id = en.id WHERE 1=1 """ params = [] try: with get_db_connection() as conn: progress(0.3, desc="Adding filters...") # Start node if start_node and start_node.strip(): if start_node.startswith('http://'): pattern = f"{start_node}%" else: pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%" query += " AND s.id LIKE ?" params.append(pattern) # Relation - FIXED: use full URL! if relation and relation.strip(): if relation.startswith('http://'): rel_value = relation elif relation.startswith('/r/'): rel_value = f"{CONCEPTNET_BASE}{relation}" else: rel_value = f"{CONCEPTNET_BASE}/r/{relation}" query += " AND r.id = ?" params.append(rel_value) log_progress(f"Relation: {rel_value}", "DEBUG") # End node if end_node and end_node.strip(): if end_node.startswith('http://'): pattern = f"{end_node}%" else: pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%" query += " AND en.id LIKE ?" params.append(pattern) query += " ORDER BY e.weight DESC LIMIT ?" params.append(limit) progress(0.6, desc="Executing...") start_time = time.time() df = pd.read_sql_query(query, conn, params=params) elapsed = time.time() - start_time log_progress(f"Query done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS") progress(1.0, desc="Done!") if df.empty: return pd.DataFrame(), f"āš ļø No results ({elapsed:.2f}s)" df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label'] return df, f"āœ… {len(df)} results in {elapsed:.2f}s" except Exception as e: log_progress(f"Error: {e}", "ERROR") import traceback traceback.print_exc() return pd.DataFrame(), f"āŒ {e}" def run_raw_query(sql_query): if not sql_query.strip().upper().startswith("SELECT"): return pd.DataFrame(), "āŒ Only SELECT" try: with get_db_connection() as conn: start = time.time() df = pd.read_sql_query(sql_query, conn) elapsed = time.time() - start return df, f"āœ… {len(df)} rows in {elapsed:.3f}s" except Exception as e: return pd.DataFrame(), f"āŒ {e}" def get_schema_info(): md = f"# šŸ“š Schema\n\n" md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n" md += "## Key Discovery\n\n" md += "āœ… **Relations use FULL URLs:** `http://conceptnet.io/r/IsA` (not `/r/IsA`)\n\n" md += "āœ… **Nodes use FULL URLs:** `http://conceptnet.io/c/en/dog` (not `/c/en/dog`)\n\n" try: with get_db_connection() as conn: cursor = conn.cursor() md += "## Common Relations\n\n" cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20") for rel_id, label in cursor.fetchall(): md += f"- **{label}:** `{rel_id}`\n" md += "\n## Tables\n\n" cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") for table, in cursor.fetchall(): cursor.execute(f"SELECT COUNT(*) FROM {table}") md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n" except Exception as e: md += f"\nError: {e}\n" return md # UI with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🧠 ConceptNet Explorer") gr.Markdown(f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | āœ… **FIXED:** Using full URLs for relations!") with gr.Tabs(): with gr.TabItem("šŸ” Semantic Profile"): gr.Markdown("**Explore semantic relations for any word**") with gr.Row(): word_input = gr.Textbox( label="Word", placeholder="dog", value="dog", info="Enter a word" ) lang_input = gr.Dropdown( choices=TARGET_LANGUAGES, value="en", label="Language" ) semantic_btn = gr.Button("šŸ” Get Semantic Profile", variant="primary", size="lg") semantic_output = gr.Markdown() gr.Examples( examples=[ ["dog", "en"], ["hund", "de"], ["perro", "es"], ["chien", "fr"], ], inputs=[word_input, lang_input] ) with gr.TabItem("⚔ Query Builder"): gr.Markdown("**Build custom relationship queries**") with gr.Row(): start_input = gr.Textbox(label="Start Node", placeholder="dog") rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA") end_input = gr.Textbox(label="End Node", placeholder="") limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50) query_btn = gr.Button("ā–¶ļø Run Query", variant="primary", size="lg") status_output = gr.Markdown() results_output = gr.DataFrame(wrap=True) with gr.TabItem("šŸ’» Raw SQL"): gr.Markdown("**Execute custom SQL queries**") raw_sql_input = gr.Textbox( label="SQL Query", value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10", lines=3 ) raw_btn = gr.Button("ā–¶ļø Execute") raw_status = gr.Markdown() raw_results = gr.DataFrame() with gr.TabItem("šŸ“Š Schema"): schema_btn = gr.Button("šŸ“Š Load Schema") schema_output = gr.Markdown() gr.Markdown( "---\n" "**Fixed:** Relations now use full URLs (`http://conceptnet.io/r/IsA`) | " "**Performance:** Exact match on rel_id = fast queries!" ) semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output) query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output]) raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status]) schema_btn.click(get_schema_info, None, schema_output) if __name__ == "__main__": log_progress("APP READY - Relations use full URLs now!", "SUCCESS") demo.launch(ssr_mode=False)