import gradio as gr import sqlite3 import pandas as pd from huggingface_hub import hf_hub_download import os import traceback # --- 1. Download and Cache the Database --- print("Downloading database (if not already cached)...") REPO_ID = "ysenarath/conceptnet-sqlite" DB_FILENAME = "data/conceptnet-v5.7.0.db" # This is the critical caching step for HF Spaces DB_PATH = hf_hub_download( repo_id=REPO_ID, filename=DB_FILENAME, repo_type="dataset" ) print(f"Database is available at: {DB_PATH}") # --- 2. Database Helper Functions --- def get_db_connection(): """ Creates a new read-only connection to the SQLite database. This is safer for Gradio's multi-threading. """ try: # Connect in read-only mode db_uri = f"file:{DB_PATH}?mode=ro" conn = sqlite3.connect(db_uri, uri=True) return conn except Exception as e: print(f"Error connecting to DB: {e}") traceback.print_exc() return None def get_schema_info(): """ Dynamically queries the SQLite database to get its schema. """ print("Getting schema info...") schema_md = "# Database Schema\n\n" try: with get_db_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';") tables = cursor.fetchall() if not tables: return "Could not find any tables in the database." for table in tables: table_name = table[0] schema_md += f"## Table: `{table_name}`\n\n" schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n" schema_md += "|:------------|:----------|:---------|:------------|\n" cursor.execute(f"PRAGMA table_info({table_name});") columns = cursor.fetchall() for col in columns: # col structure: (cid, name, type, notnull, dflt_value, pk) name, dtype, notnull, pk = col[1], col[2], col[3], col[5] schema_md += f"| `{name}` | `{dtype}` | {bool(notnull)} | {bool(pk)} |\n" schema_md += "\n" return schema_md except Exception as e: print(f"Error in get_schema_info: {e}") traceback.print_exc() return f"An error occurred while fetching schema: {e}" def run_query(start_node, relation, end_node, limit): """ Runs an efficient query by filtering on text paths first, then joining by indexed IDs. *** FIX ***: This now queries the `node.path` column, not `node.label`. """ print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'") select_clause = """ SELECT e.id AS edge_id, s.path AS start_path, r.label AS relation_label, en.path AS end_path, e.weight, e.dataset, e.surface_text, s.label AS start_label_text, en.label AS end_label_text FROM edge AS e JOIN relation AS r ON e.rel_id = r.id JOIN node AS s ON e.start_id = s.id JOIN node AS en ON e.end_id = en.id """ where_conditions = [] params = [] try: # --- FIX: Query `node.path` for nodes, `relation.label` for relations --- if start_node: op = "LIKE" if "%" in start_node else "=" where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE path {op} ?)") params.append(start_node) if relation: op = "LIKE" if "%" in relation else "=" where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)") params.append(relation) if end_node: op = "LIKE" if "%" in end_node else "=" where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE path {op} ?)") params.append(end_node) # --- End of Fix --- if not where_conditions: where_clause = " WHERE 1=1" # Get random edges if no filter else: where_clause = " WHERE " + " AND ".join(where_conditions) query = select_clause + where_clause + " LIMIT ?" params.append(limit) print(f"Executing SQL: {query}") print(f"With params: {params}") with get_db_connection() as conn: df = pd.read_sql_query(query, conn, params=params) if df.empty: return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)." return df, "Query successful!" except Exception as e: print(f"Error in run_query: {e}") traceback.print_exc() err_msg = f"**Query Failed!**\n\n`{e}`" return pd.DataFrame(), err_msg def run_raw_query(sql_query): """ Executes a raw, read-only SQL query against the database. DANGEROUS but useful for debugging. """ print(f"Running raw query: {sql_query}") # Basic safety check for read-only if not sql_query.strip().upper().startswith("SELECT"): return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety." try: with get_db_connection() as conn: df = pd.read_sql_query(sql_query, conn) if df.empty: return df, "Query ran successfully but returned no results." return df, "Raw query successful!" except Exception as e: print(f"Error in run_raw_query: {e}") traceback.print_exc() return pd.DataFrame(), f"**Query Failed!**\n\n`{e}`" # --- 3. Build the Gradio UI --- with gr.Blocks(title="ConceptNet SQLite Explorer") as demo: gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)") gr.Markdown(f"**Note:** Initial query might take a few seconds as the database warms up.") with gr.Tabs(): with gr.TabItem("Query Builder"): gr.Markdown( "**Run a query against the database.**\n" "This query joins the `edge`, `node`, and `relation` tables to find connections.\n" "You can use SQL wildcards like `%` (e.g., `/c/en/dog%` or `/r/Is%`)." ) with gr.Row(): start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog") rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA") end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal") limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1) query_btn = gr.Button("Run Query", variant="primary") status_output = gr.Markdown("Status will appear here...") results_output = gr.DataFrame(label="Query Results", interactive=False) with gr.TabItem("Raw SQL Query"): gr.Markdown("**Danger Zone:** Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.") raw_sql_input = gr.Textbox( label="Raw SQL Query", placeholder="SELECT s.path, s.label FROM node AS s WHERE s.label = 'dog' LIMIT 5", lines=5 ) raw_query_btn = gr.Button("Run Raw SQL", variant="secondary") raw_status_output = gr.Markdown("Status will appear here...") raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False) with gr.TabItem("Schema Explorer"): gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.") schema_btn = gr.Button("Show Database Schema", variant="secondary") schema_output = gr.Markdown("Schema will appear here...") # --- 4. Connect UI Elements to Functions --- query_btn.click( fn=run_query, inputs=[start_input, rel_input, end_input, limit_slider], outputs=[results_output, status_output], api_name="run_query" ) raw_query_btn.click( fn=run_raw_query, inputs=[raw_sql_input], outputs=[raw_results_output, raw_status_output], api_name="run_raw_query" ) schema_btn.click( fn=get_schema_info, inputs=None, outputs=schema_output, api_name="get_schema" ) if __name__ == "__main__": # Removed experimental ssr_mode=False demo.launch(ssr_mode=False)