conceptnet_db / app.py
cstr's picture
Update app.py
bb71fb2 verified
raw
history blame
8.76 kB
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback
# --- 1. Download and Cache the Database ---
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet-v5.7.0.db"
# This is the critical caching step for HF Spaces
DB_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=DB_FILENAME,
repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")
# --- 2. Database Helper Functions ---
def get_db_connection():
"""
Creates a new read-only connection to the SQLite database.
This is safer for Gradio's multi-threading.
"""
try:
# Connect in read-only mode
db_uri = f"file:{DB_PATH}?mode=ro"
conn = sqlite3.connect(db_uri, uri=True)
return conn
except Exception as e:
print(f"Error connecting to DB: {e}")
traceback.print_exc()
return None
def get_schema_info():
"""
Dynamically queries the SQLite database to get its schema.
"""
print("Getting schema info...")
schema_md = "# Database Schema\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
tables = cursor.fetchall()
if not tables:
return "Could not find any tables in the database."
for table in tables:
table_name = table[0]
schema_md += f"## Table: `{table_name}`\n\n"
schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
schema_md += "|:------------|:----------|:---------|:------------|\n"
cursor.execute(f"PRAGMA table_info({table_name});")
columns = cursor.fetchall()
for col in columns:
# col structure: (cid, name, type, notnull, dflt_value, pk)
name, dtype, notnull, pk = col[1], col[2], col[3], col[5]
schema_md += f"| `{name}` | `{dtype}` | {bool(notnull)} | {bool(pk)} |\n"
schema_md += "\n"
return schema_md
except Exception as e:
print(f"Error in get_schema_info: {e}")
traceback.print_exc()
return f"An error occurred while fetching schema: {e}"
def run_query(start_node, relation, end_node, limit):
"""
Runs an efficient query by filtering on text paths first,
then joining by indexed IDs.
*** FIX ***: This now queries the `node.path` column, not `node.label`.
"""
print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
select_clause = """
SELECT
e.id AS edge_id,
s.path AS start_path,
r.label AS relation_label,
en.path AS end_path,
e.weight,
e.dataset,
e.surface_text,
s.label AS start_label_text,
en.label AS end_label_text
FROM
edge AS e
JOIN
relation AS r ON e.rel_id = r.id
JOIN
node AS s ON e.start_id = s.id
JOIN
node AS en ON e.end_id = en.id
"""
where_conditions = []
params = []
try:
# --- FIX: Query `node.path` for nodes, `relation.label` for relations ---
if start_node:
op = "LIKE" if "%" in start_node else "="
where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE path {op} ?)")
params.append(start_node)
if relation:
op = "LIKE" if "%" in relation else "="
where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)")
params.append(relation)
if end_node:
op = "LIKE" if "%" in end_node else "="
where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE path {op} ?)")
params.append(end_node)
# --- End of Fix ---
if not where_conditions:
where_clause = " WHERE 1=1" # Get random edges if no filter
else:
where_clause = " WHERE " + " AND ".join(where_conditions)
query = select_clause + where_clause + " LIMIT ?"
params.append(limit)
print(f"Executing SQL: {query}")
print(f"With params: {params}")
with get_db_connection() as conn:
df = pd.read_sql_query(query, conn, params=params)
if df.empty:
return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)."
return df, "Query successful!"
except Exception as e:
print(f"Error in run_query: {e}")
traceback.print_exc()
err_msg = f"**Query Failed!**\n\n`{e}`"
return pd.DataFrame(), err_msg
def run_raw_query(sql_query):
"""
Executes a raw, read-only SQL query against the database.
DANGEROUS but useful for debugging.
"""
print(f"Running raw query: {sql_query}")
# Basic safety check for read-only
if not sql_query.strip().upper().startswith("SELECT"):
return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety."
try:
with get_db_connection() as conn:
df = pd.read_sql_query(sql_query, conn)
if df.empty:
return df, "Query ran successfully but returned no results."
return df, "Raw query successful!"
except Exception as e:
print(f"Error in run_raw_query: {e}")
traceback.print_exc()
return pd.DataFrame(), f"**Query Failed!**\n\n`{e}`"
# --- 3. Build the Gradio UI ---
with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)")
gr.Markdown(f"**Note:** Initial query might take a few seconds as the database warms up.")
with gr.Tabs():
with gr.TabItem("Query Builder"):
gr.Markdown(
"**Run a query against the database.**\n"
"This query joins the `edge`, `node`, and `relation` tables to find connections.\n"
"You can use SQL wildcards like `%` (e.g., `/c/en/dog%` or `/r/Is%`)."
)
with gr.Row():
start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog")
rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA")
end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal")
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
query_btn = gr.Button("Run Query", variant="primary")
status_output = gr.Markdown("Status will appear here...")
results_output = gr.DataFrame(label="Query Results", interactive=False)
with gr.TabItem("Raw SQL Query"):
gr.Markdown("**Danger Zone:** Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.")
raw_sql_input = gr.Textbox(
label="Raw SQL Query",
placeholder="SELECT s.path, s.label FROM node AS s WHERE s.label = 'dog' LIMIT 5",
lines=5
)
raw_query_btn = gr.Button("Run Raw SQL", variant="secondary")
raw_status_output = gr.Markdown("Status will appear here...")
raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False)
with gr.TabItem("Schema Explorer"):
gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
schema_btn = gr.Button("Show Database Schema", variant="secondary")
schema_output = gr.Markdown("Schema will appear here...")
# --- 4. Connect UI Elements to Functions ---
query_btn.click(
fn=run_query,
inputs=[start_input, rel_input, end_input, limit_slider],
outputs=[results_output, status_output],
api_name="run_query"
)
raw_query_btn.click(
fn=run_raw_query,
inputs=[raw_sql_input],
outputs=[raw_results_output, raw_status_output],
api_name="run_raw_query"
)
schema_btn.click(
fn=get_schema_info,
inputs=None,
outputs=schema_output,
api_name="get_schema"
)
if __name__ == "__main__":
# Removed experimental ssr_mode=False
demo.launch(ssr_mode=False)