Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import sqlite3
|
| 3 |
import pandas as pd
|
| 4 |
-
from huggingface_hub import hf_hub_download, HfApi
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
import shutil
|
|
@@ -18,17 +18,15 @@ LOCAL_DB_PATH = "/tmp/conceptnet-indexed.db"
|
|
| 18 |
|
| 19 |
print(f"🌍 Filtering to: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
|
| 20 |
|
| 21 |
-
# Get HF token
|
| 22 |
-
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
|
| 23 |
-
if not HF_TOKEN:
|
| 24 |
-
try:
|
| 25 |
-
HF_TOKEN = HfFolder.get_token()
|
| 26 |
-
except:
|
| 27 |
-
pass
|
| 28 |
|
| 29 |
if not HF_TOKEN:
|
| 30 |
print("⚠️ WARNING: No HF_TOKEN found!")
|
| 31 |
print(" Add HF_TOKEN in Space settings to enable checkpointing")
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# Original database
|
| 34 |
ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite"
|
|
@@ -163,6 +161,8 @@ def update_remote_progress(completed_indices, analyzed_tables=None, database_upl
|
|
| 163 |
|
| 164 |
except Exception as e:
|
| 165 |
log_progress(f"Failed to update progress: {e}", "ERROR")
|
|
|
|
|
|
|
| 166 |
return False
|
| 167 |
|
| 168 |
def upload_database_checkpoint(message=""):
|
|
@@ -180,7 +180,8 @@ def upload_database_checkpoint(message=""):
|
|
| 180 |
|
| 181 |
db_size = os.path.getsize(LOCAL_DB_PATH) / (2**30)
|
| 182 |
log_progress(f"Uploading database checkpoint ({db_size:.2f} GB)...", "CHECKPOINT")
|
| 183 |
-
|
|
|
|
| 184 |
log_progress(f" This may take 5-10 minutes...", "INFO")
|
| 185 |
|
| 186 |
start = time.time()
|
|
@@ -195,7 +196,8 @@ def upload_database_checkpoint(message=""):
|
|
| 195 |
)
|
| 196 |
|
| 197 |
elapsed = time.time() - start
|
| 198 |
-
|
|
|
|
| 199 |
|
| 200 |
return True
|
| 201 |
|
|
@@ -257,8 +259,8 @@ def create_indexed_database():
|
|
| 257 |
)
|
| 258 |
shutil.copy2(remote_db, LOCAL_DB_PATH)
|
| 259 |
log_progress("Downloaded partial database", "SUCCESS")
|
| 260 |
-
except:
|
| 261 |
-
log_progress("No partial database
|
| 262 |
|
| 263 |
if not os.path.exists(LOCAL_DB_PATH):
|
| 264 |
# Download and copy original
|
|
@@ -292,6 +294,7 @@ def create_indexed_database():
|
|
| 292 |
cursor.execute("PRAGMA journal_mode = WAL")
|
| 293 |
cursor.execute("PRAGMA synchronous = NORMAL")
|
| 294 |
cursor.execute("PRAGMA cache_size = -512000")
|
|
|
|
| 295 |
|
| 296 |
# PHASE 1: Create Indices
|
| 297 |
log_progress("="*60, "INFO")
|
|
@@ -331,7 +334,7 @@ def create_indexed_database():
|
|
| 331 |
)
|
| 332 |
|
| 333 |
# Upload checkpoint
|
| 334 |
-
upload_database_checkpoint(f"Checkpoint: {idx_name} created")
|
| 335 |
|
| 336 |
except Exception as e:
|
| 337 |
log_progress(f"Failed to create {idx_name}: {e}", "ERROR")
|
|
@@ -356,10 +359,13 @@ def create_indexed_database():
|
|
| 356 |
|
| 357 |
log_progress(f"[{i}/{len(tables)}] Analyzing table: {table}", "INFO")
|
| 358 |
|
| 359 |
-
# Get table size
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
start = time.time()
|
| 365 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import sqlite3
|
| 3 |
import pandas as pd
|
| 4 |
+
from huggingface_hub import hf_hub_download, HfApi
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
import shutil
|
|
|
|
| 18 |
|
| 19 |
print(f"🌍 Filtering to: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
|
| 20 |
|
| 21 |
+
# Get HF token (multiple methods for compatibility)
|
| 22 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
if not HF_TOKEN:
|
| 25 |
print("⚠️ WARNING: No HF_TOKEN found!")
|
| 26 |
print(" Add HF_TOKEN in Space settings to enable checkpointing")
|
| 27 |
+
print(" Go to: Settings > Variables and secrets > HF_TOKEN")
|
| 28 |
+
else:
|
| 29 |
+
print(f"✅ HF_TOKEN found (length: {len(HF_TOKEN)})")
|
| 30 |
|
| 31 |
# Original database
|
| 32 |
ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite"
|
|
|
|
| 161 |
|
| 162 |
except Exception as e:
|
| 163 |
log_progress(f"Failed to update progress: {e}", "ERROR")
|
| 164 |
+
import traceback
|
| 165 |
+
traceback.print_exc()
|
| 166 |
return False
|
| 167 |
|
| 168 |
def upload_database_checkpoint(message=""):
|
|
|
|
| 180 |
|
| 181 |
db_size = os.path.getsize(LOCAL_DB_PATH) / (2**30)
|
| 182 |
log_progress(f"Uploading database checkpoint ({db_size:.2f} GB)...", "CHECKPOINT")
|
| 183 |
+
if message:
|
| 184 |
+
log_progress(f" {message}", "INFO")
|
| 185 |
log_progress(f" This may take 5-10 minutes...", "INFO")
|
| 186 |
|
| 187 |
start = time.time()
|
|
|
|
| 196 |
)
|
| 197 |
|
| 198 |
elapsed = time.time() - start
|
| 199 |
+
speed_mbps = (db_size * 8) / elapsed if elapsed > 0 else 0
|
| 200 |
+
log_progress(f"Database uploaded in {elapsed:.1f}s ({speed_mbps:.1f} Mbps)", "SUCCESS")
|
| 201 |
|
| 202 |
return True
|
| 203 |
|
|
|
|
| 259 |
)
|
| 260 |
shutil.copy2(remote_db, LOCAL_DB_PATH)
|
| 261 |
log_progress("Downloaded partial database", "SUCCESS")
|
| 262 |
+
except Exception as e:
|
| 263 |
+
log_progress(f"No partial database: {e}", "INFO")
|
| 264 |
|
| 265 |
if not os.path.exists(LOCAL_DB_PATH):
|
| 266 |
# Download and copy original
|
|
|
|
| 294 |
cursor.execute("PRAGMA journal_mode = WAL")
|
| 295 |
cursor.execute("PRAGMA synchronous = NORMAL")
|
| 296 |
cursor.execute("PRAGMA cache_size = -512000")
|
| 297 |
+
cursor.execute("PRAGMA temp_store = MEMORY")
|
| 298 |
|
| 299 |
# PHASE 1: Create Indices
|
| 300 |
log_progress("="*60, "INFO")
|
|
|
|
| 334 |
)
|
| 335 |
|
| 336 |
# Upload checkpoint
|
| 337 |
+
upload_database_checkpoint(f"Checkpoint: {idx_name} created ({i}/{len(indices_to_create)})")
|
| 338 |
|
| 339 |
except Exception as e:
|
| 340 |
log_progress(f"Failed to create {idx_name}: {e}", "ERROR")
|
|
|
|
| 359 |
|
| 360 |
log_progress(f"[{i}/{len(tables)}] Analyzing table: {table}", "INFO")
|
| 361 |
|
| 362 |
+
# Get table size
|
| 363 |
+
try:
|
| 364 |
+
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
| 365 |
+
row_count = cursor.fetchone()[0]
|
| 366 |
+
log_progress(f" Table has {row_count:,} rows", "INFO")
|
| 367 |
+
except:
|
| 368 |
+
log_progress(f" Could not count rows", "WARN")
|
| 369 |
|
| 370 |
start = time.time()
|
| 371 |
|