cstr commited on
Commit
33e2835
·
verified ·
1 Parent(s): a12e87b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import sqlite3
3
  import pandas as pd
4
- from huggingface_hub import hf_hub_download, HfApi, HfFolder
5
  import os
6
  import time
7
  import shutil
@@ -18,17 +18,15 @@ LOCAL_DB_PATH = "/tmp/conceptnet-indexed.db"
18
 
19
  print(f"🌍 Filtering to: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
20
 
21
- # Get HF token
22
- HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
23
- if not HF_TOKEN:
24
- try:
25
- HF_TOKEN = HfFolder.get_token()
26
- except:
27
- pass
28
 
29
  if not HF_TOKEN:
30
  print("⚠️ WARNING: No HF_TOKEN found!")
31
  print(" Add HF_TOKEN in Space settings to enable checkpointing")
 
 
 
32
 
33
  # Original database
34
  ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite"
@@ -163,6 +161,8 @@ def update_remote_progress(completed_indices, analyzed_tables=None, database_upl
163
 
164
  except Exception as e:
165
  log_progress(f"Failed to update progress: {e}", "ERROR")
 
 
166
  return False
167
 
168
  def upload_database_checkpoint(message=""):
@@ -180,7 +180,8 @@ def upload_database_checkpoint(message=""):
180
 
181
  db_size = os.path.getsize(LOCAL_DB_PATH) / (2**30)
182
  log_progress(f"Uploading database checkpoint ({db_size:.2f} GB)...", "CHECKPOINT")
183
- log_progress(f" {message}", "INFO")
 
184
  log_progress(f" This may take 5-10 minutes...", "INFO")
185
 
186
  start = time.time()
@@ -195,7 +196,8 @@ def upload_database_checkpoint(message=""):
195
  )
196
 
197
  elapsed = time.time() - start
198
- log_progress(f"Database uploaded in {elapsed:.1f}s ({db_size*8/elapsed:.1f} Mbps)", "SUCCESS")
 
199
 
200
  return True
201
 
@@ -257,8 +259,8 @@ def create_indexed_database():
257
  )
258
  shutil.copy2(remote_db, LOCAL_DB_PATH)
259
  log_progress("Downloaded partial database", "SUCCESS")
260
- except:
261
- log_progress("No partial database, starting from original", "INFO")
262
 
263
  if not os.path.exists(LOCAL_DB_PATH):
264
  # Download and copy original
@@ -292,6 +294,7 @@ def create_indexed_database():
292
  cursor.execute("PRAGMA journal_mode = WAL")
293
  cursor.execute("PRAGMA synchronous = NORMAL")
294
  cursor.execute("PRAGMA cache_size = -512000")
 
295
 
296
  # PHASE 1: Create Indices
297
  log_progress("="*60, "INFO")
@@ -331,7 +334,7 @@ def create_indexed_database():
331
  )
332
 
333
  # Upload checkpoint
334
- upload_database_checkpoint(f"Checkpoint: {idx_name} created")
335
 
336
  except Exception as e:
337
  log_progress(f"Failed to create {idx_name}: {e}", "ERROR")
@@ -356,10 +359,13 @@ def create_indexed_database():
356
 
357
  log_progress(f"[{i}/{len(tables)}] Analyzing table: {table}", "INFO")
358
 
359
- # Get table size for progress estimation
360
- cursor.execute(f"SELECT COUNT(*) FROM {table}")
361
- row_count = cursor.fetchone()[0]
362
- log_progress(f" Table has {row_count:,} rows", "INFO")
 
 
 
363
 
364
  start = time.time()
365
 
 
1
  import gradio as gr
2
  import sqlite3
3
  import pandas as pd
4
+ from huggingface_hub import hf_hub_download, HfApi
5
  import os
6
  import time
7
  import shutil
 
18
 
19
  print(f"🌍 Filtering to: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
20
 
21
+ # Get HF token (multiple methods for compatibility)
22
+ HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
 
 
 
 
 
23
 
24
  if not HF_TOKEN:
25
  print("⚠️ WARNING: No HF_TOKEN found!")
26
  print(" Add HF_TOKEN in Space settings to enable checkpointing")
27
+ print(" Go to: Settings > Variables and secrets > HF_TOKEN")
28
+ else:
29
+ print(f"✅ HF_TOKEN found (length: {len(HF_TOKEN)})")
30
 
31
  # Original database
32
  ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite"
 
161
 
162
  except Exception as e:
163
  log_progress(f"Failed to update progress: {e}", "ERROR")
164
+ import traceback
165
+ traceback.print_exc()
166
  return False
167
 
168
  def upload_database_checkpoint(message=""):
 
180
 
181
  db_size = os.path.getsize(LOCAL_DB_PATH) / (2**30)
182
  log_progress(f"Uploading database checkpoint ({db_size:.2f} GB)...", "CHECKPOINT")
183
+ if message:
184
+ log_progress(f" {message}", "INFO")
185
  log_progress(f" This may take 5-10 minutes...", "INFO")
186
 
187
  start = time.time()
 
196
  )
197
 
198
  elapsed = time.time() - start
199
+ speed_mbps = (db_size * 8) / elapsed if elapsed > 0 else 0
200
+ log_progress(f"Database uploaded in {elapsed:.1f}s ({speed_mbps:.1f} Mbps)", "SUCCESS")
201
 
202
  return True
203
 
 
259
  )
260
  shutil.copy2(remote_db, LOCAL_DB_PATH)
261
  log_progress("Downloaded partial database", "SUCCESS")
262
+ except Exception as e:
263
+ log_progress(f"No partial database: {e}", "INFO")
264
 
265
  if not os.path.exists(LOCAL_DB_PATH):
266
  # Download and copy original
 
294
  cursor.execute("PRAGMA journal_mode = WAL")
295
  cursor.execute("PRAGMA synchronous = NORMAL")
296
  cursor.execute("PRAGMA cache_size = -512000")
297
+ cursor.execute("PRAGMA temp_store = MEMORY")
298
 
299
  # PHASE 1: Create Indices
300
  log_progress("="*60, "INFO")
 
334
  )
335
 
336
  # Upload checkpoint
337
+ upload_database_checkpoint(f"Checkpoint: {idx_name} created ({i}/{len(indices_to_create)})")
338
 
339
  except Exception as e:
340
  log_progress(f"Failed to create {idx_name}: {e}", "ERROR")
 
359
 
360
  log_progress(f"[{i}/{len(tables)}] Analyzing table: {table}", "INFO")
361
 
362
+ # Get table size
363
+ try:
364
+ cursor.execute(f"SELECT COUNT(*) FROM {table}")
365
+ row_count = cursor.fetchone()[0]
366
+ log_progress(f" Table has {row_count:,} rows", "INFO")
367
+ except:
368
+ log_progress(f" Could not count rows", "WARN")
369
 
370
  start = time.time()
371