shaun3141 commited on
Commit
64ab15a
Β·
0 Parent(s):

Reorganize project structure: move HF/Gradio files to hf_space/ subfolder

Browse files

- Move all Hugging Face Space deployment files to hf_space/ directory
- Update upload scripts and setup scripts to work from new location
- Update README.md with new paths and project structure
- Follows industry best practices for organizing deployment code

Files changed (8) hide show
  1. .dockerignore +19 -0
  2. Dockerfile +37 -0
  3. app.py +378 -0
  4. check_build_status.py +88 -0
  5. requirements.txt +15 -0
  6. setup_hf_space.py +99 -0
  7. upload_notebook.py +43 -0
  8. upload_to_space.sh +35 -0
.dockerignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ *.so
7
+ *.egg
8
+ *.egg-info
9
+ dist
10
+ build
11
+ .env
12
+ .venv
13
+ venv/
14
+ ENV/
15
+ .git
16
+ .gitignore
17
+ .DS_Store
18
+ *.log
19
+
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install required packages for Dev Mode + developer tools
4
+ RUN apt-get update && \
5
+ apt-get install -y \
6
+ bash \
7
+ git git-lfs \
8
+ wget curl procps \
9
+ htop vim nano \
10
+ ffmpeg && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install Python dependencies
14
+ COPY requirements.txt /tmp/requirements.txt
15
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
16
+
17
+ # Set up app directory
18
+ WORKDIR /app
19
+ COPY --chown=1000:1000 . /app
20
+
21
+ # Create home directory for user 1000 and set proper permissions
22
+ RUN mkdir -p /home/user && \
23
+ chown -R 1000:1000 /home/user && \
24
+ chown -R 1000:1000 /app
25
+
26
+ # Set environment variables
27
+ ENV HOME=/home/user
28
+
29
+ # Switch to user with uid 1000
30
+ USER 1000
31
+
32
+ # Expose Gradio port
33
+ EXPOSE 7860
34
+
35
+ # CMD instruction required for Dev Mode
36
+ CMD ["python", "app.py"]
37
+
app.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Caribbean Voices Hackathon - Audio Transcription Gradio App
3
+ A framework for running experiments and sharing work with others
4
+ """
5
+ import gradio as gr
6
+ import torch
7
+ import librosa
8
+ import numpy as np
9
+ import pandas as pd
10
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
11
+ import time
12
+ import os
13
+ from pathlib import Path
14
+ import json
15
+
16
+ # Available models for experimentation
17
+ AVAILABLE_MODELS = {
18
+ "Wav2Vec2 Base (960h)": "facebook/wav2vec2-base-960h",
19
+ "Wav2Vec2 Large (960h)": "facebook/wav2vec2-large-960h",
20
+ "Wav2Vec2 Base (100h)": "facebook/wav2vec2-base",
21
+ }
22
+
23
+ # Global variables for model caching
24
+ current_model_name = None
25
+ current_processor = None
26
+ current_model = None
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+
29
+ def load_model(model_key):
30
+ """Load a model and processor, caching them for efficiency"""
31
+ global current_model_name, current_processor, current_model
32
+
33
+ model_path = AVAILABLE_MODELS[model_key]
34
+
35
+ # Only reload if model changed
36
+ if current_model_name != model_path:
37
+ print(f"Loading model: {model_path}")
38
+ current_processor = Wav2Vec2Processor.from_pretrained(model_path)
39
+ current_model = Wav2Vec2ForCTC.from_pretrained(model_path)
40
+ current_model.to(device)
41
+ current_model.eval()
42
+ current_model_name = model_path
43
+ print(f"Model loaded on {device}")
44
+
45
+ return current_processor, current_model
46
+
47
+ def transcribe_audio(audio_file, model_choice, max_seconds=30):
48
+ """Transcribe a single audio file"""
49
+ if audio_file is None:
50
+ return "Please upload an audio file.", None
51
+
52
+ try:
53
+ processor, model = load_model(model_choice)
54
+
55
+ # Load audio
56
+ speech_array, sr = librosa.load(audio_file, sr=16000)
57
+
58
+ # Truncate if needed
59
+ max_len = int(max_seconds * 16000)
60
+ if len(speech_array) > max_len:
61
+ speech_array = speech_array[:max_len]
62
+ duration_warning = f"⚠️ Audio truncated to {max_seconds} seconds"
63
+ else:
64
+ duration_warning = ""
65
+
66
+ # Prepare inputs
67
+ inputs = processor(
68
+ speech_array,
69
+ sampling_rate=16000,
70
+ return_tensors="pt",
71
+ padding=True,
72
+ )
73
+ inputs = {k: v.to(device) for k, v in inputs.items()}
74
+
75
+ # Inference
76
+ start_time = time.time()
77
+ with torch.no_grad():
78
+ logits = model(**inputs).logits
79
+
80
+ predicted_ids = torch.argmax(logits, dim=-1)
81
+ transcription = processor.batch_decode(
82
+ predicted_ids,
83
+ skip_special_tokens=True
84
+ )[0]
85
+
86
+ inference_time = time.time() - start_time
87
+ audio_duration = len(speech_array) / 16000
88
+
89
+ result = {
90
+ "transcription": transcription.strip(),
91
+ "model": model_choice,
92
+ "audio_duration": f"{audio_duration:.2f}s",
93
+ "inference_time": f"{inference_time:.3f}s",
94
+ "realtime_factor": f"{inference_time/audio_duration:.2f}x" if audio_duration > 0 else "N/A",
95
+ "warning": duration_warning
96
+ }
97
+
98
+ info_text = f"""
99
+ **Model:** {result['model']}
100
+ **Audio Duration:** {result['audio_duration']}
101
+ **Inference Time:** {result['inference_time']}
102
+ **Real-time Factor:** {result['realtime_factor']}
103
+ {duration_warning}
104
+ """
105
+
106
+ return result['transcription'], info_text
107
+
108
+ except Exception as e:
109
+ return f"Error: {str(e)}", f"❌ Error occurred: {str(e)}"
110
+
111
+ def batch_transcribe(audio_files, model_choice, max_seconds=30):
112
+ """Transcribe multiple audio files"""
113
+ if audio_files is None or len(audio_files) == 0:
114
+ return None, "Please upload audio files."
115
+
116
+ try:
117
+ processor, model = load_model(model_choice)
118
+
119
+ results = []
120
+ start_time = time.time()
121
+
122
+ for idx, audio_file in enumerate(audio_files):
123
+ try:
124
+ # Load audio
125
+ speech_array, sr = librosa.load(audio_file.name, sr=16000)
126
+
127
+ # Truncate if needed
128
+ max_len = int(max_seconds * 16000)
129
+ if len(speech_array) > max_len:
130
+ speech_array = speech_array[:max_len]
131
+
132
+ # Prepare inputs
133
+ inputs = processor(
134
+ speech_array,
135
+ sampling_rate=16000,
136
+ return_tensors="pt",
137
+ padding=True,
138
+ )
139
+ inputs = {k: v.to(device) for k, v in inputs.items()}
140
+
141
+ # Inference
142
+ with torch.no_grad():
143
+ logits = model(**inputs).logits
144
+
145
+ predicted_ids = torch.argmax(logits, dim=-1)
146
+ transcription = processor.batch_decode(
147
+ predicted_ids,
148
+ skip_special_tokens=True
149
+ )[0]
150
+
151
+ filename = Path(audio_file.name).stem
152
+ results.append({
153
+ "File": filename,
154
+ "Transcription": transcription.strip()
155
+ })
156
+
157
+ except Exception as e:
158
+ filename = Path(audio_file.name).stem if audio_file else f"file_{idx}"
159
+ results.append({
160
+ "File": filename,
161
+ "Transcription": f"Error: {str(e)}"
162
+ })
163
+
164
+ total_time = time.time() - start_time
165
+ df = pd.DataFrame(results)
166
+
167
+ # Create CSV file
168
+ csv_path = f"/tmp/batch_results_{int(time.time())}.csv"
169
+ df.to_csv(csv_path, index=False)
170
+
171
+ summary = f"""
172
+ **Batch Processing Complete**
173
+ - Files processed: {len(results)}
174
+ - Total time: {total_time:.2f}s
175
+ - Average time per file: {total_time/len(results):.2f}s
176
+ """
177
+
178
+ return df, csv_path, summary
179
+
180
+ except Exception as e:
181
+ return None, None, f"❌ Error: {str(e)}"
182
+
183
+ def compare_models(audio_file, max_seconds=30):
184
+ """Compare transcription results across different models"""
185
+ if audio_file is None:
186
+ return "Please upload an audio file to compare models."
187
+
188
+ results = []
189
+
190
+ for model_name in AVAILABLE_MODELS.keys():
191
+ try:
192
+ transcription, info = transcribe_audio(audio_file, model_name, max_seconds)
193
+ results.append({
194
+ "Model": model_name,
195
+ "Transcription": transcription,
196
+ "Info": info
197
+ })
198
+ except Exception as e:
199
+ results.append({
200
+ "Model": model_name,
201
+ "Transcription": f"Error: {str(e)}",
202
+ "Info": ""
203
+ })
204
+
205
+ # Format results
206
+ comparison_text = "## Model Comparison Results\n\n"
207
+ for result in results:
208
+ comparison_text += f"### {result['Model']}\n"
209
+ comparison_text += f"**Transcription:** {result['Transcription']}\n"
210
+ comparison_text += f"{result['Info']}\n\n"
211
+
212
+ return comparison_text
213
+
214
+ # Custom CSS for better UI
215
+ css = """
216
+ .gradio-container {
217
+ font-family: 'Inter', sans-serif;
218
+ }
219
+ .main-header {
220
+ text-align: center;
221
+ padding: 20px;
222
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
223
+ color: white;
224
+ border-radius: 10px;
225
+ margin-bottom: 20px;
226
+ }
227
+ """
228
+
229
+ # Create Gradio interface
230
+ with gr.Blocks(css=css, title="Caribbean Voices - Audio Transcription") as demo:
231
+ gr.Markdown("""
232
+ <div class="main-header">
233
+ <h1>🎀 Caribbean Voices Hackathon</h1>
234
+ <p>Audio Transcription Experimentation Framework</p>
235
+ </div>
236
+ """)
237
+
238
+ with gr.Tabs():
239
+ # Tab 1: Single File Transcription
240
+ with gr.Tab("🎯 Single File Transcription"):
241
+ gr.Markdown("### Upload a single audio file to transcribe")
242
+ with gr.Row():
243
+ with gr.Column():
244
+ audio_input = gr.Audio(
245
+ label="Upload Audio File",
246
+ type="filepath",
247
+ sources=["upload", "microphone"]
248
+ )
249
+ model_choice = gr.Dropdown(
250
+ choices=list(AVAILABLE_MODELS.keys()),
251
+ value=list(AVAILABLE_MODELS.keys())[0],
252
+ label="Select Model"
253
+ )
254
+ max_seconds = gr.Slider(
255
+ minimum=5,
256
+ maximum=60,
257
+ value=30,
258
+ step=5,
259
+ label="Max Audio Length (seconds)"
260
+ )
261
+ transcribe_btn = gr.Button("Transcribe", variant="primary")
262
+
263
+ with gr.Column():
264
+ transcription_output = gr.Textbox(
265
+ label="Transcription",
266
+ lines=5,
267
+ placeholder="Transcription will appear here..."
268
+ )
269
+ info_output = gr.Markdown(label="Processing Info")
270
+
271
+ transcribe_btn.click(
272
+ fn=transcribe_audio,
273
+ inputs=[audio_input, model_choice, max_seconds],
274
+ outputs=[transcription_output, info_output]
275
+ )
276
+
277
+ # Tab 2: Batch Processing
278
+ with gr.Tab("πŸ“¦ Batch Processing"):
279
+ gr.Markdown("### Upload multiple audio files for batch transcription")
280
+ with gr.Row():
281
+ with gr.Column():
282
+ batch_audio_input = gr.File(
283
+ label="Upload Audio Files",
284
+ file_count="multiple",
285
+ file_types=["audio"]
286
+ )
287
+ batch_model_choice = gr.Dropdown(
288
+ choices=list(AVAILABLE_MODELS.keys()),
289
+ value=list(AVAILABLE_MODELS.keys())[0],
290
+ label="Select Model"
291
+ )
292
+ batch_max_seconds = gr.Slider(
293
+ minimum=5,
294
+ maximum=60,
295
+ value=30,
296
+ step=5,
297
+ label="Max Audio Length (seconds)"
298
+ )
299
+ batch_btn = gr.Button("Process Batch", variant="primary")
300
+
301
+ with gr.Column():
302
+ batch_results = gr.Dataframe(
303
+ label="Results",
304
+ headers=["File", "Transcription"],
305
+ wrap=True
306
+ )
307
+ batch_summary = gr.Markdown()
308
+ download_csv = gr.File(label="Download Results CSV")
309
+
310
+ batch_btn.click(
311
+ fn=batch_transcribe,
312
+ inputs=[batch_audio_input, batch_model_choice, batch_max_seconds],
313
+ outputs=[batch_results, download_csv, batch_summary]
314
+ )
315
+
316
+ # Tab 3: Model Comparison
317
+ with gr.Tab("βš–οΈ Model Comparison"):
318
+ gr.Markdown("### Compare transcription results across different models")
319
+ with gr.Row():
320
+ with gr.Column():
321
+ compare_audio_input = gr.Audio(
322
+ label="Upload Audio File",
323
+ type="filepath",
324
+ sources=["upload", "microphone"]
325
+ )
326
+ compare_max_seconds = gr.Slider(
327
+ minimum=5,
328
+ maximum=60,
329
+ value=30,
330
+ step=5,
331
+ label="Max Audio Length (seconds)"
332
+ )
333
+ compare_btn = gr.Button("Compare Models", variant="primary")
334
+
335
+ with gr.Column():
336
+ comparison_output = gr.Markdown(label="Comparison Results")
337
+
338
+ compare_btn.click(
339
+ fn=compare_models,
340
+ inputs=[compare_audio_input, compare_max_seconds],
341
+ outputs=[comparison_output]
342
+ )
343
+
344
+ # Tab 4: About & Documentation
345
+ with gr.Tab("πŸ“š About"):
346
+ gr.Markdown("""
347
+ ## Caribbean Voices Hackathon - Audio Transcription Platform
348
+
349
+ ### Features
350
+ - **Single File Transcription**: Quick transcription of individual audio files
351
+ - **Batch Processing**: Process multiple files at once with CSV export
352
+ - **Model Comparison**: Compare results across different Wav2Vec2 models
353
+ - **Real-time Metrics**: See inference time and real-time factors
354
+
355
+ ### Available Models
356
+ - **Wav2Vec2 Base (960h)**: Fast, general-purpose model
357
+ - **Wav2Vec2 Large (960h)**: More accurate, slower inference
358
+ - **Wav2Vec2 Base (100h)**: Smaller model, faster inference
359
+
360
+ ### Usage Tips
361
+ 1. For best results, use clear audio recordings
362
+ 2. Audio files are automatically resampled to 16kHz
363
+ 3. Long audio files are truncated to the max length setting
364
+ 4. Batch processing results can be downloaded as CSV
365
+
366
+ ### Technical Details
367
+ - Framework: PyTorch + Transformers
368
+ - Audio Processing: Librosa
369
+ - Interface: Gradio
370
+ - Device: Automatically uses GPU if available
371
+
372
+ ### Sharing
373
+ Share this Space with collaborators by sharing the URL!
374
+ """)
375
+
376
+ if __name__ == "__main__":
377
+ demo.launch(server_name="0.0.0.0", server_port=7860)
378
+
check_build_status.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Monitor Hugging Face Space build status
4
+ """
5
+ import json
6
+ import time
7
+ import urllib.request
8
+ from datetime import datetime
9
+
10
+ SPACE_ID = "shaun3141/caribbean-voices-hackathon"
11
+ API_URL = f"https://huggingface.co/api/spaces/{SPACE_ID}"
12
+
13
+ def get_space_status():
14
+ """Get current Space status"""
15
+ try:
16
+ with urllib.request.urlopen(API_URL) as response:
17
+ data = json.loads(response.read())
18
+ runtime = data.get('runtime', {})
19
+ return {
20
+ 'stage': runtime.get('stage', 'unknown'),
21
+ 'hardware': runtime.get('hardware', {}).get('current', 'None'),
22
+ 'siblings': len(data.get('siblings', [])),
23
+ 'last_modified': data.get('lastModified', 'unknown')
24
+ }
25
+ except Exception as e:
26
+ return {'error': str(e)}
27
+
28
+ def format_status(status):
29
+ """Format status for display"""
30
+ if 'error' in status:
31
+ return f"❌ Error: {status['error']}"
32
+
33
+ stage = status['stage']
34
+ stage_emoji = {
35
+ 'BUILDING': 'πŸ”¨',
36
+ 'RUNNING': 'βœ…',
37
+ 'STOPPED': '⏸️',
38
+ 'PAUSED': '⏸️',
39
+ 'SLEEPING': '😴',
40
+ 'ERROR': '❌'
41
+ }
42
+
43
+ emoji = stage_emoji.get(stage, '❓')
44
+ return f"{emoji} Status: {stage} | Hardware: {status['hardware']} | Files: {status['siblings']}"
45
+
46
+ def monitor_build(check_interval=10, max_checks=60):
47
+ """Monitor build status with periodic checks"""
48
+ print(f"πŸ” Monitoring build status for: {SPACE_ID}")
49
+ print(f"πŸ“Š Checking every {check_interval} seconds (max {max_checks} checks)")
50
+ print("=" * 60)
51
+
52
+ for i in range(max_checks):
53
+ status = get_space_status()
54
+ timestamp = datetime.now().strftime("%H:%M:%S")
55
+
56
+ print(f"[{timestamp}] {format_status(status)}")
57
+
58
+ if 'error' in status:
59
+ print("⚠️ Could not fetch status. Retrying...")
60
+ elif status['stage'] == 'RUNNING':
61
+ print("\nπŸŽ‰ Build complete! Your Space is now running!")
62
+ print(f"🌐 View it at: https://huggingface.co/spaces/{SPACE_ID}")
63
+ break
64
+ elif status['stage'] in ['ERROR', 'STOPPED']:
65
+ print(f"\n⚠️ Build ended with status: {status['stage']}")
66
+ print("Check the build logs for details:")
67
+ print(f"https://huggingface.co/spaces/{SPACE_ID}")
68
+ break
69
+
70
+ if i < max_checks - 1:
71
+ time.sleep(check_interval)
72
+
73
+ print("\n" + "=" * 60)
74
+ print("Monitoring complete. Check build logs for details:")
75
+ print(f"https://huggingface.co/spaces/{SPACE_ID}")
76
+
77
+ if __name__ == "__main__":
78
+ import sys
79
+
80
+ # Check once
81
+ if len(sys.argv) > 1 and sys.argv[1] == "--once":
82
+ status = get_space_status()
83
+ print(format_status(status))
84
+ print(f"\n🌐 Space URL: https://huggingface.co/spaces/{SPACE_ID}")
85
+ else:
86
+ # Monitor continuously
87
+ monitor_build()
88
+
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.30.0
2
+ librosa>=0.10.0
3
+ soundfile>=0.12.0
4
+ pandas>=2.0.0
5
+ torch>=2.0.0
6
+ torchaudio>=2.0.0
7
+ huggingface_hub>=0.20.0
8
+ gradio>=4.0.0
9
+ numpy>=1.24.0
10
+ datasets>=2.14.0
11
+ scikit-learn>=1.3.0
12
+ # ESPnet for OWSM models (optional - install if using ESPnet version)
13
+ # espnet>=202301
14
+ # espnet_model_zoo>=0.1.0
15
+
setup_hf_space.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to sync StarterNotebook.ipynb to a Hugging Face Space with Dev Mode support
4
+ """
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+ from huggingface_hub import HfApi, whoami
9
+
10
+ def main():
11
+ # Check if logged in
12
+ try:
13
+ user_info = whoami()
14
+ print(f"βœ… Logged in as: {user_info['name']}")
15
+ username = user_info['name']
16
+ except Exception as e:
17
+ print("❌ Not logged in to Hugging Face")
18
+ print("\nPlease log in first by running:")
19
+ print(" hf auth login")
20
+ print("\nOr get a token from: https://huggingface.co/settings/tokens")
21
+ sys.exit(1)
22
+
23
+ # Set up space details
24
+ space_id = f"{username}/caribbean-voices-hackathon"
25
+
26
+ # Get script directory and ensure we're in hf_space/
27
+ script_dir = Path(__file__).parent.resolve()
28
+ os.chdir(script_dir)
29
+
30
+ # Required files for Docker Space with Dev Mode
31
+ required_files = {
32
+ "Dockerfile": Path("Dockerfile"),
33
+ "requirements.txt": Path("requirements.txt"),
34
+ "StarterNotebook.ipynb": Path("../StarterNotebook.ipynb"),
35
+ }
36
+
37
+ # Check all required files exist
38
+ missing_files = [name for name, path in required_files.items() if not path.exists()]
39
+ if missing_files:
40
+ print(f"❌ Missing required files: {', '.join(missing_files)}")
41
+ sys.exit(1)
42
+
43
+ print(f"\nπŸ“¦ Creating/updating Docker Space: {space_id}")
44
+ print(" (Docker Space required for Dev Mode)")
45
+
46
+ api = HfApi()
47
+
48
+ # Create the space as Docker type (required for Dev Mode)
49
+ try:
50
+ api.create_repo(
51
+ repo_id=space_id,
52
+ repo_type="space",
53
+ space_sdk="docker", # Docker SDK required for Dev Mode
54
+ exist_ok=True
55
+ )
56
+ print(f"βœ… Space created/verified: https://huggingface.co/spaces/{space_id}")
57
+ except Exception as e:
58
+ print(f"❌ Error creating space: {e}")
59
+ sys.exit(1)
60
+
61
+ # Upload all required files
62
+ files_to_upload = [
63
+ ("Dockerfile", "Dockerfile"),
64
+ ("requirements.txt", "requirements.txt"),
65
+ ("../StarterNotebook.ipynb", "StarterNotebook.ipynb"),
66
+ (".dockerignore", ".dockerignore"),
67
+ ]
68
+
69
+ print(f"\nπŸ“€ Uploading files...")
70
+ for local_path, repo_path in files_to_upload:
71
+ local_file = Path(local_path)
72
+ if local_file.exists():
73
+ try:
74
+ api.upload_file(
75
+ path_or_fileobj=str(local_file),
76
+ path_in_repo=repo_path,
77
+ repo_id=space_id,
78
+ repo_type="space"
79
+ )
80
+ print(f" βœ… Uploaded: {repo_path}")
81
+ except Exception as e:
82
+ print(f" ⚠️ Warning uploading {repo_path}: {e}")
83
+ else:
84
+ print(f" ⚠️ Skipping {repo_path} (file not found)")
85
+
86
+ print(f"\nβœ… Files uploaded successfully!")
87
+ print(f"\n🌐 View your space at: https://huggingface.co/spaces/{space_id}")
88
+ print(f"\nπŸ“ Next steps to enable Dev Mode:")
89
+ print(f" 1. Go to: https://huggingface.co/spaces/{space_id}")
90
+ print(f" 2. Click on the Space settings")
91
+ print(f" 3. Enable 'Dev Mode' from the interface")
92
+ print(f" 4. Connect via SSH or VS Code Remote (instructions in Dev Mode modal)")
93
+ print(f"\nπŸ’‘ Note: Dev Mode requires PRO or Team & Enterprise plan")
94
+ print(f"\n✨ Done! Your notebook is now synced to Hugging Face Spaces.")
95
+ print(f"\nTo update files in the future, run this script again.")
96
+
97
+ if __name__ == "__main__":
98
+ main()
99
+
upload_notebook.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick script to upload StarterNotebook.ipynb to Hugging Face Space
4
+ Run this script from the hf_space/ directory
5
+ """
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ SPACE_ID = "shaun3141/caribbean-voices-hackathon"
11
+ NOTEBOOK = "../StarterNotebook.ipynb"
12
+
13
+ def main():
14
+ notebook_path = Path(NOTEBOOK)
15
+
16
+ if not notebook_path.exists():
17
+ print(f"❌ Notebook not found: {NOTEBOOK}")
18
+ sys.exit(1)
19
+
20
+ print(f"πŸ“€ Uploading {NOTEBOOK} to {SPACE_ID}...")
21
+
22
+ try:
23
+ result = subprocess.run(
24
+ ["hf", "upload", SPACE_ID, NOTEBOOK, "--repo-type", "space"],
25
+ check=True,
26
+ capture_output=True,
27
+ text=True
28
+ )
29
+ print("βœ… Upload successful!")
30
+ print(f"🌐 View your space at: https://huggingface.co/spaces/{SPACE_ID}")
31
+ print("\nπŸ’‘ The Space will automatically rebuild after upload.")
32
+ except subprocess.CalledProcessError as e:
33
+ print(f"❌ Upload failed: {e}")
34
+ print(f"Error output: {e.stderr}")
35
+ sys.exit(1)
36
+ except FileNotFoundError:
37
+ print("❌ 'hf' command not found. Make sure Hugging Face CLI is installed.")
38
+ print(" Install with: pipx install huggingface_hub")
39
+ sys.exit(1)
40
+
41
+ if __name__ == "__main__":
42
+ main()
43
+
upload_to_space.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Simple script to upload files to Hugging Face Space
3
+ # Run this script from the hf_space/ directory
4
+
5
+ SPACE_ID="shaun3141/caribbean-voices-hackathon"
6
+
7
+ # Get the directory where this script is located
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ cd "$SCRIPT_DIR"
10
+
11
+ echo "πŸ“€ Uploading files to Hugging Face Space: $SPACE_ID"
12
+ echo ""
13
+
14
+ # Upload key files
15
+ echo "Uploading app.py (Gradio app)..."
16
+ hf upload "$SPACE_ID" app.py --repo-type space
17
+
18
+ echo ""
19
+ echo "Uploading Dockerfile..."
20
+ hf upload "$SPACE_ID" Dockerfile --repo-type space
21
+
22
+ echo ""
23
+ echo "Uploading requirements.txt..."
24
+ hf upload "$SPACE_ID" requirements.txt --repo-type space
25
+
26
+ echo ""
27
+ echo "Uploading StarterNotebook.ipynb..."
28
+ hf upload "$SPACE_ID" ../StarterNotebook.ipynb --repo-type space
29
+
30
+ echo ""
31
+ echo "βœ… Upload complete!"
32
+ echo "🌐 View your space at: https://huggingface.co/spaces/$SPACE_ID"
33
+ echo ""
34
+ echo "πŸ’‘ Note: The Space will automatically rebuild after uploads."
35
+