Spaces:
Running
Running
Refactor cache system to simplify input/output handling and improve error management
Browse files
app.py
CHANGED
|
@@ -33,70 +33,44 @@ class TransliterationCache:
|
|
| 33 |
self.cache_file = cache_file
|
| 34 |
self.cache = {}
|
| 35 |
self.load_cache()
|
| 36 |
-
|
| 37 |
def load_cache(self):
|
| 38 |
-
"""Load existing cache from file"""
|
| 39 |
if os.path.exists(self.cache_file):
|
| 40 |
try:
|
| 41 |
with open(self.cache_file, 'r', encoding='utf-8') as f:
|
| 42 |
-
reader = csv.
|
| 43 |
for row in reader:
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
'corrected_output': row.get('corrected_output', ''),
|
| 48 |
-
'timestamp': row['timestamp'],
|
| 49 |
-
'usage_count': int(row.get('usage_count', 1))
|
| 50 |
-
}
|
| 51 |
except Exception as e:
|
| 52 |
print(f"Error loading cache: {e}")
|
| 53 |
-
|
| 54 |
def save_cache(self):
|
| 55 |
-
"""Save cache to file"""
|
| 56 |
try:
|
| 57 |
with open(self.cache_file, 'w', encoding='utf-8', newline='') as f:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
writer.writeheader()
|
| 61 |
-
|
| 62 |
-
for key, data in self.cache.items():
|
| 63 |
input_text, direction = key.rsplit('_', 1)
|
| 64 |
-
writer.writerow(
|
| 65 |
-
'input': input_text,
|
| 66 |
-
'direction': direction,
|
| 67 |
-
'output': data['output'],
|
| 68 |
-
'corrected_output': data.get('corrected_output', ''),
|
| 69 |
-
'timestamp': data['timestamp'],
|
| 70 |
-
'usage_count': data['usage_count']
|
| 71 |
-
})
|
| 72 |
except Exception as e:
|
| 73 |
print(f"Error saving cache: {e}")
|
| 74 |
-
|
| 75 |
def get(self, input_text, direction):
|
| 76 |
-
"""Get cached result if exists"""
|
| 77 |
key = f"{input_text}_{direction}"
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
self.save_cache()
|
| 81 |
-
return self.cache[key]['output']
|
| 82 |
-
return None
|
| 83 |
-
|
| 84 |
def set(self, input_text, direction, output):
|
| 85 |
-
"""Cache a new result"""
|
| 86 |
key = f"{input_text}_{direction}"
|
| 87 |
-
self.cache[key] =
|
| 88 |
-
'output': output,
|
| 89 |
-
'corrected_output': '',
|
| 90 |
-
'timestamp': datetime.now().isoformat(),
|
| 91 |
-
'usage_count': 1
|
| 92 |
-
}
|
| 93 |
self.save_cache()
|
| 94 |
-
|
| 95 |
def update_correction(self, input_text, direction, corrected_output):
|
| 96 |
-
"""Update with user correction"""
|
| 97 |
key = f"{input_text}_{direction}"
|
| 98 |
if key in self.cache:
|
| 99 |
-
self.cache[key]
|
| 100 |
self.save_cache()
|
| 101 |
return True
|
| 102 |
return False
|
|
@@ -201,9 +175,9 @@ def transliterate(text, direction):
|
|
| 201 |
def save_correction(input_text, direction, corrected_output):
|
| 202 |
"""Save user correction to cache"""
|
| 203 |
if cache_system.update_correction(input_text, direction, corrected_output):
|
| 204 |
-
return "
|
| 205 |
else:
|
| 206 |
-
return "
|
| 207 |
|
| 208 |
# Arabic keyboard layout
|
| 209 |
arabic_keys = [
|
|
@@ -240,11 +214,11 @@ def create_interface():
|
|
| 240 |
with gr.Blocks(title="Darija Transliterator", theme=gr.themes.Soft()) as demo:
|
| 241 |
gr.Markdown(
|
| 242 |
"""
|
| 243 |
-
#
|
| 244 |
Convert between Latin script and Arabic script for Moroccan Darija
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
"""
|
| 249 |
)
|
| 250 |
|
|
@@ -276,55 +250,64 @@ def create_interface():
|
|
| 276 |
)
|
| 277 |
|
| 278 |
# Arabic Keyboard
|
| 279 |
-
gr.Markdown("### Arabic Keyboard
|
| 280 |
-
gr.Markdown("*Click letters to edit the output text above*")
|
| 281 |
-
|
| 282 |
-
#
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
with gr.Group():
|
| 286 |
for row in arabic_keys:
|
| 287 |
with gr.Row():
|
| 288 |
for char in row:
|
| 289 |
btn = gr.Button(char, size="sm", scale=1)
|
| 290 |
-
# Use JavaScript to prevent form submission and add character
|
| 291 |
btn.click(
|
| 292 |
fn=None,
|
| 293 |
-
js=f"""
|
| 294 |
-
(output_text) => {{
|
| 295 |
-
return output_text + '{char}';
|
| 296 |
-
}}
|
| 297 |
-
""",
|
| 298 |
inputs=[output_text],
|
| 299 |
outputs=[output_text],
|
| 300 |
show_progress=False,
|
| 301 |
queue=False
|
| 302 |
)
|
| 303 |
-
|
| 304 |
with gr.Row():
|
| 305 |
space_btn = gr.Button("Space", size="sm", scale=2)
|
| 306 |
-
backspace_btn = gr.Button("
|
| 307 |
-
clear_output_btn = gr.Button("
|
| 308 |
-
|
| 309 |
# Keyboard utility buttons with JavaScript
|
| 310 |
space_btn.click(
|
| 311 |
fn=None,
|
| 312 |
-
js="(output_text) => output_text + ' '",
|
| 313 |
inputs=[output_text],
|
| 314 |
outputs=[output_text],
|
| 315 |
show_progress=False,
|
| 316 |
queue=False
|
| 317 |
)
|
| 318 |
-
|
| 319 |
backspace_btn.click(
|
| 320 |
fn=None,
|
| 321 |
-
js="(output_text) => output_text.slice(0, -1)",
|
| 322 |
inputs=[output_text],
|
| 323 |
outputs=[output_text],
|
| 324 |
show_progress=False,
|
| 325 |
queue=False
|
| 326 |
)
|
| 327 |
-
|
| 328 |
clear_output_btn.click(
|
| 329 |
fn=None,
|
| 330 |
js="() => ''",
|
|
@@ -332,16 +315,42 @@ def create_interface():
|
|
| 332 |
show_progress=False,
|
| 333 |
queue=False
|
| 334 |
)
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
# Correction system
|
| 337 |
with gr.Group():
|
| 338 |
-
gr.Markdown("###
|
| 339 |
correction_status = gr.Textbox(
|
| 340 |
label="Status",
|
| 341 |
interactive=False,
|
| 342 |
visible=False
|
| 343 |
)
|
| 344 |
-
save_correction_btn = gr.Button("
|
| 345 |
|
| 346 |
# Event handlers
|
| 347 |
translate_btn.click(
|
|
@@ -397,10 +406,10 @@ def create_interface():
|
|
| 397 |
This model transliterates Moroccan Darija between Latin and Arabic scripts using a CTC-based neural network.
|
| 398 |
|
| 399 |
**Features:**
|
| 400 |
-
-
|
| 401 |
-
-
|
| 402 |
-
-
|
| 403 |
-
-
|
| 404 |
|
| 405 |
**How to help:**
|
| 406 |
1. Use the Arabic keyboard to correct outputs
|
|
|
|
| 33 |
self.cache_file = cache_file
|
| 34 |
self.cache = {}
|
| 35 |
self.load_cache()
|
| 36 |
+
|
| 37 |
def load_cache(self):
|
| 38 |
+
"""Load existing cache from file (input/output pairs only)"""
|
| 39 |
if os.path.exists(self.cache_file):
|
| 40 |
try:
|
| 41 |
with open(self.cache_file, 'r', encoding='utf-8') as f:
|
| 42 |
+
reader = csv.reader(f)
|
| 43 |
for row in reader:
|
| 44 |
+
if len(row) == 3:
|
| 45 |
+
key = f"{row[0]}_{row[1]}"
|
| 46 |
+
self.cache[key] = row[2]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
except Exception as e:
|
| 48 |
print(f"Error loading cache: {e}")
|
| 49 |
+
|
| 50 |
def save_cache(self):
|
| 51 |
+
"""Save cache to file (input/output pairs only)"""
|
| 52 |
try:
|
| 53 |
with open(self.cache_file, 'w', encoding='utf-8', newline='') as f:
|
| 54 |
+
writer = csv.writer(f)
|
| 55 |
+
for key, output in self.cache.items():
|
|
|
|
|
|
|
|
|
|
| 56 |
input_text, direction = key.rsplit('_', 1)
|
| 57 |
+
writer.writerow([input_text, direction, output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
print(f"Error saving cache: {e}")
|
| 60 |
+
|
| 61 |
def get(self, input_text, direction):
|
|
|
|
| 62 |
key = f"{input_text}_{direction}"
|
| 63 |
+
return self.cache.get(key, None)
|
| 64 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def set(self, input_text, direction, output):
|
|
|
|
| 66 |
key = f"{input_text}_{direction}"
|
| 67 |
+
self.cache[key] = output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
self.save_cache()
|
| 69 |
+
|
| 70 |
def update_correction(self, input_text, direction, corrected_output):
|
|
|
|
| 71 |
key = f"{input_text}_{direction}"
|
| 72 |
if key in self.cache:
|
| 73 |
+
self.cache[key] = corrected_output
|
| 74 |
self.save_cache()
|
| 75 |
return True
|
| 76 |
return False
|
|
|
|
| 175 |
def save_correction(input_text, direction, corrected_output):
|
| 176 |
"""Save user correction to cache"""
|
| 177 |
if cache_system.update_correction(input_text, direction, corrected_output):
|
| 178 |
+
return "Correction saved! Thank you for improving the model."
|
| 179 |
else:
|
| 180 |
+
return "Could not save correction."
|
| 181 |
|
| 182 |
# Arabic keyboard layout
|
| 183 |
arabic_keys = [
|
|
|
|
| 214 |
with gr.Blocks(title="Darija Transliterator", theme=gr.themes.Soft()) as demo:
|
| 215 |
gr.Markdown(
|
| 216 |
"""
|
| 217 |
+
# Darija Transliterator
|
| 218 |
Convert between Latin script and Arabic script for Moroccan Darija
|
| 219 |
|
| 220 |
+
**Smart Caching**: Results are cached for faster responses and data collection
|
| 221 |
+
**Arabic Keyboard**: Built-in Arabic keyboard for corrections
|
| 222 |
"""
|
| 223 |
)
|
| 224 |
|
|
|
|
| 250 |
)
|
| 251 |
|
| 252 |
# Arabic Keyboard
|
| 253 |
+
gr.Markdown("### Arabic Keyboard")
|
| 254 |
+
gr.Markdown("*Click letters to edit the output text above, or use your keyboard*")
|
| 255 |
+
|
| 256 |
+
# JavaScript for inserting at cursor and mapping Latin keys to Arabic
|
| 257 |
+
insert_js = """
|
| 258 |
+
(output_text, evt) => {
|
| 259 |
+
const textarea = document.querySelector('textarea[aria-label="Output"]');
|
| 260 |
+
if (!textarea) return output_text;
|
| 261 |
+
const start = textarea.selectionStart;
|
| 262 |
+
const end = textarea.selectionEnd;
|
| 263 |
+
const before = output_text.slice(0, start);
|
| 264 |
+
const after = output_text.slice(end);
|
| 265 |
+
return before + evt + after;
|
| 266 |
+
}
|
| 267 |
+
"""
|
| 268 |
+
# Latin to Arabic mapping for keyboard
|
| 269 |
+
latin_to_arabic = {
|
| 270 |
+
'a': 'ا', 'b': 'ب', 't': 'ت', 'j': 'ج', 'h': 'ح', 'd': 'د', 'r': 'ر', 'z': 'ز',
|
| 271 |
+
's': 'س', 'c': 'ص', 'f': 'ف', 'q': 'ق', 'k': 'ك', 'l': 'ل', 'm': 'م', 'n': 'ن',
|
| 272 |
+
'w': 'و', 'y': 'ي', 'g': 'غ', '3': 'ع', '7': 'ح', '5': 'خ', '6': 'ط', '9': 'ق',
|
| 273 |
+
'e': 'ء', 'o': 'ؤ', 'i': 'ئ', 'x': 'ش', 'v': 'ظ', 'p': 'ة', 'u': 'ى',
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
with gr.Group():
|
| 277 |
for row in arabic_keys:
|
| 278 |
with gr.Row():
|
| 279 |
for char in row:
|
| 280 |
btn = gr.Button(char, size="sm", scale=1)
|
|
|
|
| 281 |
btn.click(
|
| 282 |
fn=None,
|
| 283 |
+
js=f"(output_text) => {{ const textarea = document.querySelector('textarea[aria-label=\"Output\"]'); if (!textarea) return output_text; const start = textarea.selectionStart; const end = textarea.selectionEnd; const before = output_text.slice(0, start); const after = output_text.slice(end); setTimeout(()=>{{textarea.selectionStart=textarea.selectionEnd=start+1}},0); return before + '{char}' + after; }}",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
inputs=[output_text],
|
| 285 |
outputs=[output_text],
|
| 286 |
show_progress=False,
|
| 287 |
queue=False
|
| 288 |
)
|
|
|
|
| 289 |
with gr.Row():
|
| 290 |
space_btn = gr.Button("Space", size="sm", scale=2)
|
| 291 |
+
backspace_btn = gr.Button("Backspace", size="sm", scale=2)
|
| 292 |
+
clear_output_btn = gr.Button("Clear Output", size="sm", scale=2)
|
| 293 |
+
|
| 294 |
# Keyboard utility buttons with JavaScript
|
| 295 |
space_btn.click(
|
| 296 |
fn=None,
|
| 297 |
+
js="(output_text) => { const textarea = document.querySelector('textarea[aria-label=\"Output\"]'); if (!textarea) return output_text; const start = textarea.selectionStart; const end = textarea.selectionEnd; const before = output_text.slice(0, start); const after = output_text.slice(end); setTimeout(()=>{textarea.selectionStart=textarea.selectionEnd=start+1},0); return before + ' ' + after; }",
|
| 298 |
inputs=[output_text],
|
| 299 |
outputs=[output_text],
|
| 300 |
show_progress=False,
|
| 301 |
queue=False
|
| 302 |
)
|
|
|
|
| 303 |
backspace_btn.click(
|
| 304 |
fn=None,
|
| 305 |
+
js="(output_text) => { const textarea = document.querySelector('textarea[aria-label=\"Output\"]'); if (!textarea) return output_text; const start = textarea.selectionStart; const end = textarea.selectionEnd; if (start === 0 && end === 0) return output_text; const before = output_text.slice(0, start-1); const after = output_text.slice(end); setTimeout(()=>{textarea.selectionStart=textarea.selectionEnd=start-1},0); return before + after; }",
|
| 306 |
inputs=[output_text],
|
| 307 |
outputs=[output_text],
|
| 308 |
show_progress=False,
|
| 309 |
queue=False
|
| 310 |
)
|
|
|
|
| 311 |
clear_output_btn.click(
|
| 312 |
fn=None,
|
| 313 |
js="() => ''",
|
|
|
|
| 315 |
show_progress=False,
|
| 316 |
queue=False
|
| 317 |
)
|
| 318 |
+
|
| 319 |
+
# Add JS event for mapping Latin keys to Arabic in output_text
|
| 320 |
+
output_text.elem_id = "output_textbox"
|
| 321 |
+
gr.HTML("""
|
| 322 |
+
<script>
|
| 323 |
+
(() => {
|
| 324 |
+
const latinToArabic = {"a":"ا","b":"ب","t":"ت","j":"ج","h":"ح","d":"د","r":"ر","z":"ز","s":"س","c":"ص","f":"ف","q":"ق","k":"ك","l":"ل","m":"م","n":"ن","w":"و","y":"ي","g":"غ","3":"ع","7":"ح","5":"خ","6":"ط","9":"ق","e":"ء","o":"ؤ","i":"ئ","x":"ش","v":"ظ","p":"ة","u":"ى"};
|
| 325 |
+
document.addEventListener('keydown', function(e) {
|
| 326 |
+
const textarea = document.querySelector('textarea[aria-label="Output"]');
|
| 327 |
+
if (!textarea || document.activeElement !== textarea) return;
|
| 328 |
+
if (e.ctrlKey || e.altKey || e.metaKey) return;
|
| 329 |
+
const key = e.key.toLowerCase();
|
| 330 |
+
if (latinToArabic[key]) {
|
| 331 |
+
e.preventDefault();
|
| 332 |
+
const start = textarea.selectionStart;
|
| 333 |
+
const end = textarea.selectionEnd;
|
| 334 |
+
const before = textarea.value.slice(0, start);
|
| 335 |
+
const after = textarea.value.slice(end);
|
| 336 |
+
textarea.value = before + latinToArabic[key] + after;
|
| 337 |
+
textarea.selectionStart = textarea.selectionEnd = start + 1;
|
| 338 |
+
textarea.dispatchEvent(new Event('input', {bubbles:true}));
|
| 339 |
+
}
|
| 340 |
+
});
|
| 341 |
+
})();
|
| 342 |
+
</script>
|
| 343 |
+
""")
|
| 344 |
+
|
| 345 |
# Correction system
|
| 346 |
with gr.Group():
|
| 347 |
+
gr.Markdown("### Correction System")
|
| 348 |
correction_status = gr.Textbox(
|
| 349 |
label="Status",
|
| 350 |
interactive=False,
|
| 351 |
visible=False
|
| 352 |
)
|
| 353 |
+
save_correction_btn = gr.Button("Save Correction", variant="secondary")
|
| 354 |
|
| 355 |
# Event handlers
|
| 356 |
translate_btn.click(
|
|
|
|
| 406 |
This model transliterates Moroccan Darija between Latin and Arabic scripts using a CTC-based neural network.
|
| 407 |
|
| 408 |
**Features:**
|
| 409 |
+
- **Smart Caching**: Frequently used translations are cached for speed
|
| 410 |
+
- **Data Collection**: Your usage helps improve the model
|
| 411 |
+
- **Arabic Keyboard**: Easy text editing and correction
|
| 412 |
+
- **Correction System**: Help improve translations by saving corrections
|
| 413 |
|
| 414 |
**How to help:**
|
| 415 |
1. Use the Arabic keyboard to correct outputs
|