SemanticMap / app.py
w11wo's picture
Hide scenario on record
694b9d1
import os
import csv
import json
import random
import base64
import mimetypes
from typing import Any
from pathlib import Path
from dotenv import load_dotenv
import requests
import gradio as gr
from openai import OpenAI
from country_list import countries_for_language
def load_scenarios() -> list[str]:
"""Load scenarios from a CSV file.
Returns:
list[str]: List of scenario descriptions.
"""
# load scenarios from CSV file
with open(Path(__file__).parent / "static" / "scenarios.csv", "r") as f:
reader = csv.reader(f)
_ = next(reader)
scenarios = [row[1] for row in reader if row]
return scenarios
def fetch_user_scenarios(username: str) -> list[int] | None:
"""Fetches the list of scenarios for a given user from the sheet.
Args:
username (str): Username.
Returns:
list[int] | None: List of scenario indices for the user, or None.
"""
try:
response = requests.get(os.getenv("PROGRESS_ENDPOINT_URL", ""), params={"user": username})
response.raise_for_status()
data = response.json()
if data.get("result") == "success":
return data.get("scenarios", [])
else:
print(f"Error fetching user data server: {data.get('error')}")
return None
except Exception as e:
print(f"Request failed: {e}")
return None
def load_user(request: gr.Request) -> tuple[str, list[int]]:
"""Load the user and their scenarios.
Args:
request (gr.Request): Gradio request object containing the username.
Returns:
tuple[str, list[int]]: Tuple containing the username and their scenarios.
"""
username = request.username
user_scenarios = fetch_user_scenarios(username)
return (username, user_scenarios)
def init_interface(
username: str, user_scenarios: list[int]
) -> tuple[dict[str, Any], str, dict[str, Any], str, int, str, str]:
"""Initialize the Gradio interface with the welcome message and scenario.
Args:
username (str): The username of the user.
user_scenarios (list[int]): List of scenario indices the user has completed.
Returns:
tuple: Gradio updates for the interface components.
- loader_ui: Update to hide the loading UI.
- status: Update to show the loader message.
- main_ui: Update to show the main UI.
- welcome_text: Welcome message for the user.
- scenario_idx: Randomly selected scenario index.
- scenario_text: The scenario text for the user to act out.
- progress_bar: Visual progress bar showing completed scenarios.
"""
welcome_msg = f"""## ๐Ÿ‘‹ Welcome {username}!
[Logout](/logout)
### Please follow these steps to contribute to our dataset:
You will be presented with a series of scenarios. For each scenario, please:
1. **Formulate Your Query**: Read the scenario and imagine how you would ask Google Maps about it using voice input. <u>Be as creative as you like</u>! You can try different phrasings, expressions, or styles that feel natural.
2. **Record Your Query**: Once you're ready, hit โ€œrecordโ€ and at the same time, speak your query to Google Maps. Once recording starts, the scenario will be hidden, so make sure your query is ready in your mind!
3. **Take a Screenshot**: After recording, capture the Google Maps results page that shows the system's response and upload it.
4. **Evaluate the Result**: Tell us whether Google Maps gave a satisfactory response. If not, please explain why.
5. **Provide a Transcript**: Post-edit and submit a written version of what you said in your query.
6. **Share Your Background**: Specify the variety of English that you identify with. Optionally, provide your Australian postcode for additional context.
For more details on the steps, please refer to the [annotation guidelines](https://docs.google.com/document/d/10YfIDRbE5uLEP-rvUPHgFSM5dg8GXxBLZ-i4Y1I_0hw/edit?usp=sharing).
"""
# load all scenarios and filter out those already completed by the user
all_scenarios = load_scenarios()
# handle case where user has completed all scenarios
if len(user_scenarios) >= len(all_scenarios):
complete_msg = (
f"๐ŸŽ‰ Congratulations {username}! You have completed all scenarios. Thank you for your contributions!"
)
return (
gr.update(visible=True),
complete_msg,
gr.update(visible=False),
None,
None,
None,
None,
)
# select random scenario that the user has not completed
scenario_idx = None
while scenario_idx is None or scenario_idx in user_scenarios:
scenario_idx = random.randint(0, len(all_scenarios) - 1)
scenario_text = all_scenarios[scenario_idx]
print(f"Selected scenario {scenario_idx}: {scenario_text}")
# give visual progress bar
def make_progress_bar(done: int, total: int) -> str:
percent = done / total * 100 if total > 0 else 0
return f"""
<div style="width: 100%; border: 1px solid white; border-radius: 8px; padding: 8px;">
<div style="text-align: center; margin-bottom: 4px; font-weight: bold;">
{int(percent)}% ({done}/{total} scenarios completed)
</div>
<div style="width: 100%; background-color: #eee; border-radius: 5px; overflow: hidden;">
<div style="width: {percent}%; background-color: #4CAF50; height: 20px;"></div>
</div>
</div>
"""
progress_bar = make_progress_bar(len(user_scenarios), len(all_scenarios))
return (
gr.update(visible=False),
None,
gr.update(visible=True),
welcome_msg,
scenario_idx,
scenario_text,
progress_bar,
)
def validate_inputs(
audio: str,
transcript: str,
country: str,
image: str,
expected: str,
satisfiable: str,
actual: str | None = None,
postcode: str | None = None,
) -> tuple[bool, str]:
"""Validate the inputs provided by the user.
Args:
audio (str): Path to the audio file.
transcript (str): Transcription of the audio.
country (str): Country of origin of the user.
image (str): Path to the image file.
expected (str): Expected output from Google Maps.
satisfiable (str): Whether the Google Maps results satisfied the query.
actual (str | None, optional): Actual output from Google Maps, if applicable. Defaults to None.
postcode (str | None, optional): Australian postcode, if provided. Defaults to None.
Returns:
tuple[bool, str]: A tuple containing a boolean indicating success or failure, and a message.
"""
errors = []
if not audio:
errors.append("audio")
if not transcript.strip():
errors.append("transcript")
if not country:
errors.append("country")
if not image:
errors.append("image")
if not expected.strip():
errors.append("expected output")
if not satisfiable:
errors.append("satisfiability")
if satisfiable == "No" and not actual.strip():
errors.append("actual output")
if postcode:
if not postcode.isdigit() or len(postcode) != 4:
errors.append("postcode (must be a 4-digit number)")
elif not (200 <= int(postcode) <= 7999):
errors.append("postcode (must be a valid Australian postcode between 200 and 7999)")
if errors:
return False, f"โŒ Submission failed. Missing: {', '.join(errors)}"
else:
return True, "โœ… Submission successful! Thank you for your contribution."
def encode_file_as_json(path: str, name_override: str = None) -> dict | None:
"""Encode a file as a base64 JSON object.
Args:
path (str): Path to the file to encode.
name_override (str, optional): Filename to override, as temporary audio files default to `test.wav`. Defaults to None.
Returns:
dict | None: A dictionary containing the base64 encoded file, its MIME type, and its name, or None if the file does not exist.
"""
if path and os.path.exists(path):
with open(path, "rb") as f:
file_bytes = f.read()
base64_str = base64.b64encode(file_bytes).decode("utf-8")
mime_type, _ = mimetypes.guess_type(path)
return {
"base64": base64_str,
"type": mime_type or "application/octet-stream",
"name": name_override or os.path.basename(path),
}
return None
def upload_response(
username: str,
scenario_idx: int,
scenario_text: str,
audio_path: str,
transcript: str,
country: str,
image_path: str,
expected: str,
satisfiable: str,
actual: str | None = None,
postcode: str | None = None,
) -> dict:
"""Upload the user's response to the server.
Args:
username (str): Username of the user.
scenario_idx (int): Index of the scenario.
scenario_text (str): Text of the scenario.
audio_path (str): Path to the audio file.
transcript (str): Transcript of the audio.
country (str): Country of origin of the user.
image_path (str): Path to the image file.
expected (str): Expected output from Google Maps.
satisfiable (str): Whether the Google Maps results satisfied the query.
actual (str | None, optional): Actual output from Google Maps, if applicable. Defaults to None.
postcode (str | None, optional): Australian postcode, if provided. Defaults to None.
Returns:
dict: Response from the server indicating success or failure.
"""
image_obj = encode_file_as_json(image_path)
audio_obj = encode_file_as_json(audio_path, name_override=f"{username}_scenario_{scenario_idx}.wav")
payload = {
"username": username,
"scenario_idx": scenario_idx,
"scenario_text": scenario_text,
"audio_url": json.dumps(audio_obj),
"transcript": transcript,
"country": country,
"screenshot_url": json.dumps(image_obj),
"expected": expected,
"satisfiable": satisfiable,
"actual": actual,
"postcode": postcode,
}
try:
response = requests.post(os.getenv("UPLOAD_ENDPOINT_URL", ""), data=payload)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
return {"result": "error", "error": str(e)}
def handle_submission(
username: str,
scenario_idx: int,
scenario_text: str,
audio_path: str,
transcript: str,
country: str,
image_path: str,
expected: str,
satisfiable: str,
user_scenarios: list[int],
actual: str | None = None,
postcode: str | None = None,
) -> list[int]:
"""Handle the submission of user data.
Args:
username (str): Username of the user.
scenario_idx (int): Index of the scenario.
scenario_text (str): Text of the scenario.
audio_path (str): Path to the audio file.
transcript (str): Transcript of the audio.
country (str): Country of origin of the user.
image_path (str): Path to the image file.
expected (str): Expected output from Google Maps.
satisfiable (str): Whether the Google Maps results satisfied the query.
user_scenarios (list[int]): List of scenario indices the user has completed.
actual (str | None, optional): Actual output from Google Maps, if applicable. Defaults to None.
postcode (str | None, optional): Australian postcode, if provided. Defaults to None.
Returns:
list[int]: Updated list of scenario indices the user has completed.
"""
valid, msg = validate_inputs(audio_path, transcript, country, image_path, expected, satisfiable, actual, postcode)
if not valid:
gr.Warning(msg)
return
response = upload_response(
username,
scenario_idx,
scenario_text,
audio_path,
transcript,
country,
image_path,
expected,
satisfiable,
actual,
postcode,
)
print(response)
if response.get("result") == "success":
user_scenarios.append(scenario_idx)
gr.Info(msg)
return user_scenarios
def transcribe_audio(audio_path: str) -> str | None:
"""Transcribe the audio file using OpenAI's Whisper model.
Args:
audio_path (str): Path to the audio file.
Returns:
str | None: Transcription of the audio, or None if an error occurs.
"""
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
if not audio_path or not os.path.exists(audio_path):
return None
audio_bytes = open(audio_path, "rb")
try:
response = client.audio.transcriptions.create(model="whisper-1", file=audio_bytes, language="en")
transcription = getattr(response, "text", "")
return transcription.strip()
except Exception as e:
print(f"Error during transcription: {e}")
return None
if __name__ == "__main__":
load_dotenv()
with gr.Blocks() as demo:
username = gr.State()
user_scenarios = gr.State([])
with gr.Row() as loader_ui:
status = gr.Image(
value=Path(__file__).parent / "static" / "loading.gif",
show_label=False,
interactive=False,
height=50,
show_download_button=False,
)
with gr.Column(visible=False) as main_ui:
gr.Markdown(f"# ๐Ÿ—บ๏ธ SemanticMap Annotation Dashboard")
welcome_text = gr.Markdown()
progress_display = gr.HTML()
with gr.Row():
scenario_idx_input = gr.Textbox(value=None, visible=False)
scenario_input = gr.Textbox(
label="Scenario",
value=None,
interactive=False,
info="Read this scenario. How would you describe it to Google Maps?",
max_lines=6,
)
with gr.Row(visible=False) as upload_ui:
upload_status = gr.Image(
value=Path(__file__).parent / "static" / "loading.gif",
show_label=False,
interactive=False,
height=50,
show_download_button=False,
)
with gr.Row() as form_ui:
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Audio Upload (User Recording)")
transcript_input = gr.Textbox(
label="Transcript",
placeholder="Enter the transcript here...",
info="What did you say in the audio recording?",
)
country_select = gr.Dropdown(
label="Variety of English",
info="Which national variety of English do you identify with?",
choices=sorted([name for _, name in countries_for_language("en")]),
value=None,
interactive=True,
)
postcode_input = gr.Textbox(
label="Australian Postcode (Optional)",
placeholder="Enter your postcode (optional)",
info="If you want to provide your postcode, please enter it here. This is optional.",
)
# transcribe audio after upload
audio_input.input(fn=transcribe_audio, inputs=audio_input, outputs=transcript_input)
# hide scenario text when user starts recording
audio_input.start_recording(fn=lambda: gr.update(visible=False), outputs=scenario_input)
# show scenario text when user stops recording
audio_input.stop_recording(fn=lambda: gr.update(visible=True), outputs=scenario_input)
with gr.Column():
image_input = gr.Image(type="filepath", label="Image Upload (Google Maps Screenshot)", height=600)
expected_output = gr.Textbox(label="What did you expect Google Maps to return?")
satisfiable_radio = gr.Radio(
label="Did the Google Maps results satisfy your query?",
choices=["Yes", "No"],
)
actual_output = gr.Textbox(
label="What did Google Maps return instead?", visible=False, interactive=True
)
satisfiable_radio.change(
fn=lambda x: gr.update(visible=(x == "No")),
inputs=satisfiable_radio,
outputs=actual_output,
)
with gr.Row() as button_row:
skip_button = gr.Button("Skip Scenario", variant="secondary")
submit_button = gr.Button("Submit", variant="primary")
submit_button.click(
# on submit, show the upload status UI and hide the main UI and submit button
fn=lambda: (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)),
outputs=[upload_ui, form_ui, button_row],
).then(
# then handle the submission
fn=handle_submission,
inputs=[
username,
scenario_idx_input,
scenario_input,
audio_input,
transcript_input,
country_select,
image_input,
expected_output,
satisfiable_radio,
user_scenarios,
actual_output,
postcode_input,
],
outputs=[user_scenarios],
).then(
# then reload the user scenario data, fetching latest progress
init_interface,
inputs=[username, user_scenarios],
outputs=[
loader_ui,
status,
main_ui,
welcome_text,
scenario_idx_input,
scenario_input,
progress_display,
],
).then(
# then, clear form components except for the country select
fn=lambda: (
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
),
outputs=[audio_input, transcript_input, image_input, expected_output, satisfiable_radio, actual_output],
).then(
# then hide the upload UI and show the main UI and submit button again
fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)),
outputs=[upload_ui, form_ui, button_row],
)
skip_button.click(
# on skip, show the upload status UI and hide the main UI and submit button
fn=lambda: (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)),
outputs=[upload_ui, form_ui, button_row],
).then(
# then reload the user scenario data, fetching latest progress
init_interface,
inputs=[username, user_scenarios],
outputs=[
loader_ui,
status,
main_ui,
welcome_text,
scenario_idx_input,
scenario_input,
progress_display,
],
).then(
# then, clear form components except for the country select
fn=lambda: (
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
gr.update(value=None),
),
outputs=[audio_input, transcript_input, image_input, expected_output, satisfiable_radio, actual_output],
).then(
# then hide the upload UI and show the main UI and submit button again
fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)),
outputs=[upload_ui, form_ui, button_row],
)
demo.load(load_user, None, outputs=[username, user_scenarios]).then(
init_interface,
inputs=[username, user_scenarios],
outputs=[loader_ui, status, main_ui, welcome_text, scenario_idx_input, scenario_input, progress_display],
)
users = os.getenv("USERS", "").split(",")
passwords = os.getenv("PASSWORD", "").split(",")
demo.launch(auth=list(zip(users, passwords)), ssr_mode=False)