Final_Assignment

Sleeping

App Files Files Community

Final_Assignment / tools /audio.py

tdziwok

first commit

31af2b2 6 months ago

raw

history blame contribute delete

2.15 kB

	import requests
	from smolagents import tool
	import openai
	import base64

	@tool
	def analyse_audio(audio_url: str) -> str:
	"""
	analyse the provided audio file, and return a description or transcription of the contents.

	Args:
	audio_url (str): The URL of the audio file to be analysed. Usually with an audio extension like mp3, aac, etc.

	Returns:
	str: description or transcription of the contents of the provided audio
	"""

	# some security:
	if "https://agents-course-unit4-scoring.hf.space" not in audio_url:
	return "the requested URL is not whitelisted, refusing to fetch data"

	resp = requests.get(audio_url)
	if resp.status_code != 200:
	return f"failed to fetch the requested audio file: (status={resp.status_code})\n{resp.text}"
	mime = resp.headers.get("content-type")
	# todo filer mimetypes for security and correctness

	audio_bytes = base64.b64encode(resp.content).decode("utf-8")

	# Create the message to GPT-4o (vision)
	response = openai.chat.completions.create(
	model="gpt-4o-audio-preview",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Please analyze the contents of this audio file. Provide a short (two sentence) description of the contents, and then output your analysis. The analysis should be in the most appropriate format. e.g. if the audio is a conversation, a transcription (indicating who says what) is best, for a monologue, maybe a simple transcription is best. if it's nature noises, describe what they are, the likely locations, etc."},
	{
	"type": "input_audio",
	"input_audio": {
	"data": audio_bytes, # Use the base64 string here
	"format": "mp3" # mime.split("/")[-1], # TODO this is pretty poor parsing of a content-type response header
	},
	}
	]
	}
	],
	max_tokens=500,
	)

	return response.choices[0].message.content