Spaces:

alfulanny
/

huggingface_ai_final

Running

App Files Files Community

alfulanny commited on 4 days ago

Commit

0106d0b

verified ·

1 Parent(s): f86436f

Create vision_tools.py

Browse files

Files changed (1) hide show

vision_tools.py +81 -0

vision_tools.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Vision tools: image captioning using Hugging Face Inference API with a local fallback.
+Functions:
+- `caption_image(path)`: returns a short caption for an image file.
+"""
+from typing import Optional
+import os
+import logging
+logger = logging.getLogger(__name__)
+try:
+    from huggingface_hub import InferenceApi
+except Exception:
+    InferenceApi = None
+try:
+    from transformers import pipeline
+except Exception:
+    pipeline = None
+try:
+    from smolagents import tool
+except Exception:
+    tool = None
+def caption_image(path: str, model: str = "nlpconnect/vit-gpt2-image-captioning") -> str:
+    """Caption an image at `path` using HF Inference API when possible, otherwise local pipeline if available.
+    Returns a short textual caption or an error string.
+    """
+    if not os.path.exists(path):
+        return "(file not found)"
+    hf_token = os.environ.get("HF_TOKEN")
+    # Try Inference API first
+    if hf_token and InferenceApi is not None:
+        try:
+            client = InferenceApi(repo_id=model, token=hf_token)
+            with open(path, "rb") as f:
+                out = client(inputs=f)
+            # InferenceApi for image-to-text may return text or structure
+            if isinstance(out, dict) and "generated_text" in out:
+                return out["generated_text"].strip()
+            if isinstance(out, list) and len(out) > 0:
+                first = out[0]
+                if isinstance(first, dict) and "generated_text" in first:
+                    return first["generated_text"].strip()
+                return str(first)
+            return str(out)
+        except Exception as e:
+            logger.warning("HF Inference image captioning failed: %s", e)
+    # Local pipeline fallback (may not be installed or suitable for large models)
+    if pipeline is not None:
+        try:
+            pipe = pipeline("image-to-text", model=model)
+            res = pipe(path)
+            if isinstance(res, list) and len(res) > 0:
+                return res[0].get("generated_text", str(res[0]))
+            return str(res)
+        except Exception as e:
+            logger.warning("Local pipeline image captioning failed: %s", e)
+    return "(image captioning unavailable)"
+# Export a smolagents-wrapped tool if possible
+if tool is not None:
+    try:
+        @tool
+        def caption_image_tool(path: str, model: str = "nlpconnect/vit-gpt2-image-captioning") -> str:
+            return caption_image(path, model=model)
+    except Exception:
+        caption_image_tool = caption_image
+else:
+    caption_image_tool = caption_image
+__all__ = ["caption_image", "caption_image_tool"]