Spaces:
Build error
Build error
| import os | |
| from fastapi import FastAPI, HTTPException, BackgroundTasks | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Setup cache directory | |
| os.makedirs("/app/cache", exist_ok=True) | |
| os.environ['TRANSFORMERS_CACHE'] = "/app/cache" | |
| app = FastAPI(title="Medical LLaMA API") | |
| # Add CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Check GPU availability | |
| def check_gpu(): | |
| if torch.cuda.is_available(): | |
| logger.info(f"GPU available: {torch.cuda.get_device_name(0)}") | |
| return True | |
| logger.warning("No GPU available, using CPU") | |
| return False | |
| # Initialize model with proper device | |
| def init_model(): | |
| try: | |
| device = "cuda" if check_gpu() else "cpu" | |
| model_path = os.getenv("MODEL_PATH", "./model/medical_llama_3b") | |
| logger.info(f"Loading model from {model_path}") | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir="/app/cache") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| device_map="auto", | |
| cache_dir="/app/cache" | |
| ) | |
| return tokenizer, model | |
| except Exception as e: | |
| logger.error(f"Error loading model: {str(e)}") | |
| raise | |
| # Rest of your existing code... | |
| async def startup_event(): | |
| logger.info("Starting up application...") | |
| try: | |
| global tokenizer, model | |
| tokenizer, model = init_model() | |
| logger.info("Model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"Failed to load model: {str(e)}") |