NextPlace / MLBaseModelDriver.py
ckoozzzu's picture
Upload folder using huggingface_hub
94dd1f0 verified
Raw
History Blame Contribute Delete
6.11 kB
import torch
import sys
import pandas as pd
from typing import TypedDict, Optional, Tuple
import datetime
import math
import importlib.util
from huggingface_hub import hf_hub_download
import pickle
"""
Data container class representing the data shape of the synapse coming into `run_inference`
"""
class ProcessedSynapse(TypedDict):
id: Optional[str]
nextplace_id: Optional[str]
property_id: Optional[str]
listing_id: Optional[str]
address: Optional[str]
city: Optional[str]
state: Optional[str]
zip_code: Optional[str]
price: Optional[float]
beds: Optional[int]
baths: Optional[float]
sqft: Optional[int]
lot_size: Optional[int]
year_built: Optional[int]
days_on_market: Optional[int]
latitude: Optional[float]
longitude: Optional[float]
property_type: Optional[str]
last_sale_date: Optional[str]
hoa_dues: Optional[float]
query_date: Optional[str]
"""
This class must do two things
1) The constructor must load the model
2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
of float, str representing the predicted sale price and the predicted sale date.
"""
class MLBaseModelDriver:
def __init__(self):
self.model, self.label_encoder, self.scaler = self.load_model()
def load_model(self) -> Tuple[any, any, any]:
"""
load the model and model parameters
:return: model, label encoder, and scaler
"""
print(f"Loading model...")
model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
model_class = self._import_model_class(model_class_file)
model = model_class(input_dim=4)
state_dict = torch.load(model_file, weights_only=False)
model.load_state_dict(state_dict)
model.eval()
# Load additional artifacts
with open(scaler_file, 'rb') as f:
scaler = pickle.load(f)
with open(label_encoders_file, 'rb') as f:
label_encoders = pickle.load(f)
print(f"Model Loaded.")
return model, label_encoders, scaler
def _download_model_files(self) -> Tuple[str, str, str, str]:
"""
download files from hugging face
:return: downloaded files
"""
model_path = "ckoozzzu/NextPlace"
# Download the model files from the Hugging Face Hub
model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
# Load the model and artifacts
return model_file, scaler_file, label_encoders_file, model_class_file
def _import_model_class(self, model_class_file):
"""
import the model class and instantiate it
:param model_class_file: file path to the model class
:return: None
"""
# Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
module_name = "MLBaseModel"
spec = importlib.util.spec_from_file_location(module_name, model_class_file)
model_module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = model_module
spec.loader.exec_module(model_module)
if hasattr(model_module, "MLBaseModel"):
return model_module.MLBaseModel
else:
raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
"""
run inference using the MLBaseModel
:param input_data: synapse from the validator
:return: the predicted sale price and date
"""
input_tensor = self._preprocess_input(input_data)
with torch.no_grad():
prediction = self.model(input_tensor)
predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
predicted_days_on_market = math.floor(predicted_days_on_market)
predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)
return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
"""
convert predicted days on market to a sale date
:param days_on_market: number of days this home has been on the market
:param predicted_days_on_market: the predicted number of days for this home on the market
:return: the predicted sale date
"""
if days_on_market < predicted_days_on_market:
days_until_sale = predicted_days_on_market - days_on_market
sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
return sale_date
else:
return datetime.date.today() + datetime.timedelta(days=1)
def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
"""
preprocess the input for inference
:param data: synapse from the validator
:return: tensor representing the synapse
"""
df = pd.DataFrame([data])
default_beds = 3
default_sqft = 1500.0
default_property_type = '6'
df['beds'] = df['beds'].fillna(default_beds)
df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
df['property_type'] = df['property_type'].fillna(default_property_type)
df['property_type'] = df['property_type'].astype(int)
df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
X = df[['beds', 'sqft', 'property_type', 'price']]
input_tensor = torch.tensor(X.values, dtype=torch.float32)
return input_tensor