Spaces:

54justin
/

591Analyzer

Paused

App Files Files Community

54justin commited on Oct 8

Commit

f205f47

verified ·

1 Parent(s): a0f0229

Upload 7 files

Browse files

Files changed (7) hide show

591_rental_analysis.ipynb +10 -0
analyzer.py +373 -0
main.py +179 -0
requirements.txt +14 -0
scraper.py +253 -0
utils.py +150 -0
visualizer.py +402 -0

591_rental_analysis.ipynb ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+ "cells": [],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

analyzer.py ADDED Viewed

	@@ -0,0 +1,373 @@

+# �� Copilot �ͦ�
+import pandas as pd
+import numpy as np
+from typing import Dict, List, Tuple
+import json
+from transformers import pipeline, AutoTokenizer, AutoModel
+from datasets import Dataset
+import re
+class RentalDataAnalyzer:
+    """���θ�Ƥ��R��"""
+    def __init__(self, data_path: str = None):
+        """
+        ��l�Ƥ��R��
+        Args:
+            data_path: ����ɮ׸��|
+        """
+        self.data_path = data_path
+        self.df = None
+        self.analysis_results = {}
+        # ��l��Hugging Face�ҫ��Ω��r���R
+        self.sentiment_analyzer = None
+        self.text_classifier = None
+    def load_data(self, data_path: str = None) -> pd.DataFrame:
+        """���J���"""
+        if data_path:
+            self.data_path = data_path
+        try:
+            if self.data_path.endswith('.json'):
+                with open(self.data_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                self.df = pd.DataFrame(data)
+            elif self.data_path.endswith('.csv'):
+                self.df = pd.read_csv(self.data_path, encoding='utf-8-sig')
+            else:
+                raise ValueError("���䴩���ɮ׮榡")
+            print(f"���\���J {len(self.df)} �����")
+            return self.df
+        except Exception as e:
+            print(f"���J��Ʈɵo�Ϳ��~: {e}")
+            return None
+    def clean_data(self) -> pd.DataFrame:
+        """�M�~���"""
+        if self.df is None:
+            print("�Х����J���")
+            return None
+        print("�}�l�M�~���...")
+        # �������Ƹ��
+        original_count = len(self.df)
+        self.df = self.df.drop_duplicates(subset=['title', 'address', 'price'])
+        print(f"���� {original_count - len(self.df)} �����Ƹ��")
+        # �M�z�������
+        self.df['price'] = pd.to_numeric(self.df['price'], errors='coerce')
+        self.df = self.df[self.df['price'] > 0]  # �����L�į���
+        # �M�z�W�Ƹ��
+        self.df['area'] = pd.to_numeric(self.df['area'], errors='coerce')
+        # �p��C�W����
+        self.df['price_per_ping'] = self.df.apply(
+            lambda row: row['price'] / row['area'] if row['area'] > 0 else np.nan,
+            axis=1
+        )
+        # �������`�ȡ]�ϥ�IQR��k�^
+        self.df = self.remove_outliers(self.df, 'price')
+        print(f"�M�~��Ѿl {len(self.df)} �����ĸ��")
+        return self.df
+    def remove_outliers(self, df: pd.DataFrame, column: str) -> pd.DataFrame:
+        """�������`��"""
+        Q1 = df[column].quantile(0.25)
+        Q3 = df[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        outliers_count = len(df[(df[column] < lower_bound) | (df[column] > upper_bound)])
+        print(f"���� {outliers_count} �� {column} ���`��")
+        return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
+    def basic_statistics(self) -> Dict:
+        """�򥻲έp���R"""
+        if self.df is None or len(self.df) == 0:
+            return {}
+        stats = {
+            'total_properties': len(self.df),
+            'price_stats': {
+                'mean': round(self.df['price'].mean(), 2),
+                'median': round(self.df['price'].median(), 2),
+                'std': round(self.df['price'].std(), 2),
+                'min': self.df['price'].min(),
+                'max': self.df['price'].max(),
+                'q25': round(self.df['price'].quantile(0.25), 2),
+                'q75': round(self.df['price'].quantile(0.75), 2)
+            },
+            'area_stats': {
+                'mean': round(self.df['area'].mean(), 2),
+                'median': round(self.df['area'].median(), 2),
+                'min': self.df['area'].min(),
+                'max': self.df['area'].max()
+            } if not self.df['area'].isna().all() else {},
+            'price_per_ping_stats': {
+                'mean': round(self.df['price_per_ping'].mean(), 2),
+                'median': round(self.df['price_per_ping'].median(), 2),
+                'min': round(self.df['price_per_ping'].min(), 2),
+                'max': round(self.df['price_per_ping'].max(), 2)
+            } if not self.df['price_per_ping'].isna().all() else {}
+        }
+        self.analysis_results['basic_stats'] = stats
+        return stats
+    def price_distribution_analysis(self) -> Dict:
+        """�����������R"""
+        if self.df is None or len(self.df) == 0:
+            return {}
+        # �w�q�����϶�
+        price_bins = [0, 15000, 20000, 25000, 30000, 40000, float('inf')]
+        price_labels = ['<15K', '15-20K', '20-25K', '25-30K', '30-40K', '>40K']
+        self.df['price_range'] = pd.cut(self.df['price'], bins=price_bins, labels=price_labels, right=False)
+        distribution = self.df['price_range'].value_counts().sort_index()
+        distribution_dict = {
+            'ranges': distribution.index.tolist(),
+            'counts': distribution.values.tolist(),
+            'percentages': (distribution / len(self.df) * 100).round(2).tolist()
+        }
+        self.analysis_results['price_distribution'] = distribution_dict
+        return distribution_dict
+    def area_analysis(self) -> Dict:
+        """�W�Ƥ��R"""
+        if self.df is None or len(self.df) == 0 or self.df['area'].isna().all():
+            return {}
+        # �w�q�W�ư϶�
+        area_bins = [0, 20, 30, 40, 50, float('inf')]
+        area_labels = ['<20�W', '20-30�W', '30-40�W', '40-50�W', '>50�W']
+        self.df['area_range'] = pd.cut(self.df['area'], bins=area_bins, labels=area_labels, right=False)
+        area_distribution = self.df['area_range'].value_counts().sort_index()
+        area_dict = {
+            'ranges': area_distribution.index.tolist(),
+            'counts': area_distribution.values.tolist(),
+            'percentages': (area_distribution / len(self.df) * 100).round(2).tolist()
+        }
+        self.analysis_results['area_analysis'] = area_dict
+        return area_dict
+    def setup_huggingface_models(self):
+        """�]�mHugging Face�ҫ�"""
+        try:
+            print("���JHugging Face�ҫ�...")
+            # ���J���屡�P���R�ҫ�
+            self.sentiment_analyzer = pipeline(
+                "sentiment-analysis",
+                model="ckiplab/bert-base-chinese-ws",
+                return_all_scores=True
+            )
+            print("Hugging Face�ҫ����J����")
+        except Exception as e:
+            print(f"���JHugging Face�ҫ��ɵo�Ϳ��~: {e}")
+    def analyze_descriptions(self) -> Dict:
+        """���R����y�z��r"""
+        if self.df is None or 'raw_info' not in self.df.columns:
+            return {}
+        descriptions = self.df['raw_info'].dropna().tolist()
+        if not descriptions:
+            return {}
+        # ����r���R
+        keywords_analysis = self.analyze_keywords(descriptions)
+        analysis_result = {
+            'keywords_frequency': keywords_analysis,
+            'total_descriptions': len(descriptions)
+        }
+        self.analysis_results['description_analysis'] = analysis_result
+        return analysis_result
+    def analyze_keywords(self, descriptions: List[str]) -> Dict:
+        """���R����r�W�v"""
+        # �w�q�Ыά�������r
+        keywords = [
+            '�񱶹B', '�񨮯�', '�q��', '���x', '������', '�޲z�O',
+            '�ĥ�', '�q��', '�w�R', '�K�Q', '�ͬ�����', '�ǰ�',
+            '���s', '���C', '�a��', '�a�q', '�N��', '�~���'
+        ]
+        keyword_counts = {keyword: 0 for keyword in keywords}
+        for desc in descriptions:
+            for keyword in keywords:
+                if keyword in desc:
+                    keyword_counts[keyword] += 1
+        # �ƧǨè��e10��
+        sorted_keywords = dict(sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True)[:10])
+        return sorted_keywords
+    def correlation_analysis(self) -> Dict:
+        """�����ʤ��R"""
+        if self.df is None or len(self.df) == 0:
+            return {}
+        numeric_columns = ['price', 'area', 'price_per_ping']
+        available_columns = [col for col in numeric_columns if col in self.df.columns and not self.df[col].isna().all()]
+        if len(available_columns) < 2:
+            return {}
+        correlation_matrix = self.df[available_columns].corr()
+        correlation_dict = {}
+        for i, col1 in enumerate(available_columns):
+            for j, col2 in enumerate(available_columns):
+                if i < j:  # �קK����
+                    correlation_dict[f"{col1}_vs_{col2}"] = round(correlation_matrix.loc[col1, col2], 3)
+        self.analysis_results['correlation'] = correlation_dict
+        return correlation_dict
+    def generate_insights(self) -> List[str]:
+        """�ͦ����R�}��"""
+        insights = []
+        if 'basic_stats' in self.analysis_results:
+            stats = self.analysis_results['basic_stats']
+            insights.append(f"�@��� {stats['total_properties']} ���ŦX���󪺯��Ϊ���")
+            insights.append(f"���������� {stats['price_stats']['mean']:,} ��")
+            insights.append(f"��������Ƭ� {stats['price_stats']['median']:,} ��")
+            if stats['price_stats']['mean'] > stats['price_stats']['median']:
+                insights.append("���������V�k���סA�s�b����������԰�������")
+        if 'price_distribution' in self.analysis_results:
+            dist = self.analysis_results['price_distribution']
+            max_range_idx = dist['percentages'].index(max(dist['percentages']))
+            most_common_range = dist['ranges'][max_range_idx]
+            percentage = dist['percentages'][max_range_idx]
+            insights.append(f"�̱`���������϶��O {most_common_range}�A�� {percentage}%")
+        if 'area_analysis' in self.analysis_results:
+            area = self.analysis_results['area_analysis']
+            if area:
+                max_area_idx = area['percentages'].index(max(area['percentages']))
+                most_common_area = area['ranges'][max_area_idx]
+                insights.append(f"�̱`�����W�ư϶��O {most_common_area}")
+        return insights
+    def run_full_analysis(self) -> Dict:
+        """���槹����R"""
+        print("�}�l���槹����R...")
+        # �򥻲έp
+        basic_stats = self.basic_statistics()
+        print("? �򥻲έp���R����")
+        # �����������R
+        price_dist = self.price_distribution_analysis()
+        print("? �����������R����")
+        # �W�Ƥ��R
+        area_analysis = self.area_analysis()
+        print("? �W�Ƥ��R����")
+        # �y�z��r���R
+        desc_analysis = self.analyze_descriptions()
+        print("? �y�z��r���R����")
+        # �����ʤ��R
+        correlation = self.correlation_analysis()
+        print("? �����ʤ��R����")
+        # �ͦ��}��
+        insights = self.generate_insights()
+        print("? �}��ͦ�����")
+        self.analysis_results['insights'] = insights
+        return self.analysis_results
+    def save_analysis_results(self, filename: str = "analysis_results.json"):
+        """�x�s���R���G"""
+        try:
+            with open(f"output/{filename}", 'w', encoding='utf-8') as f:
+                json.dump(self.analysis_results, f, ensure_ascii=False, indent=2)
+            print(f"���R���G�w�x�s�� output/{filename}")
+        except Exception as e:
+            print(f"�x�s���R���G�ɵo�Ϳ��~: {e}")
+    def print_summary(self):
+        """�L�X���R�K�n"""
+        if not self.analysis_results:
+            print("�S�����R���G�i���")
+            return
+        print("\n" + "="*50)
+        print("���������s�ϯ��Υ������R���i")
+        print("="*50)
+        if 'insights' in self.analysis_results:
+            print("\n? ���n�}��:")
+            for i, insight in enumerate(self.analysis_results['insights'], 1):
+                print(f"{i}. {insight}")
+        if 'basic_stats' in self.analysis_results:
+            stats = self.analysis_results['basic_stats']
+            print(f"\n? �����έp:")
+            print(f"   ��������: {stats['price_stats']['mean']:,} ��")
+            print(f"   �����: {stats['price_stats']['median']:,} ��")
+            print(f"   �̧C����: {stats['price_stats']['min']:,} ��")
+            print(f"   �̰�����: {stats['price_stats']['max']:,} ��")
+            print(f"   �зǮt: {stats['price_stats']['std']:,} ��")
+        if 'price_distribution' in self.analysis_results:
+            print(f"\n? ��������:")
+            dist = self.analysis_results['price_distribution']
+            for range_name, count, percentage in zip(dist['ranges'], dist['counts'], dist['percentages']):
+                print(f"   {range_name}: {count} �� ({percentage}%)")
+        print("\n" + "="*50)
+if __name__ == "__main__":
+    # ���դ��R��
+    analyzer = RentalDataAnalyzer()
+    # ���J���
+    df = analyzer.load_data("output/rental_data.csv")
+    if df is not None:
+        # �M�~���
+        analyzer.clean_data()
+        # ���槹����R
+        results = analyzer.run_full_analysis()
+        # �x�s���G
+        analyzer.save_analysis_results()
+        # ��ܺK�n
+        analyzer.print_summary()

main.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# �� Copilot �ͦ�
+"""
+591���θ�Ƥ��R�� - �D�{��
+���������s�ϯ��Υ������R�u��
+���{����X�F�������ΡB��Ƥ��R�M��ı�ƥ\��A
+�M���Ω���R591���κ������θ�ơC
+"""
+import os
+import sys
+import argparse
+from datetime import datetime
+# �[�J�۹���|
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from scraper import Rent591Scraper
+from analyzer import RentalDataAnalyzer
+from visualizer import RentalDataVisualizer
+from utils import log_message, create_output_directories, get_current_timestamp
+class RentalAnalysisApp:
+    """591���Τ��R���ε{���D���O"""
+    def __init__(self):
+        self.scraper = Rent591Scraper()
+        self.analyzer = RentalDataAnalyzer()
+        self.visualizer = RentalDataVisualizer()
+        self.timestamp = get_current_timestamp()
+    def run_full_pipeline(self, max_pages: int = 5, skip_scraping: bool = False):
+        """���槹�㪺���R�y�{"""
+        print("? 591���θ�Ƥ��R���Ұ�")
+        print("=" * 50)
+        # �Ыؿ�X�ؿ�
+        create_output_directories()
+        # �B�J1: ��ƪ���
+        if not skip_scraping:
+            log_message("�}�l����591���θ��...")
+            rental_data = self.scraper.scrape_rental_data(max_pages=max_pages)
+            if not rental_data:
+                log_message("������������ơA�{���פ�", "ERROR")
+                return False
+            log_message(f"���\���� {len(rental_data)} �����")
+            # �x�s��l���
+            self.scraper.save_data(rental_data, f"raw_data_{self.timestamp}.json")
+            # �ഫ��CSV
+            df = self.scraper.to_dataframe(rental_data)
+            csv_filename = f"output/rental_data_{self.timestamp}.csv"
+            df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
+            log_message(f"��Ƥw�x�s��CSV: {csv_filename}")
+            # �ϥγ̷s������ɮ�
+            data_file = csv_filename
+        else:
+            # �M��̷s������ɮ�
+            data_files = [f for f in os.listdir("output") if f.startswith("rental_data") and f.endswith(".csv")]
+            if not data_files:
+                log_message("�䤣��{������ɮסA�Х����檦��", "ERROR")
+                return False
+            data_file = f"output/{sorted(data_files)[-1]}"
+            log_message(f"�ϥβ{������ɮ�: {data_file}")
+        # �B�J2: ��Ƥ��R
+        log_message("�}�l��Ƥ��R...")
+        # ���J���
+        self.analyzer.load_data(data_file)
+        # �M�~���
+        cleaned_df = self.analyzer.clean_data()
+        if cleaned_df is None or len(cleaned_df) == 0:
+            log_message("��ƲM�~��S�����ĸ��", "ERROR")
+            return False
+        # ���槹����R
+        analysis_results = self.analyzer.run_full_analysis()
+        # �x�s���R���G
+        results_filename = f"analysis_results_{self.timestamp}.json"
+        self.analyzer.save_analysis_results(results_filename)
+        # ��ܤ��R�K�n
+        self.analyzer.print_summary()
+        # �B�J3: ��Ƶ�ı��
+        log_message("�}�l�ͦ���ı�ƹϪ�...")
+        # �]�m��ı�ƾ�
+        self.visualizer.df = cleaned_df
+        self.visualizer.analysis_results = analysis_results
+        # �ͦ��Ҧ��Ϫ�
+        self.visualizer.generate_all_visualizations()
+        # �ЫغK�n���i
+        summary_filename = f"output/summary_report_{self.timestamp}.png"
+        self.visualizer.create_summary_report(summary_filename)
+        log_message("���R�����I", "SUCCESS")
+        self.print_completion_summary()
+        return True
+    def print_completion_summary(self):
+        """�L�X�����K�n"""
+        print("\n" + "? ���R�����I" + "?")
+        print("=" * 50)
+        print("? ��X�ɮ�:")
+        print(f"   �u�w�w ��l���: output/raw_data_{self.timestamp}.json")
+        print(f"   �u�w�w �M�~���: output/rental_data_{self.timestamp}.csv")
+        print(f"   �u�w�w ���R���G: output/analysis_results_{self.timestamp}.json")
+        print(f"   �u�w�w �K�n���i: output/summary_report_{self.timestamp}.png")
+        print("   �u�w�w �Ϫ��ɮ�:")
+        print("   �x   �u�w�w output/price_distribution.png")
+        print("   �x   �u�w�w output/price_ranges.png")
+        print("   �x   �u�w�w output/area_analysis.png")
+        print("   �x   �u�w�w output/price_per_ping.png")
+        print("   �x   �|�w�w output/keywords_analysis.png")
+        print("   �|�w�w ���ʦ������O: output/dashboard.html")
+        print("\n? ����: ���} dashboard.html �i�d�ݤ��ʦ����R���G")
+        print("=" * 50)
+def main():
+    """�D���"""
+    parser = argparse.ArgumentParser(description='591���θ�Ƥ��R��')
+    parser.add_argument('--max-pages', type=int, default=5,
+                       help='�̤j�������� (�w�]: 5)')
+    parser.add_argument('--skip-scraping', action='store_true',
+                       help='���L���ΡA�ϥβ{����ƶi����R')
+    parser.add_argument('--analysis-only', action='store_true',
+                       help='�Ȱ�����R�A�����s�������')
+    args = parser.parse_args()
+    try:
+        app = RentalAnalysisApp()
+        if args.analysis_only:
+            # �Ȥ��R�Ҧ�
+            log_message("����Ȥ��R�Ҧ�...")
+            success = app.run_full_pipeline(max_pages=0, skip_scraping=True)
+        else:
+            # ����y�{
+            success = app.run_full_pipeline(
+                max_pages=args.max_pages,
+                skip_scraping=args.skip_scraping
+            )
+        if success:
+            log_message("�{�����榨�\�����I", "SUCCESS")
+            return 0
+        else:
+            log_message("�{�����楢��", "ERROR")
+            return 1
+    except KeyboardInterrupt:
+        log_message("�ϥΪ̤��_�{������", "WARNING")
+        return 1
+    except Exception as e:
+        log_message(f"�{������ɵo�ͥ��w�����~: {e}", "ERROR")
+        return 1
+if __name__ == "__main__":
+    # �]�m�{����T
+    print("? 591���θ�Ƥ��R��")
+    print("? �ؼаϰ�: ���������s��")
+    print("? ��������: 2�СB��h�B�q��j��")
+    print("? ��X Hugging Face �ͺA�t��")
+    print("-" * 50)
+    exit_code = main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+# �� Copilot �ͦ�
+requests>=2.31.0
+beautifulsoup4>=4.12.0
+pandas>=2.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+seaborn>=0.12.0
+transformers>=4.30.0
+datasets>=2.14.0
+plotly>=5.15.0
+jupyter>=1.0.0
+lxml>=4.9.0
+selenium>=4.10.0
+webdriver-manager>=3.8.0

scraper.py ADDED Viewed

	@@ -0,0 +1,253 @@

+# �� Copilot �ͦ�
+import requests
+import time
+import json
+import pandas as pd
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+import re
+from typing import List, Dict, Optional
+class Rent591Scraper:
+    """591���κ��������O"""
+    def __init__(self):
+        self.base_url = "https://rent.591.com.tw"
+        self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        }
+        self.session = requests.Session()
+        self.session.headers.update(self.headers)
+    def setup_driver(self):
+        """�]�mChrome WebDriver"""
+        chrome_options = Options()
+        chrome_options.add_argument('--headless')  # �L�Y�Ҧ�
+        chrome_options.add_argument('--no-sandbox')
+        chrome_options.add_argument('--disable-dev-shm-usage')
+        chrome_options.add_argument('--disable-gpu')
+        chrome_options.add_argument('--window-size=1920,1080')
+        chrome_options.add_argument(f'--user-agent={self.headers["User-Agent"]}')
+        service = Service(ChromeDriverManager().install())
+        driver = webdriver.Chrome(service=service, options=chrome_options)
+        return driver
+    def get_csrf_token(self, driver):
+        """���CSRF Token"""
+        try:
+            # �X�ݭ������token
+            driver.get("https://rent.591.com.tw/")
+            time.sleep(2)
+            # ���ձq����������token
+            token_element = driver.find_element(By.NAME, "csrf-token")
+            if token_element:
+                return token_element.get_attribute("content")
+            # �p�G�S���A���ձqcookies�����
+            cookies = driver.get_cookies()
+            for cookie in cookies:
+                if 'token' in cookie['name'].lower():
+                    return cookie['value']
+        except Exception as e:
+            print(f"���token����: {e}")
+        return None
+    def scrape_rental_data(self, max_pages: int = 10) -> List[Dict]:
+        """
+        �������θ��
+        Args:
+            max_pages: �̤j��������
+        Returns:
+            ���θ�ƦC��
+        """
+        driver = self.setup_driver()
+        all_data = []
+        try:
+            # �ؼ�URL�Ѽ�
+            params = {
+                'region': '17',      # ������
+                'section': '247',    # ���s��
+                'kind': '1',         # ��h���a
+                'layout': '2',       # 2��
+                'shape': '2'         # �q��j��
+            }
+            for page in range(1, max_pages + 1):
+                print(f"���b������ {page} ��...")
+                # �c��URL
+                params['page'] = page
+                url = f"{self.base_url}/list?" + "&".join([f"{k}={v}" for k, v in params.items()])
+                driver.get(url)
+                time.sleep(3)  # ���ݭ������J
+                # �ˬd�O�_�����
+                rental_items = driver.find_elements(By.CSS_SELECTOR, '.rent-item')
+                if not rental_items:
+                    print(f"�� {page} ���S������ơA�����")
+                    break
+                page_data = self.parse_page_data(driver)
+                all_data.extend(page_data)
+                print(f"�� {page} ����� {len(page_data)} �����")
+                # �קK�Q��IP�A�[�J����
+                time.sleep(2)
+        except Exception as e:
+            print(f"������Ʈɵo�Ϳ��~: {e}")
+        finally:
+            driver.quit()
+        return all_data
+    def parse_page_data(self, driver) -> List[Dict]:
+        """�ѪR�歶���"""
+        page_data = []
+        try:
+            # �������HTML
+            soup = BeautifulSoup(driver.page_source, 'html.parser')
+            # �d�䯲�ζ���
+            rental_items = soup.find_all('div', class_='rent-item')
+            for item in rental_items:
+                try:
+                    rental_info = self.extract_rental_info(item)
+                    if rental_info:
+                        page_data.append(rental_info)
+                except Exception as e:
+                    print(f"�ѪR�浧��Ʈɵo�Ϳ��~: {e}")
+                    continue
+        except Exception as e:
+            print(f"�ѪR������Ʈɵo�Ϳ��~: {e}")
+        return page_data
+    def extract_rental_info(self, item) -> Optional[Dict]:
+        """�����浧���θ�T"""
+        try:
+            # �򥻸�T
+            title_elem = item.find('h3', class_='rent-item-title')
+            title = title_elem.get_text(strip=True) if title_elem else "N/A"
+            # ����
+            price_elem = item.find('div', class_='rent-item-price')
+            price_text = price_elem.get_text(strip=True) if price_elem else "0"
+            price = self.extract_price(price_text)
+            # �a�}
+            address_elem = item.find('div', class_='rent-item-address')
+            address = address_elem.get_text(strip=True) if address_elem else "N/A"
+            # �ԲӸ�T
+            info_elem = item.find('div', class_='rent-item-info')
+            info_text = info_elem.get_text(strip=True) if info_elem else ""
+            # �����W�ơB�Ӽh����T
+            area = self.extract_area(info_text)
+            floor = self.extract_floor(info_text)
+            # �s��
+            link_elem = item.find('a')
+            link = self.base_url + link_elem.get('href') if link_elem and link_elem.get('href') else ""
+            return {
+                'title': title,
+                'price': price,
+                'address': address,
+                'area': area,
+                'floor': floor,
+                'link': link,
+                'raw_info': info_text
+            }
+        except Exception as e:
+            print(f"�������θ�T�ɵo�Ϳ��~: {e}")
+            return None
+    def extract_price(self, price_text: str) -> int:
+        """���������Ʀr"""
+        try:
+            # �����D�Ʀr�r�šA��������
+            price_match = re.search(r'[\d,]+', price_text.replace(',', ''))
+            if price_match:
+                return int(price_match.group().replace(',', ''))
+        except:
+            pass
+        return 0
+    def extract_area(self, info_text: str) -> float:
+        """�����W��"""
+        try:
+            area_match = re.search(r'(\d+(?:\.\d+)?)\s*�W', info_text)
+            if area_match:
+                return float(area_match.group(1))
+        except:
+            pass
+        return 0.0
+    def extract_floor(self, info_text: str) -> str:
+        """�����Ӽh��T"""
+        try:
+            floor_match = re.search(r'(\d+)��', info_text)
+            if floor_match:
+                return floor_match.group(1) + '��'
+        except:
+            pass
+        return "N/A"
+    def save_data(self, data: List[Dict], filename: str = "rental_data.json"):
+        """�x�s��ƨ��ɮ�"""
+        try:
+            with open(f"output/{filename}", 'w', encoding='utf-8') as f:
+                json.dump(data, f, ensure_ascii=False, indent=2)
+            print(f"��Ƥw�x�s�� output/{filename}")
+        except Exception as e:
+            print(f"�x�s��Ʈɵo�Ϳ��~: {e}")
+    def to_dataframe(self, data: List[Dict]) -> pd.DataFrame:
+        """�ഫ��DataFrame"""
+        return pd.DataFrame(data)
+if __name__ == "__main__":
+    scraper = Rent591Scraper()
+    print("�}�l����591���θ��...")
+    # �������
+    rental_data = scraper.scrape_rental_data(max_pages=5)
+    if rental_data:
+        print(f"�`�@������ {len(rental_data)} �����")
+        # �x�s��l���
+        scraper.save_data(rental_data)
+        # �ഫ��DataFrame���x�sCSV
+        df = scraper.to_dataframe(rental_data)
+        df.to_csv("output/rental_data.csv", index=False, encoding='utf-8-sig')
+        print("��ƪ��������I")
+        print(df.head())
+    else:
+        print("�S�������������")

utils.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# �� Copilot �ͦ�
+import time
+import json
+from datetime import datetime
+def log_message(message: str, level: str = "INFO"):
+    """�O����x�T��"""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {level}: {message}")
+def save_json(data, filename: str, output_dir: str = "output"):
+    """�x�sJSON�榡���"""
+    try:
+        filepath = f"{output_dir}/{filename}"
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+        log_message(f"��Ƥw�x�s�� {filepath}")
+        return True
+    except Exception as e:
+        log_message(f"�x�s��Ʈɵo�Ϳ��~: {e}", "ERROR")
+        return False
+def load_json(filename: str, output_dir: str = "output"):
+    """���JJSON�榡���"""
+    try:
+        filepath = f"{output_dir}/{filename}"
+        with open(filepath, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        log_message(f"���\���J {filepath}")
+        return data
+    except Exception as e:
+        log_message(f"���J��Ʈɵo�Ϳ��~: {e}", "ERROR")
+        return None
+def format_currency(amount: float) -> str:
+    """�榡�ƪ��B���"""
+    if amount >= 10000:
+        return f"{amount:,.0f}"
+    else:
+        return f"{amount:.0f}"
+def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
+    """�w�����k�A�קK���s���~"""
+    try:
+        if denominator == 0:
+            return default
+        return numerator / denominator
+    except:
+        return default
+def clean_text(text: str) -> str:
+    """�M�z��r���e"""
+    if not text:
+        return ""
+    # �����h�l�ť�
+    text = " ".join(text.split())
+    # �����S���r�š]�O�d����B�^��B�Ʀr�M�`�μ��I�^
+    import re
+    text = re.sub(r'[^\u4e00-\u9fff\w\s.,!?;:()�]�^�i�j�u�v\-]', '', text)
+    return text.strip()
+def retry_request(func, max_retries: int = 3, delay: float = 1.0):
+    """���վ���"""
+    for attempt in range(max_retries):
+        try:
+            return func()
+        except Exception as e:
+            if attempt == max_retries - 1:
+                raise e
+            log_message(f"�ШD���ѡA{delay}���᭫��... (���� {attempt + 1}/{max_retries})", "WARNING")
+            time.sleep(delay)
+            delay *= 2  # ���ưh��
+def validate_price(price_str: str) -> bool:
+    """���һ���榡�O�_�X�z"""
+    try:
+        import re
+        # �����Ʀr
+        price_match = re.search(r'[\d,]+', price_str.replace(',', ''))
+        if price_match:
+            price = int(price_match.group().replace(',', ''))
+            # �X�z�������d��G5000 - 100000
+            return 5000 <= price <= 100000
+    except:
+        pass
+    return False
+def validate_area(area_str: str) -> bool:
+    """���ҩW�Ʈ榡�O�_�X�z"""
+    try:
+        import re
+        area_match = re.search(r'(\d+(?:\.\d+)?)', area_str)
+        if area_match:
+            area = float(area_match.group(1))
+            # �X�z���W�ƽd��G10 - 100�W
+            return 10 <= area <= 100
+    except:
+        pass
+    return False
+def create_output_directories():
+    """�Ыؿ�X�ؿ�"""
+    import os
+    directories = ['output', 'output/images', 'output/data', 'output/reports']
+    for directory in directories:
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+            log_message(f"�Ыإؿ�: {directory}")
+def get_current_timestamp() -> str:
+    """������e�ɶ��W"""
+    return datetime.now().strftime("%Y%m%d_%H%M%S")
+def calculate_statistics(data_list):
+    """�p��έp�ƾ�"""
+    if not data_list:
+        return {}
+    import numpy as np
+    data_array = np.array(data_list)
+    return {
+        'count': len(data_array),
+        'mean': float(np.mean(data_array)),
+        'median': float(np.median(data_array)),
+        'std': float(np.std(data_array)),
+        'min': float(np.min(data_array)),
+        'max': float(np.max(data_array)),
+        'q25': float(np.percentile(data_array, 25)),
+        'q75': float(np.percentile(data_array, 75))
+    }
+def progress_bar(current: int, total: int, length: int = 50):
+    """��ܶi�ױ�"""
+    if total == 0:
+        return
+    percent = (current / total) * 100
+    filled = int(length * current // total)
+    bar = '�i' * filled + '-' * (length - filled)
+    print(f'\r�i��: |{bar}| {percent:.1f}% ({current}/{total})', end='', flush=True)
+    if current >= total:
+        print()  # �����ᴫ��

visualizer.py ADDED Viewed

	@@ -0,0 +1,402 @@

+# �� Copilot �ͦ�
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import json
+from typing import Dict, List
+# �]�w����r��
+plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS']
+plt.rcParams['axes.unicode_minus'] = False
+class RentalDataVisualizer:
+    """���θ�Ƶ�ı�ƾ�"""
+    def __init__(self, df: pd.DataFrame = None, analysis_results: Dict = None):
+        """
+        ��l�Ƶ�ı�ƾ�
+        Args:
+            df: ���DataFrame
+            analysis_results: ���R���G�r��
+        """
+        self.df = df
+        self.analysis_results = analysis_results
+        self.colors = px.colors.qualitative.Set3
+    def load_data(self, data_path: str):
+        """���J���"""
+        try:
+            if data_path.endswith('.csv'):
+                self.df = pd.read_csv(data_path, encoding='utf-8-sig')
+            else:
+                raise ValueError("�д���CSV�榡������ɮ�")
+            print(f"���\���J {len(self.df)} ����ƥΩ��ı��")
+        except Exception as e:
+            print(f"���J��Ʈɵo�Ϳ��~: {e}")
+    def load_analysis_results(self, results_path: str):
+        """���J���R���G"""
+        try:
+            with open(results_path, 'r', encoding='utf-8') as f:
+                self.analysis_results = json.load(f)
+            print("���R���G���J���\")
+        except Exception as e:
+            print(f"���J���R���G�ɵo�Ϳ��~: {e}")
+    def plot_price_distribution(self, save_path: str = "output/price_distribution.png"):
+        """ø�s����������"""
+        if self.df is None or 'price' not in self.df.columns:
+            print("�L�kø�s���������ϡG�ʤָ��")
+            return
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+        # �����
+        ax1.hist(self.df['price'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
+        ax1.set_xlabel('���� (��)')
+        ax1.set_ylabel('����ƶq')
+        ax1.set_title('�������������')
+        ax1.grid(True, alpha=0.3)
+        # �c�ι�
+        ax2.boxplot(self.df['price'], vert=True, patch_artist=True,
+                   boxprops=dict(facecolor='lightgreen', alpha=0.7))
+        ax2.set_ylabel('���� (��)')
+        ax2.set_title('���������c�ι�')
+        ax2.grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"���������Ϥw�x�s: {save_path}")
+    def plot_price_ranges(self, save_path: str = "output/price_ranges.png"):
+        """ø�s�����϶�������"""
+        if not self.analysis_results or 'price_distribution' not in self.analysis_results:
+            print("�L�kø�s�����϶��ϡG�ʤ֤��R���G")
+            return
+        dist_data = self.analysis_results['price_distribution']
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+        # ������
+        bars = ax1.bar(dist_data['ranges'], dist_data['counts'],
+                      color=self.colors[:len(dist_data['ranges'])], alpha=0.8)
+        ax1.set_xlabel('�����϶�')
+        ax1.set_ylabel('����ƶq')
+        ax1.set_title('�U�����϶�����ƶq')
+        ax1.tick_params(axis='x', rotation=45)
+        # �b�����W��ܼƭ�
+        for bar, count in zip(bars, dist_data['counts']):
+            height = bar.get_height()
+            ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
+                    f'{count}', ha='center', va='bottom')
+        # ����
+        ax2.pie(dist_data['percentages'], labels=dist_data['ranges'], autopct='%1.1f%%',
+               colors=self.colors[:len(dist_data['ranges'])], startangle=90)
+        ax2.set_title('�����϶���Ҥ���')
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"�����϶��Ϥw�x�s: {save_path}")
+    def plot_area_analysis(self, save_path: str = "output/area_analysis.png"):
+        """ø�s�W�Ƥ��R��"""
+        if self.df is None or 'area' not in self.df.columns:
+            print("�L�kø�s�W�Ƥ��R�ϡG�ʤָ��")
+            return
+        # �����ŭ�
+        area_data = self.df['area'].dropna()
+        if len(area_data) == 0:
+            print("�L�kø�s�W�Ƥ��R�ϡG�S�����Ī��W�Ƹ��")
+            return
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+        # ���I�� - �W�ƻP�������Y
+        if 'price' in self.df.columns:
+            valid_data = self.df.dropna(subset=['area', 'price'])
+            if len(valid_data) > 0:
+                ax1.scatter(valid_data['area'], valid_data['price'],
+                           alpha=0.6, color='coral', s=50)
+                ax1.set_xlabel('�W��')
+                ax1.set_ylabel('���� (��)')
+                ax1.set_title('�W�ƻP�������Y')
+                ax1.grid(True, alpha=0.3)
+                # �K�[�Ͷսu
+                z = np.polyfit(valid_data['area'], valid_data['price'], 1)
+                p = np.poly1d(z)
+                ax1.plot(valid_data['area'], p(valid_data['area']), "r--", alpha=0.8)
+        # �W�Ƥ��������
+        ax2.hist(area_data, bins=15, alpha=0.7, color='lightgreen', edgecolor='black')
+        ax2.set_xlabel('�W��')
+        ax2.set_ylabel('����ƶq')
+        ax2.set_title('�W�Ƥ���')
+        ax2.grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"�W�Ƥ��R�Ϥw�x�s: {save_path}")
+    def plot_price_per_ping(self, save_path: str = "output/price_per_ping.png"):
+        """ø�s�C�W�������R��"""
+        if self.df is None or 'price_per_ping' not in self.df.columns:
+            print("�L�kø�s�C�W�����ϡG�ʤָ��")
+            return
+        price_per_ping_data = self.df['price_per_ping'].dropna()
+        if len(price_per_ping_data) == 0:
+            print("�L�kø�s�C�W�����ϡG�S�����Ī��C�W�������")
+            return
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+        # �C�W��������
+        ax1.hist(price_per_ping_data, bins=20, alpha=0.7, color='gold', edgecolor='black')
+        ax1.set_xlabel('�C�W���� (��/�W)')
+        ax1.set_ylabel('����ƶq')
+        ax1.set_title('�C�W��������')
+        ax1.grid(True, alpha=0.3)
+        # �c�ι�
+        ax2.boxplot(price_per_ping_data, vert=True, patch_artist=True,
+                   boxprops=dict(facecolor='orange', alpha=0.7))
+        ax2.set_ylabel('�C�W���� (��/�W)')
+        ax2.set_title('�C�W�����c�ι�')
+        ax2.grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"�C�W�����Ϥw�x�s: {save_path}")
+    def plot_keywords_analysis(self, save_path: str = "output/keywords_analysis.png"):
+        """ø�s����r���R��"""
+        if not self.analysis_results or 'description_analysis' not in self.analysis_results:
+            print("�L�kø�s����r���R�ϡG�ʤ֤��R���G")
+            return
+        desc_analysis = self.analysis_results['description_analysis']
+        if 'keywords_frequency' not in desc_analysis:
+            print("�L�kø�s����r���R�ϡG�ʤ�����r���")
+            return
+        keywords_data = desc_analysis['keywords_frequency']
+        # �L�o�X���ƾڪ�����r
+        filtered_keywords = {k: v for k, v in keywords_data.items() if v > 0}
+        if not filtered_keywords:
+            print("�S������������r���")
+            return
+        keywords = list(filtered_keywords.keys())
+        frequencies = list(filtered_keywords.values())
+        plt.figure(figsize=(12, 8))
+        bars = plt.barh(keywords, frequencies, color=self.colors[:len(keywords)])
+        plt.xlabel('�X�{����')
+        plt.ylabel('����r')
+        plt.title('����y�z����r�W�v���R')
+        plt.grid(True, alpha=0.3, axis='x')
+        # �b�����W��ܼƭ�
+        for bar, freq in zip(bars, frequencies):
+            width = bar.get_width()
+            plt.text(width + 0.1, bar.get_y() + bar.get_height()/2.,
+                    f'{freq}', ha='left', va='center')
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"����r���R�Ϥw�x�s: {save_path}")
+    def create_interactive_dashboard(self, save_path: str = "output/dashboard.html"):
+        """�Ыؤ��ʦ������O"""
+        if self.df is None:
+            print("�L�k�Ыػ����O�G�ʤָ��")
+            return
+        # �Ыؤl��
+        fig = make_subplots(
+            rows=2, cols=2,
+            subplot_titles=('��������', '�W��vs����', '�����϶�����', '�C�W��������'),
+            specs=[[{"secondary_y": False}, {"secondary_y": False}],
+                   [{"type": "bar"}, {"secondary_y": False}]]
+        )
+        # 1. �������������
+        fig.add_trace(
+            go.Histogram(x=self.df['price'], name='��������', nbinsx=20,
+                        marker_color='skyblue', opacity=0.7),
+            row=1, col=1
+        )
+        # 2. �W��vs�������I��
+        if 'area' in self.df.columns:
+            valid_data = self.df.dropna(subset=['area', 'price'])
+            if len(valid_data) > 0:
+                fig.add_trace(
+                    go.Scatter(x=valid_data['area'], y=valid_data['price'],
+                             mode='markers', name='�W��vs����',
+                             marker=dict(color='coral', size=8, opacity=0.6)),
+                    row=1, col=2
+                )
+        # 3. �����϶�����
+        if self.analysis_results and 'price_distribution' in self.analysis_results:
+            dist_data = self.analysis_results['price_distribution']
+            fig.add_trace(
+                go.Bar(x=dist_data['ranges'], y=dist_data['counts'],
+                      name='�����϶�', marker_color='lightgreen'),
+                row=2, col=1
+            )
+        # 4. �C�W��������
+        if 'price_per_ping' in self.df.columns:
+            price_per_ping_data = self.df['price_per_ping'].dropna()
+            if len(price_per_ping_data) > 0:
+                fig.add_trace(
+                    go.Histogram(x=price_per_ping_data, name='�C�W����', nbinsx=15,
+                               marker_color='gold', opacity=0.7),
+                    row=2, col=2
+                )
+        # ��s����
+        fig.update_layout(
+            title_text="���������s�ϯ��Υ������R�����O",
+            title_x=0.5,
+            height=800,
+            showlegend=False
+        )
+        # ��s�b����
+        fig.update_xaxes(title_text="���� (��)", row=1, col=1)
+        fig.update_yaxes(title_text="����ƶq", row=1, col=1)
+        fig.update_xaxes(title_text="�W��", row=1, col=2)
+        fig.update_yaxes(title_text="���� (��)", row=1, col=2)
+        fig.update_xaxes(title_text="�����϶�", row=2, col=1)
+        fig.update_yaxes(title_text="����ƶq", row=2, col=1)
+        fig.update_xaxes(title_text="�C�W���� (��/�W)", row=2, col=2)
+        fig.update_yaxes(title_text="����ƶq", row=2, col=2)
+        # �x�s���ʦ��Ϫ�
+        fig.write_html(save_path)
+        print(f"���ʦ������O�w�x�s: {save_path}")
+    def generate_all_visualizations(self):
+        """�ͦ��Ҧ���ı�ƹϪ�"""
+        print("�}�l�ͦ���ı�ƹϪ�...")
+        # �R�A�Ϫ�
+        self.plot_price_distribution()
+        self.plot_price_ranges()
+        self.plot_area_analysis()
+        self.plot_price_per_ping()
+        self.plot_keywords_analysis()
+        # ���ʦ������O
+        self.create_interactive_dashboard()
+        print("�Ҧ���ı�ƹϪ��ͦ������I")
+    def create_summary_report(self, save_path: str = "output/summary_report.png"):
+        """�ЫغK�n���i��"""
+        if not self.analysis_results or 'basic_stats' not in self.analysis_results:
+            print("�L�k�ЫغK�n���i�G�ʤ֤��R���G")
+            return
+        fig, ax = plt.subplots(figsize=(12, 8))
+        ax.axis('off')
+        # ���D
+        fig.suptitle('���������s�ϯ��Υ������R�K�n���i', fontsize=20, fontweight='bold', y=0.95)
+        # �򥻲έp��T
+        stats = self.analysis_results['basic_stats']
+        # �Ыؤ�r���e
+        report_text = f"""
+? �������p
+? �`�����: {stats['total_properties']} ��
+? ��ƽd��: 2�СB��h�B�q��j��
+? �����έp
+? ��������: {stats['price_stats']['mean']:,} ��
+? ����Ư���: {stats['price_stats']['median']:,} ��
+? �̧C����: {stats['price_stats']['min']:,} ��
+? �̰�����: {stats['price_stats']['max']:,} ��
+? �зǮt: {stats['price_stats']['std']:,} ��
+? �����S�x
+? �Ĥ@�|�����: {stats['price_stats']['q25']:,} ��
+? �ĤT�|�����: {stats['price_stats']['q75']:,} ��
+        """
+        # �K�[���n�έp�]�p�G�����ܡ^
+        if 'area_stats' in stats and stats['area_stats']:
+            area_stats = stats['area_stats']
+            report_text += f"""
+? �W�Ʋέp
+? �����W��: {area_stats['mean']} �W
+? ����ƩW��: {area_stats['median']} �W
+? �̤p�W��: {area_stats['min']} �W
+? �̤j�W��: {area_stats['max']} �W
+            """
+        # �K�[�C�W�����έp�]�p�G�����ܡ^
+        if 'price_per_ping_stats' in stats and stats['price_per_ping_stats']:
+            pp_stats = stats['price_per_ping_stats']
+            report_text += f"""
+? �C�W�����έp
+? �����C�W����: {pp_stats['mean']:,} ��/�W
+? ����ƨC�W����: {pp_stats['median']:,} ��/�W
+? �̧C�C�W����: {pp_stats['min']:,} ��/�W
+? �̰��C�W����: {pp_stats['max']:,} ��/�W
+            """
+        # �K�[�}��]�p�G�����ܡ^
+        if 'insights' in self.analysis_results:
+            report_text += "\n\n? ���n�}��\n"
+            for i, insight in enumerate(self.analysis_results['insights'], 1):
+                report_text += f"? {insight}\n"
+        # ��ܤ�r
+        ax.text(0.05, 0.95, report_text, transform=ax.transAxes, fontsize=12,
+               verticalalignment='top', fontfamily='monospace',
+               bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue", alpha=0.8))
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300, bbox_inches='tight')
+        plt.close()
+        print(f"�K�n���i�w�x�s: {save_path}")
+if __name__ == "__main__":
+    # ���յ�ı�ƾ�
+    visualizer = RentalDataVisualizer()
+    # ���J���
+    visualizer.load_data("output/rental_data.csv")
+    visualizer.load_analysis_results("output/analysis_results.json")
+    # �ͦ��Ҧ���ı�ƹϪ�
+    visualizer.generate_all_visualizations()
+    # �ЫغK�n���i
+    visualizer.create_summary_report()