import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import glob

def analyze_temperature(data_dir, lunar_file, max_files=100):
    print("Loading lunar data...")
    lunar_df = pd.read_csv(lunar_file)
    lunar_df['date'] = pd.to_datetime(lunar_df['date'])
    lunar_map = lunar_df.set_index('date')['tithi'].to_dict()

    # Accumulators for TMAX
    tithi_temp_sum = {i: 0.0 for i in range(1, 31)}
    tithi_temp_count = {i: 0 for i in range(1, 31)}

    # Get weather files
    files = glob.glob(os.path.join(data_dir, "*.csv"))
    # Picking a different subset or the same subset? 
    # Let's verify using more files if possible, or the same max_files
    files_to_process = files[:max_files]

    print(f"Found {len(files)} weather files. Processing first {len(files_to_process)}...")

    processed_count = 0
    total_records = 0

    for file_path in files_to_process:
        try:
            # Read TMAX. TMAX is in tenths of degrees C.
            # We filter for TMAX not null.
            df = pd.read_csv(file_path, usecols=['DATE', 'TMAX'])
            df['DATE'] = pd.to_datetime(df['DATE'])

            df = df.dropna(subset=['TMAX'])

            # Map Tithi
            df['tithi'] = df['DATE'].map(lunar_map)
            df = df.dropna(subset=['tithi'])

            if df.empty:
                continue

            # Group by tithi
            file_grouped = df.groupby('tithi')['TMAX'].agg(['sum', 'count'])

            for tithi, row in file_grouped.iterrows():
                tithi = int(tithi)
                if 1 <= tithi <= 30:
                    tithi_temp_sum[tithi] += row['sum']
                    tithi_temp_count[tithi] += row['count']

            total_records += len(df)
            processed_count += 1

            if processed_count % 10 == 0:
                print(f"Processed {processed_count} files...")

        except Exception as e:
            # Some files might not have TMAX column
            # print(f"Skipping {file_path}: {e}")
            continue

    print(f"Finished processing. Total records used: {total_records}")

    # Calculate averages
    results = []
    for tithi in range(1, 31):
        total_t = tithi_temp_sum[tithi]
        count_t = tithi_temp_count[tithi]
        avg_t = total_t / count_t if count_t > 0 else 0
        results.append({
            'tithi': tithi,
            'avg_tmax_tenths_c': avg_t,
            'avg_tmax_c': avg_t / 10.0,
            'count': count_t
        })

    results_df = pd.DataFrame(results)

    # Save results
    output_path = os.path.join(os.path.dirname(__file__), 'temperature_by_tithi.csv')
    results_df.to_csv(output_path, index=False)
    print(f"Results saved to {output_path}")

    # Plotting
    plt.figure(figsize=(12, 6))
    plt.plot(results_df['tithi'], results_df['avg_tmax_c'], marker='s', linestyle='-', color='red')
    plt.title(f'Average Max Temperature by Lunar Day (Tithi) - Sample of {processed_count} Stations')
    plt.xlabel('Lunar Day (Tithi) [1-30]')
    plt.ylabel('Avg Max Temp (°C)')
    plt.grid(True, alpha=0.3)
    plt.xticks(range(1, 31))

    # Add Moon Phases markers
    plt.axvline(x=1, color='k', linestyle='--', alpha=0.5, label='New Moon')
    plt.axvline(x=15, color='k', linestyle='--', alpha=0.5, label='Full Moon')
    plt.axvline(x=30, color='k', linestyle='--', alpha=0.5, label='New Moon')
    plt.legend()

    plot_path = os.path.join(os.path.dirname(__file__), 'temperature_plot.png')
    plt.savefig(plot_path)
    print(f"Plot saved to {plot_path}")

if __name__ == "__main__":
    current_dir = os.path.dirname(__file__)
    data_dir = os.path.join(current_dir, 'daily-summaries-latest')
    lunar_file = os.path.join(current_dir, 'lunar_data.csv')

    analyze_temperature(data_dir, lunar_file, max_files=200)