#!/usr/bin/env python3
"""
Project 32: Historical Predictions Evaluation
Statistical analysis of famous astrological predictions vs outcomes.
"""

import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

OUTPUT_DIR = Path(__file__).parent
DATA_FILE = OUTPUT_DIR / 'predictions_data.csv'

def main():
    print("Project 32: Historical Predictions Analysis")
    print("-" * 50)

    # Load Data
    if not DATA_FILE.exists():
        print("Error: predictions_data.csv not found.")
        return

    df = pd.read_csv(DATA_FILE)
    df['Result_Bool'] = df['Result'] == 'Success'

    total_n = len(df)
    success_n = df['Result_Bool'].sum()
    accuracy = success_n / total_n

    print(f"Total Predictions: {total_n}")
    print(f"Successful: {success_n}")
    print(f"Accuracy Rate: {accuracy:.2%}")

    # 1. Binomial Test vs Random Chance (50/50)
    # H0: p <= 0.5
    # H1: p > 0.5
    binom_res = stats.binomtest(success_n, total_n, p=0.5, alternative='greater')
    print(f"Binomial Test P-Value: {binom_res.pvalue:.4f}")
    if binom_res.pvalue < 0.05:
        print(">> Result is statistically significant (Better than chance).")
    else:
        print(">> Result is NOT statistically significant (Indistinguishable from chance).")

    # 2. Accuracy by Category
    print("\n--- Accuracy by Category ---")
    cat_acc = df.groupby('Category')['Result_Bool'].agg(['count', 'mean'])
    cat_acc = cat_acc.sort_values('mean', ascending=False)
    print(cat_acc)

    # 3. Accuracy by Time Delta (Buckets)
    print("\n--- Accuracy by Time Horizon ---")
    df['Horizon'] = pd.cut(df['Time_Delta_Years'], 
                           bins=[-1, 1, 10, 500], 
                           labels=['Short Term (<2y)', 'Medium (2-10y)', 'Long Term (>10y)'])

    time_acc = df.groupby('Horizon', observed=False)['Result_Bool'].agg(['count', 'mean'])
    print(time_acc)

    # --- Visualizations ---
    create_plots(df, cat_acc, time_acc)

    # --- Generate Report ---
    generate_report(df, accuracy, binom_res.pvalue, cat_acc, time_acc)

def create_plots(df, cat_acc, time_acc):
    sns.set_theme(style="whitegrid")

    # 1. Bar Chart: Accuracy by Category
    plt.figure(figsize=(10, 6))
    ax1 = sns.barplot(x=cat_acc.index, y=cat_acc['mean'], palette='viridis')
    plt.title('Prediction Accuracy by Category')
    plt.ylabel('Success Rate')
    plt.ylim(0, 1)
    plt.axhline(0.5, color='red', linestyle='--', label='Chance (50%)')
    plt.legend()
    # Add count labels
    for i, p in enumerate(ax1.patches):
        count = cat_acc['count'].iloc[i]
        ax1.annotate(f'n={count}', 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha = 'center', va = 'center', 
                    xytext = (0, 9), 
                    textcoords = 'offset points')
    plt.savefig(OUTPUT_DIR / 'accuracy_by_category.png')
    plt.close()

    # 2. Scatter/Swarm: Horizon vs Result
    plt.figure(figsize=(10, 6))
    # Add jitter
    sns.stripplot(data=df, x='Result', y='Time_Delta_Years', hue='Category', 
                  size=10, jitter=0.2, alpha=0.8, palette='deep')
    plt.yscale('log') # Log scale because Nostradamus 200y outliers
    plt.title('Prediction Horizon vs Outcome (Log Scale)')
    plt.ylabel('Years in Advance (Log Scale)')
    plt.savefig(OUTPUT_DIR / 'horizon_vs_outcome.png')
    plt.close()

def generate_report(df, acc, p_val, cat_df, time_df):
    with open(OUTPUT_DIR / 'RESULTS.md', 'w') as f:
        f.write("# Project 32: Historical Predictions Evaluation\n\n")
        f.write("## Overview\n")
        f.write(f"This analysis evaluated **{len(df)}** famous astrological predictions from 1555 to 2022.\n\n")

        f.write("## Key Findings\n")
        f.write(f"- **Overall Accuracy**: {acc:.1%}\n")
        f.write(f"- **Statistical Significance**: p={p_val:.4f}\n")

        sig_text = "significantly better than random chance." if p_val < 0.05 else "not statistically distinguishable from a coin flip."
        f.write(f"The aggregate performance of these historical predictions is **{sig_text}**\n\n")

        f.write("## Performance by Category\n")
        f.write("| Category | N | Accuracy |\n")
        f.write("|----------|---|----------|\n")
        for cat, row in cat_df.iterrows():
            f.write(f"| {cat} | {row['count']} | {row['mean']:.1%} |\n")

        f.write("\n## Performance by Time Horizon\n")
        f.write("| Horizon | N | Accuracy |\n")
        f.write("|---------|---|----------|\n")
        for hor, row in time_df.iterrows():
            f.write(f"| {hor} | {row['count']} | {row['mean']:.1%} |\n")

        f.write("\n## The Dataset\n")
        f.write(df[['Source', 'Year_Made', 'Target_Year', 'Event_Description', 'Result']].to_markdown(index=False))

if __name__ == "__main__":
    main()