visualize_results.py · The Big Astrology Book of Research

#!/usr/bin/env python3
import pandas as pd
import numpy as np
import swisseph as swe
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Setup
OUTPUT_DIR = Path(__file__).parent
DATA_FILE = OUTPUT_DIR / "new_couples_wikidata.csv"
swe.set_ephe_path(None)

# Planets
PLANETS = {
    'Sun': swe.SUN,
    'Mercury': swe.MERCURY,
    'Venus': swe.VENUS,
    'Mars': swe.MARS,
    'Jupiter': swe.JUPITER,
    'Saturn': swe.SATURN,
    'Uranus': swe.URANUS,
    'Neptune': swe.NEPTUNE,
    'Pluto': swe.PLUTO,
    'Node': swe.MEAN_NODE
}
PLANET_LIST = list(PLANETS.keys())

def get_positions(date_str):
    try:
        dt = datetime.strptime(date_str, "%Y-%m-%d")
        jd = swe.julday(dt.year, dt.month, dt.day, 12.0)
        pos = {}
        for name, pid in PLANETS.items():
            deg = swe.calc_ut(jd, pid)[0][0]
            pos[name] = np.deg2rad(deg)
        return pos
    except:
        return None

def main():
    print("Loading data...")
    df = pd.read_csv(DATA_FILE)

    results = []
    print(f"Processing {len(df)} couples...")

    for _, row in df.iterrows():
        try:
            # Duration Calc
            start = str(row['start_date'])
            end = str(row['end_date'])
            if start == 'nan': continue

            start_dt = datetime.strptime(start, "%Y-%m-%d")

            if end != 'nan':
                end_dt = datetime.strptime(end, "%Y-%m-%d")
                duration = (end_dt - start_dt).days / 365.25
                status = 'Ended'
            else:
                duration = (datetime.now() - start_dt).days / 365.25
                status = 'Ongoing'

            if duration < 0: continue

            # Synastry Calc
            p1_pos = get_positions(row['p1_birth_date'])
            p2_pos = get_positions(row['p2_birth_date'])

            if not p1_pos or not p2_pos: continue

            feat = {'duration': duration, 'status': status}

            for p1 in PLANET_LIST:
                for p2 in PLANET_LIST:
                    angle = p1_pos[p1] - p2_pos[p2]
                    # Agnostic Cosine: +1 Conj, -1 Opp
                    feat[f"{p1}-{p2}"] = np.cos(angle)

            results.append(feat)

        except Exception:
            continue

    res_df = pd.DataFrame(results)
    print(f"Computed features for {len(res_df)} couples.")

    # Filter for Completed Relationships for Stats
    ended_df = res_df[res_df['status'] == 'Ended']

    # Define Long vs Short Term (Quartiles)
    # Using all data (including ongoing) for quartiles might be biased towards short ongoing ones?
    # Better to use distribution of ALL to determine thresholds, or just ENDED?
    # The previous analysis likely used ALL. Let's stick to ALL for quartiles to match earlier method,
    # or refine to Ended if that's more rigorous. 
    # Let's simple use 25% top/bottom of ALL valid durations.
    q_high = res_df['duration'].quantile(0.75)
    q_low = res_df['duration'].quantile(0.25)

    long_term = res_df[res_df['duration'] > q_high]
    short_term = res_df[res_df['duration'] < q_low]

    print(f"Long Term > {q_high:.1f}y (n={len(long_term)})")
    print(f"Short Term < {q_low:.1f}y (n={len(short_term)})")

    # --- Plot 1: The Mars Effect (Distribution) ---
    plt.figure(figsize=(10, 6))
    sns.kdeplot(data=long_term, x='Mars-Mars', label='Long Term', fill=True, color='green', alpha=0.3)
    sns.kdeplot(data=short_term, x='Mars-Mars', label='Short Term', fill=True, color='red', alpha=0.3)
    plt.title('Distribution of Mars-Mars Synastry\n(Long vs Short Relationships)')
    plt.xlabel('Cosine Similarity (+1=Conjunction, -1=Opposition)')
    plt.legend()
    plt.xlim(-1.1, 1.1)
    plt.grid(True, alpha=0.3)
    plt.savefig(OUTPUT_DIR / 'viz_mars_mars_dist.png')
    print("Saved viz_mars_mars_dist.png")

    # --- Plot 2: Top factors Bar Chart (Mean Difference) ---
    # Calculate Mean Diff for all pairs
    diffs = []
    pairs = [c for c in res_df.columns if '-' in c]
    for col in pairs:
        # T-test
        t, p = stats.ttest_ind(long_term[col], short_term[col], equal_var=False)
        mean_diff = long_term[col].mean() - short_term[col].mean()
        diffs.append({'pair': col, 'diff': mean_diff, 'p': p})

    diff_df = pd.DataFrame(diffs).sort_values('diff', ascending=False)

    # Top 5 Positive (Conjunction Helps) & Top 5 Negative (Opposition Helps)
    top_pos = diff_df.head(5)
    top_neg = diff_df.tail(5)

    viz_df = pd.concat([top_pos, top_neg])

    plt.figure(figsize=(12, 8))
    colors = ['green' if x > 0 else 'red' for x in viz_df['diff']]
    sns.barplot(data=viz_df, x='diff', y='pair', palette=colors)
    plt.title('Top Synastry Factors Differentiating Long vs Short Relationships\n(Mean Difference in Cosine)')
    plt.xlabel('Difference (Long Mean - Short Mean)\nPositive = Conjunction Favors Longevity')
    plt.axvline(0, color='black', linewidth=1)
    plt.savefig(OUTPUT_DIR / 'viz_top_factors.png')
    print("Saved viz_top_factors.png")

    # --- Plot 3: Personal Planet Heatmap (Correlation with Duration) ---
    # Only Personal Planets: Sun, Moon (Excluded), Mercury, Venus, Mars
    personal_planets = ['Sun', 'Mercury', 'Venus', 'Mars', 'Jupiter', 'Saturn']

    # Init matrix
    matrix = pd.DataFrame(index=personal_planets, columns=personal_planets, dtype=float)

    for p1 in personal_planets:
        for p2 in personal_planets:
            key = f"{p1}-{p2}"
            if key in res_df.columns:
                # Correlation with Duration (Ended only)
                corr = ended_df[key].corr(ended_df['duration'])
                matrix.loc[p1, p2] = corr

    plt.figure(figsize=(8, 6))
    sns.heatmap(matrix.astype(float), annot=True, cmap='RdBu_r', center=0, fmt='.3f')
    plt.title('Correlation with Longevity (Personal Planets)\nRed=Conjunction Good, Blue=Opposition Good')
    plt.savefig(OUTPUT_DIR / 'viz_personal_heatmap.png')
    print("Saved viz_personal_heatmap.png")

    # --- Plot 4: Age Artifact Demo (Pluto-Pluto) ---
    plt.figure(figsize=(10, 6))
    sns.regplot(data=ended_df, x='Pluto-Pluto', y='duration', scatter_kws={'alpha':0.1}, line_kws={'color':'red'})
    plt.title('The Age Artifact: Pluto-Pluto Conjunction vs Longevity\n(Partners of same generation live longer together)')
    plt.xlabel('Pluto-Pluto Cosine')
    plt.ylabel('Duration (Years)')
    plt.savefig(OUTPUT_DIR / 'viz_age_artifact.png')
    print("Saved viz_age_artifact.png")

if __name__ == "__main__":
    from scipy import stats # Ensure stats is available
    main()