cosine_analysis.py · The Big Astrology Book of Research

#!/usr/bin/env python3
"""
Project 34: Solar Return Annual Predictions (Cosine Analysis)
-------------------------------------------------------------
Objective: Be agnostic. Use cosines of angle differences of pairs from the 
12 celestial features (Sun, Moon, Mercury, Venus, Mars, Jupiter, Saturn, 
Uranus, Neptune, Pluto, Mean Node, Lilith).

Methodology:
1. Load event data from solar_return_data.csv.
2. For each event year:
   a. Calculate Solar Return chart (assuming 12:00 birth time if unknown).
   b. Calculate positions of 12 bodies.
   c. Calculate 66 pair-wise Cosines (cos(A-B)).
3. Statistical Analysis:
   a. For each Event Type (Death, Marriage, etc.), compare pair means to the rest of the dataset.
   b. Identify significant deviations.
"""

import pandas as pd
import numpy as np
import swisseph as swe
from datetime import datetime, timedelta
from pathlib import Path
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Constants
PLANETS = {
    'Sun': swe.SUN,
    'Moon': swe.MOON,
    'Mercury': swe.MERCURY,
    'Venus': swe.VENUS,
    'Mars': swe.MARS,
    'Jupiter': swe.JUPITER,
    'Saturn': swe.SATURN,
    'Uranus': swe.URANUS,
    'Neptune': swe.NEPTUNE,
    'Pluto': swe.PLUTO,
    'Node': swe.MEAN_NODE,  # Mean Node
    'Lilith': swe.MEAN_APOG # Mean Lilith (Black Moon)
}

PLANET_LIST = list(PLANETS.keys())
OUTPUT_DIR = Path(__file__).parent
DATA_FILE = OUTPUT_DIR / 'solar_return_data.csv'

# Setup Swiss Ephemeris
swe.set_ephe_path(None)

def get_solar_return_date(birth_date_str, target_year):
    """
    Calculate the JD of the Solar Return for the target year.
    Assumes Birth Time is 12:00:00 UTC.
    """
    # Parse Birth Date
    try:
        bd = datetime.strptime(birth_date_str, "%Y-%m-%d")
    except ValueError:
        return None

    # Calculate Natal Sun
    # Assuming 12:00 UTC
    jd_birth = swe.julday(bd.year, bd.month, bd.day, 12.0)
    natal_sun = swe.calc_ut(jd_birth, swe.SUN)[0][0]

    # Estimate Return Date (Same Month/Day, Target Year)
    # Start search around the birthday in target year
    # Need to handle leap years carefully, but swe.julday handles dates fine.
    # Solar return is usually within +/- 1 day of birthday.

    jd_approx = swe.julday(target_year, bd.month, bd.day, 12.0)

    # Iterative search for exact return
    jd_iter = jd_approx
    for _ in range(10): # 10 iterations is plenty for high precision
        sun_pos = swe.calc_ut(jd_iter, swe.SUN)[0][0]

        # Calculate difference (accounting for 360 wrap)
        diff = natal_sun - sun_pos
        while diff > 180: diff -= 360
        while diff < -180: diff += 360

        if abs(diff) < 0.00001: # ~0.03 seconds of arc
            break

        # Sun moves ~1 degree per day (roughly)
        # correction = diff / 1.0
        jd_iter += diff

    return jd_iter

def analyze_chart_cosines(jd):
    """
    Calculate positions of 12 bodies and returns 66 cosine features.
    Feature Name: "Sun-Moon", Value: cos(Sun - Moon)
    """
    positions = {}
    for name, pid in PLANETS.items():
        pos = swe.calc_ut(jd, pid)[0][0]
        positions[name] = np.deg2rad(pos) # Convert to radians for cosine

    features = {}

    # Generate Pairs
    for i in range(len(PLANET_LIST)):
        for j in range(i + 1, len(PLANET_LIST)):
            p1 = PLANET_LIST[i]
            p2 = PLANET_LIST[j]

            # Cosine of angle difference
            # cos(a - b)
            angle_diff = positions[p1] - positions[p2]
            cosine_val = np.cos(angle_diff)

            features[f"{p1}-{p2}"] = cosine_val

    return features

def main():
    print("Loading data...")
    if not DATA_FILE.exists():
        print(f"Error: {DATA_FILE} not found. Please run the data generation step first or locate the file.")
        return

    df = pd.read_csv(DATA_FILE)
    print(f"Loaded {len(df)} events.")

    results = []

    print("Calculating Solar Returns and Cosine Features...")
    for idx, row in df.iterrows():
        name = row['name']
        bdate = row['birth_date']
        year = row['year']
        event = row['event_type']

        try:
            jd_return = get_solar_return_date(bdate, year)
            if jd_return is None: continue

            features = analyze_chart_cosines(jd_return)
            features['event_type'] = event
            features['year'] = year
            features['name'] = name

            results.append(features)
        except Exception as e:
            print(f"Error processing {name} {year}: {e}")

    results_df = pd.DataFrame(results)
    print(f"Processed {len(results_df)} valid charts.")

    # --- Analysis ---

    # Normalize Event Types
    # Group rare events? 
    print("\nEvent Counts:")
    print(results_df['event_type'].value_counts())

    main_events = ['death', 'marriage', 'award', 'crisis', 'divorce']

    output_lines = []
    output_lines.append("# Project 34: Solar Return Cosine Analysis Results")
    output_lines.append("\n## Methodology")
    output_lines.append("Analyzing **cosine similarity** (+1 Conj, -1 Opp) for 66 planetary pairs in Solar Return charts.")
    output_lines.append(f"Dataset: {len(results_df)} events.")
    output_lines.append("\n## Top Correlations by Event Type")
    output_lines.append("| Event | Pair | Mean Diff | P-Value | Interpretation |")
    output_lines.append("|---|---|---|---|---|")

    significant_findings = []

    for event in main_events:
        print(f"\nAnalyzing: {event.upper()}")
        event_df = results_df[results_df['event_type'] == event]
        rest_df = results_df[results_df['event_type'] != event]

        if len(event_df) < 5:
            print("  Not enough data.")
            continue

        # Check all 66 pairs
        pair_cols = [c for c in results_df.columns if '-' in c and c not in ['birth-date', 'event-type']] # robust filter?
        # Actually columns are strictly "P1-P2".
        pair_cols = [f"{PLANET_LIST[i]}-{PLANET_LIST[j]}" for i in range(len(PLANET_LIST)) for j in range(i+1, len(PLANET_LIST))]

        local_results = []
        for pair in pair_cols:
            t_stat, p_val = stats.ttest_ind(event_df[pair], rest_df[pair], equal_var=False)
            diff = event_df[pair].mean() - rest_df[pair].mean()

            local_results.append({
                'pair': pair,
                'diff': diff,
                'p': p_val,
                'mean_event': event_df[pair].mean(),
                'mean_rest': rest_df[pair].mean()
            })

        # Sort by significance
        local_results.sort(key=lambda x: x['p'])

        # Report top 3
        for r in local_results[:3]:
            sig = "**" if r['p'] < 0.05 else ""
            print(f"  {r['pair']}: Diff={r['diff']:+.2f}, p={r['p']:.4f} {sig}")

            if r['p'] < 0.10: # Significance filter
                interp = "Conjunction" if r['mean_event'] > r['mean_rest'] else "Opposition"
                output_lines.append(f"| {event} | {r['pair']} | {r['diff']:+.2f} | {sig}{r['p']:.4f}{sig} | {interp} tendency |")
                significant_findings.append({'event': event, 'pair': r['pair'], 'p': r['p'], 'diff': r['diff']})

    # Save Results
    with open(OUTPUT_DIR / 'RESULTS_COSINE.md', 'w') as f:
        f.write("\n".join(output_lines))

    # Visualize top finding
    if significant_findings:
        top = sorted(significant_findings, key=lambda x: x['p'])[0]
        plot_pair_distribution(results_df, top['event'], top['pair'])

def plot_pair_distribution(df, target_event, pair):
    plt.figure(figsize=(10, 6))

    # Plot target event
    sns.kdeplot(df[df['event_type'] == target_event][pair], label=f'{target_event} (n={len(df[df["event_type"]==target_event])})', fill=True)

    # Plot rest
    sns.kdeplot(df[df['event_type'] != target_event][pair], label=f'Rest (n={len(df[df["event_type"]!=target_event])})', fill=True, alpha=0.3)

    plt.title(f'Solar Return {pair} Cosine Distribution: {target_event.title()}')
    plt.xlabel(f'{pair} Cosine (+1=Conj, -1=Opp)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig(OUTPUT_DIR / 'cosine_analysis_plot.png')
    print(f"Saved plot for {pair} ({target_event})")

if __name__ == "__main__":
    main()