import pandas as pd
import numpy as np
import swisseph as swe
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
# Setup
OUTPUT_DIR = Path(__file__).parent
DATA_FILE = OUTPUT_DIR / "new_couples_wikidata.csv"
swe.set_ephe_path(None)
# Planets
PLANETS = {
'Sun': swe.SUN,
'Moon': swe.MOON, # Including Moon for this visualization (assuming noon is roughly okay for general distrib)
'Mercury': swe.MERCURY,
'Venus': swe.VENUS,
'Mars': swe.MARS,
'Jupiter': swe.JUPITER,
'Saturn': swe.SATURN
}
# Note: Moon moves ~13 deg/day. Noon position has +/- 6.5 deg error.
# For broad distribution analysis (KDE) of thousands of couples, this noise might flatten the curve
# but shouldn't shift the mean significantly if random.
def get_positions(date_str):
try:
dt = datetime.strptime(date_str, "%Y-%m-%d")
jd = swe.julday(dt.year, dt.month, dt.day, 12.0)
pos = {}
for name, pid in PLANETS.items():
deg = swe.calc_ut(jd, pid)[0][0]
pos[name] = np.deg2rad(deg)
return pos
except:
return None
def main():
print("Loading data for KDE Visualization...")
if not DATA_FILE.exists():
print("Data file not found.")
return
df = pd.read_csv(DATA_FILE)
data_list = []
print(f"Processing {len(df)} couples...")
for _, row in df.iterrows():
try:
start_str = str(row['start_date'])
end_str = str(row['end_date'])
if start_str == 'nan': continue
start_dt = datetime.strptime(start_str, "%Y-%m-%d")
# Determine duration and status
if end_str != 'nan':
end_dt = datetime.strptime(end_str, "%Y-%m-%d")
duration = (end_dt - start_dt).days / 365.25
status = 'Ended'
else:
# For ongoing, we can use current duration, but for "Success/Fail" comparison,
# we usually want to compare Completed Short vs Completed Long, or Completed vs Ongoing Long.
duration = (datetime.now() - start_dt).days / 365.25
status = 'Ongoing'
if duration < 0.1 or duration > 80: continue
# Synastry
p1_pos = get_positions(row['p1_birth_date'])
p2_pos = get_positions(row['p2_birth_date'])
if not p1_pos or not p2_pos: continue
feat = {
'duration': duration,
'status': status
}
# Key pairs for visualization
pairs_to_calc = [
('Sun', 'Moon'),
('Venus', 'Mars'),
('Mars', 'Mars'),
('Sun', 'Sun'),
('Venus', 'Saturn') # Classic "Glue" aspect?
]
for p1, p2 in pairs_to_calc:
angle = p1_pos[p1] - p2_pos[p2]
feat[f"{p1}-{p2}"] = np.cos(angle)
data_list.append(feat)
except:
continue
res_df = pd.DataFrame(data_list)
print(f"Processed {len(res_df)} couples.")
# Define Long vs Short Term
# We will use Quartiles of the entire dataset to show extremes
q_high = res_df['duration'].quantile(0.75)
q_low = res_df['duration'].quantile(0.25)
long_term = res_df[res_df['duration'] > q_high].copy()
short_term = res_df[res_df['duration'] < q_low].copy()
long_term['Group'] = 'Long Term'
short_term['Group'] = 'Short Term'
combined = pd.concat([long_term, short_term])
print(f"Long Term (> {q_high:.1f}y): {len(long_term)}")
print(f"Short Term (< {q_low:.1f}y): {len(short_term)}")
# Plotting
key_aspects = ['Sun-Moon', 'Venus-Mars', 'Mars-Mars', 'Sun-Sun']
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()
for i, aspect in enumerate(key_aspects):
ax = axes[i]
sns.kdeplot(data=combined, x=aspect, hue='Group', fill=True, ax=ax,
common_norm=False, palette={'Long Term': 'green', 'Short Term': 'red'}, alpha=0.3)
ax.set_title(f"Distribution of {aspect} Synastry")
ax.set_xlabel("Cosine Similarity (-1=Opp, +1=Conj)")
ax.set_ylabel("Density")
ax.axvline(0, color='gray', linestyle='--', alpha=0.5)
ax.set_xlim(-1.1, 1.1)
plt.suptitle("Synastry Distributions: Long Term vs Short Term Relationships\n(Are 'Good' Aspects more common in Long marriages?)", fontsize=16)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'aspect_distributions_kde.png')
print("Saved aspect_distributions_kde.png")
if __name__ == "__main__":
main()