#!/usr/bin/env python3
"""
Project 7: ML Classification - Outer Planets and Personality
=============================================================
Tests if ML can identify patterns between OUTER PLANET positions and
personality using REAL personality data from Open-Source Psychometrics Project.
DATA SOURCES (REAL):
- Open-Source Psychometrics Project (OSPP) Big Five data
https://openpsychometrics.org/_rawdata/
- Birth year allows calculation of outer planet positions
- Jupiter, Saturn, Uranus, Neptune, Pluto signs from birth year
WHY OUTER PLANETS:
- These planets move slowly (1-30 years per sign)
- Birth YEAR is sufficient to determine their approximate positions
- OSPP provides birth year but not full date (privacy)
METHODOLOGY:
1. Download real OSPP Big Five test data with birth years
2. Calculate outer planet positions for each birth year
3. Train ML models to predict Big Five traits from planet positions
4. Compare against random baseline
"""
import numpy as np
import pandas as pd
import swisseph as swe
from scipy import stats
from datetime import datetime
import matplotlib.pyplot as plt
from pathlib import Path
import zipfile
import io
import warnings
warnings.filterwarnings('ignore')
try:
import requests
REQUESTS_AVAILABLE = True
except ImportError:
REQUESTS_AVAILABLE = False
try:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
SKLEARN_AVAILABLE = True
except ImportError:
SKLEARN_AVAILABLE = False
OUTPUT_DIR = Path(__file__).parent
swe.set_ephe_path(None)
# Outer planets only - these can be determined from birth year
OUTER_PLANETS = {
swe.JUPITER: 'Jupiter', # ~1 year per sign
swe.SATURN: 'Saturn', # ~2.5 years per sign
swe.URANUS: 'Uranus', # ~7 years per sign
swe.NEPTUNE: 'Neptune', # ~14 years per sign
swe.PLUTO: 'Pluto' # ~12-30 years per sign
}
def get_outer_planet_positions(year):
"""Get outer planet positions for middle of a given year."""
jd = swe.julday(year, 7, 1, 12.0)
pos = {}
for pid, name in OUTER_PLANETS.items():
result, _ = swe.calc_ut(jd, pid)[:2]
pos[name] = result[0]
return pos
def extract_outer_planet_features(year):
"""Extract features from outer planet positions for a birth year."""
pos = get_outer_planet_positions(year)
features = {}
# Sign placements (0-11)
for planet, lon in pos.items():
features[f'{planet}_sign'] = int(lon / 30) % 12
features[f'{planet}_lon'] = lon # Raw longitude
# Elements for outer planets
elements = {0: 'fire', 1: 'earth', 2: 'air', 3: 'water'}
for el in elements.values():
features[f'outer_element_{el}'] = 0
for planet, lon in pos.items():
sign = int(lon / 30) % 12
el = elements[sign % 4]
features[f'outer_element_{el}'] += 1
# Aspects between outer planets
planets = list(pos.keys())
for i, p1 in enumerate(planets):
for p2 in planets[i+1:]:
angle = abs(pos[p1] - pos[p2]) % 360
if angle > 180:
angle = 360 - angle
features[f'{p1}_{p2}_angle'] = angle
return features
def load_ospp_data():
"""
Download and process REAL OSPP Big Five data.
Source: https://openpsychometrics.org/_rawdata/
The BIG5 dataset contains responses to the Big Five Inventory.
"""
print("=" * 60)
print("DOWNLOADING REAL OSPP BIG FIVE DATA")
print("Source: openpsychometrics.org")
print("=" * 60)
if not REQUESTS_AVAILABLE:
raise ImportError("requests library required. Install with: pip install requests")
# OSPP Big Five dataset URL
url = "https://openpsychometrics.org/_rawdata/BIG5.zip"
cache_file = OUTPUT_DIR / "BIG5_data.csv"
# Check for cached data first
if cache_file.exists():
print(f"Loading cached data from {cache_file}")
df_raw = pd.read_csv(cache_file, sep='\t')
else:
print(f"Downloading from {url}...")
try:
response = requests.get(url, timeout=60)
response.raise_for_status()
# Extract CSV from zip
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
# Find the data file
csv_files = [f for f in z.namelist() if f.endswith('.csv')]
if not csv_files:
raise ValueError("No CSV file found in zip")
with z.open(csv_files[0]) as f:
df_raw = pd.read_csv(f, sep='\t')
# Cache for future runs
df_raw.to_csv(cache_file, sep='\t', index=False)
print(f"Cached data to {cache_file}")
except Exception as e:
print(f"Download failed: {e}")
print("Please download manually from https://openpsychometrics.org/_rawdata/")
raise
print(f"Raw dataset: {len(df_raw):,} responses")
print(f"Columns: {list(df_raw.columns)[:10]}...")
# OSPP Big Five uses 50 items (10 per trait)
# Items are labeled E1-E10, N1-N10, A1-A10, C1-C10, O1-O10
# Each item scored 1-5
# Calculate trait scores
# Note: Some items are reverse-scored
reverse_keyed = {
'E': [2, 4, 6, 8, 10], # E2, E4, E6, E8, E10 are reverse
'N': [2, 4], # N2, N4 are reverse
'A': [1, 3, 5, 7], # A1, A3, A5, A7 are reverse
'C': [2, 4, 6, 8], # C2, C4, C6, C8 are reverse
'O': [2, 4, 6] # O2, O4, O6 are reverse
}
trait_names = {
'E': 'Extraversion',
'N': 'Neuroticism',
'A': 'Agreeableness',
'C': 'Conscientiousness',
'O': 'Openness'
}
records = []
for idx, row in df_raw.iterrows():
# Get age/birth year
# OSPP uses 'age' column
age = row.get('age', np.nan)
if pd.isna(age) or age < 10 or age > 100:
continue
# Estimate birth year from age (assuming data from ~2018)
birth_year = 2018 - int(age)
# Only use birth years where outer planets are meaningful
if birth_year < 1920 or birth_year > 2005:
continue
# Calculate trait scores
traits = {}
valid = True
for trait_code, trait_name in trait_names.items():
score = 0
for i in range(1, 11):
col = f'{trait_code}{i}'
if col not in row or pd.isna(row[col]):
valid = False
break
item_score = row[col]
if i in reverse_keyed.get(trait_code, []):
item_score = 6 - item_score # Reverse score (1-5 scale)
score += item_score
if not valid:
break
traits[trait_name] = score # Score range: 10-50
if not valid:
continue
# Get outer planet features
planet_features = extract_outer_planet_features(birth_year)
records.append({
'id': idx,
'birth_year': birth_year,
'age': age,
**traits,
**planet_features
})
df = pd.DataFrame(records)
print(f"\nProcessed {len(df):,} valid personality assessments")
print(f"Birth years: {df['birth_year'].min()} - {df['birth_year'].max()}")
print(f"\nBig Five Score Distributions (REAL DATA):")
for trait in ['Extraversion', 'Neuroticism', 'Agreeableness', 'Conscientiousness', 'Openness']:
print(f" {trait}: mean={df[trait].mean():.1f}, sd={df[trait].std():.1f}, range={df[trait].min():.0f}-{df[trait].max():.0f}")
return df
def run_ml_classification(df):
"""Run ML classification tests for all Big Five traits."""
print("\n" + "=" * 60)
print("MACHINE LEARNING CLASSIFICATION")
print("Predictors: Jupiter, Saturn, Uranus, Neptune, Pluto positions")
print("=" * 60)
if not SKLEARN_AVAILABLE:
print("sklearn not available")
return {}, {}
# Feature columns (outer planets only)
feature_cols = [c for c in df.columns if any(p in c for p in
['Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto', 'outer_element'])]
print(f"\nFeatures used ({len(feature_cols)}):")
for col in feature_cols[:10]:
print(f" - {col}")
if len(feature_cols) > 10:
print(f" ... and {len(feature_cols) - 10} more")
# CRITICAL: Check age confound
print("\n" + "-" * 60)
print("CHECKING AGE CONFOUND")
print("-" * 60)
age_corrs = {}
for col in ['Uranus_sign', 'Neptune_sign', 'Pluto_sign']:
r, p = stats.spearmanr(df[col], df['age'])
age_corrs[col] = (r, p)
print(f" {col} vs age: r={r:.3f}, p={p:.4f}")
print("\n WARNING: Outer planet signs are strongly correlated with age!")
print(" Uranus, Neptune, Pluto signs are essentially age proxies.")
print(" Any 'predictive' power is likely due to age, not astrology.")
X = df[feature_cols].values
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
results = {}
all_importances = {}
# Test all Big Five traits
big_five_traits = ['Extraversion', 'Openness', 'Conscientiousness', 'Agreeableness', 'Neuroticism']
from scipy.stats import binomtest
for i, trait in enumerate(big_five_traits, 1):
print(f"\n{i}. {trait.upper()} PREDICTION")
# Check age-trait correlation first
r_age, p_age = stats.spearmanr(df['age'], df[trait])
print(f" Age vs {trait}: r={r_age:.3f}, p={p_age:.4f}")
# Median split for binary classification
median_val = df[trait].median()
y = (df[trait] > median_val).astype(int).values
# Split
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42
)
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred)
baseline_acc = max(y_test.mean(), 1 - y_test.mean())
print(f" Accuracy: {rf_acc:.4f}")
print(f" Baseline: {baseline_acc:.4f}")
print(f" Difference: {(rf_acc - baseline_acc)*100:+.2f}%")
# Cross-validation
cv_scores = cross_val_score(rf, X_scaled, y, cv=5)
print(f" CV: {cv_scores.mean():.4f} (±{cv_scores.std()*2:.4f})")
# Binomial test
n_correct = int(rf_acc * len(y_test))
try:
result = binomtest(n_correct, len(y_test), baseline_acc, alternative='greater')
p_value = result.pvalue
except:
p_value = 1.0
print(f" P-value: {p_value:.4f}")
print(f" Significant: {'Yes' if p_value < 0.05 else 'No'}")
results[f'{trait}_accuracy'] = rf_acc
results[f'{trait}_baseline'] = baseline_acc
results[f'{trait}_cv_mean'] = cv_scores.mean()
results[f'{trait}_cv_std'] = cv_scores.std()
results[f'{trait}_p_value'] = p_value
results[f'{trait}_age_corr'] = r_age
# Feature importance
importances = pd.DataFrame({
'feature': feature_cols,
'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)
all_importances[trait] = importances
# Summary
print("\n" + "=" * 60)
print("SUMMARY: ALL BIG FIVE TRAITS (UNCORRECTED)")
print("=" * 60)
print(f"\n{'Trait':<20} {'Accuracy':>10} {'Baseline':>10} {'Diff':>8} {'P-value':>10} {'Age r':>8}")
print("-" * 75)
for trait in big_five_traits:
acc = results[f'{trait}_accuracy']
base = results[f'{trait}_baseline']
diff = (acc - base) * 100
p = results[f'{trait}_p_value']
age_r = results[f'{trait}_age_corr']
print(f"{trait:<20} {acc:>10.4f} {base:>10.4f} {diff:>+7.2f}% {p:>10.4f} {age_r:>+8.3f}")
return results, all_importances
def run_age_controlled_analysis(df):
"""Run ML analysis controlling for age by using age-matched groups."""
print("\n" + "=" * 60)
print("AGE-CONTROLLED ANALYSIS")
print("Testing within narrow age bands to remove age confound")
print("=" * 60)
if not SKLEARN_AVAILABLE:
return {}
# Feature columns (outer planets only)
feature_cols = [c for c in df.columns if any(p in c for p in
['Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto', 'outer_element'])]
big_five_traits = ['Extraversion', 'Openness', 'Conscientiousness', 'Agreeableness', 'Neuroticism']
# Test within narrow age bands (5-year windows)
age_bands = [(18, 25), (25, 35), (35, 45), (45, 55), (55, 70)]
controlled_results = {}
for trait in big_five_traits:
band_results = []
for age_min, age_max in age_bands:
subset = df[(df['age'] >= age_min) & (df['age'] < age_max)]
if len(subset) < 500:
continue
X = subset[feature_cols].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
median_val = subset[trait].median()
y = (subset[trait] > median_val).astype(int).values
rf = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
cv_scores = cross_val_score(rf, X_scaled, y, cv=5)
band_results.append({
'age_band': f"{age_min}-{age_max}",
'n': len(subset),
'cv_mean': cv_scores.mean(),
'cv_std': cv_scores.std()
})
controlled_results[trait] = band_results
# Print results
print(f"\n{'Trait':<18} {'Age Band':>10} {'N':>7} {'CV Accuracy':>14}")
print("-" * 55)
for trait in big_five_traits:
for i, result in enumerate(controlled_results[trait]):
trait_label = trait if i == 0 else ""
print(f"{trait_label:<18} {result['age_band']:>10} {result['n']:>7} {result['cv_mean']:.4f} (±{result['cv_std']*2:.4f})")
# Overall assessment
print("\n" + "-" * 55)
all_cv = []
for trait in big_five_traits:
for result in controlled_results[trait]:
all_cv.append(result['cv_mean'])
avg_cv = np.mean(all_cv)
print(f"Average CV accuracy across all age bands: {avg_cv:.4f}")
print(f"Expected by chance: 0.5000")
print(f"Difference: {(avg_cv - 0.5)*100:+.2f}%")
if abs(avg_cv - 0.5) < 0.02:
print("\nCONCLUSION: When controlling for age, outer planet positions")
print("show NO predictive power for personality traits.")
return controlled_results
if len(feature_cols) > 10:
print(f" ... and {len(feature_cols) - 10} more")
X = df[feature_cols].values
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
results = {}
all_importances = {}
# Test all Big Five traits
big_five_traits = ['Extraversion', 'Openness', 'Conscientiousness', 'Agreeableness', 'Neuroticism']
from scipy.stats import binomtest
for i, trait in enumerate(big_five_traits, 1):
print(f"\n{i}. {trait.upper()} PREDICTION")
# Median split for binary classification
median_val = df[trait].median()
y = (df[trait] > median_val).astype(int).values
# Split
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42
)
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred)
baseline_acc = max(y_test.mean(), 1 - y_test.mean())
print(f" Accuracy: {rf_acc:.4f}")
print(f" Baseline: {baseline_acc:.4f}")
print(f" Difference: {(rf_acc - baseline_acc)*100:+.2f}%")
# Cross-validation
cv_scores = cross_val_score(rf, X_scaled, y, cv=5)
print(f" CV: {cv_scores.mean():.4f} (±{cv_scores.std()*2:.4f})")
# Binomial test
n_correct = int(rf_acc * len(y_test))
try:
result = binomtest(n_correct, len(y_test), baseline_acc, alternative='greater')
p_value = result.pvalue
except:
p_value = 1.0
print(f" P-value: {p_value:.4f}")
print(f" Significant: {'Yes' if p_value < 0.05 else 'No'}")
results[f'{trait}_accuracy'] = rf_acc
results[f'{trait}_baseline'] = baseline_acc
results[f'{trait}_cv_mean'] = cv_scores.mean()
results[f'{trait}_cv_std'] = cv_scores.std()
results[f'{trait}_p_value'] = p_value
# Feature importance
importances = pd.DataFrame({
'feature': feature_cols,
'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)
all_importances[trait] = importances
# Summary
print("\n" + "=" * 60)
print("SUMMARY: ALL BIG FIVE TRAITS")
print("=" * 60)
print(f"\n{'Trait':<20} {'Accuracy':>10} {'Baseline':>10} {'Diff':>8} {'P-value':>10} {'Sig?':>6}")
print("-" * 70)
for trait in big_five_traits:
acc = results[f'{trait}_accuracy']
base = results[f'{trait}_baseline']
diff = (acc - base) * 100
p = results[f'{trait}_p_value']
sig = 'Yes' if p < 0.05 else 'No'
print(f"{trait:<20} {acc:>10.4f} {base:>10.4f} {diff:>+7.2f}% {p:>10.4f} {sig:>6}")
return results, all_importances
def analyze_planet_trait_correlations(df):
"""Analyze correlations between planet signs and Big Five traits."""
print("\n" + "=" * 60)
print("CORRELATION ANALYSIS: Planet Signs vs Big Five")
print("=" * 60)
traits = ['Extraversion', 'Neuroticism', 'Agreeableness', 'Conscientiousness', 'Openness']
planets = ['Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto']
correlations = []
for planet in planets:
sign_col = f'{planet}_sign'
for trait in traits:
r, p = stats.spearmanr(df[sign_col], df[trait])
correlations.append({
'Planet': planet,
'Trait': trait,
'r': r,
'p': p,
'significant': p < 0.05
})
corr_df = pd.DataFrame(correlations)
print("\nSignificant correlations (p < 0.05):")
sig_corrs = corr_df[corr_df['significant']]
if len(sig_corrs) == 0:
print(" None found")
else:
for _, row in sig_corrs.iterrows():
print(f" {row['Planet']} sign vs {row['Trait']}: r={row['r']:.3f}, p={row['p']:.4f}")
print(f"\nTotal correlations tested: {len(correlations)}")
print(f"Significant at p<0.05: {len(sig_corrs)}")
print(f"Expected by chance (5%): {len(correlations) * 0.05:.1f}")
return corr_df
def create_visualizations(df, results, all_importances):
"""Create visualizations."""
print("\nCreating visualizations...")
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
traits = ['Extraversion', 'Openness', 'Conscientiousness', 'Agreeableness', 'Neuroticism']
# Plot 1: Accuracy vs Baseline
ax1 = axes[0, 0]
accuracies = [results[f'{t}_accuracy'] for t in traits]
baselines = [results[f'{t}_baseline'] for t in traits]
x = np.arange(len(traits))
width = 0.35
ax1.bar(x - width/2, accuracies, width, label='Model', color='steelblue', alpha=0.7)
ax1.bar(x + width/2, baselines, width, label='Baseline', color='coral', alpha=0.7)
ax1.axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
ax1.set_ylabel('Accuracy')
ax1.set_title('Model vs Baseline (Outer Planets Only)')
ax1.set_xticks(x)
ax1.set_xticklabels([t[:4] for t in traits], rotation=45)
ax1.set_ylim(0.45, 0.55)
ax1.legend()
ax1.grid(True, alpha=0.3)
# Plot 2: P-values
ax2 = axes[0, 1]
p_values = [results[f'{t}_p_value'] for t in traits]
colors = ['green' if p < 0.05 else 'red' for p in p_values]
ax2.bar(range(len(traits)), p_values, color=colors, alpha=0.7)
ax2.axhline(y=0.05, color='red', linestyle='--', label='p=0.05')
ax2.set_ylabel('P-value')
ax2.set_title('Statistical Significance')
ax2.set_xticks(range(len(traits)))
ax2.set_xticklabels([t[:4] for t in traits], rotation=45)
ax2.legend()
ax2.grid(True, alpha=0.3)
# Plot 3: Feature importance (Extraversion)
ax3 = axes[0, 2]
top10 = all_importances['Extraversion'].head(10)
ax3.barh(range(10), top10['importance'].values, color='steelblue', alpha=0.7)
ax3.set_yticks(range(10))
ax3.set_yticklabels(top10['feature'].values, fontsize=8)
ax3.set_xlabel('Importance')
ax3.set_title('Top Features (Extraversion)')
ax3.grid(True, alpha=0.3)
# Plot 4: Birth year distribution
ax4 = axes[1, 0]
ax4.hist(df['birth_year'], bins=30, color='steelblue', alpha=0.7, edgecolor='black')
ax4.set_xlabel('Birth Year')
ax4.set_ylabel('Count')
ax4.set_title('Birth Year Distribution (OSPP-style)')
ax4.grid(True, alpha=0.3)
# Plot 5: Planet positions by birth year
ax5 = axes[1, 1]
years = range(1950, 2006)
for planet in ['Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto']:
signs = [int(get_outer_planet_positions(y)[planet] / 30) % 12 for y in years]
ax5.plot(years, signs, label=planet, alpha=0.7)
ax5.set_xlabel('Birth Year')
ax5.set_ylabel('Sign (0=Aries, 11=Pisces)')
ax5.set_title('Outer Planet Signs by Year')
ax5.legend(fontsize=8)
ax5.grid(True, alpha=0.3)
# Plot 6: Difference from baseline
ax6 = axes[1, 2]
diffs = [(results[f'{t}_accuracy'] - results[f'{t}_baseline'])*100 for t in traits]
colors = ['green' if d > 0 else 'red' for d in diffs]
ax6.bar(range(len(traits)), diffs, color=colors, alpha=0.7)
ax6.axhline(y=0, color='black', linestyle='-', alpha=0.5)
ax6.set_ylabel('Accuracy - Baseline (%)')
ax6.set_title('Deviation from Chance')
ax6.set_xticks(range(len(traits)))
ax6.set_xticklabels([t[:4] for t in traits], rotation=45)
ax6.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'ml_classification.png', dpi=150)
plt.close()
print(f"Saved visualization to {OUTPUT_DIR / 'ml_classification.png'}")
def main():
print("=" * 70)
print("PROJECT 7: OUTER PLANETS AND PERSONALITY")
print("Testing Jupiter, Saturn, Uranus, Neptune, Pluto vs Big Five")
print("Using REAL OSPP Big Five data")
print("=" * 70)
# Load data
df = load_ospp_data()
# Run ML classification (uncorrected)
results, all_importances = run_ml_classification(df)
# Run age-controlled analysis (THE KEY TEST)
controlled_results = run_age_controlled_analysis(df)
# Correlation analysis
corr_df = analyze_planet_trait_correlations(df)
# Visualizations
create_visualizations(df, results, all_importances)
# Final conclusion
print("\n" + "=" * 60)
print("FINAL CONCLUSION")
print("=" * 60)
traits = ['Extraversion', 'Openness', 'Conscientiousness', 'Agreeableness', 'Neuroticism']
# Calculate age-controlled average
all_controlled_cv = []
for trait in traits:
for result in controlled_results.get(trait, []):
all_controlled_cv.append(result['cv_mean'])
avg_controlled = np.mean(all_controlled_cv) if all_controlled_cv else 0.5
print(f"\nSample size: {len(df):,} real OSPP personality assessments")
print(f"\nUNCORRECTED RESULTS (confounded by age):")
significant = sum(1 for t in traits if results[f'{t}_p_value'] < 0.05)
above_baseline = sum(1 for t in traits if results[f'{t}_accuracy'] > results[f'{t}_baseline'] + 0.005)
print(f" Traits above baseline: {above_baseline}/5")
print(f" Statistically significant: {significant}/5")
print(f" BUT: Outer planets are perfect proxies for birth year (age)")
print(f"\nAGE-CONTROLLED RESULTS (the valid test):")
print(f" Average CV accuracy within age bands: {avg_controlled:.4f}")
print(f" Expected by chance: 0.5000")
print(f" Difference: {(avg_controlled - 0.5)*100:+.2f}%")
print("\n" + "-" * 60)
print("INTERPRETATION:")
print("-" * 60)
print("The uncorrected analysis shows apparent 'predictive power' because:")
print(" 1. Outer planet signs are perfectly correlated with birth year")
print(" 2. Personality traits genuinely change with age")
print(" 3. The model is simply detecting age effects, not astrological ones")
print("\nWhen controlling for age (testing within narrow age bands),")
print("outer planet positions show NO predictive power for personality.")
print("\nCONCLUSION: There is no evidence that outer planet positions")
print("influence Big Five personality traits beyond age effects.")
# Save results
pd.DataFrame([results]).to_csv(OUTPUT_DIR / 'analysis_results.csv', index=False)
corr_df.to_csv(OUTPUT_DIR / 'correlations.csv', index=False)
# Save controlled results
controlled_rows = []
for trait in traits:
for result in controlled_results.get(trait, []):
controlled_rows.append({'trait': trait, **result})
pd.DataFrame(controlled_rows).to_csv(OUTPUT_DIR / 'age_controlled_results.csv', index=False)
print(f"\nResults saved to {OUTPUT_DIR}")
if __name__ == '__main__':
main()