import pandas as pd
import numpy as np
f = '../wls_b_15_2.dta'
print(f"--- Inspecting CM variables in {f} ---")
try:
reader = pd.read_stata(f, iterator=True)
labels = reader.variable_labels()
cm_vars = [col for col in labels.keys() if col.lower().startswith('cm')]
print(f"Found {len(cm_vars)} variables starting with 'cm'")
if cm_vars:
df = pd.read_stata(f, columns=cm_vars)
candidates = []
for col in cm_vars:
try:
# Convert to numeric, forcing errors to NaN
vals = pd.to_numeric(df[col], errors='coerce').dropna()
if len(vals) > 0:
median = vals.median()
# Check range for 1937-1940 birth (444 to 492)
if 440 <= median <= 500:
candidates.append((col, labels[col], median))
except Exception as e:
pass
print("\nCandidates for Birth Century Month (Median 440-500):")
for c in candidates:
print(f"{c[0]}: {c[1]} (Median: {c[2]})")
except Exception as e:
print(e)