import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import scipy.stats as stats
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
import warnings
warnings.filterwarnings("ignore")

data = pd.read_csv('data.csv.gz')
data.head()

filtered_data = data.drop(columns=['SAMPLE', 'SERIAL', 'CBSERIAL', 'CLUSTER', 'STRATA', 'GQ', 'PERWT', 'EDUCD', 'ADJUST', 'INCTOT'])
filtered_data.head()

filtered_data.shape

(3422888, 6)

filtered_data.dtypes

filtered_data.isna().sum()

filtered_data = filtered_data[filtered_data['PERNUM'] == 1].reset_index(drop=True)
filtered_data = filtered_data.drop(columns=['PERNUM'])
filtered_data.head()

filtered_data = filtered_data[(filtered_data['HHINCOME'] > 0) & (filtered_data['HHINCOME'] != 9999999)].reset_index(drop=True)
filtered_data.head()

FIPS_TO_STATE = {
    1:'Alabama', 2:'Alaska', 4:'Arizona', 5:'Arkansas', 6:'California',
    8:'Colorado', 9:'Connecticut', 10:'Delaware', 11:'District of Columbia',
    12:'Florida', 13:'Georgia', 15:'Hawaii', 16:'Idaho', 17:'Illinois',
    18:'Indiana', 19:'Iowa', 20:'Kansas', 21:'Kentucky', 22:'Louisiana',
    23:'Maine', 24:'Maryland', 25:'Massachusetts', 26:'Michigan',
    27:'Minnesota', 28:'Mississippi', 29:'Missouri', 30:'Montana',
    31:'Nebraska', 32:'Nevada', 33:'New Hampshire', 34:'New Jersey',
    35:'New Mexico', 36:'New York', 37:'North Carolina', 38:'North Dakota',
    39:'Ohio', 40:'Oklahoma', 41:'Oregon', 42:'Pennsylvania', 44:'Rhode Island',
    45:'South Carolina', 46:'South Dakota', 47:'Tennessee', 48:'Texas',
    49:'Utah', 50:'Vermont', 51:'Virginia', 53:'Washington', 54:'West Virginia',
    55:'Wisconsin', 56:'Wyoming'
}
filtered_data['STATE'] = filtered_data['STATEFIP'].map(FIPS_TO_STATE)
filtered_data = filtered_data.drop(columns='STATEFIP')
filtered_data.head()

def recode_educ(code):
    if code <= 6:    return 'HS or Lower'
    elif code >= 10: return 'BA or Higher'
    else:            return 'Some College'

filtered_data['EDUCGROUP'] = filtered_data['EDUC'].apply(recode_educ)
filtered_data = filtered_data.drop(columns='EDUC')
filtered_data.head()

filtered_data = filtered_data.drop(columns='YEAR')
filtered_data.head()

educ_counts = filtered_data['EDUCGROUP'].value_counts()
educ_pct = filtered_data['EDUCGROUP'].value_counts(normalize=True)*100
educ_pct_df = pd.DataFrame({'Count': educ_counts, 'Percent (%)': educ_pct})
educ_pct_df

filtered_data.groupby('EDUCGROUP')['HHINCOME'].describe().round()

weighted_means = filtered_data.groupby('EDUCGROUP').apply(lambda x: np.average(x['HHINCOME'], weights=x['HHWT'])).rename('Weighted Mean Income ($)').reset_index()
weighted_means

fig, ax = plt.subplots(figsize=(7, 5))
for grp, color in [('HS or Lower', '#E74C3C'), ('Some College', '#F39C12'), ('BA or Higher', '#2ECC71')]:
    subset = filtered_data[filtered_data['EDUCGROUP'] == grp]
    data = subset['HHINCOME'].clip(upper=500000)
    weights = subset['HHWT']
    ax.hist(data, bins=60, alpha=0.5, color=color, label=grp, density=True, weights=weights)

ax.set_title('Weighted Income Distribution by Education Group', fontweight='bold')
ax.set_xlabel('Household Income (capped at $500k)')
ax.xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x/1000:.0f}k'))
ax.legend()
plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize=(7, 5))
groups = [filtered_data[filtered_data['EDUCGROUP'] == g]['HHINCOME'].clip(upper=500000)
          for g in ['HS or Lower', 'Some College', 'BA or Higher']]
ax.boxplot(groups, labels=['HS or Lower', 'Some College', 'BA or Higher'],
           patch_artist=True, notch=True,
           boxprops=dict(facecolor='#AED6F1'),
           medianprops=dict(color='navy', linewidth=2))

ax.set_title('Income by Education Group', fontweight='bold')
ax.set_ylabel('Household Income (capped at $500k)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x/1000:.0f}k'))
plt.tight_layout()
plt.show()

state_educ = filtered_data.groupby(['STATE', 'EDUCGROUP']).size().unstack(fill_value=0)
state_educ['pct_BA'] = (state_educ['BA or Higher']/state_educ.sum(axis=1)*100).round(1)
state_educ['pct_HS'] = (state_educ['HS or Lower']/state_educ.sum(axis=1)*100).round(1)
state_educ.sort_values('pct_BA', ascending=False)
state_educ[['pct_BA','pct_HS']].sort_values('pct_BA').plot(
    kind='barh',
    stacked=True,
    figsize=(13,12)
)

plt.xlabel('Percent of Population')
plt.ylabel('State')
plt.title('Education Composition by State')
plt.legend(title='Education Level')
plt.show()

summary = (
    filtered_data.groupby(['STATE', 'EDUCGROUP'])
    .apply(lambda g: np.average(g['HHINCOME'], weights=g['HHWT']))
    .round(0)
    .rename('weighted_mean_income')
    .reset_index()
)
summary.head()

pivot = (
    summary[summary['EDUCGROUP'].isin(['HS or Lower', 'BA or Higher'])]
    .pivot(index='STATE', columns='EDUCGROUP', values='weighted_mean_income')
    .reset_index()
)
pivot.columns.name = None
pivot['EDUCATION_PREMIUM'] = pivot['BA or Higher'] - pivot['HS or Lower']
pivot_sorted = pivot.sort_values('EDUCATION_PREMIUM', ascending=False).reset_index(drop=True)
pivot_sorted['RANK'] = range(1, len(pivot_sorted) + 1)
pivot_sorted.head()

print(f"Mean premium:   ${pivot['EDUCATION_PREMIUM'].mean():,.0f}")
print(f"Median premium: ${pivot['EDUCATION_PREMIUM'].median():,.0f}")
print(f"Std deviation:  ${pivot['EDUCATION_PREMIUM'].std():,.0f}")
print(f"Min premium:    ${pivot['EDUCATION_PREMIUM'].min():,.0f}  ({pivot.loc[pivot['EDUCATION_PREMIUM'].idxmin(), 'STATE']})")
print(f"Max premium:    ${pivot['EDUCATION_PREMIUM'].max():,.0f}  ({pivot.loc[pivot['EDUCATION_PREMIUM'].idxmax(), 'STATE']})")

Mean premium:   $76,841
Median premium: $73,508
Std deviation:  $16,325
Min premium:    $51,552  (Wyoming)
Max premium:    $120,429  (District of Columbia)

fig, ax = plt.subplots(figsize=(7, 5))
ax.scatter(pivot['HS or Lower'] / 1000, pivot['BA or Higher'] / 1000,
           s=80, color='#8E44AD', alpha=0.75, edgecolors='white')
for _, row in pivot.iterrows():
    ax.annotate(row['STATE'][:2], (row['HS or Lower'] / 1000, row['BA or Higher'] / 1000),
                fontsize=7, ha='center', va='bottom')
lims = [min(ax.get_xlim()[0], ax.get_ylim()[0]), max(ax.get_xlim()[1], ax.get_ylim()[1])]
ax.plot(lims, lims, 'k--', alpha=0.5, label='Equal income line')
ax.set_xlabel('Weighted Mean Income — HS or Lower ($k)', fontsize=12)
ax.set_ylabel('Weighted Mean Income — BA or Higher ($k)', fontsize=12)
ax.set_title('State-Level Income: HS vs BA Households', fontsize=13, fontweight='bold')
ax.legend()
plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize=(13, 10))
sorted_p = pivot.sort_values('EDUCATION_PREMIUM')
cmap = plt.cm.RdYlGn
norm = plt.Normalize(sorted_p['EDUCATION_PREMIUM'].min(), sorted_p['EDUCATION_PREMIUM'].max())
bar_colors = [cmap(norm(v)) for v in sorted_p['EDUCATION_PREMIUM']]
ax.barh(sorted_p['STATE'], sorted_p['EDUCATION_PREMIUM'] / 1000, color=bar_colors)
median_val = sorted_p['EDUCATION_PREMIUM'].median()
ax.axvline(median_val / 1000, color='black', linestyle='--', alpha=0.5,
           label=f'Median: ${median_val/1000:.1f}k')
ax.set_xlabel('Education Premium ($k)', fontsize=12)
ax.set_title('Education Premium by State (BA or Higher vs HS or Lower)',
             fontsize=13, fontweight='bold')
ax.legend(fontsize=11)
plt.tight_layout()
plt.show()

COLI = {
    'Alabama': 88.0, 'Alaska': 123.8, 'Arizona': 111.5, 'Arkansas': 88.7,
    'California': 144.8, 'Colorado': 102.0, 'Connecticut': 112.3,
    'Delaware': 100.8, 'District of Columbia': 141.9, 'Florida': 102.8,
    'Georgia': 91.3, 'Hawaii': 186.9, 'Idaho': 102.0, 'Illinois': 94.4,
    'Indiana': 90.5, 'Iowa': 89.7, 'Kansas': 87.0, 'Kentucky': 93.0,
    'Louisiana': 92.2, 'Maine': 112.1, 'Maryland': 115.3,
    'Massachusetts': 145.9, 'Michigan': 90.4, 'Minnesota': 95.1,
    'Mississippi': 87.9, 'Missouri': 88.7, 'Montana': 94.9,
    'Nebraska': 93.1, 'Nevada': 101.3, 'New Hampshire': 112.6,
    'New Jersey': 114.6, 'New Mexico': 93.3, 'New York': 123.3,
    'North Carolina': 97.8, 'North Dakota': 91.9, 'Ohio': 94.2,
    'Oklahoma': 85.7, 'Oregon': 112.0, 'Pennsylvania': 95.1,
    'Rhode Island': 112.2, 'South Carolina': 95.9, 'South Dakota': 92.2,
    'Tennessee': 90.5, 'Texas': 92.7, 'Utah': 104.9, 'Vermont': 114.4,
    'Virginia': 100.7, 'Washington': 114.2, 'West Virginia': 84.1,
    'Wisconsin': 97.0, 'Wyoming': 95.5
}

pivot['COLI'] = pivot['STATE'].map(COLI)
pivot['REAL_PREMIUM'] = (pivot['EDUCATION_PREMIUM'] / pivot['COLI']) * 100

pivot['NOMINAL_RANK'] = pivot['EDUCATION_PREMIUM'].rank(ascending=False).astype(int)
pivot['REAL_RANK'] = pivot['REAL_PREMIUM'].rank(ascending=False).astype(int)
pivot['RANK_SHIFT'] = pivot['NOMINAL_RANK'] - pivot['REAL_RANK']

pivot_coli = pivot.sort_values('REAL_PREMIUM', ascending=False).reset_index(drop=True)
pivot_coli[['STATE', 'EDUCATION_PREMIUM', 'COLI', 'REAL_PREMIUM', 'NOMINAL_RANK', 'REAL_RANK', 'RANK_SHIFT']].head()

fig, ax = plt.subplots(figsize=(13, 10))
sorted_real = pivot.sort_values('REAL_PREMIUM')
cmap = plt.cm.RdYlGn
norm = plt.Normalize(sorted_real['REAL_PREMIUM'].min(), sorted_real['REAL_PREMIUM'].max())
bar_colors = [cmap(norm(v)) for v in sorted_real['REAL_PREMIUM']]
ax.barh(sorted_real['STATE'], sorted_real['REAL_PREMIUM'] / 1000, color=bar_colors)
median_val = sorted_real['REAL_PREMIUM'].median()
ax.axvline(median_val / 1000, color='black', linestyle='--', alpha=0.5,
           label=f'Median: ${median_val/1000:.1f}k')
ax.set_xlabel('Education Premium ($k, COLI-adjusted)', fontsize=12)
ax.set_title('COLI-Adjusted Education Premium by State (BA or Higher vs HS or Lower)',
             fontsize=13, fontweight='bold')
ax.legend(fontsize=11)
plt.tight_layout()
plt.show()

fig, axes = plt.subplots(1, 2, figsize=(18, 10))
sorted_nominal = pivot.sort_values('EDUCATION_PREMIUM')
cmap = plt.cm.RdYlGn
norm_n = plt.Normalize(sorted_nominal['EDUCATION_PREMIUM'].min(), sorted_nominal['EDUCATION_PREMIUM'].max())
axes[0].barh(sorted_nominal['STATE'], sorted_nominal['EDUCATION_PREMIUM'] / 1000,
             color=[cmap(norm_n(v)) for v in sorted_nominal['EDUCATION_PREMIUM']])
axes[0].axvline(sorted_nominal['EDUCATION_PREMIUM'].median() / 1000,
                color='black', linestyle='--', alpha=0.5,
                label=f"Median: ${sorted_nominal['EDUCATION_PREMIUM'].median()/1000:.1f}k")
axes[0].set_title('Nominal Education Premium', fontweight='bold', fontsize=12)
axes[0].set_xlabel('Education Premium ($k)')
axes[0].legend()

sorted_real = pivot.sort_values('REAL_PREMIUM')
norm_r = plt.Normalize(sorted_real['REAL_PREMIUM'].min(), sorted_real['REAL_PREMIUM'].max())
axes[1].barh(sorted_real['STATE'], sorted_real['REAL_PREMIUM'] / 1000,
             color=[cmap(norm_r(v)) for v in sorted_real['REAL_PREMIUM']])
axes[1].axvline(sorted_real['REAL_PREMIUM'].median() / 1000,
                color='black', linestyle='--', alpha=0.5,
                label=f"Median: ${sorted_real['REAL_PREMIUM'].median()/1000:.1f}k")
axes[1].set_title('COLI-Adjusted Education Premium', fontweight='bold', fontsize=12)
axes[1].set_xlabel('Education Premium ($k, COLI-adjusted)')
axes[1].legend()

plt.suptitle('Education Premium: Nominal vs Cost-of-Living Adjusted',
             fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

hypothesis_states = ['California', 'Washington', 'Hawaii', 'Arkansas', 'Mississippi', 'Iowa']
highlight = pivot[pivot['STATE'].isin(hypothesis_states)].copy()
highlight = highlight.set_index('STATE').loc[hypothesis_states].reset_index()

fig, ax = plt.subplots(figsize=(7, 5))
x = np.arange(len(highlight))
width = 0.35

ax.bar(x - width/2, highlight['EDUCATION_PREMIUM'] / 1000,
       width, label='Nominal Premium', color='#3498DB', alpha=0.8)
ax.bar(x + width/2, highlight['REAL_PREMIUM'] / 1000,
       width, label='COLI-Adjusted Premium', color='#E67E22', alpha=0.8)

ax.set_xticks(x)
ax.set_xticklabels(highlight['STATE'], rotation=15, ha='right')
ax.set_ylabel('Education Premium ($k)')
ax.set_title('Nominal vs COLI-Adjusted Premium\nfor Hypothesis States',
             fontsize=13, fontweight='bold')
ax.legend()
plt.tight_layout()
plt.show()

filtered_data['log_income'] = np.log(filtered_data['HHINCOME'].replace(0, np.nan))
filtered_data.head()

interaction_model = smf.ols('log_income ~ C(STATE) + C(EDUCGROUP) + C(STATE):C(EDUCGROUP)',
                data=filtered_data).fit()
anova_table = anova_lm(interaction_model, typ=2)
anova_table

residuals = interaction_model.resid.sample(10000)
stats.probplot(residuals, dist="norm", plot=plt)
plt.title('QQ Plot of Residuals (Log Income)')
plt.show()

two_way_anova_model = smf.ols('log_income ~ C(EDUCGROUP) + C(STATE)', data=filtered_data).fit()
anova_table = anova_lm(two_way_anova_model, typ=2)
anova_table

model1 = smf.wls(
    "log_income ~ C(EDUCGROUP)",
    data=filtered_data, weights=filtered_data['HHWT']).fit()
model1.summary()

model2 = smf.wls(
        "log_income ~ C(EDUCGROUP) + C(STATE)",
    data=filtered_data, weights=filtered_data['HHWT']).fit()
model2.summary()

results_df = pd.DataFrame({'Model': ['Model 1: Education Only', 'Model 2: Education + State'],
                           'R-squared': [round(model1.rsquared, 4), round(model2.rsquared, 4)],
                           'Adj. R-squared': [round(model1.rsquared_adj, 4), round(model2.rsquared_adj, 4)],
                           'AIC': [round(model1.aic, 1), round(model2.aic, 1)],
                           'N': [int(model1.nobs), int(model2.nobs)]})
results_df

pivot.sort_values("EDUCATION_PREMIUM", ascending=False)[["STATE","EDUCATION_PREMIUM"]].head(5).reset_index(drop=True)

pivot.sort_values("EDUCATION_PREMIUM", ascending=True)[["STATE","EDUCATION_PREMIUM"]].head(5).reset_index(drop=True)

pivot.sort_values("REAL_PREMIUM", ascending=False)[["STATE","REAL_PREMIUM"]].head(5).reset_index(drop=True)

pivot.sort_values("REAL_PREMIUM", ascending=True)[["STATE","REAL_PREMIUM"]].head(5).reset_index(drop=True)

Group	EDUC Codes	Description
HS or Lower	0–6	HS diploma/GED or lower
BA or Higher	10–11	Bachelor's degree or higher

	YEAR	SAMPLE	SERIAL	CBSERIAL	HHWT	CLUSTER	ADJUST	STATEFIP	STRATA	GQ	HHINCOME	PERNUM	PERWT	EDUC	EDUCD	INCTOT
0	2024	202401	1	2024010000060	41.0	2024000000011	1.01525	1	250001	3	9999999	1	41.0	2	25	18500
1	2024	202401	2	2024010000094	52.0	2024000000021	1.01525	1	260001	3	9999999	1	52.0	6	64	0
2	2024	202401	3	2024010000146	31.0	2024000000031	1.01525	1	140101	3	9999999	1	31.0	6	63	27100
3	2024	202401	4	2024010000156	4.0	2024000000041	1.01525	1	280201	4	9999999	1	4.0	7	71	1000
4	2024	202401	5	2024010000182	19.0	2024000000051	1.01525	1	80001	3	9999999	1	19.0	6	63	0

	YEAR	HHWT	STATEFIP	HHINCOME	EDUC
0	2024	11.0	1	91200	6
1	2024	61.0	1	134000	11
2	2024	67.0	1	33300	6
3	2024	199.0	1	207100	10
4	2024	68.0	1	195600	6

	YEAR	HHWT	HHINCOME	EDUC	STATE
0	2024	11.0	91200	6	Alabama
1	2024	61.0	134000	11	Alabama
2	2024	67.0	33300	6	Alabama
3	2024	199.0	207100	10	Alabama
4	2024	68.0	195600	6	Alabama

	count	mean	std	min	25%	50%	75%	max
EDUCGROUP
BA or Higher	544891.0	167532.0	157446.0	1.0	72000.0	125000.0	205700.0	2811900.0
HS or Lower	495770.0	75415.0	77044.0	1.0	29100.0	56200.0	98000.0	1914000.0
Some College	291550.0	97332.0	90272.0	1.0	42000.0	76600.0	125000.0	2523200.0

🎓 DegreeDelta: Quantifying the Income Advantage of College Degrees Across U.S. States¶

Group Members: Dylan Dsouza, Jaden Goelkel, Sam Genous, Anna Rosenbaum and Wanchen Yang¶

1. Statement of the Problem¶

Hypotheses¶

2. Relevance and Motivation¶

3. Data Source¶

4. Data Description¶

5. Data Wrangling¶

5.1 Imports¶

5.2 Data Loading¶

5.3 Data Cleaning¶

5.4 Data Preprocessing¶

6. Exploratory Data Analysis¶

6.1 Education Group Composition¶

6.2 Income Distribution by Education Group¶

6.3 Education Group Composition by State¶

6.4 Education Premium by State¶

6.5 Cost-of-Living Adjustment¶

7. Previous Research¶

8. Analyses Performed¶

8.1 Log Transformation¶

8.2 Full Interaction ANOVA¶

8.3 QQ Plot of Residuals¶

8.4 Two-Way ANOVA¶

8.5 Weighted Linear Regression¶

8.6 Model Fit Comparison¶

9. Results¶

9.1 Statistical Significance of the Education Premium¶

9.2 State-Level Magnitude of the Education Premium¶

9.3 Cost-of-Living Adjusted Education Premium¶

9.4 Hypothesis Evaluation¶

10. Limitations¶

11. References¶

	0
YEAR	int64
HHWT	float64
STATEFIP	int64
HHINCOME	int64
PERNUM	int64
EDUC	int64

	Count	Percent (%)
EDUCGROUP
BA or Higher	544891	40.901254
HS or Lower	495770	37.214075
Some College	291550	21.884671

	EDUCGROUP	Weighted Mean Income ($)
0	BA or Higher	162724.626449
1	HS or Lower	74593.605160
2	Some College	94876.639716

	STATE	EDUCGROUP	weighted_mean_income
0	Alabama	BA or Higher	134943.0
1	Alabama	HS or Lower	62850.0
2	Alabama	Some College	81694.0
3	Alaska	BA or Higher	159980.0
4	Alaska	HS or Lower	85137.0

	STATE	BA or Higher	HS or Lower	EDUCATION_PREMIUM	RANK
0	District of Columbia	197105.0	76676.0	120429.0	1
1	New York	185397.0	74426.0	110971.0	2
2	California	198066.0	88157.0	109909.0	3
3	Massachusetts	193158.0	84899.0	108259.0	4
4	Connecticut	193260.0	85438.0	107822.0	5

	STATE	EDUCATION_PREMIUM	COLI	REAL_PREMIUM	NOMINAL_RANK	REAL_RANK	RANK_SHIFT
0	Virginia	100801.0	100.7	100100.297915	7	1	6
1	Connecticut	107822.0	112.3	96012.466607	5	2	3
2	Illinois	88373.0	94.4	93615.466102	10	3	7
3	Texas	86230.0	92.7	93020.496224	11	4	7
4	Georgia	84497.0	91.3	92548.740416	13	5	8

	sum_sq	df	F	PR(>F)
C(STATE)	1.597602e+04	50.0	329.934088	0.0
C(EDUCGROUP)	1.772140e+05	2.0	91494.828804	0.0
C(STATE):C(EDUCGROUP)	2.068829e+03	100.0	21.362559	0.0
Residual	1.290014e+06	1332058.0	NaN	NaN

	sum_sq	df	F	PR(>F)
C(EDUCGROUP)	1.772140e+05	2.0	91355.188807	0.0
C(STATE)	1.597602e+04	50.0	329.430541	0.0
Residual	1.292083e+06	1332158.0	NaN	NaN

Dep. Variable:	log_income	R-squared:	0.122
Model:	WLS	Adj. R-squared:	0.122
Method:	Least Squares	F-statistic:	9.229e+04
Date:	Mon, 09 Mar 2026	Prob (F-statistic):	0.00
Time:	21:47:44	Log-Likelihood:	-2.0912e+06
No. Observations:	1332211	AIC:	4.182e+06
Df Residuals:	1332208	BIC:	4.182e+06
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	11.6281	0.001	8455.792	0.000	11.625	11.631
C(EDUCGROUP)[T.HS or Lower]	-0.8378	0.002	-424.771	0.000	-0.842	-0.834
C(EDUCGROUP)[T.Some College]	-0.5413	0.002	-236.034	0.000	-0.546	-0.537

Omnibus:	613334.827	Durbin-Watson:	1.981
Prob(Omnibus):	0.000	Jarque-Bera (JB):	10204424.115
Skew:	-1.799	Prob(JB):	0.00
Kurtosis:	16.073	Cond. No.	3.55

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	11.4500	0.007	1622.391	0.000	11.436	11.464
C(EDUCGROUP)[T.HS or Lower]	-0.8190	0.002	-415.206	0.000	-0.823	-0.815
C(EDUCGROUP)[T.Some College]	-0.5258	0.002	-229.598	0.000	-0.530	-0.521
C(STATE)[T.Alaska]	0.3096	0.020	15.368	0.000	0.270	0.349
C(STATE)[T.Arizona]	0.1892	0.009	20.927	0.000	0.171	0.207
C(STATE)[T.Arkansas]	-0.0115	0.011	-1.011	0.312	-0.034	0.011
C(STATE)[T.California]	0.3321	0.007	44.547	0.000	0.317	0.347
C(STATE)[T.Colorado]	0.2485	0.009	26.438	0.000	0.230	0.267
C(STATE)[T.Connecticut]	0.2879	0.011	26.685	0.000	0.267	0.309
C(STATE)[T.Delaware]	0.2101	0.017	12.377	0.000	0.177	0.243
C(STATE)[T.District of Columbia]	0.1492	0.019	7.957	0.000	0.112	0.186
C(STATE)[T.Florida]	0.1292	0.008	16.782	0.000	0.114	0.144
C(STATE)[T.Georgia]	0.1353	0.008	15.959	0.000	0.119	0.152
C(STATE)[T.Hawaii]	0.3276	0.016	20.756	0.000	0.297	0.359
C(STATE)[T.Idaho]	0.1722	0.013	12.855	0.000	0.146	0.198
C(STATE)[T.Illinois]	0.1615	0.008	19.615	0.000	0.145	0.178
C(STATE)[T.Indiana]	0.1111	0.009	12.114	0.000	0.093	0.129
C(STATE)[T.Iowa]	0.1146	0.011	10.379	0.000	0.093	0.136
C(STATE)[T.Kansas]	0.1148	0.011	10.056	0.000	0.092	0.137
C(STATE)[T.Kentucky]	0.0044	0.010	0.433	0.665	-0.015	0.024
C(STATE)[T.Louisiana]	-0.0544	0.010	-5.377	0.000	-0.074	-0.035
C(STATE)[T.Maine]	0.1008	0.014	6.974	0.000	0.072	0.129
C(STATE)[T.Maryland]	0.3145	0.009	33.211	0.000	0.296	0.333
C(STATE)[T.Massachusetts]	0.3017	0.009	33.033	0.000	0.284	0.320
C(STATE)[T.Michigan]	0.0744	0.009	8.745	0.000	0.058	0.091
C(STATE)[T.Minnesota]	0.2056	0.009	21.657	0.000	0.187	0.224
C(STATE)[T.Mississippi]	-0.0860	0.012	-7.449	0.000	-0.109	-0.063
C(STATE)[T.Missouri]	0.0736	0.009	7.894	0.000	0.055	0.092
C(STATE)[T.Montana]	0.0928	0.016	5.769	0.000	0.061	0.124
C(STATE)[T.Nebraska]	0.0952	0.013	7.347	0.000	0.070	0.121
C(STATE)[T.Nevada]	0.2030	0.011	17.921	0.000	0.181	0.225
C(STATE)[T.New Hampshire]	0.3022	0.015	20.302	0.000	0.273	0.331
C(STATE)[T.New Jersey]	0.3490	0.009	39.952	0.000	0.332	0.366
C(STATE)[T.New Mexico]	-0.0206	0.013	-1.604	0.109	-0.046	0.005
C(STATE)[T.New York]	0.1711	0.008	21.894	0.000	0.156	0.186
C(STATE)[T.North Carolina]	0.0749	0.008	8.922	0.000	0.058	0.091
C(STATE)[T.North Dakota]	0.1198	0.018	6.583	0.000	0.084	0.155
C(STATE)[T.Ohio]	0.0843	0.008	10.186	0.000	0.068	0.100
C(STATE)[T.Oklahoma]	0.0022	0.010	0.212	0.832	-0.018	0.023

Omnibus:	622883.291	Durbin-Watson:	2.000
Prob(Omnibus):	0.000	Jarque-Bera (JB):	10602156.692
Skew:	-1.829	Prob(JB):	0.00
Kurtosis:	16.327	Cond. No.	65.9

	Model	R-squared	Adj. R-squared	AIC	N
0	Model 1: Education Only	0.1217	0.1217	4182333.4	1332211
1	Model 2: Education + State	0.1310	0.1310	4168190.3	1332211

	STATE	EDUCATION_PREMIUM
0	Wyoming	51552.0
1	Montana	55647.0
2	Hawaii	56057.0
3	Iowa	56411.0
4	South Dakota	58429.0

	STATE	REAL_PREMIUM
0	Hawaii	29993.044409
1	Wyoming	53981.151832
2	Oregon	57599.107143
3	Montana	58637.513172
4	Alaska	60454.765751