import pandas as pd
data = pd.read_csv("A1.2 Felicidad y GDP.csv")
sort_felicidad = data.sort_values("Felicidad", ascending=False)
sort_GDP = data.sort_values("GDP", ascending=False)
print(sort_felicidad.head(10))
print(sort_GDP.head(10))

          Pais  Felicidad           GDP
0      Finland     7.8210  2.718370e+11
1      Denmark     7.6362  3.560850e+11
2      Iceland     7.5575  2.171808e+10
3  Switzerland     7.5116  7.522480e+11
4  Netherlands     7.4149  9.138650e+11
5   Luxembourg     7.4040  7.335313e+10
6       Sweden     7.3843  5.414870e+11
7       Norway     7.3651  3.621980e+11
8       Israel     7.3638  4.071010e+11
9  New Zealand     7.1998  2.117350e+11
               Pais  Felicidad           GDP
15    United States     6.9768  2.089370e+13
70            China     5.5853  1.468770e+13
52            Japan     6.0389  5.040110e+12
13          Germany     7.0341  3.846410e+12
16   United Kingdom     6.9425  2.756900e+12
130           India     3.7771  2.667690e+12
19           France     6.6867  2.630320e+12
29            Italy     6.4667  1.892570e+12
14           Canada     7.0251  1.645420e+12
57      South Korea     5.9351  1.637900e+12

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
x = np.log10(data["GDP"])
y = data.Felicidad
plt.scatter(x, y, alpha=0.75)
plt.title("GDP v.s. Felicidad")
plt.ylabel("Felicidad")
plt.xlabel("GDP [Log10]")
plt.show()

x_prom = np.mean(x)
y_prom = np.mean(y)
B1_num = sum((x-x_prom)*(y-y_prom))
B1_den = sum((x-x_prom)**2)
B1 = B1_num / B1_den
B0 = y_prom - (B1*x_prom)
print("B0 =",B0)
print("B1 =",B1)

B0 = -1.3023500570747277
B1 = 0.6281284658810408

plt.scatter(x, y, alpha=0.75)
yHat = B0 + B1*x
plt.plot(x, yHat, c = "r", linewidth = 3, alpha = 0.75)
plt.title("GDP v.s. Felicidad")
plt.ylabel("Felicidad")
plt.xlabel("GDP [Log10]")
plt.show()

RSS = sum((y - yHat)**2)
print("RSS =", RSS)

RSS = 131.3738317732635

import scipy.stats as st
n = len(x)
SEB1 = np.sqrt(RSS / ((n-2)*sum((x-x_prom)**2)))
print("SE =", SEB1)
per = st.t.interval(confidence=0.95, df=n-2)[1]
CIlow = B1 - per*SEB1
CIhigh = B1 + per*SEB1
print("Intervalo de confianza: (",CIlow,",",CIhigh,")")

SE = 0.09983378435340727
Intervalo de confianza: ( 0.4307393313073311 , 0.8255176004547504 )

RSE=np.sqrt(RSS/(n-2))
TSS=sum((y-y_prom)**2)
R2=1-RSS/TSS
print("Residual standard error =",RSE)
print("R^2 =",R2)

Residual standard error = 0.9721807858537376
R^2 = 0.22166361654970657

import statsmodels.api as sm
model = sm.OLS(y,sm.add_constant(x))
results = model.fit()
print(results.summary())
print(results.pvalues)
print("RSE:", (results.scale)**0.5)

                            OLS Regression Results                            
==============================================================================
Dep. Variable:              Felicidad   R-squared:                       0.222
Model:                            OLS   Adj. R-squared:                  0.216
Method:                 Least Squares   F-statistic:                     39.59
Date:                Mon, 18 Aug 2025   Prob (F-statistic):           3.83e-09
Time:                        00:59:44   Log-Likelihood:                -195.09
No. Observations:                 141   AIC:                             394.2
Df Residuals:                     139   BIC:                             400.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.3024      1.094     -1.191      0.236      -3.465       0.860
GDP            0.6281      0.100      6.292      0.000       0.431       0.826
==============================================================================
Omnibus:                        2.648   Durbin-Watson:                   0.455
Prob(Omnibus):                  0.266   Jarque-Bera (JB):                2.523
Skew:                          -0.326   Prob(JB):                        0.283
Kurtosis:                       2.944   Cond. No.                         148.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
const    2.357975e-01
GDP      3.825718e-09
dtype: float64
RSE: 0.9721807858537376

A1.2 Regresión lineal simple¶