import numpy as np
import plotly.express as px
from statsmodels.nonparametric.kernel_regression import KernelReg as kr 
import plotly.graph_objs as go
import pandas as pd


np.random.seed(1)

# xwidth controls the range of x values.
xwidth = 20
x = np.arange(0,xwidth,1)

# we want to add some noise to the x values so that dont sit at regular intervals
x_residuals = np.random.normal(scale=0.2, size=[x.shape[0]])

# new_x is the range of x values we will be using all the way through
new_x = x + x_residuals

# We generate residuals for y values since we want to show some variation in the data
num_points = x.shape[0]
residuals = np.random.normal(scale=2.0, size=[num_points])

# We will be using fun_y to generate y values all the way through
fun_y = lambda x: -(x*x) + residuals


# Plot the x and y values 
px.scatter(x=new_x,y=fun_y(new_x), title='Figure 1:  Visualizing the generated data')


bw = [1]
kmodel  = kr(endog=fun_y(new_x), exog=new_x, var_type='c', bw=bw, reg_type='lc')
pred_y, marginal_effects = kmodel.fit()


fig = px.scatter(x=new_x,y=fun_y(new_x),  title='Figure 2: Statsmodels fit to generated data')
fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodels fit',  mode='lines'))


kernel_x = np.arange(-xwidth,xwidth, 0.1)
bw_manual = 1

def gauss_const(h):
    """
    Returns the normalization constant for a gaussian
    """
    return 1/(h*np.sqrt(np.pi*2))

def gauss_exp(ker_x, xi, h): 
    """
    Returns the gaussian function exponent term
    """
    num =  - 0.5*np.square((xi- ker_x))
    den = h*h
    return num/den

def kernel_function(h, ker_x, xi): 
    """
    Returns the gaussian function value. Combines the gauss_const and
    gauss_exp to get this result
    """
    const = gauss_const(h)
    gauss_val = const*np.exp(gauss_exp(ker_x,xi,h))
    return gauss_val 

# We are selecting a single point and calculating the Kernel value
input_x = new_x[0]
col1 = gauss_const(bw_manual)
col2= gauss_exp(kernel_x, input_x, bw_manual)
col3 = kernel_function(bw_manual, kernel_x, input_x)


# Dataframe for a single observation point x_i. In the code x_i comes from new_x
data = {'Input_x': [input_x for x in range(col2.shape[0])],
        'kernel_x': kernel_x,
        'gaussian_const': [col1 for x in range(col2.shape[0])],
        'gaussian_exp': col2,
        'full_gaussian_value': col3,
        'bw':[bw_manual for x in range(col2.shape[0])],
        }
single_pt_KE = pd.DataFrame(data=data)
single_pt_KE


# Plotting a scatter plot of Kernel 
px.line(x=kernel_x, y=col3, title='Figure 3: Kernel function for a single input value')


## Plotting gaussian for all input x points 
kernel_fns = {'kernel_x': kernel_x}
for input_x in new_x: 
    input_string= 'x_value_{}'.format(np.round(input_x,2)) 
    kernel_fns[input_string] = kernel_function(bw_manual, kernel_x, input_x)

kernels_df = pd.DataFrame(data=kernel_fns)


y_all = kernels_df.drop(columns='kernel_x')
px.line(kernels_df, x='kernel_x', y=y_all.columns, title='Gaussian for all input points', range_x=[-5,20])


def weights(bw_manual, input_x, all_input_values ): 
    w_row = []
    for x_i in all_input_values: 
        ki = kernel_function(bw_manual, x_i, input_x)
        ki_sum = np.sum(kernel_function(bw_manual, all_input_values, input_x))
        w_row.append(ki/ki_sum)
    return w_row 


def single_y_pred(bw_manual, input_x, new_x): 
    w = weights(bw_manual, input_x, new_x)
    y_single = np.sum(np.dot(fun_y(new_x),w))
    return y_single


ypred_single = single_y_pred(bw_manual, new_x[0], new_x)


Y_pred = []
for input_x in new_x: 
    w = []
    Y_single = single_y_pred(bw_manual, input_x, new_x)
    Y_pred.append(Y_single)


data= {'x': new_x, 'y': fun_y(new_x), 'y_manual': np.array(y_all)}
fig = px.scatter(x=new_x,y=fun_y(x))
fig.add_trace(go.Scatter(x=new_x, y=pred_y, name='Statsmodel KR',  mode='lines'))
fig.add_trace(go.Scatter(x=new_x, y=np.array(Y_pred), name='Manual KR',  mode='lines'))

	Input_x	kernel_x	gaussian_const	gaussian_exp	full_gaussian_value	bw
0	0.324869	-20.0	0.398942	-206.550151	7.894399e-91	1
1	0.324869	-19.9	0.398942	-204.522665	5.995777e-90	1
2	0.324869	-19.8	0.398942	-202.505178	4.508467e-89	1
3	0.324869	-19.7	0.398942	-200.497691	3.356366e-88	1
4	0.324869	-19.6	0.398942	-198.500204	2.473813e-87	1
...	...	...	...	...	...	...
395	0.324869	19.5	0.398942	-183.842823	5.740986e-81	1
396	0.324869	19.6	0.398942	-185.765336	8.395560e-82	1
397	0.324869	19.7	0.398942	-187.697849	1.215542e-82	1
398	0.324869	19.8	0.398942	-189.640362	1.742397e-83	1
399	0.324869	19.9	0.398942	-191.592875	2.472757e-84	1

Table of Contents

Kernal Regression by Statsmodels¶

Generating Fake Data¶

Output of Kernal Regression¶

Kernel regression by Hand in Python¶

Step 1: Calculate the Kernel for a single input x point¶

Visualizing the Kernels for all the input x points¶

Step 2: Calculate the weights for each input x value¶

Step 3: Calculate the y pred value for a single input point¶

Step 4: Calculate the y pred values for all the input points¶

Step 5: Visualize the difference between the two methods¶

Conclusion¶

References¶