File size: 3,302 Bytes
1fcb538
9131bec
 
1fcb538
931207e
 
ba93ad8
61a9e50
 
 
617fdc7
1f08540
 
4c6f386
 
cc7b71f
96f9a87
 
 
9131bec
96f9a87
 
 
432ab81
96f9a87
 
 
 
 
617fdc7
96f9a87
9131bec
bea8b09
 
96f9a87
9131bec
16e4f76
e741bd4
 
 
 
 
 
 
 
 
16e4f76
 
 
 
 
 
7bc22ec
16e4f76
 
 
 
 
 
 
 
3b0e8de
 
 
3e06692
 
 
 
 
16e4f76
3b0e8de
 
9131bec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt

st.title('Fitting simple models with JAX')
st.header('A quadratric regression example')

st.markdown('*\"Parametrised models are simply functions that depend on inputs and trainable parameters. There is no fundamental difference between the two, except that trainable parameters are shared across training samples whereas the input varies from sample to sample.\"* [(Yann LeCun, Deep learning course)](https://atcold.github.io/pytorch-Deep-Learning/en/week02/02-1/#Parametrised-models)')

st.latex(r'''h(\boldsymbol x, \boldsymbol w)= \sum_{k=1}^{K}\boldsymbol w_{k} \phi_{k}(\boldsymbol x)''')


# Sidebar inputs
number_of_observations = st.sidebar.slider('Number of observations', min_value=50, max_value=150, value=100)
noise_standard_deviation = st.sidebar.slider('Standard deviation of the noise', min_value = 0.0, max_value=2.0, value=1.0)
cost_function = st.sidebar.radio('What cost function you want to use for the fitting?', options=('RMSE-Loss', 'Huber-Loss'))

np.random.seed(2)

X = np.column_stack((np.ones(number_of_observations), 
                     np.random.random(number_of_observations)))      

w = np.array([3.0, -20.0, 32.0])  # coefficients                                    

X = np.column_stack((X, X[:,1] ** 2))   # add x**2 column
additional_noise = 8 * np.random.binomial(1, 0.03, size = number_of_observations)
y = np.dot(X, w) + noise_standard_deviation * np.random.randn(number_of_observations) \
        + additional_noise	


fig, ax = plt.subplots(dpi=320)
ax.set_xlim((0,1))
ax.set_ylim((-5,26))
ax.scatter(X[:,1], y, c='#e76254' ,edgecolors='firebrick')

st.pyplot(fig)

st.subheader('Train a model')

st.markdown('*\"A Gradient Based Method is a method/algorithm that finds the minima of a function, assuming that one can easily compute the gradient of that function. It assumes that the function is continuous and differentiable almost everywhere (it need not be differentiable everywhere).\"* [(Yann LeCun, Deep learning course)](https://atcold.github.io/pytorch-Deep-Learning/en/week02/02-1/#Parametrised-models)')

st.markdown('Using gradient descent we find the minima of the loss adjusting the weights in each step given the following formula:')

st.latex(r'''\bf{w}\leftarrow \bf{w}-\eta \frac{\partial\ell(\bf{X},\bf{y}, \bf{w})}{\partial \bf{w}}''')


# Fitting by the respective cost_function
if cost_function == 'RMSE-Loss':
     st.write('You selected the RMSE loss function.')
     st.latex(r'''\ell(X, y, w)=\frac{1}{m}||Xw - y||_{2}^2''')
     st.latex(r'''\ell(X, y, w)=\frac{1}{m}\big(\sqrt{(Xw - y)\cdot(Xw - y)}\big)^2''')
     st.latex(r'''\ell(X, y, w)= \frac{1}{m}\sum_1^m (\hat{y}_i - y_i)^2''')
else:
     st.write("You selected the Huber loss function.")
     st.latex(r'''
\ell_{H} = 
\begin{cases} 
      (y^{(i)}-\hat{y}^{(i)})^2 & \text{for }\quad |y^{(i)}-\hat{y}^{(i)}|\leq \delta \\
      2\delta|y^{(i)}-\hat{y}^{(i)}| - \delta^2 & \text{otherwise}
\end{cases}''')

st.markdown('The training loop:')

code = '''NUM_ITER = 1000
     # initialize parameters
     w = np.array([3., -2., -8.])
     for i in range(NUM_ITER):
          # update parameter 
          w -= learning_rte * grad_loss(w)'''
     
st.code(code, language='python')

st.write(X[:5, :])