Loss Functions#

References#

[1]:

import numpy as np
from sklearn import metrics

import matplotlib.pyplot as plt

plt.style.use("fivethirtyeight")

Loss vs Cost Function#

\begin{align} \text{Cost Function } J(h_w(X), y) = \frac{1}{N} \sum_{i=1}^{N} L(h_w(x_i), y_i) \end{align}

Classification Loss#

Hinge Loss#

\begin{align} L(h_w(x_i), y_i) = \max(1 - h_w(x_i) y_i, 0)^p \end{align}

p	Usecase
p = 1	SVM
p = 2	Squared Loss SVM

Generally used for Standard SVM

[23]:

def hinge_loss(y_true, y_pred, p):
    return np.power(np.max(np.c_[1 - (y_true * y_pred), np.zeros_like(y_true)], axis=1), p)

Correct classification

[29]:

hinge_loss(-1, -1, 1), hinge_loss(-1, -1, 2)

[29]:

(array([0]), array([0]))

Incorrect Classification

[31]:

hinge_loss(-1, 1, 1), hinge_loss(1, -1, 1)

[31]:

(array([2]), array([2]))

[41]:

y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

metrics.hinge_loss(y_pred, y_true), hinge_loss(y_true, y_pred, 1).mean()

[41]:

(0.4444444444444444, 0.4444444444444444)

[42]:

metrics.hinge_loss([-1], [-1]), metrics.hinge_loss([-1], [1])

[42]:

(0.0, 2.0)

Log Loss#

\begin{align} L(h_w(x_i), y_i) = \log(1 + e^{- y_i h_w(x_i)}) \end{align}

Used for Logistic Regression

[43]:

def logistic_loss(y_true, y_pred):
    return np.log(1 + np.exp(-(y_true * y_pred)))

def log_loss(y_true, y_pred):
    return ((y_true * np.log(y_pred)) + ((1 - y_true) * np.log(1 - y_pred)))

Correct classification

[45]:

logistic_loss(-1, -1), logistic_loss(1, 1)

[45]:

(0.31326168751822286, 0.31326168751822286)

Incorrect Classification

[46]:

logistic_loss(-1, 1), logistic_loss(1, -1)

[46]:

(1.3132616875182228, 1.3132616875182228)

[55]:

y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

logistic_loss(y_true, y_pred).mean()

[55]:

0.535483909740445

Exponential Loss#

\begin{align} L(h_w(x_i), y_i) = e^{-h_w(x_i) y_i} \end{align}

Adaboost
Very aggressive, Loss of misprediction increases exponentially

[56]:

def exp_loss(y_true, y_pred):
    return np.exp(-(y_pred * y_true))

Correct classification

[57]:

exp_loss(-1, -1), exp_loss(1, 1)

[57]:

(0.36787944117144233, 0.36787944117144233)

Incorrect Classification

[58]:

exp_loss(-1, 1), exp_loss(1, -1)

[58]:

(2.718281828459045, 2.718281828459045)

[59]:

y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

exp_loss(y_true, y_pred).mean()

[59]:

0.8901910827909096

Zero-One Loss#

\begin{align} L(h_w(x_i), y_i) &= \delta(sign({h_w(x_i)) \ne y_i}) \\ \\ \delta &= \begin{cases} 1 & \text{if } sign(h_w(x_i)) \ne y_i \text{ i.e. missclassification}\\ \\ 0 & \text{otherwise} \end{cases} \end{align}

Actual Classification loss

[60]:

def zero_one_loss(y_true, y_pred):
    return np.int32(np.sign(y_pred) != y_true)

Correct classification

[61]:

zero_one_loss(-1, -1), zero_one_loss(1, 1)

[61]:

(0, 0)

Incorrect Classification

[62]:

zero_one_loss(-1, 1), zero_one_loss(-1, 1)

[62]:

(1, 1)

[63]:

y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

zero_one_loss(y_true, y_pred).mean()

[63]:

0.2222222222222222

Graphical Representation#

[70]:

n_samples = 100

x = np.linspace(-2, 2, n_samples)
y = np.ones_like(x)

fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(x, logistic_loss(x, y), label='Logistic Loss')
ax.plot(x, exp_loss(x, y), label='Exponential Loss')
ax.plot(np.int32(x), zero_one_loss(y, x), label='Zero-One Loss')
ax.plot(x, hinge_loss(x, y, 1), label='Hinge Loss')

plt.legend()
plt.show()

Regression Loss#

Squared Loss#

\begin{align} L(h_w(x_i), y_i) &= (h_w(x_i) - y_i)^2 \end{align}

also known as Ordinary Least Square
estimates mean label
Differenciable easily
affected by noisy data

[17]:

def squared_loss(y_true, y_pred):
    return np.square(y_pred - y_true)

[72]:

y_true = np.array([12, 11, 10, 12.3, 11.2, 10.3])
y_pred = np.array([11.9, 11, 9.6, 12, 11, 10.1])

squared_loss(y_true, y_pred).mean(), metrics.mean_squared_error(y_true, y_pred)

[72]:

(0.05666666666666679, 0.05666666666666679)

Absolute Loss#

\begin{align} L(h_w(x_i), y_i) &= |h_w(x_i) - y_i| \end{align}

Estimates median label
Less sensitive to noise

[73]:

def absolute_loss(y_true, y_pred):
    return np.abs(y_pred - y_true)

[74]:

y_true = np.array([12, 11, 10, 12.3, 11.2, 10.3])
y_pred = np.array([11.9, 11, 9.6, 12, 11, 10.1])

absolute_loss(y_true, y_pred).mean(), metrics.mean_absolute_error(y_true, y_pred)

[74]:

(0.20000000000000018, 0.20000000000000018)

Graphical Representation#

[84]:

n_samples = 50

x = np.linspace(-3, 3, n_samples)
y = np.zeros_like(x)

fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(x, squared_loss(x, y), label='Squared Loss')
ax.plot(x, absolute_loss(x, y), label='Absolute Loss')

plt.legend()
plt.show()

[ ]:

Loss Functions

Contents

Loss Functions#

References#

Loss vs Cost Function#

Classification Loss#

Hinge Loss#

Log Loss#

Exponential Loss#

Zero-One Loss#

Graphical Representation#

Regression Loss#

Squared Loss#

Absolute Loss#

Graphical Representation#