This website works better with desktop in both themes, for mobile devices please change to light theme.

# Distance Metrics#

[6]:

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style

style.use('seaborn')

%matplotlib inline

[7]:

m = np.array([
[2, 0],
[0, 2],
[4, -1]
])

[15]:

x,y = m[:,[0]],m[:,[1]]
plt.axhline(alpha=0.4, c='k')
plt.axvline(alpha=0.4, c='k')
plt.scatter(x,y)
plt.show()

[16]:

x,y = m[:,[0]],m[:,[1]]
plt.axhline(alpha=0.4, c='k')
plt.axvline(alpha=0.4, c='k')
plt.quiver(*np.zeros_like(m.T),x,y,scale=10)
plt.show()


## Minkowski distance#

$$d = (\sum_{i=1}^{n} \bigl| {||x||}_r - {||z||}_r \bigr|^p)^{\frac{1}{p}}$$

p

distance metric

p = 1

manhattan

p = 2

Euclidean

$p \rightarrow \infty$

max

[17]:

from scipy.spatial import minkowski_distance

[24]:

m[0], m[1]

[24]:

(array([2, 0]), array([0, 2]))

[20]:

minkowski_distance(m[0], m[1], p=1)

[20]:

4.0

[21]:

minkowski_distance(m[0], m[1], p=2)

[21]:

2.8284271247461903

[23]:

minkowski_distance(m[0], m[1], p=np.inf)

[23]:

2.0


## Manhattan distance#

$$\vec{v_1} = [x_1,y_1]$$

$$\vec{v_2} = [x_2,y_2]$$

$$d = |{x_2} - {x_1}| + |{y_2} - {y_1}|$$

• complete distance between two points considering the path is not linear

• like going from one building to another

[14]:

from sklearn.metrics.pairwise import manhattan_distances

[73]:

m, manhattan_distances(m)

[73]:

(array([[ 2,  0],
[ 0,  2],
[ 4, -1]]),
array([[0., 4., 3.],
[4., 0., 7.],
[3., 7., 0.]]))

[74]:

def manhattan_distances_scratch(v1,v2):
return np.abs(v2 - v1).sum()

[75]:

manhattan_distances_scratch(m[0],m[1])

[75]:

4

[76]:

manhattan_distances_scratch(m[0],m[2])

[76]:

3

[78]:

manhattan_distances_scratch(m[1],m[2])

[78]:

7


## Euclidean distance#

$$\vec{v_1} = [x_1,y_1]$$

$$\vec{v_2} = [x_2,y_2]$$

$$d = \sqrt{({\vec{v_2} - \vec{v_1}})^2}$$

$$d = \sqrt{({x_2} - {x_1})^2 + ({y_2} - {y_1})^2}$$

• direct distance between two points

• like flying from one city to another

[8]:

from sklearn.metrics.pairwise import euclidean_distances

[66]:

m,euclidean_distances(m)

[66]:

(array([[ 2,  0],
[ 0,  2],
[ 4, -1]]),
array([[0.        , 2.82842712, 2.23606798],
[2.82842712, 0.        , 5.        ],
[2.23606798, 5.        , 0.        ]]))

[59]:

m[0], m[1]

[59]:

(array([2, 0]), array([0, 2]))

[69]:

def euclidean_distances_scratch(v1,v2):
return np.sqrt(np.square(v2 - v1).sum())

[70]:

euclidean_distances_scratch(m[0],m[1])

[70]:

2.8284271247461903

[71]:

euclidean_distances_scratch(m[0],m[2])

[71]:

2.23606797749979

[72]:

euclidean_distances_scratch(m[1],m[2])

[72]:

5.0


## Cosine distance & Cosine Similarity#

$$\vec{A}.\vec{B} = ||\vec{A}|| * ||\vec{B}|| * cos(\theta)$$

$$\theta$$ = Angle between $$\vec{A}$$ and $$\vec{B}$$

Cosine distance = $$cosine(A,B) = \frac{\vec{A}.\vec{B}}{||\vec{A}|| * ||\vec{B}||} = \frac{\sum{A_i . B_i}}{\sqrt{\sum{A_i^2}}\sqrt{\sum{B_i^2}}}$$

cosine similarity = 1 - cosine distance

• angular distance between two points

[5]:

from sklearn.metrics.pairwise import cosine_similarity

[6]:

m,cosine_similarity(m)

[6]:

(array([[ 2,  0],
[ 0,  2],
[ 4, -1]]),
array([[ 1.        ,  0.        ,  0.9701425 ],
[ 0.        ,  1.        , -0.24253563],
[ 0.9701425 , -0.24253563,  1.        ]]))

[42]:

def cosine_similarity_scratch(v1,v2):
return (v1 * v2).sum() / (np.linalg.norm(v1) * np.linalg.norm(v2))

[45]:

cosine_similarity_scratch(m[0],m[1])

[45]:

0.0

[43]:

cosine_similarity_scratch(m[0],m[2])

[43]:

0.9701425001453319

[47]:

cosine_similarity_scratch(m[1],m[2])

[47]:

-0.24253562503633297