PROJECT : Handwritten Digits Classifier#

WORK IN PROGRESS

[208]:

import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as mpimg

Loading Data#

internal data of sklearn digits

[2]:

digits = load_digits()

[3]:

digits.keys()

[3]:

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR'])

[10]:

df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target

df.head()

[10]:

	pixel_0_2	pixel_0_3	pixel_0_4	pixel_0_5	pixel_1_1	...	pixel_7_2	pixel_7_3	pixel_7_4	pixel_7_5	pixel_7_6	target
0	5.0	13.0	9.0	1.0	0.0	...	6.0	13.0	10.0	0.0	0.0	0
1	0.0	12.0	13.0	5.0	0.0	...	0.0	11.0	16.0	10.0	0.0	1
2	0.0	4.0	15.0	12.0	0.0	...	0.0	3.0	11.0	16.0	9.0	2
3	7.0	15.0	13.0	1.0	8.0	...	7.0	13.0	13.0	9.0	0.0	3
4	0.0	1.0	11.0	0.0	0.0	...	0.0	2.0	16.0	4.0	0.0	4

5 rows × 65 columns

[105]:

test_digit = mpimg.imread('/opt/datasetsRepo/digits/4.png').mean(axis=2)

[29]:

df.target.value_counts()

[29]:

3    183
5    182
1    182
6    181
4    181
9    180
7    179
0    178
2    177
8    174
Name: target, dtype: int64

Visualize Digits#

[23]:

plt.imshow(df[df.target == 1][[i for i in df.columns if 'pixel' in i]].iloc[0].values.reshape(8,8), cmap='gray')

[23]:

<matplotlib.image.AxesImage at 0x7f692be81b80>

../_images/ClassificationProjects_DigitsDataModelling_9_1.png

[24]:

plt.imshow(df[df.target == 0][[i for i in df.columns if 'pixel' in i]].iloc[0].values.reshape(8,8), cmap='gray')

[24]:

<matplotlib.image.AxesImage at 0x7f692bdf17f0>

../_images/ClassificationProjects_DigitsDataModelling_10_1.png

[25]:

plt.imshow(df[df.target == 4][[i for i in df.columns if 'pixel' in i]
                              ].iloc[0].values.reshape(8, 8), cmap='gray')

[25]:

<matplotlib.image.AxesImage at 0x7f692bd623d0>

../_images/ClassificationProjects_DigitsDataModelling_11_1.png

Dataset preparation#

[162]:

x_train, x_test, y_train, y_test = train_test_split(df[[i for i in df.columns if 'pixel' in i]], df.target, \
    random_state=30, test_size=0.1, stratify=df.target)

Support Vector Machine Model Building#

[173]:

svc_model = SVC(kernel='rbf')
svc_model.fit(x_train, y_train)

[173]:

SVC()

training accuracy#

[174]:

svc_model.score(x_train, y_train)

[174]:

0.9962894248608535

testing accuracy#

[175]:

svc_model.score(x_test, y_test)

[175]:

0.9944444444444445

classification report#

[176]:

from sklearn.metrics import classification_report, plot_confusion_matrix

[177]:

print(classification_report(y_test, svc_model.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        18
           3       1.00      1.00      1.00        18
           4       1.00      1.00      1.00        18
           5       1.00      1.00      1.00        18
           6       1.00      1.00      1.00        18
           7       0.95      1.00      0.97        18
           8       1.00      1.00      1.00        18
           9       1.00      0.94      0.97        18

    accuracy                           0.99       180
   macro avg       0.99      0.99      0.99       180
weighted avg       0.99      0.99      0.99       180

[178]:

plot_confusion_matrix(svc_model, x_test, y_test)

[178]:

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f692a4c9580>

../_images/ClassificationProjects_DigitsDataModelling_23_1.png

Test with real image#

[179]:

plt.imshow(test_digit, cmap='gray')

[179]:

<matplotlib.image.AxesImage at 0x7f692a297580>

../_images/ClassificationProjects_DigitsDataModelling_25_1.png

[180]:

svc_model.predict(test_digit.reshape(1,64), )

[180]:

array([4])

PROJECT : Handwritten Digits Classifier

Contents

PROJECT : Handwritten Digits Classifier#

Loading Data#

Visualize Digits#

Dataset preparation#

Support Vector Machine Model Building#

training accuracy#

testing accuracy#

classification report#

Test with real image#