Recognizing Hand written Digits with deep learning on KAGGle full dataset
I trained the model with models written from scratch as well as pre trained models
- We are importing all the required packages and data
 - We could see our data
 - Now we will convert our Dataframe which consist of 785 columns where one column is the label of the digit and all the other columns are the values of pixels (28 * 28) =784 pixels
 - See how each image is mapped in terms of pixels
 - Let's make our dataloaders
 - We are using here resnet50 modal and freezed model and trained last layer for three epochs then unfreeze layers to train our model
 
> "I trained the model with models written from scratch as well as pre trained models"
- toc: true
- branch: master
- badges: true
- comments: true
- categories: [fastpages, jupyter]
- image: images/MNIST/mnist.png
- hide: false
- search_exclude: true
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
We will use fast AI for our production. Fast AI is a framework on top of pytorch which makes our development of deep learning models easy
from fastai import *
from fastai.vision import *
import os
# to easier work with paths
from pathlib import Path
# to read and manipulate .csv-files
import pandas as pd
INPUT = Path("../input/digit-recognizer")
train_df = pd.read_csv(INPUT/"train.csv")
train_df.head(3)
test_df = pd.read_csv(INPUT/"test.csv")
test_df.head(3)
test_df.info()
TRAIN = Path("../train")
TEST = Path("../test")
for index in range(10):
    try:
        os.makedirs(TRAIN/str(index))
    except:
        pass
try:
    os.makedirs(TEST)
except:
    pass
import numpy as np
# import PIL to display images and to create images from arrays
from PIL import Image
def saveDigit(digit, filepath):
    digit = digit.reshape(28,28)
    digit = digit.astype(np.uint8)
    img = Image.fromarray(digit)
    img.save(filepath)
for index, row in train_df.iterrows():
    
    label,digit = row[0], row[1:]
    
    folder = TRAIN/str(label)
    filename = f"{index}.jpg"
    filepath = folder/filename
    
    digit = digit.values
    
    saveDigit(digit, filepath)
for index, digit in test_df.iterrows():
    folder = TEST
    filename = f"{index}.jpg"
    filepath = folder/filename
    
    digit = digit.values
    
    saveDigit(digit, filepath)
Let's see one of our images
subdirectory=str(0)
path = TEST
images = os.listdir(path)
image = Image.open(path/images[2])
image
import matplotlib.pyplot as plt
image_path = TEST/os.listdir(TEST)[9]
image = Image.open(image_path)
image_array = np.asarray(image)
fig, ax = plt.subplots(figsize=(15, 15))
img = ax.imshow(image_array, cmap='gray')
for x in range(28):
    for y in range(28):
        value = round(image_array[y][x]/255.0, 2)
        color = 'black' if value > 0.5 else 'white'
        ax.annotate(text=value, xy=(x, y), ha='center', va='center', color=color)
plt.axis('off')
plt.show()
data = ImageDataLoaders.from_folder(
    path = TRAIN,  
    valid_pct = 0.1,
    bs = 256,
    size = 28,
    
)
from fastai.callback.fp16 import *
learn = cnn_learner(data, resnet50, metrics=accuracy).to_fp16()
learn.fine_tune(12, freeze_epochs=3)
path=TEST
f=os.listdir(TEST)
new=[str(path)+'/'+s for s in f]
test_dl=learn.dls.test_dl(new)
class_score,y=learn.get_preds(dl=test_dl)
class_score
class_score = np.argmax(class_score, axis=1)
class_score[1].item()
Let's Submit our data
sample_submission =  pd.read_csv(INPUT/"sample_submission.csv")
display(sample_submission.head(2))
display(sample_submission.tail(2))
ImageId = [os.path.splitext(path)[0] for path in os.listdir(TEST)]
# typecast to int so that file can be sorted by ImageId
ImageId = [int(path) for path in ImageId]
# +1 because index starts at 1 in the submission file
ImageId = [ID+1 for ID in ImageId]
submission  = pd.DataFrame({
    "ImageId": ImageId,
    "Label": class_score
})
# submission.sort_values(by=["ImageId"], inplace = True)
submission.to_csv("submission.csv", index=False)
display(submission.head(3))
display(submission.tail(3))