Data - Hindi MNIST data present at https://www.kaggle.com/datasets/imbikramsaha/hindi-mnist
The Methodology will remain the same as discussed in the chapter.
- Define the baseline model first.
- Define dataloaders and other parameters that are required for implementing the Stochastic Gradient Descent,
- Define the loss function and the accuracy metric
- Fit your model using FastAI's libraries and check whether it beats the baseline model.
- Make Improvements.
Resources: > - Chapter Link:https://course.fast.ai/Lessons/lesson3.html> - Video based on 2020 course where the part of the problem is discussed:https://www.youtube.com/watch?v=p50s63nPq9I&t=6605s
from fastai.vision.all import *
from fastbook import *
matplotlib.rc('image', cmap='Greys')
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
cnt = 0
for filename in filenames:
cnt = cnt+1
#print(os.path.join(dirname, filename))
print(f"Read {cnt} files from the directory- {dirname}")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
train_dir = "/kaggle/input/hindi-mnist/Hindi-MNIST/train"
train_path = Path(train_dir)
valid_dir = "/kaggle/input/hindi-mnist/Hindi-MNIST/test"
valid_path = Path(valid_dir)
zeroes = train_path.ls().sorted()[0].ls()
ones = train_path.ls().sorted()[1].ls()
twos = train_path.ls().sorted()[2].ls()
threes = train_path.ls().sorted()[3].ls()
fours = train_path.ls().sorted()[4].ls()
fives = train_path.ls().sorted()[5].ls()
sixes = train_path.ls().sorted()[6].ls()
sevens = train_path.ls().sorted()[7].ls()
eights = train_path.ls().sorted()[8].ls()
nines = train_path.ls().sorted()[9].ls()
im = Image.open(sixes[0])
im
tensor(im)[4:10,4:10]
im.shape
The size of the images - 32 x 32
im_t = tensor(im)
df = pd.DataFrame(im_t[4:15,4:22])
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')
Dict = {0: zeroes,1:ones, 2: twos, 3: threes, 4: fours, 5: fives, 6: sixes, 7:sevens, 8:eights, 9:nines}
train_tensors = []
valid_tensors = []
for key in Dict:
train_tensors.append([tensor(Image.open(o)) for o in Dict[key]])
valid_inf = valid_path.ls().sorted()[key].ls()
valid_tensors.append([tensor(Image.open(o)) for o in valid_inf])
len(train_tensors), len(valid_tensors), len(train_tensors[0]), len(valid_tensors[0])
stacked_train_tensors = []
for i in range(len(train_tensors)):
stacked_train_tensors.append((torch.stack(train_tensors[i]).float()/255))
#print(i)
print(len(stacked_train_tensors)), print(stacked_train_tensors[0].shape)
stacked_train_tensors_mean = []
for i in range(len(train_tensors)):
stacked_train_tensors_mean.append((torch.stack(train_tensors[i]).float()/255).mean(0))
#print(i)
print(len(stacked_train_tensors_mean))
stacked_valid_tensors = []
for i in range(len(valid_tensors)):
stacked_valid_tensors.append((torch.stack(valid_tensors[i]).float()/255))
#print(i)
print(len(stacked_valid_tensors)), print(stacked_valid_tensors[0].shape)
stacked_train_tensors_mean[0].shape
show_image(stacked_train_tensors_mean[4])
show_image(train_tensors[4][0])
dist_4_abs = (train_tensors[4][0] - stacked_train_tensors_mean[4]).abs().mean()
dist_4_sqr = ((train_tensors[4][0] - stacked_train_tensors_mean[4])**2).mean().sqrt()
dist_4_abs, dist_4_sqr
dist_3_abs = (train_tensors[3][0] - stacked_train_tensors_mean[4]).abs().mean()
dist_3_sqr = ((train_tensors[3][0] - stacked_train_tensors_mean[4])**2).mean().sqrt()
dist_3_abs, dist_3_sqr
def rms_error(a,b):
return ((a-b)**2).mean((-1, -2)).sqrt()
for i in range(10):
err = rms_error(train_tensors[4][0],stacked_train_tensors_mean[i])
print(err)
for i in range(10):
print(F.l1_loss(train_tensors[4][0].float(),stacked_train_tensors_mean[i]))
for i in range(10):
print(F.mse_loss(train_tensors[4][0].float(),stacked_train_tensors_mean[i]))
print(stacked_valid_tensors[4].shape), print(stacked_train_tensors_mean[4].shape)
error = rms_error(stacked_valid_tensors[4], stacked_train_tensors_mean[4])
error.shape, error[0:15]
def predict_input(input_tensor):
errors_in_pred = []
# errors = rms_error(input_tensor, stacked_train_tensors_mean[x])
for i in range(10):
errors = rms_error(input_tensor, stacked_train_tensors_mean[i])
errors_in_pred.append(errors)
#return torch.argmin(torch.stack(errors_in_pred), 0)
# across the first axis, 0 specifies the axis
return torch.argmin(torch.stack(errors_in_pred), 0)
y = predict_input(stacked_valid_tensors[9])
y, y.shape
(y == 9).float().mean()
accuracies = []
for i in range(10):
#print(i)
preds = predict_input(stacked_valid_tensors[i])
acc = (preds == i).float().mean()
accuracies.append(acc)
#print(preds)
#pred_e = torch.argmin(err, 0)
# print(preds)
#accuracies.append((pred_e == i).float().mean())
accuracies
print('baseline model accuracy:', torch.stack(accuracies).mean())
stacked_train_tensors[0][0].shape # one image from digit 0
lst = [stacked_train_tensors[i] for i in range(10)]
# one row represents one image. image is flattened to 32*32 = 1024 pixels
train_x = torch.cat(lst).view(-1, 32*32)
train_x.shape
y_tensor = torch.tensor([])
for i in range(10):
a = tensor(np.full(len(stacked_train_tensors[i]),i))
y_tensor = torch.cat([y_tensor, a])
y_tensor = y_tensor.unsqueeze(1)
y_tensor
y_tensor = y_tensor.type(torch.LongTensor)
y_tensor.shape
dset = list(zip(train_x,y_tensor))
valid_lst = [stacked_valid_tensors[i] for i in range(10)]
# one row represents one image. image is flattened to 32*32 = 1024 pixels
valid_x = torch.cat(valid_lst).view(-1, 32*32)
valid_x.shape
#train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
valid_y_tensor = torch.tensor([])
for i in range(10):
a = tensor(np.full(len(stacked_valid_tensors[i]),i))
valid_y_tensor = torch.cat([valid_y_tensor, a])
valid_y_tensor = valid_y_tensor.unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y_tensor))
valid_y_tensor.shape
def init_params(size, std=1.0):
return (torch.randn(size)*std).requires_grad_()
weights = init_params((32*32,1))
bias = init_params(1)
weights.shape, bias.shape
(train_x[0]*weights.T).sum() + bias
def linear1(xb):
return xb@weights + bias
preds = linear1(train_x)
preds
train_x.shape
preds.shape
dl_train = DataLoader(dset, batch_size=256, shuffle=True)
dl_valid = DataLoader(valid_dset, batch_size=256)
dls = DataLoaders(dl_train, dl_valid)
len(dls.train)
def loss_func(predictions, targets):
predictions = predictions.sigmoid()
return torch.where(targets==1, 1-predictions, predictions).mean()
def accuracy_metric(prediction, y):
idx = torch.argmax(prediction, axis=1) # returns the index of the highest value
return (idx==y.T).float().mean()
model = nn.Sequential(
nn.Linear(32*32, 30), # 1024 input features and 30 output features
nn.ReLU(),
nn.Linear(30,10),
)
learn_loss_func = Learner(dls, model, loss_func=loss_func, opt_func=SGD, metrics=accuracy_metric)
learn_loss_func.fit(n_epoch=10, lr=0.1)
def softmax_loss(prediction, y):
soft_m = torch.softmax(prediction, dim=1)
index = tensor(range(len(y)))
return soft_m[index.long(), y.long()].mean()
learn_softmax = Learner(dls, model, loss_func=softmax_loss, opt_func=SGD, metrics=accuracy_metric)
learn_softmax.fit(n_epoch=10, lr=0.1)
def loss_entropy(pred, y):
#print(y.shape)
y = y.long()
if y.ndim > 1:
y = y.squeeze()
# print(y.shape)
return F.cross_entropy(pred, y)
learn_entropy = Learner(dls, model, loss_func=loss_entropy, opt_func=SGD, metrics=accuracy_metric)
learn_entropy.fit(n_epoch=30, lr=0.1)
plt.plot(L(learn_loss_func.recorder.values).itemgot(2), label='w/ simple_loss');
plt.plot(L(learn_entropy.recorder.values).itemgot(2), label='w/ entropy');
plt.plot(L(learn_softmax.recorder.values).itemgot(2), label='w/ softmax');
plt.title('accuracy');
plt.legend(loc='best');
plt.xlabel('epoch');
y_tensor[10000:10010]
model(train_x)[10000:10010]
m = learn_entropy.model
m
w, b = m[0].parameters()
for i in range(w.shape[0]):
show_image(w[i].view(32,32))