final submission readyu...moved notebookks
This commit is contained in:
511
notebooks/Final-Submission.ipynb
Normal file
511
notebooks/Final-Submission.ipynb
Normal file
@@ -0,0 +1,511 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21b10b99",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 1: Load Dataset\n",
|
||||
"Load images from disk and count per class to verify dataset integrity"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d318d1f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"data_dir = '../data/raw/vehicle_classification'\n",
|
||||
"\n",
|
||||
"total_count = 0\n",
|
||||
"\n",
|
||||
"for class_name in os.listdir(data_dir):\n",
|
||||
" class_path = os.path.join(data_dir, class_name)\n",
|
||||
" if os.path.isdir(class_path):\n",
|
||||
" count = len(os.listdir(class_path))\n",
|
||||
" total_count += count\n",
|
||||
" print(f\"{class_name}: {count} images\")\n",
|
||||
"\n",
|
||||
"print(f\"Total Count: {total_count} images\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64122ad4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check out sample image from dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5604ace3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# first image in first folder\n",
|
||||
"first_class = os.listdir(data_dir)[0]\n",
|
||||
"first_image_path = os.path.join(data_dir, first_class, os.listdir(os.path.join(data_dir, first_class))[0])\n",
|
||||
"\n",
|
||||
"img = Image.open(first_image_path)\n",
|
||||
"print(f\"Size: {img.size}\")\n",
|
||||
"print(f\"Mode: {img.mode}\")\n",
|
||||
"plt.imshow(img)\n",
|
||||
"plt.title(first_class)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c19ec00a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Ensure that all images are RGB, all of same resolution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3cedd586",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sizes = set()\n",
|
||||
"modes = set()\n",
|
||||
"\n",
|
||||
"for class_name in os.listdir(data_dir):\n",
|
||||
" class_path = os.path.join(data_dir, class_name)\n",
|
||||
" if not os.path.isdir(class_path):\n",
|
||||
" continue\n",
|
||||
" for img_name in os.listdir(class_path):\n",
|
||||
" img = Image.open(os.path.join(class_path, img_name))\n",
|
||||
" sizes.add(img.size)\n",
|
||||
" modes.add(img.mode)\n",
|
||||
"\n",
|
||||
"print(f\"Unique sizes: {sizes}\")\n",
|
||||
"print(f\"Unique modes: {modes}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88ac961b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Accelrate torch with GPU or MPS if available (credit: Claude)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8f556b22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" DEVICE = torch.device('cuda')\n",
|
||||
" print(f'GPU: {torch.cuda.get_device_name(0)}')\n",
|
||||
"elif torch.backends.mps.is_available():\n",
|
||||
" DEVICE = torch.device('mps')\n",
|
||||
" print('Apple Silicon (MPS)')\n",
|
||||
"else:\n",
|
||||
" DEVICE = torch.device('cpu')\n",
|
||||
" print('CPU')\n",
|
||||
"\n",
|
||||
"print(f'Running on: {DEVICE}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ad97919",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 2: Split Dataset 80:20 (Train / Test)\n",
|
||||
"\n",
|
||||
"Augmentation applied to training set only — test set kept clean for fair evaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f68c1a25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import math \n",
|
||||
"from torchvision import datasets, transforms\n",
|
||||
"from torch.utils.data import random_split, DataLoader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_transform = transforms.Compose([\n",
|
||||
" transforms.Resize((64, 64)), # Resize to 64x64 (even though all images are)\n",
|
||||
" transforms.RandomHorizontalFlip(), # randomly mirror image\n",
|
||||
" transforms.RandomRotation(20), # rotate up to 20 degrees\n",
|
||||
" transforms.ColorJitter(brightness=0.3, contrast=0.3), # vary lighting\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # Normalize tensors as in Pytorch tutorial\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"test_transform = transforms.Compose([\n",
|
||||
" transforms.Resize((64,64)), # Resize to 64x64 (even though all images are)\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # Normalize tensors as in Pytorch tutorial\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"train_full = datasets.ImageFolder(root=data_dir, transform=train_transform) #Load full dataset with train transform\n",
|
||||
"test_full = datasets.ImageFolder(root=data_dir, transform=test_transform) # Load full dataset with test transform\n",
|
||||
"\n",
|
||||
"train_size = math.floor(len(train_full) * 0.8) #80% split for training\n",
|
||||
"test_size = len(train_full) - train_size # Remaining 20% used for testing\n",
|
||||
"\n",
|
||||
"torch.manual_seed(42) # Fixes the RNG to the same starting point...42 is convention according to GeeksForGeeks\n",
|
||||
"indices = torch.randperm(len(train_full)).tolist() #randomly shuffle the indices\n",
|
||||
"\n",
|
||||
"train_indices = indices[:train_size] # First 80% of indices\n",
|
||||
"test_indices = indices[train_size:] # remaining 20% of indices\n",
|
||||
"\n",
|
||||
"train_dataset = torch.utils.data.Subset(train_full, train_indices) #Create final datasets\n",
|
||||
"test_dataset = torch.utils.data.Subset(test_full, test_indices)\n",
|
||||
"\n",
|
||||
"print(f\"Train: {len(train_dataset)}, Test: {len(test_dataset)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2eede814",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Credit: Claude: load dataset into batches (64 is standard), and dedicate n threads to the process (min 1, preferrably 4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e1539eaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"NUM_WORKERS = min(4, os.cpu_count() or 1)\n",
|
||||
"PIN_MEMORY = (DEVICE.type == 'cuda') # Pin memory if GPU available for CUDA\n",
|
||||
"\n",
|
||||
"print(NUM_WORKERS)\n",
|
||||
"\n",
|
||||
"train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,\n",
|
||||
" num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)\n",
|
||||
"test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False,\n",
|
||||
" num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)\n",
|
||||
"classes = train_full.classes \n",
|
||||
"print(classes)\n",
|
||||
"print(len(classes))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e7255041",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 3: CNN Architecture\n",
|
||||
"Model takes a batch of (3, 64, 64) images and outputs 8 class scores"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1b7d9ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch.nn as nn\n",
|
||||
"\n",
|
||||
"class Net(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super(Net, self).__init__()\n",
|
||||
"\n",
|
||||
" self.features = nn.Sequential(\n",
|
||||
" # go from 64x64 to 32x32\n",
|
||||
"\n",
|
||||
" # kernel size = 3x3 filter patch\n",
|
||||
" #padding = 1, so 64x64 stays 64x64 after conv\n",
|
||||
" nn.Conv2d(3, 32, kernel_size=3, padding=1), # 3 channel RRGB, 32 filters (recommended), and adding 1 pixel of zeros so keep output at same size\n",
|
||||
"\n",
|
||||
" nn.BatchNorm2d(32), # mirrors conv2d output\n",
|
||||
" nn.ReLU(), #Activation fn\n",
|
||||
" nn.MaxPool2d(2,2), # 2x2 window, stride = 2: so halved --> 32x32\n",
|
||||
"\n",
|
||||
" #Go from 32x32 --> 16x16 in the same manner\n",
|
||||
"\n",
|
||||
" nn.Conv2d(32, 64, kernel_size=3, padding=1), # Double filters --> more complex features detected\n",
|
||||
" nn.BatchNorm2d(64),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.MaxPool2d(2,2), \n",
|
||||
" \n",
|
||||
" # Go from 16x 16 0 --> 8x8 in the same manner\n",
|
||||
"\n",
|
||||
" nn.Conv2d(64, 128, kernel_size=3, padding=1), # Double filters again --> even more complex rfeatures detected\n",
|
||||
" nn.BatchNorm2d(128),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.MaxPool2d(2,2), \n",
|
||||
"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" self.classifier = nn.Sequential(\n",
|
||||
" nn.Flatten(),\n",
|
||||
" nn.Linear (128 * 8 *8, 512), # flattened size of 8x8 * 128, 512 is arbitrary number of hidden neurons (recommended by GeeksForGeeks)....tunned to learn details without overfitting\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.Dropout(0.5),#Randomly zero 50% of neurons --> prevent memorization and overfitting\n",
|
||||
" nn.Linear(512, len(classes)) # one score per vehicle class\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def forward(self, x): \n",
|
||||
" x = self.features(x) # extract spatial features via conv blocks\n",
|
||||
" x = self.classifier(x) # flatten to 8 vehicle clases\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"model = Net().to(DEVICE)\n",
|
||||
"device = DEVICE\n",
|
||||
"\n",
|
||||
"print(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22e71032",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Loss fn and optimizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "54d11a04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch.optim as optim\n",
|
||||
"\n",
|
||||
"criterion = nn.CrossEntropyLoss() # Applied softmax to convert scores --> probabilities --> penalizes model \n",
|
||||
"\n",
|
||||
"# Changed to adam optimizer (internal momentumn calculation)\n",
|
||||
"optimizer = optim.Adam(model.parameters(), lr=0.001)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "572d80e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 4: Train Model\n",
|
||||
"\n",
|
||||
"Track loss and accuracy per epoch — stored in lists for plotting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "374d0590",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_losses, train_accs = [], [] #To store accs for visualization\n",
|
||||
"\n",
|
||||
"for epoch in range(30): # 30 epochs\n",
|
||||
" running_loss = 0.0 # keep track of running loss\n",
|
||||
" correct = 0\n",
|
||||
" total = 0\n",
|
||||
"\n",
|
||||
" for i, data in enumerate(train_loader, 0):\n",
|
||||
" inputs, labels = data\n",
|
||||
" inputs, labels = inputs.to(device), labels.to(device)\n",
|
||||
" optimizer.zero_grad() # clear prior gradients\n",
|
||||
" outputs = model(inputs) # forward pass \n",
|
||||
" loss = criterion(outputs, labels) # compare to GT\n",
|
||||
" loss.backward() # Backprop...compute gradient of loss\n",
|
||||
" optimizer.step() #Use adam optimizer to update weights using gradients\n",
|
||||
"\n",
|
||||
" running_loss += loss.item() #Extract scalar loss value \n",
|
||||
" _, predicted = torch.max(outputs, 1) # Take index of highest score as predicted class\n",
|
||||
" total += labels.size(0)\n",
|
||||
" correct += (predicted == labels).sum().item() #update correct tally\n",
|
||||
"\n",
|
||||
" epoch_loss = running_loss / len(train_loader) #Compute avg loss\n",
|
||||
" epoch_acc = 100 * correct / total #Avg acc accross epoch\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" #Adding epochs to list\n",
|
||||
" train_losses.append(epoch_loss)\n",
|
||||
" train_accs.append(epoch_acc) \n",
|
||||
"\n",
|
||||
" print(f'Epoch {epoch+1}: Loss={epoch_loss:.3f}, Accuracy={epoch_acc:.2f}%')\n",
|
||||
"\n",
|
||||
"print('Finished Training')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26ab705a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 6 (Bonus): Plot Loss & Accuracy Curves\n",
|
||||
"\n",
|
||||
"Visualises how loss decreased and accuracy improved across 30 epochs [credit: Claude]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c71ee0ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Plot 1: Training loss vs epoch \n",
|
||||
"ax1.plot(train_losses, color='steelblue', linewidth=2)\n",
|
||||
"ax1.set_title('Training Loss')\n",
|
||||
"ax1.set_xlabel('Epoch')\n",
|
||||
"ax1.set_ylabel('Loss')\n",
|
||||
"ax1.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"#Plot 2: training acc. vs epoch\n",
|
||||
"ax2.plot(train_accs, color='darkorange', linewidth=2)\n",
|
||||
"ax2.set_title('Training Accuracy')\n",
|
||||
"ax2.set_xlabel('Epoch')\n",
|
||||
"ax2.set_ylabel('Accuracy (%)')\n",
|
||||
"ax2.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"#Concat two plots, save, and show\n",
|
||||
"plt.suptitle('Training Curves', fontsize=14, fontweight='bold')\n",
|
||||
"plt.tight_layout()\n",
|
||||
"os.makedirs('../results', exist_ok=True)\n",
|
||||
"plt.savefig('../results/training_curves.png', dpi=150, bbox_inches='tight')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3bfda75",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2bf2b9a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs('../models', exist_ok=True)\n",
|
||||
"PATH = '../models/final-classifier.pth'\n",
|
||||
"torch.save(model.state_dict(), PATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "057d5d72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Task 5: Final Accuracy\n",
|
||||
"Evaluate on both train and test sets with Dropout disabled (model.eval())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9e54f566",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.eval() # Switch to eval mode (disabled droupout --> higher acc) (credit: Claude)\n",
|
||||
"train_correct, train_total = 0, 0\n",
|
||||
"with torch.no_grad(): #Gradient computation not needed for inference\n",
|
||||
" for images, labels in train_loader:\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" outputs = model(images) #Fwd pass only\n",
|
||||
" _, predicted = torch.max(outputs, 1) #highest score = predicted class\n",
|
||||
" train_total += labels.size(0) #Count total \n",
|
||||
" train_correct += (predicted == labels).sum().item() # Count correct\n",
|
||||
"\n",
|
||||
"# Test accuracy - repeat with test set\n",
|
||||
"test_correct, test_total = 0, 0\n",
|
||||
"with torch.no_grad():\n",
|
||||
" for images, labels in test_loader:\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" outputs = model(images)\n",
|
||||
" _, predicted = torch.max(outputs, 1)\n",
|
||||
" test_total += labels.size(0)\n",
|
||||
" test_correct += (predicted == labels).sum().item()\n",
|
||||
"\n",
|
||||
"print(f'Final Train Accuracy : {100 * train_correct / train_total:.2f}%')\n",
|
||||
"print(f'Final Test Accuracy : {100 * test_correct / test_total:.2f}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60666242",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Credit Claude: Testing accuracy per class"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8cc7ed40",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"correct_pred = {classname: 0 for classname in classes} # Correct predicitions per class\n",
|
||||
"total_pred = {classname: 0 for classname in classes} # total images seen per class\n",
|
||||
"\n",
|
||||
"model.eval()\n",
|
||||
"with torch.no_grad():\n",
|
||||
" for data in test_loader:\n",
|
||||
" images, labels = data\n",
|
||||
" images, labels = images.to(device), labels.to(device)\n",
|
||||
" outputs = model(images)\n",
|
||||
" _, predictions = torch.max(outputs, 1) #predicted class index per class\n",
|
||||
" for label, prediction in zip(labels, predictions):\n",
|
||||
" if label == prediction:\n",
|
||||
" correct_pred[classes[label]] += 1\n",
|
||||
" total_pred[classes[label]] += 1\n",
|
||||
"\n",
|
||||
"for classname, correct_count in correct_pred.items():\n",
|
||||
" accuracy = 100 * float(correct_count) / total_pred[classname]\n",
|
||||
" print(f'Accuracy for class: {classname:10s} is {accuracy:.1f}%')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user