02_Images and Convolutional Neural Networks
{Datacamp}
{Datacamp}
1) Overview:
Content:
Handling images with PyTorch.
Image dataset.
Data augmentation.
Data augmentation in PyTorch.
Convolutional Neural Networks.
The convolutional layer.
Building convolutional networks.
Training image classifiers.
Choosing augmentations.
Dataset with augmentations.
Image classifier training loop.
Evaluating image classifiers.
Multi-class model evaluation.
Analyzing metrics per class.
2) Detail:
Handling Images with Pytorch
from torchvision.dataset import ImageFolder
from torchvision import transforms
train_transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((128, 128)),
])
dataset_train = ImageFolder("data/clouds_train", transform = train_transforms)
Display Images
dataloader_train = Dataloader(
dataset_train,
shuffle = True,
batch_size = 1,
)
image, label = next(iter(dataloader_train))
print(image.shape)
image = image.squeeze().permute(1,2,0)
print(image.shape)
import matplotlib.pyplot as plt
plt.imshow()
plt.show()
Display Augmentation
train_transforms = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(),
transforms.RandomAutocontrast(),
transforms.ToTensor(),
transforms.Resize((128,128)),
])
dataset_train = ImageFolder("data/clouds_chatrain",
transform = train_transforms,
)
Convolutional Neural Network
#nn.Conv2d(3, 32, kernel_size = 3, padding = 1)
#nn.MaxPool2d(kernel_size = 2)
class Net(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(3,32, kernel_size = 3, padding = 1)
nn.ELU()
nn.MaxPool2d(kernel_size = 2)
nn.Conv2d(32,64, kernel_size = 3, padding = 1)
nn.ELU()
nn.MaxPool2d(kernel_size = 2)
nn.Flatten()
)
self.classifier = nn.Linear(64*16*16, num_classes)
def forward(self, x):
x = self.feature_extractor(x)
x = self.classifier(x)
return x
Convolutional Neural Network
#nn.Conv2d(3, 32, kernel_size = 3, padding = 1)
#nn.MaxPool2d(kernel_size = 2)
class Net(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(3,32, kernel_size = 3, padding = 1)
nn.ELU()
nn.MaxPool2d(kernel_size = 2)
nn.Conv2d(32,64, kernel_size = 3, padding = 1)
nn.ELU()
nn.MaxPool2d(kernel_size = 2)
nn.Flatten()
)
self.classifier = nn.Linear(64*16*16, num_classes)
def forward(self, x):
x = self.feature_extractor(x)
x = self.classifier(x)
return x
Cross-Entropy loss
criterion = nn.CrossEntropyLoss()
Training
net = Net(num_classes = 7)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.01)
for epoch in range(10):
for images, labels in dataloader_train:
optimizer.zero_grad()
outputs = net(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
Evaluation
test_transforms = transforms.Compose([
#
# No data augmentation
#
transforms.ToTensor(),
transforms.Resize((64,64)),
])
dataset_test = Image_Folder(
"clouds_test",
transform = test_trasnforms,
)
Metrics
Precision and Recall: Multi-class Classification
Precision: Fraction of cumulus-predictions that were correct.
Recall: Fraction of all cumulus examples correctly predicted.
Averaging multi-class metrics
With 7 classes, we have 7 precision and 7 recall scores
We can analyze them per-class, or aggregate:
Micro average: global calculation - used for imbalanced datasets.
Macro average: mean of per-class metrics - care about performance on small classes.
Weighted average: weighted mean of per-class metrics - consider errors in larger classes as more important.
from torchmetrics import Recall
recall_per_class = Recall(task = "multiclass", num_classes = 7, average = None)
recall_micro = Recall(task = "multiclass", num_classes = 7, average = "micro")
recall_macro = Recall(task = "multiclass", num_classes = 7, average = "macro")
recall_weighted = Recall(task = "multiclass", num_classes = 7, average = "weighted")
Evaluation Loop
from torchmetrics import Precision, Recall
metric_precision = Precision(task = "multiclass", num_classes = 7, average = "macro")
metric_recall = Recall(task = "multiclass", num_classes = 7, average = "macro")
net.eval()
with torch.no_grad():
for images, labels in dataloader_test:
outputs = net(images)
_, preds = torch.max(output,1)
metric_precision(preds, labels)
metric_recall(preds, labels)
precision = metric_precision.compute()
recall = metric_recall.compute()
Analyze Performance per Class
metric_recall = Recall(task = "multiclass", num_classes = 7, average = None)
net.eval()
with torch.no_grad():
for images, labels in dataloader_test:
outputs = net(images)
_, preds = torch.max(output,1)
metric_recall(preds, labels)
recall = metric_recall.compute()
#To check class indice
#dataset_test.class_to_idx
recall_per_class = {k:recall[v].item() for k,v in dataset_test.class_to_idx.items()}