04_Multi-Output And Multi-Input Architectures
{Datacamp}
{Datacamp}
1) Overview:
Content:
Multi-input models.
Two-input dataset.
Two-input model.
Training two-input model.
Multi-output models.
Two-output Dataset and DataLoader.
Two-output model architecture.
Training multi-output models.
Evaluation of multi-output models and loss weighting.
Multi-output model evaluation.
Loss weighting.
Wrap-up.
2) Detail:
Multi-input Model
Two-input Dataset.
from PIL import Image
class OmniglotDataset(Dataset):
def __init__(self, transform, samples):
self.transform = transform
self.samples = samples
def __len__(self):
return len(self.samples)
def __getitem__(self,idx):
img_path, alphabet, label = self.samples[idx]
img = Image.open(img_path_.convert('L')
img = self.transform(img)
return img, alphabet, label
print(samples[0])
Tensor Concatenation
x = torch.tensor([[1,2,3]])
y = torch.tensor([[4,5,6]])
torch.cat((x,y), dim = 0)
# [1,2,3,4,5,6]
torch.cat((x,y), dim = 1)
# [[1,3,4],[4,5,6]]
Two-input Architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Define image processing layer
self.image_layer = nn.Sequential(
nn.Con2d(1,16, kernel_size = 3, padding = 1)
nn.MaxPool2d(kernel_size = 2)
nn.ELU()
nn.Flatten()
nn.Linear(16*32*32,128)
)
# Define alphabet processing layer
self.alphabet_layer = nn.Sequential(
nn.Linear(30, 8),
nn.ELU(),
)
# Define classifier layer
self.classifier = nn.Sequential(
nn.Linear(128+8, 964),
)
def forward(self, x_image, x_alphabet):
x_image = self.image_layer(x_image)
x_alphabet = self.alphabet_layer(x_alphabet)
x = torch.cat((x_image, x_alphabet), dim = 1)
return self.classifier(x)
Training Loop
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.01)
for epoch in range(10):
for img, alpha, labels in dataloader_train:
optimizer.zero_grad()
outputs = net(img, alpha)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
DataLoader
dataset_train = OmniglotDataset(
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Resize((64, 64)),
]),
samples=samples,
)
dataloader_train = DataLoader(
dataset_train, shuffle=True, batch_size=3,
)
Multi-output Model
Two-output Dataset
class OmniglotDataset(Dataset):
def __init__(self, transform, samples):
self.transform = transform
self.samples = samples
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
img_path, alphabet, label = self.samples[idx]
img = Image.open(img_path).convert('L')
img = self.transform(img)
return img, alphabet, label
print(samples[0])
Two-output Architecture
class Net(nn.Module):
def __init__(self, num_alpha, num_char):
super(Net,self).__init__()
self.image_layer = nn.Sequential(
nn.Conv2d(1,16, kernel_size = 3, padding = 1),
nn.MaxPool2d(kernel_size = 2),
nn.ELU(),
nn.Flatten(),
nn.Linear(16*32*32, 128),
)
self.classifier_alpha = nn.Linear(128,30)
self.classifier_char = nn.Linear(128,964)
def forward(self,x):
x_image = self.image_layer(x)
output_alpha = self.classifier_alpha(x_image)
output_char = self.classifier_char(x_image)
return output_alpha, output_char
Training Loop
for epoch in range(10):
for images, labels_alpha, labels_char in dataloader_train:
optimizer.zero_grad()
outputs_alpha, outputs_char = net(images)
loss_alpha = criterion(outputs_alpha, labels_alpha)
loss_char = criterion(outputs_char, labels_char)
loss = loss_alpha + loss_char
loss.backward()
optimizer.step()
Model Evaluation
acc_alpha = Accuracy(task = "multiclass", num_classes = 30)
acc_char = Accuracy(task = "multiclass", num_classes = 964)
net.eval()
with torch.no_grad():
for images, labels_alpha, labels_char in dataloader_test:
outputs_alpha, outputs_char = net(images)
_, pred_alpha = torch.max(outputs_alpha, 1)
_, pred_char = torch.max(outputs_char, 1)
acc_alpha(pred_alpha, labels_alpha)
acc_char(pred_char, labels_char)
print(f"Alphabet: {acc_alpha.compute()}")
print(f"Character: {acc_char.compute()}")
Loss Weighting
## Scale more important loss
loss = loss_alpha + loss_char * 2
## Assign weights that sum to 1
loss = 0.33 * loss_alpha + 0.67 * loss_char
Warning: Losse on different scales
Losses must be on the same scale before they are weighted and added
Ex:
Predict house price => MSE loss.
Predict quality: low, medium, high => CrossEntropy loss.
CE is typically in the single-digits.
MSE can reach tens of thousands.
Model would ignore quality assessment task.
Solution: Normalize both losses before weighting and adding.
loss_price = loss_price / torch.max(loss_price)
loss_quality = loss_quality / torch.max(loss_quality)
loss = 0.7 * loss_price + 0.3 * loss_quality
n2 n0
θ