class BB_model(nn.Module):def __init__(self):super(BB_model, self).__init__()resnet = models.resnet34(pretrained=True)layers = list(resnet.children())[:8]self.features1 = nn.Sequential(*layers[:6])self.features2 = nn.Sequential(*layers[6:])self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))def forward(self, x):x = self.features1(x)x = self.features2(x)x = F.relu(x)x = nn.AdaptiveAvgPool2d((1,1))(x)x = x.view(x.shape[0], -1)return self.classifier(x), self.bb(x)7.训练对于损失,我们需要同时考虑分类损失和边界框回归损失,因此我们使用交叉熵和 L1 损失(真实值和预测坐标之间的所有绝对差之和)的组合 。我已经将 L1 损失缩放了 1000 倍,因为分类和回归损失都在相似的范围内 。除此之外,它是一个标准的 PyTorch 训练循环(使用 GPU):
def update_optimizer(optimizer, lr):for i, param_group in enumerate(optimizer.param_groups):param_group["lr"] = lr def train_epocs(model, optimizer, train_dl, val_dl, epochs=10,C=1000):idx = 0for i in range(epochs):model.train()total = 0sum_loss = 0for x, y_class, y_bb in train_dl:batch = y_class.shape[0]x = x.cuda().float()y_class = y_class.cuda()y_bb = y_bb.cuda().float()out_class, out_bb = model(x)loss_class = F.cross_entropy(out_class, y_class, reduction="sum")loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)loss_bb = loss_bb.sum()loss = loss_class + loss_bb/Coptimizer.zero_grad()loss.backward()optimizer.step()idx += 1total += batchsum_loss += loss.item()train_loss = sum_loss/totalval_loss, val_acc = val_metrics(model, valid_dl, C)print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))return sum_loss/total def val_metrics(model, valid_dl, C=1000):model.eval()total = 0sum_loss = 0correct = 0for x, y_class, y_bb in valid_dl:batch = y_class.shape[0]x = x.cuda().float()y_class = y_class.cuda()y_bb = y_bb.cuda().float()out_class, out_bb = model(x)loss_class = F.cross_entropy(out_class, y_class, reduction="sum")loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)loss_bb = loss_bb.sum()loss = loss_class + loss_bb/C_, pred = torch.max(out_class, 1)correct += pred.eq(y_class).sum().item()sum_loss += loss.item()total += batchreturn sum_loss/total, correct/total model = BB_model().cuda()parameters = filter(lambda p: p.requires_grad, model.parameters())optimizer = torch.optim.Adam(parameters, lr=0.006) train_epocs(model, optimizer, train_dl, valid_dl, epochs=15)8.测试现在我们已经完成了训练 , 我们可以选择一个随机图像并在上面测试我们的模型 。尽管我们只有相当少量的训练图像,但是我们最终在测试图像上得到了一个相当不错的预测 。
使用手机拍摄真实照片并测试模型将是一项有趣的练习 。另一个有趣的实验是不执行任何数据增强并训练模型并比较两个模型 。
# resizing test imageim = read_image('./road_signs/images_resized/road789.png')im = cv2.resize(im, (int(1.49*300), 300))cv2.imwrite('./road_signs/road_signs_test/road789.jpg', cv2.cvtColor(im, cv2.COLOR_RGB2BGR))# test Datasettest_ds = RoadDataset(pd.DataFrame([{'path':'./road_signs/road_signs_test/road789.jpg'}])['path'],pd.DataFrame([{'bb':np.array([0,0,0,0])}])['bb'],pd.DataFrame([{'y':[0]}])['y'])x, y_class, y_bb = test_ds[0] xx = torch.FloatTensor(x[None,])xx.shape # predictionout_class, out_bb = model(xx.cuda())out_class, out_bb

文章插图
总结现在我们已经介绍了目标检测的基本原理,并从头开始实现它,您可以将这些想法扩展到多对象情况,并尝试更复杂的模型,如 RCNN 和 YOLO!
推荐阅读
- 16个优秀的开源微信小程序项目,接单赚钱利器!
- 让Java起飞的技术...
- 即将到来的 Vue 3 “Vapor Mode”
- 学会使用Java的远程调试工具,解决难题
- Oracle数据库调优实战:优化SQL查询的黄金法则!
- JVM的调优常用参数
- API请求重试的8种方法,你用哪种?
- 利用Docker简化机器学习应用程序的部署和可扩展性
- 2024年的后端和Web开发趋势
- 警惕“应用推荐”背后的信贷陷阱
