Buiding an architecture to detect class and position of multiple entities in a single image. Based off of convolutional YOLO3 architecture. Paper:
Dataset Preparation¶
pascal_root = Path('PascalData')
j = json.load((pascal_root/'PASCAL_VOC'/'pascal_train2007.json').open())
j.keys()
dict_keys(['images', 'type', 'annotations', 'categories'])
draw_text[source]
draw_text(ax,xy,txt,sz=14)
Draw the class type aboce the bounding box of font size sz
ax = show_img(im)
b = bb_hw(im0_a[0])
draw_rect(ax, b)
draw_text(ax, b[:2], cats[im0_a[1]])
draw_idx(12)
torch.Size([3, 333, 500])
Architecture¶
get_res_body[source]
get_res_body(size,n_in,c_out=10)
Return the body of a resnet with given size and n_in input channels
res_body = get_res_body(34, 3)
res_body(torch.randn(1,3, 224, 224)).shape
torch.Size([1, 512, 7, 7])
We now have to take the output of the ResNet body and turn it into k outputs of size (4 + 4c) where k is the number of anchor boxes, 4 outputs for bounding box coordinates and 4c activations representing the predicted class
ssd_head = SSDHead(9, 10)
ssd = GenericModel(res_body, ssd_head)
ssd
GenericModel(
(body): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(4): Sequential(
(0): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(1): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(2): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
)
(5): Sequential(
(0): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Sequential(
(0): AvgPool2d(kernel_size=2, stride=2, padding=0)
(1): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(activation): ReLU()
)
(1): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(2): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(3): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
)
(6): Sequential(
(0): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Sequential(
(0): AvgPool2d(kernel_size=2, stride=2, padding=0)
(1): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(activation): ReLU()
)
(1): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(2): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(3): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(4): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(5): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
)
(7): Sequential(
(0): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Sequential(
(0): AvgPool2d(kernel_size=2, stride=2, padding=0)
(1): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(activation): ReLU()
)
(1): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
(2): BaseRes(
(blocks): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): ReLU()
(2): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
)
(identity): Identity()
(activation): ReLU()
)
)
)
(head): SSDHead(
(relu): ReLU()
(drop): Dropout(p=0.1, inplace=False)
(initial_layer): StandardConv(
(layers): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): Dropout(p=0.1, inplace=False)
)
)
(conv_to_4): StandardConv(
(layers): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): Dropout(p=0.1, inplace=False)
)
)
(conv_to_2): StandardConv(
(layers): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): Dropout(p=0.1, inplace=False)
)
)
(conv_to_1): StandardConv(
(layers): Sequential(
(0): ConvBatchLayer(
(conv): AutoConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Identity()
)
(1): Dropout(p=0.1, inplace=False)
)
)
(out_conv_4): PredictionConv(
(class_conv): AutoConv(
(conv): Conv2d(256, 11, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bbox_conv): AutoConv(
(conv): Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(out_conv_2): PredictionConv(
(class_conv): AutoConv(
(conv): Conv2d(256, 11, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bbox_conv): AutoConv(
(conv): Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(out_conv_1): PredictionConv(
(class_conv): AutoConv(
(conv): Conv2d(256, 11, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(bbox_conv): AutoConv(
(conv): Conv2d(256, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
)
!python notebook2script.py SingleShotDetector.ipynb
Converted SingleShotDetector.ipynb to ModernArchitecturesFromPyTorch/nb_SingleShotDetector.py