Fully implemented ResNet architecture from scratch: https://arxiv.org/pdf/1512.03385.pdf
Helper¶
Nested Modules¶
We first need to make new classes that allow architectures that aren't straight forward passes through a defined set of layers. This is normally handled in the forward passes of pytorch with autograd. We need to be a bit more clever due to the fact that we need to define our gradients in each module.
Testing the gradients and the outputs:
m = SequentialModel(TestMixingGrads(), Linear(25,10, False))
db = get_mnist_databunch()
lf = CrossEntropy()
optimizer = adam_opt()
m
learn = Learner(m, CrossEntropy(), Optimizer, db)
run = Runner(learn, [CheckGrad()])
run.fit(1,0.1)
Refactored Conv Layers¶
Before we can start making ResNets, we first define a few helper modules that abstract some of the layers:
ResBlocks¶
Final built up ResNet blocks that implement the skip connecton layers characteristic of a ResNet
class ResLayer(NestedModel):
"Sequential res layers"
def __init__(self, block, n, n_in, n_out, *args, **kwargs):
layers = []
self.block, self.n, self.n_in, self.n_out = block, n, n_in, n_out
downsampling = 2 if n_in != n_out else 1
layers = [ResBlock(n_in, n_out, block, stride=downsampling),
*[ResBlock(n_out * block.expansion, n_out, block, stride=1) for i in range(n-1)]]
self.layers = SequentialModel(*layers)
def __repr__(self): return f'ResLayer(\n{self.layers}\n)'
ResNet¶
class ResNet(NestedModel):
"Class to create ResNet architectures of dynamic sizing"
def __init__(self, block, layer_sizes=[64, 128, 256, 512], depths=[2,2,2,2], c_in=3,
c_out=1000, im_size=(28,28), activation=ReLU, *args, **kwargs):
self.layer_sizes = layer_sizes
gate = [
Reshape(c_in, im_size[0], im_size[1]),
ConvBatch(c_in, self.layer_sizes[0], stride=2, kernel_size=7),
activation(),
Pool(max_pool, ks=3, stride=2, padding=Padding(1))
]
self.conv_sizes = list(zip(self.layer_sizes, self.layer_sizes[1:]))
body = [
ResLayer(block, depths[0], self.layer_sizes[0], self.layer_sizes[0], Activation=activation, *args, **kwargs),
*[ResLayer(block, n, n_in * block.expansion, n_out, Activation=activation)
for (n_in,n_out), n in zip(self.conv_sizes, depths[1:])]
]
tail = [
Pool(avg_pool, ks=1, stride=1, padding=None),
Flatten(),
Linear(self.layer_sizes[-1]*block.expansion, c_out, relu_after=False)
]
self.layers = SequentialModel(
*[layer for layer in gate],
*[layer for layer in body],
*[layer for layer in tail]
)
def __repr__(self): return f'ResNet: \n{self.layers}'
res = ResNet(BasicResBlock)
res
Testing out the ResNet Architectures:
GetResnet(18, c_in=1, c_out=10)
GetResnet(34, c_in=1, c_out=10)
GetResnet(50, c_in=1, c_out=10)
GetResnet(150, c_in=1, c_out=10)
GetResnet(152, c_in=1, c_out=10)
run = get_runner(model=GetResnet(18,c_in=1, c_out=10))