Improved and refactored FCN modules as well as Parameters, Optimizers and a Training Loop

Better Cross Entropy

logsumexp[source]

logsumexp(inp)

Helper function to compute log_softmax

log_softmax_improved[source]

log_softmax_improved(inp)

Improved log_softmax to take advantage of exponential properties

def logsumexp(inp):
    a = inp.max(dim=1).values
    return a + ((inp-a[:,None]).exp().sum(-1)).log()

def log_softmax_improved(inp):
    return inp - logsumexp(inp).unsqueeze(-1)

nll[source]

nll(inp, targ)

Computes near log likelihood

cross_entropy[source]

cross_entropy(inp, targ)

Uses log_softmax_improved and nll to compute cross entropy loss

Initial Train Loop

accuracy[source]

accuracy(preds, targ)

Compute accuracy of preds with respect to targ

fit[source]

fit(epochs, model, optim, loss_func, train, valid)

Fit function 4: Added validation loops, model training status as well as printing of some metrics

def fit(epochs, model, loss_func, train, valid):
    for epoch in range(epochs):
        for batch in range(math.ceil(len(train)//bs)):
            start = batch*bs
            end = batch*bs + bs
            train_batch = train[start:end]
            valid_batch = valid[start:end]

            loss = loss_func(model(train_batch), valid_batch)
            loss_func.backward(loss, )
            model.backward()

            with torch.no_grad():
                for l in model.layers:
                    if hasattr(l, 'w'):
                        l.w -= l.w.g * lr
                        l.b   -= l.b.g   * lr
                        l.w.g = 0
                        l.b.g = 0
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')

Parameters

class Parameter[source]

Parameter(data=None, requires_grad=True)

Defines a base class for all parameters that need to be learned by the model

l_data = torch.zeros([4,6])
s_data = torch.randn([4,2])
param_l = Parameter(l_data)
param_s = Parameter(s_data)
param_l
Parameter: torch.Size([4, 6]), Requires grad: True
param_s
Parameter: torch.Size([4, 2]), Requires grad: True

Sequential Model

class SequentialModel[source]

SequentialModel(*args)

Model for executing forward and backward passes on a given list of layers

class SequentialModel():
    "Model for executing forward and backward passes on a given list of `layers`"
    def __init__(self, *args):
        self.layers = list(args)
        self.training = True

    def __repr__(self):
        "Prints out all modules of model"
        res = ["(Layer" + str(i+1) + "): " + str(m) for i,m in enumerate(self.layers)]
        return "\n".join(res)

    def __call__(self, x):
        "Execute forward pass on `x` throuh `self.layers`"
        for l in self.layers: x = l(x)
        return x

    def backward(self):
        "Execute backward pass on `x` throuh `self.layers`"
        for l in reversed(self.layers): l.backward()

    def parameters(self):
        "Get iterator over all parameters in layers of `self.layers`"
        for l in self.layers:
            for p in l.parameters(): yield p

FCN Layers Redo

class Module[source]

Module()

Base class for every layer operation in a sequential network

class CrossEntropy[source]

CrossEntropy() :: Module

Defines a base class for all layers of the network. Allows for easy implementation of forward and backward passes as well as updating learnable parameters

class CrossEntropy(Module):
    def forward(self, inp, targ):
        return cross_entropy(inp, targ)

    def bwd(self, loss, inp, targ):
        inp_s = softmax(inp)
        inp_s[range(targ.shape[0]), targ.long()] -= 1
        inp.g = inp_s / targ.shape[0]

class Linear[source]

Linear(in_d, out_d, final) :: Module

Base class for every layer operation in a sequential network

class Linear(Module):
    def __init__(self, in_d, out_d, relu_after, req_grad=True): 
        super().__init__()
        self.w = Parameter(get_weight(in_d, out_d, relu_after), req_grad)
        self.b = Parameter(torch.zeros(out_d), req_grad)

    def forward(self, xb): return xb @ self.w.d + self.b.d

    def bwd(self, out, inp):
        inp.g = out.g @ self.w.d.t()
        self.w.update(inp.t() @ out.g)
        self.b.update(out.g.sum(0))

    def __repr__(self): return f'Linear({self.w.d.shape[0]}, {self.w.d.shape[1]})'

class ReLU[source]

ReLU() :: Module

Base class for every layer operation in a sequential network

class ReLU(Module):
    def forward(self, x): return x.clamp_min_(0.)-0.5
    def bwd(self, out, inp):
        inp.g = (inp>0).float() * out.g
    def __repr__(self): return f'ReLU()'
model = SequentialModel(Linear(n_in,50, True), ReLU(), Linear(50,n_out, False))
model
(Layer1): Linear(784, 50)
(Layer2): ReLU()
(Layer3): Linear(50, 10)
for p in model.parameters(): print(p)
Parameter: torch.Size([784, 50]), Requires grad: True
Parameter: torch.Size([50]), Requires grad: True
Parameter: torch.Size([50, 10]), Requires grad: True
Parameter: torch.Size([10]), Requires grad: True

Optimizer

class Optimizer[source]

Optimizer(params, lr)

optim = Optimizer(model.parameters(), 0.5)
def fit(epochs, model, optim, loss_func, train, valid):
    "Fit function 2: Added easier optimization steps"
    for epoch in range(epochs):
        for batch in range(math.ceil(len(train)//bs)):
            start = batch*bs
            end = batch*bs + bs
            train_batch = train[start:end]
            valid_batch = valid[start:end]
            
            loss = loss_func(model(train_batch), valid_batch)
            loss_func.backward()
            model.backward()
            
            optim.step()
            optim.zero_grad()
            
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')
fit(3, model, optim, CrossEntropy(), xt, yt)
Epoch 1, Accuracy: 0.8959000110626221
Epoch 2, Accuracy: 0.9216799736022949
Epoch 3, Accuracy: 0.9330599904060364

get_model[source]

get_model(lr)

Easy helper function to get basic fully connected network with optimizer and loss function, takes learning rate, lr, as a parameter

Databunch

class Dataset[source]

Dataset(x, y)

Container class to store and get input and target values from a dataset

class DataLoader():
    "Container class to iterate over a dataset given a batcht size"
    def __init__(self, ds, bs): 
        "Provide a dataset, `ds`, and batchsize, `bs`"
        self.ds, self.bs = ds,bs
    def __iter__(self): 
        for i in range(0, len(self.ds), self.bs): yield self.ds[i:i+self.bs]
ds = Dataset(xt, yt)
dl = DataLoader(ds, bs)
def fit(epochs, model, optim, loss_func, data_loader):
    "Training Loop 3: Refactored out for easy databunch usage"
    for epoch in range(epochs):
        for xb, yb in data_loader:
            loss = loss_func(model(xb), yb)
            loss_func.backward()
            model.backward()
            
            optim.step()
            optim.zero_grad()
            
        print(f'Epoch {epoch+1}, Accuracy: {accuracy(model(xt), yt)}')
m, o, lf = get_model(0.5)
fit(3, m, o, lf, dl)
Epoch 1, Accuracy: 0.9178799986839294
Epoch 2, Accuracy: 0.9384199976921082
Epoch 3, Accuracy: 0.9474200010299683

Random Data

class Batcher[source]

Batcher(ds, bs, random)

Wrapper for databunch class that randomizes each batch of output if random arg is set to trueu

small_ds = Dataset(*ds[:10])
random = Batcher(small_ds, 4, True)
not_random = Batcher(small_ds, 4, False)
[r for r in random]
[tensor([6, 5, 1, 7]), tensor([4, 3, 8, 2]), tensor([0, 9])]
[nr for nr in not_random]
[tensor([0, 1, 2, 3]), tensor([4, 5, 6, 7]), tensor([8, 9])]

collate[source]

collate(b)

Combines the input lists b into single inpuut and target tensors

class DataLoader[source]

DataLoader(ds, batcher, collate_fcn)

Refactored DataLoader to include a batcher, also collates the output of batcher into single tensor for model

Validation

fit[source]

fit(epochs, model, optim, loss_func, train, valid)

Fit function 4: Added validation loops, model training status as well as printing of some metrics

get_datasets[source]

get_datasets()

Helper function to return proper dataloaders

train, valid = get_datasets()
m, o, lf = get_model(0.1)
fit(3, m, o, lf, train, valid)
Epoch 1, Accuracy: 0.9465565085411072, Loss: 0.18650124967098236
Epoch 2, Accuracy: 0.9217754602432251, Loss: 0.24221312999725342
Epoch 3, Accuracy: 0.9668591022491455, Loss: 0.11605237424373627