Supplementary code for the Build a Large Language Model From Scratch book by Sebastian Raschka

Code repository: https://github.com/rasbt/LLMs-from-scratch

Exercise A.1#

The Python Setup Tips document in this repository contains additional recommendations and tips to set up your Python environment.

Exercise A.2#

The Installing Libraries Used In This Book document and directory contains utilities to check whether your environment is set up correctly.

Exercise A.3#

import torch

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(
                
            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import torch
      3 class NeuralNetwork(torch.nn.Module):
      4     def __init__(self, num_inputs, num_outputs):

ModuleNotFoundError: No module named 'torch'
model = NeuralNetwork(2, 2)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)
Total number of trainable model parameters: 752

Exercise A.4#

import torch

a = torch.rand(100, 200)
b = torch.rand(200, 300)
%timeit a @ b
63.8 µs ± 8.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
a, b = a.to("cuda"), b.to("cuda")
%timeit a @ b
13.8 µs ± 425 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)