Supplementary code for the Build a Large Language Model From Scratch book by Sebastian Raschka Code repository: https://github.com/rasbt/LLMs-from-scratch |
![]() |
Load And Use Finetuned Model#
This notebook contains minimal code to load the finetuned model that was instruction finetuned and saved in chapter 7 via ch07.ipynb.
from importlib.metadata import version
pkgs = [
"tiktoken", # Tokenizer
"torch", # Deep learning library
]
for p in pkgs:
print(f"{p} version: {version(p)}")
---------------------------------------------------------------------------
PackageNotFoundError Traceback (most recent call last)
Cell In[1], line 8
3 pkgs = [
4 "tiktoken", # Tokenizer
5 "torch", # Deep learning library
6 ]
7 for p in pkgs:
----> 8 print(f"{p} version: {version(p)}")
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/importlib/metadata/__init__.py:946, in version(distribution_name)
939 def version(distribution_name):
940 """Get the version string for the named package.
941
942 :param distribution_name: The name of the distribution package to query.
943 :return: The version string for the package as defined in the package's
944 "Version" metadata key.
945 """
--> 946 return distribution(distribution_name).version
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/importlib/metadata/__init__.py:919, in distribution(distribution_name)
913 def distribution(distribution_name):
914 """Get the ``Distribution`` instance for the named package.
915
916 :param distribution_name: The name of the distribution package as a string.
917 :return: A ``Distribution`` instance (or subclass thereof).
918 """
--> 919 return Distribution.from_name(distribution_name)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/importlib/metadata/__init__.py:518, in Distribution.from_name(cls, name)
516 return dist
517 else:
--> 518 raise PackageNotFoundError(name)
PackageNotFoundError: No package metadata was found for tiktoken
from pathlib import Path
finetuned_model_path = Path("gpt2-medium355M-sft.pth")
if not finetuned_model_path.exists():
print(
f"Could not find '{finetuned_model_path}'.\n"
"Run the `ch07.ipynb` notebook to finetune and save the finetuned model."
)
from previous_chapters import GPTModel
BASE_CONFIG = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Context length
"drop_rate": 0.0, # Dropout rate
"qkv_bias": True # Query-key-value bias
}
model_configs = {
"gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
"gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
"gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
"gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}
CHOOSE_MODEL = "gpt2-medium (355M)"
BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
model = GPTModel(BASE_CONFIG)
import torch
model.load_state_dict(torch.load(
"gpt2-medium355M-sft.pth",
map_location=torch.device("cpu"),
weights_only=True
))
model.eval();
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")
prompt = """Below is an instruction that describes a task. Write a response
that appropriately completes the request.
### Instruction:
Convert the active sentence to passive: 'The chef cooks the meal every day.'
"""
from previous_chapters import (
generate,
text_to_token_ids,
token_ids_to_text
)
def extract_response(response_text, input_text):
return response_text[len(input_text):].replace("### Response:", "").strip()
torch.manual_seed(123)
token_ids = generate(
model=model,
idx=text_to_token_ids(prompt, tokenizer),
max_new_tokens=35,
context_size=BASE_CONFIG["context_length"],
eos_id=50256
)
response = token_ids_to_text(token_ids, tokenizer)
response = extract_response(response, prompt)
print(response)
The meal is cooked every day by the chef.