rasbt-LLMs-from-scratch/tool_calling_experiment/config.py

53 lines
1.1 KiB
Python

# GPT-2 Configurations
# These match the standard GPT-2 sizes.
# The `vocab_size` will be updated at runtime/initialization to 50259 to include special tokens.
GPT_CONFIG_124M = {
"vocab_size": 50257, # 50257 base + 2 special = 50259
"context_length": 1024,
"emb_dim": 768,
"n_heads": 12,
"n_layers": 12,
"drop_rate": 0.1,
"qkv_bias": True
}
GPT_CONFIG_355M = {
"vocab_size": 50257,
"context_length": 1024,
"emb_dim": 1024,
"n_heads": 16,
"n_layers": 24,
"drop_rate": 0.1,
"qkv_bias": True
}
GPT_CONFIG_774M = {
"vocab_size": 50257,
"context_length": 1024,
"emb_dim": 1280,
"n_heads": 20,
"n_layers": 36,
"drop_rate": 0.1,
"qkv_bias": True
}
GPT_CONFIG_1558M = {
"vocab_size": 50257,
"context_length": 1024,
"emb_dim": 1600,
"n_heads": 25,
"n_layers": 48,
"drop_rate": 0.1,
"qkv_bias": True
}
# Special Tokens for Tool Calling
# We append these to the end of the standard vocabulary
SPECIAL_TOKENS = {
"<CODE_START>": 50257,
"<CODE_END>": 50258
}
FINAL_VOCAB_SIZE = 50259