Fix Training Pad Token (#1678)

Currently padding with 0 the character vs 0 the token id (<unk> in the case of llama)
2023-05-02 21:16:08 -05:00 · 2023-05-02 21:16:08 -05:00 · e3968f7dd0
commit e3968f7dd0
parent 80c2f25131
1 changed files with 1 additions and 1 deletions
--- a/modules/training.py
+++ b/modules/training.py
@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
        return
    gradient_accumulation_steps = batch_size // micro_batch_size
-    shared.tokenizer.pad_token = 0
+    shared.tokenizer.pad_token_id = 0
    shared.tokenizer.padding_side = "left"
    def tokenize(prompt):