Fix Training Pad Token (#1678)
Currently padding with 0 the character vs 0 the token id (<unk> in the case of llama)
This commit is contained in:
parent
80c2f25131
commit
e3968f7dd0
1 changed files with 1 additions and 1 deletions
|
@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
||||||
return
|
return
|
||||||
|
|
||||||
gradient_accumulation_steps = batch_size // micro_batch_size
|
gradient_accumulation_steps = batch_size // micro_batch_size
|
||||||
shared.tokenizer.pad_token = 0
|
shared.tokenizer.pad_token_id = 0
|
||||||
shared.tokenizer.padding_side = "left"
|
shared.tokenizer.padding_side = "left"
|
||||||
|
|
||||||
def tokenize(prompt):
|
def tokenize(prompt):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue