Jinja templates for Instruct and Chat (#4874)

This commit is contained in:
oobabooga 2023-12-12 17:23:14 -03:00 committed by GitHub
parent aab0dd962d
commit 39d2fe1ed9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
71 changed files with 1774 additions and 518 deletions

View file

@ -120,10 +120,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
input_ids = np.array(input_ids).reshape(1, len(input_ids))
else:
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
# This is a hack for making replies more creative.
if not add_bos_token and input_ids[0][0] == shared.tokenizer.bos_token_id:
input_ids = input_ids[:, 1:]
if not add_bos_token:
while len(input_ids[0]) > 0 and input_ids[0][0] == shared.tokenizer.bos_token_id:
input_ids = input_ids[:, 1:]
# Handling truncation
if truncation_length is not None: