transformers: add use_flash_attention_2 option (#4373)

This commit is contained in:
feng lui 2023-11-05 00:59:33 +08:00 committed by GitHub
parent add359379e
commit 4766a57352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 9 additions and 1 deletions

View file

@ -126,6 +126,10 @@ def huggingface_loader(model_name):
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
'use_safetensors': True if shared.args.force_safetensors else None
}
if shared.args.use_flash_attention_2:
params['use_flash_attention_2'] = True
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])
if 'chatglm' in model_name.lower():