transformers: add use_flash_attention_2 option (#4373)

2023-11-05 00:59:33 +08:00 · 2023-11-05 00:59:33 +08:00 · 4766a57352
commit 4766a57352
parent add359379e
6 changed files with 9 additions and 1 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -126,6 +126,10 @@ def huggingface_loader(model_name):
        'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
        'use_safetensors': True if shared.args.force_safetensors else None
    }
+
+    if shared.args.use_flash_attention_2:
+        params['use_flash_attention_2'] = True
+
    config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])

    if 'chatglm' in model_name.lower():