transformers: add use_flash_attention_2 option (#4373)

2023-11-05 00:59:33 +08:00 · 2023-11-05 00:59:33 +08:00 · 4766a57352
commit 4766a57352
parent add359379e
6 changed files with 9 additions and 1 deletions
--- a/modules/loaders.py
+++ b/modules/loaders.py
@ -9,7 +9,6 @@ loaders_and_params = OrderedDict({
    'Transformers': [
        'cpu_memory',
        'gpu_memory',
-        'trust_remote_code',
        'load_in_8bit',
        'bf16',
        'cpu',
@ -21,6 +20,7 @@ loaders_and_params = OrderedDict({
        'compute_dtype',
        'trust_remote_code',
        'use_fast',
+        'use_flash_attention_2',
        'alpha_value',
        'rope_freq_base',
        'compress_pos_emb',