transformers: add use_flash_attention_2 option (#4373)

This commit is contained in:
feng lui 2023-11-05 00:59:33 +08:00 committed by GitHub
parent add359379e
commit 4766a57352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 9 additions and 1 deletions

View file

@ -9,7 +9,6 @@ loaders_and_params = OrderedDict({
'Transformers': [
'cpu_memory',
'gpu_memory',
'trust_remote_code',
'load_in_8bit',
'bf16',
'cpu',
@ -21,6 +20,7 @@ loaders_and_params = OrderedDict({
'compute_dtype',
'trust_remote_code',
'use_fast',
'use_flash_attention_2',
'alpha_value',
'rope_freq_base',
'compress_pos_emb',