Set use_fast=True by default, create --no_use_fast flag

This increases tokens/second for HF loaders.
This commit is contained in:
oobabooga 2023-11-16 19:45:05 -08:00
parent b2ce8dc7ee
commit 8b66d83aa9
6 changed files with 20 additions and 19 deletions

View file

@ -19,7 +19,7 @@ loaders_and_params = OrderedDict({
'quant_type',
'compute_dtype',
'trust_remote_code',
'use_fast',
'no_use_fast',
'use_flash_attention_2',
'alpha_value',
'rope_freq_base',
@ -34,7 +34,7 @@ loaders_and_params = OrderedDict({
'rope_freq_base',
'compress_pos_emb',
'cfg_cache',
'use_fast',
'no_use_fast',
'exllama_HF_info',
],
'ExLlamav2_HF': [
@ -45,7 +45,7 @@ loaders_and_params = OrderedDict({
'cache_8bit',
'alpha_value',
'compress_pos_emb',
'use_fast',
'no_use_fast',
],
'ExLlama': [
'gpu_split',
@ -78,7 +78,7 @@ loaders_and_params = OrderedDict({
'disk',
'auto_devices',
'trust_remote_code',
'use_fast',
'no_use_fast',
'autogptq_info',
],
'GPTQ-for-LLaMa': [
@ -86,7 +86,7 @@ loaders_and_params = OrderedDict({
'groupsize',
'model_type',
'pre_layer',
'use_fast',
'no_use_fast',
'gptq_for_llama_info',
],
'llama.cpp': [
@ -119,7 +119,7 @@ loaders_and_params = OrderedDict({
'compress_pos_emb',
'numa',
'cfg_cache',
'use_fast',
'no_use_fast',
'logits_all',
'llamacpp_HF_info',
],
@ -139,7 +139,7 @@ loaders_and_params = OrderedDict({
'max_seq_len',
'no_inject_fused_attention',
'trust_remote_code',
'use_fast',
'no_use_fast',
]
})