Add --no_use_cuda_fp16 param for AutoGPTQ

2023-06-23 12:22:56 -03:00 · 2023-06-23 12:22:56 -03:00 · 3ae9af01aa
commit 3ae9af01aa
parent 5646690769
5 changed files with 6 additions and 2 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -147,6 +147,7 @@ parser.add_argument('--autogptq', action='store_true', help='DEPRECATED')
 parser.add_argument('--triton', action='store_true', help='Use triton.')
 parser.add_argument('--no_inject_fused_attention', action='store_true', help='Do not use fused attention (lowers VRAM requirements).')
 parser.add_argument('--no_inject_fused_mlp', action='store_true', help='Triton mode only: Do not use fused MLP (lowers VRAM requirements).')
+parser.add_argument('--no_use_cuda_fp16', action='store_true', help='This can make models faster on some systems.')
 parser.add_argument('--desc_act', action='store_true', help='For models that don\'t have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.')

 # ExLlama