Add HQQ quant loader (#4888)

--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
2023-12-18 19:23:16 -05:00 · 2023-12-18 19:23:16 -05:00 · 674be9a09a
commit 674be9a09a
parent 64a57d9dc2
16 changed files with 79 additions and 0 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -144,6 +144,9 @@ parser.add_argument('--pre_layer', type=int, nargs='+', help='The number of laye
 parser.add_argument('--checkpoint', type=str, help='The path to the quantized checkpoint file. If not specified, it will be automatically detected.')
 parser.add_argument('--monkey-patch', action='store_true', help='Apply the monkey patch for using LoRAs with quantized models.')

+# HQQ
+parser.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
+
 # DeepSpeed
 parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
 parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
@ -246,6 +249,8 @@ def fix_loader_name(name):
        return 'AutoAWQ'
    elif name in ['quip#', 'quip-sharp', 'quipsharp', 'quip_sharp']:
        return 'QuIP#'
+    elif name in ['hqq']:
+        return 'HQQ'


 def add_extension(name, last=False):