Add HQQ quant loader (#4888)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
64a57d9dc2
commit
674be9a09a
16 changed files with 79 additions and 0 deletions
|
@ -144,6 +144,9 @@ parser.add_argument('--pre_layer', type=int, nargs='+', help='The number of laye
|
|||
parser.add_argument('--checkpoint', type=str, help='The path to the quantized checkpoint file. If not specified, it will be automatically detected.')
|
||||
parser.add_argument('--monkey-patch', action='store_true', help='Apply the monkey patch for using LoRAs with quantized models.')
|
||||
|
||||
# HQQ
|
||||
parser.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
|
||||
|
||||
# DeepSpeed
|
||||
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
||||
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
||||
|
@ -246,6 +249,8 @@ def fix_loader_name(name):
|
|||
return 'AutoAWQ'
|
||||
elif name in ['quip#', 'quip-sharp', 'quipsharp', 'quip_sharp']:
|
||||
return 'QuIP#'
|
||||
elif name in ['hqq']:
|
||||
return 'HQQ'
|
||||
|
||||
|
||||
def add_extension(name, last=False):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue