Change GPTQ triton default settings
This commit is contained in:
parent
e03b873460
commit
7438f4f6ba
3 changed files with 10 additions and 10 deletions
|
@ -79,14 +79,14 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc
|
|||
model.load_state_dict(torch.load(checkpoint), strict=False)
|
||||
|
||||
if is_triton:
|
||||
if not shared.args.no_quant_attn:
|
||||
if shared.args.quant_attn:
|
||||
quant.make_quant_attn(model)
|
||||
if eval and not shared.args.no_fused_mlp:
|
||||
if eval and shared.args.fused_mlp:
|
||||
quant.make_fused_mlp(model)
|
||||
|
||||
if not shared.args.no_warmup_autotune:
|
||||
if shared.args.warmup_autotune:
|
||||
quant.autotune_warmup_linear(model, transpose=not eval)
|
||||
if eval and not shared.args.no_fused_mlp:
|
||||
if eval and shared.args.fused_mlp:
|
||||
quant.autotune_warmup_fused(model)
|
||||
|
||||
model.seqlen = 2048
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue