Remove flexgen support

This commit is contained in:
oobabooga 2023-07-25 15:15:29 -07:00
parent 5134d5b1c6
commit 75c2dd38cf
8 changed files with 3 additions and 233 deletions

View file

@ -56,7 +56,6 @@ def load_model(model_name, loader=None):
'GPTQ-for-LLaMa': GPTQ_loader,
'llama.cpp': llamacpp_loader,
'llamacpp_HF': llamacpp_HF_loader,
'FlexGen': flexgen_loader,
'RWKV': RWKV_loader,
'ExLlama': ExLlama_loader,
'ExLlama_HF': ExLlama_HF_loader
@ -221,32 +220,6 @@ def huggingface_loader(model_name):
return model
def flexgen_loader(model_name):
from flexgen.flex_opt import CompressionConfig, ExecutionEnv, OptLM, Policy
# Initialize environment
env = ExecutionEnv.create(shared.args.disk_cache_dir)
# Offloading policy
policy = Policy(1, 1,
shared.args.percent[0], shared.args.percent[1],
shared.args.percent[2], shared.args.percent[3],
shared.args.percent[4], shared.args.percent[5],
overlap=True, sep_layer=True, pin_weight=shared.args.pin_weight,
cpu_cache_compute=False, attn_sparsity=1.0,
compress_weight=shared.args.compress_weight,
comp_weight_config=CompressionConfig(
num_bits=4, group_size=64,
group_dim=0, symmetric=False),
compress_cache=False,
comp_cache_config=CompressionConfig(
num_bits=4, group_size=64,
group_dim=2, symmetric=False))
model = OptLM(f"facebook/{model_name}", env, shared.args.model_dir, policy)
return model
def RWKV_loader(model_name):
from modules.RWKV import RWKVModel, RWKVTokenizer