diff --git a/modules/quantized_LLaMA.py b/modules/quantized_LLaMA.py index fa7f15c..e9352f9 100644 --- a/modules/quantized_LLaMA.py +++ b/modules/quantized_LLaMA.py @@ -42,7 +42,7 @@ def load_quantized_LLaMA(model_name): model = load_quant(path_to_model, str(pt_path), bits) - # Multi-GPU setup + # Multiple GPUs or GPU+CPU if shared.args.gpu_memory: max_memory = {} for i in range(len(shared.args.gpu_memory)):