Add llama-cpp-python wheels with tensor cores support (#5003)

2023-12-19 17:30:53 -03:00 · 2023-12-19 17:30:53 -03:00 · de138b8ba6
commit de138b8ba6
parent 0a299d5959
9 changed files with 69 additions and 21 deletions
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@ -19,12 +19,21 @@ try:
 except:
    llama_cpp_cuda = None

+try:
+    import llama_cpp_cuda_tensorcores
+except:
+    llama_cpp_cuda_tensorcores = None
+

 def llama_cpp_lib():
-    if (shared.args.cpu and llama_cpp is not None) or llama_cpp_cuda is None:
+    if shared.args.cpu and llama_cpp is not None:
        return llama_cpp
-    else:
+    elif shared.args.tensorcores and llama_cpp_cuda_tensorcores is not None:
+        return llama_cpp_cuda_tensorcores
+    elif llama_cpp_cuda is not None:
        return llama_cpp_cuda
+    else:
+        return llama_cpp


 def ban_eos_logits_processor(eos_token, input_ids, logits):