Spaces:

gaunernst
/

kv-cache-calculator

Running

gaunernst commited on Apr 3

Commit

0e5a78d

1 Parent(s): 36a071f

add a warning when ctx_len is too large. try to fix HF token

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,11 +3,12 @@ from transformers import AutoConfig
 def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
     try:
         cfg = AutoConfig.from_pretrained(
             name,
             trust_remote_code=True,
-            token=hf_token,
         )
     except Exception as e:
         raise gr.Error(e)
@@ -22,6 +23,10 @@ def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str
         ["num_layers", num_layers],
         ["max_ctx_len", cfg.max_position_embeddings],
     ]
     # TODO: show attention type, show calculation
     if use_mla:

 def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
+    hf_token = hf_token.strip()
     try:
         cfg = AutoConfig.from_pretrained(
             name,
             trust_remote_code=True,
+            token=hf_token or None,
         )
     except Exception as e:
         raise gr.Error(e)
         ["num_layers", num_layers],
         ["max_ctx_len", cfg.max_position_embeddings],
     ]
+    if ctx_len > cfg.max_position_embeddings:
+        gr.Warning(
+            "Requested context length is larger than the max value supported by the model"
+        )
     # TODO: show attention type, show calculation
     if use_mla: