Spaces:
Running
Running
add a warning when ctx_len is too large. try to fix HF token
Browse files
app.py
CHANGED
|
@@ -3,11 +3,12 @@ from transformers import AutoConfig
|
|
| 3 |
|
| 4 |
|
| 5 |
def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
|
|
|
|
| 6 |
try:
|
| 7 |
cfg = AutoConfig.from_pretrained(
|
| 8 |
name,
|
| 9 |
trust_remote_code=True,
|
| 10 |
-
token=hf_token,
|
| 11 |
)
|
| 12 |
except Exception as e:
|
| 13 |
raise gr.Error(e)
|
|
@@ -22,6 +23,10 @@ def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str
|
|
| 22 |
["num_layers", num_layers],
|
| 23 |
["max_ctx_len", cfg.max_position_embeddings],
|
| 24 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# TODO: show attention type, show calculation
|
| 27 |
if use_mla:
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def calculate(name: str, ctx_len: int, num_users: int, dtype: str, hf_token: str):
|
| 6 |
+
hf_token = hf_token.strip()
|
| 7 |
try:
|
| 8 |
cfg = AutoConfig.from_pretrained(
|
| 9 |
name,
|
| 10 |
trust_remote_code=True,
|
| 11 |
+
token=hf_token or None,
|
| 12 |
)
|
| 13 |
except Exception as e:
|
| 14 |
raise gr.Error(e)
|
|
|
|
| 23 |
["num_layers", num_layers],
|
| 24 |
["max_ctx_len", cfg.max_position_embeddings],
|
| 25 |
]
|
| 26 |
+
if ctx_len > cfg.max_position_embeddings:
|
| 27 |
+
gr.Warning(
|
| 28 |
+
"Requested context length is larger than the max value supported by the model"
|
| 29 |
+
)
|
| 30 |
|
| 31 |
# TODO: show attention type, show calculation
|
| 32 |
if use_mla:
|