Spaces:

chansung
/

hf-inference-endpoint

Runtime error

App Files Files Community

chansung commited on Aug 28, 2023

Commit

560ca38

1 Parent(s): 1df3c74

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -20

app.py CHANGED Viewed

@@ -190,16 +190,16 @@ with gr.Blocks(css=STYLE) as hf_endpoint:
         with gr.Column(elem_classes=["group-border"]):
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Hugging Face account ID (name)""")
                     hf_account_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
                 with gr.Column():
-                    gr.Markdown("## Hugging Face access token")
                     hf_token_input = gr.Textbox(show_label=False, type="password", elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Target model
 Model from the Hugging Face hub""")
                     repository_selector = gr.Textbox(
@@ -210,7 +210,7 @@ Model from the Hugging Face hub""")
                     )
                 with gr.Column():
-                    gr.Markdown("""## Target model version(branch)
 Branch name of the Model""")
                     revision_selector = gr.Textbox(
@@ -222,14 +222,14 @@ Branch name of the Model""")
         with gr.Column(elem_classes=["group-border"]):
             with gr.Column():
-                gr.Markdown("""## Endpoint name
 Name for your new endpoint""")
                 endpoint_name_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Cloud Provider""")
                     provider_selector = gr.Dropdown(
                         choices=providers.keys(),
                         interactive=True,
@@ -238,7 +238,7 @@ Name for your new endpoint""")
                     )
                 with gr.Column():
-                    gr.Markdown("""## Cloud Region""")
                     region_selector = gr.Dropdown(
                         [],
                         value="",
@@ -249,7 +249,7 @@ Name for your new endpoint""")
             with gr.Row(visible=False):
                 with gr.Column():
-                    gr.Markdown("## Task")
                     task_selector = gr.Textbox(
                         value="Text Generation",
                         interactive=False,
@@ -258,7 +258,7 @@ Name for your new endpoint""")
                     )
                 with gr.Column():
-                    gr.Markdown("## Framework")
                     framework_selector = gr.Textbox(
                         value="PyTorch",
                         interactive=False,
@@ -267,7 +267,7 @@ Name for your new endpoint""")
                     )
             with gr.Column():
-                gr.Markdown("""## Select Compute Instance Type""")
                 compute_selector = gr.Dropdown(
                     [],
                     value="",
@@ -279,7 +279,7 @@ Name for your new endpoint""")
             with gr.Row():
                 with gr.Row(scale=1):
                     with gr.Column():
-                        gr.Markdown("""## Min Number of Nodes""")
                         min_node_selector = gr.Number(
                             value=1,
                             interactive=True,
@@ -288,7 +288,7 @@ Name for your new endpoint""")
                         )
                     with gr.Column():
-                        gr.Markdown("""## Max Number of Nodes""")
                         max_node_selector = gr.Number(
                             value=1,
                             interactive=True,
@@ -297,7 +297,7 @@ Name for your new endpoint""")
                         )
                 with gr.Column(scale=2):
-                    gr.Markdown("""## Security Level""")
                     security_selector = gr.Radio(
                         choices=["Protected", "Public", "Private"],
                         value="Public",
@@ -308,14 +308,14 @@ Name for your new endpoint""")
         with gr.Column(elem_classes=["group-border"]):
             with gr.Column():
-                gr.Markdown("""## Container Type
 Text Generation Inference is an optimized container for text generation task""")
                 _ = gr.Textbox("Text Generation Inference", show_label=False, elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Custom Cuda Kernels
 TGI uses custom kernels to speed up inference for some models. You can try disabling them if you encounter issues.""")
                     _ = gr.Dropdown(
@@ -327,7 +327,7 @@ TGI uses custom kernels to speed up inference for some models. You can try disab
                     )
                 with gr.Column():
-                    gr.Markdown("""## Quantization
 Quantization can reduce the model size and improve latency, with little degradation in model accuracy.""")
                     _ = gr.Dropdown(
@@ -340,7 +340,7 @@ Quantization can reduce the model size and improve latency, with little degradat
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Max Input Length (per Query)
 Increasing this value can impact the amount of RAM required. Some models can only handle a finite range of sequences.""")
                     _ = gr.Number(
@@ -351,7 +351,7 @@ Increasing this value can impact the amount of RAM required. Some models can onl
                     )
                 with gr.Column():
-                    gr.Markdown("""## Max Number of Tokens (per Query)
 The larger this value, the more memory each request will consume and the less effective batching can be.""")
                     _ = gr.Number(
@@ -363,7 +363,7 @@ The larger this value, the more memory each request will consume and the less ef
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("""## Max Batch Prefill Tokens
 Number of prefill tokens used during continuous batching. It can be useful to adjust this number since the prefill operation is memory-intensive and compute-bound.""")
                     _ = gr.Number(
@@ -374,7 +374,7 @@ Number of prefill tokens used during continuous batching. It can be useful to ad
                     )
                 with gr.Column():
-                    gr.Markdown("""## Max Batch Total Tokens
 Number of tokens that can be passed before forcing waiting queries to be put on the batch. A value of 1000 can fit 10 queries of 100 tokens or a single query of 1000 tokens.""")
                     _ = gr.Number(

         with gr.Column(elem_classes=["group-border"]):
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Hugging Face account ID (name)""")
                     hf_account_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
                 with gr.Column():
+                    gr.Markdown("### Hugging Face access token")
                     hf_token_input = gr.Textbox(show_label=False, type="password", elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Target model
 Model from the Hugging Face hub""")
                     repository_selector = gr.Textbox(
                     )
                 with gr.Column():
+                    gr.Markdown("""### Target model version(branch)
 Branch name of the Model""")
                     revision_selector = gr.Textbox(
         with gr.Column(elem_classes=["group-border"]):
             with gr.Column():
+                gr.Markdown("""### Endpoint name
 Name for your new endpoint""")
                 endpoint_name_input = gr.Textbox(show_label=False, elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Cloud Provider""")
                     provider_selector = gr.Dropdown(
                         choices=providers.keys(),
                         interactive=True,
                     )
                 with gr.Column():
+                    gr.Markdown("""### Cloud Region""")
                     region_selector = gr.Dropdown(
                         [],
                         value="",
             with gr.Row(visible=False):
                 with gr.Column():
+                    gr.Markdown("### Task")
                     task_selector = gr.Textbox(
                         value="Text Generation",
                         interactive=False,
                     )
                 with gr.Column():
+                    gr.Markdown("### Framework")
                     framework_selector = gr.Textbox(
                         value="PyTorch",
                         interactive=False,
                     )
             with gr.Column():
+                gr.Markdown("""### Compute Instance Type""")
                 compute_selector = gr.Dropdown(
                     [],
                     value="",
             with gr.Row():
                 with gr.Row(scale=1):
                     with gr.Column():
+                        gr.Markdown("""### Min Number of Nodes""")
                         min_node_selector = gr.Number(
                             value=1,
                             interactive=True,
                         )
                     with gr.Column():
+                        gr.Markdown("""### Max Number of Nodes""")
                         max_node_selector = gr.Number(
                             value=1,
                             interactive=True,
                         )
                 with gr.Column(scale=2):
+                    gr.Markdown("""### Security Level""")
                     security_selector = gr.Radio(
                         choices=["Protected", "Public", "Private"],
                         value="Public",
         with gr.Column(elem_classes=["group-border"]):
             with gr.Column():
+                gr.Markdown("""### Container Type
 Text Generation Inference is an optimized container for text generation task""")
                 _ = gr.Textbox("Text Generation Inference", show_label=False, elem_classes=["no-label", "small-big"])
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Custom Cuda Kernels
 TGI uses custom kernels to speed up inference for some models. You can try disabling them if you encounter issues.""")
                     _ = gr.Dropdown(
                     )
                 with gr.Column():
+                    gr.Markdown("""### Quantization
 Quantization can reduce the model size and improve latency, with little degradation in model accuracy.""")
                     _ = gr.Dropdown(
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Max Input Length (per Query)
 Increasing this value can impact the amount of RAM required. Some models can only handle a finite range of sequences.""")
                     _ = gr.Number(
                     )
                 with gr.Column():
+                    gr.Markdown("""### Max Number of Tokens (per Query)
 The larger this value, the more memory each request will consume and the less effective batching can be.""")
                     _ = gr.Number(
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("""### Max Batch Prefill Tokens
 Number of prefill tokens used during continuous batching. It can be useful to adjust this number since the prefill operation is memory-intensive and compute-bound.""")
                     _ = gr.Number(
                     )
                 with gr.Column():
+                    gr.Markdown("""### Max Batch Total Tokens
 Number of tokens that can be passed before forcing waiting queries to be put on the batch. A value of 1000 can fit 10 queries of 100 tokens or a single query of 1000 tokens.""")
                     _ = gr.Number(