update
Browse files- util.py +2 -8
- vocab/__init__.py +1 -1
util.py
CHANGED
|
@@ -72,6 +72,7 @@ def tokenize_pair(text, tokenizer_type_1, tokenizer_type_2):
|
|
| 72 |
return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
|
| 73 |
|
| 74 |
|
|
|
|
| 75 |
def basic_count(tokenizer_type):
|
| 76 |
tokenizer = load_tokener(tokenizer_type)
|
| 77 |
stats = iter_vocab(tokenizer, tokenizer_type)
|
|
@@ -113,7 +114,6 @@ def on_load(url_params, request: gr.Request):
|
|
| 113 |
"""
|
| 114 |
onLoad
|
| 115 |
"""
|
| 116 |
-
|
| 117 |
text = None
|
| 118 |
tokenizer_type_1 = None
|
| 119 |
tokenizer_type_2 = None
|
|
@@ -122,11 +122,7 @@ def on_load(url_params, request: gr.Request):
|
|
| 122 |
except:
|
| 123 |
url_params = {}
|
| 124 |
if request:
|
| 125 |
-
|
| 126 |
-
logger.info(str(request.headers))
|
| 127 |
-
logger.info(str(request.query_params))
|
| 128 |
-
except:
|
| 129 |
-
pass
|
| 130 |
client_ip = request.client.host
|
| 131 |
# local_ip = socket.gethostbyname(socket.gethostbyname(""))
|
| 132 |
# headers = request.kwargs['headers']
|
|
@@ -139,8 +135,6 @@ def on_load(url_params, request: gr.Request):
|
|
| 139 |
tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
|
| 140 |
tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
|
| 141 |
text = url_params.get("text", default_user_input)
|
| 142 |
-
|
| 143 |
-
|
| 144 |
logger.info(f"client_ip: {client_ip}; params: {url_params}")
|
| 145 |
return text, tokenizer_type_1, tokenizer_type_2
|
| 146 |
|
|
|
|
| 72 |
return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
|
| 73 |
|
| 74 |
|
| 75 |
+
@lru_cache
|
| 76 |
def basic_count(tokenizer_type):
|
| 77 |
tokenizer = load_tokener(tokenizer_type)
|
| 78 |
stats = iter_vocab(tokenizer, tokenizer_type)
|
|
|
|
| 114 |
"""
|
| 115 |
onLoad
|
| 116 |
"""
|
|
|
|
| 117 |
text = None
|
| 118 |
tokenizer_type_1 = None
|
| 119 |
tokenizer_type_2 = None
|
|
|
|
| 122 |
except:
|
| 123 |
url_params = {}
|
| 124 |
if request:
|
| 125 |
+
logger.info(str(request.headers))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
client_ip = request.client.host
|
| 127 |
# local_ip = socket.gethostbyname(socket.gethostbyname(""))
|
| 128 |
# headers = request.kwargs['headers']
|
|
|
|
| 135 |
tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
|
| 136 |
tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
|
| 137 |
text = url_params.get("text", default_user_input)
|
|
|
|
|
|
|
| 138 |
logger.info(f"client_ip: {client_ip}; params: {url_params}")
|
| 139 |
return text, tokenizer_type_1, tokenizer_type_2
|
| 140 |
|
vocab/__init__.py
CHANGED
|
@@ -75,7 +75,7 @@ all_tokenizers = [
|
|
| 75 |
# ##### glm系列
|
| 76 |
"glm_chinese",
|
| 77 |
"chatglm_6b",
|
| 78 |
-
"
|
| 79 |
#
|
| 80 |
# #### llama alpaca系列
|
| 81 |
"llama", # '中文单字': 700, '中文多字': 0
|
|
|
|
| 75 |
# ##### glm系列
|
| 76 |
"glm_chinese",
|
| 77 |
"chatglm_6b",
|
| 78 |
+
"chatglm2_6b",
|
| 79 |
#
|
| 80 |
# #### llama alpaca系列
|
| 81 |
"llama", # '中文单字': 700, '中文多字': 0
|