|
|
|
|
@ -32,68 +32,70 @@ class MockXinferenceClass(object):
|
|
|
|
|
response = Response()
|
|
|
|
|
if 'v1/models/' in url:
|
|
|
|
|
# get model uid
|
|
|
|
|
model_uid = url.split('/')[-1]
|
|
|
|
|
model_uid = url.split('/')[-1] or ''
|
|
|
|
|
if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
|
|
|
|
|
model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
|
|
|
|
|
response.status_code = 404
|
|
|
|
|
response._content = b'{}'
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
# check if url is valid
|
|
|
|
|
if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
|
|
|
|
|
response.status_code = 404
|
|
|
|
|
response._content = b'{}'
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
if model_uid in ['generate', 'chat']:
|
|
|
|
|
response.status_code = 200
|
|
|
|
|
response._content = b'''{
|
|
|
|
|
"model_type": "LLM",
|
|
|
|
|
"address": "127.0.0.1:43877",
|
|
|
|
|
"accelerators": [
|
|
|
|
|
"0",
|
|
|
|
|
"1"
|
|
|
|
|
],
|
|
|
|
|
"model_name": "chatglm3-6b",
|
|
|
|
|
"model_lang": [
|
|
|
|
|
"en"
|
|
|
|
|
],
|
|
|
|
|
"model_ability": [
|
|
|
|
|
"generate",
|
|
|
|
|
"chat"
|
|
|
|
|
],
|
|
|
|
|
"model_description": "latest chatglm3",
|
|
|
|
|
"model_format": "pytorch",
|
|
|
|
|
"model_size_in_billions": 7,
|
|
|
|
|
"quantization": "none",
|
|
|
|
|
"model_hub": "huggingface",
|
|
|
|
|
"revision": null,
|
|
|
|
|
"context_length": 2048,
|
|
|
|
|
"replica": 1
|
|
|
|
|
}'''
|
|
|
|
|
"model_type": "LLM",
|
|
|
|
|
"address": "127.0.0.1:43877",
|
|
|
|
|
"accelerators": [
|
|
|
|
|
"0",
|
|
|
|
|
"1"
|
|
|
|
|
],
|
|
|
|
|
"model_name": "chatglm3-6b",
|
|
|
|
|
"model_lang": [
|
|
|
|
|
"en"
|
|
|
|
|
],
|
|
|
|
|
"model_ability": [
|
|
|
|
|
"generate",
|
|
|
|
|
"chat"
|
|
|
|
|
],
|
|
|
|
|
"model_description": "latest chatglm3",
|
|
|
|
|
"model_format": "pytorch",
|
|
|
|
|
"model_size_in_billions": 7,
|
|
|
|
|
"quantization": "none",
|
|
|
|
|
"model_hub": "huggingface",
|
|
|
|
|
"revision": null,
|
|
|
|
|
"context_length": 2048,
|
|
|
|
|
"replica": 1
|
|
|
|
|
}'''
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
elif model_uid == 'embedding':
|
|
|
|
|
response.status_code = 200
|
|
|
|
|
response._content = b'''{
|
|
|
|
|
"model_type": "embedding",
|
|
|
|
|
"address": "127.0.0.1:43877",
|
|
|
|
|
"accelerators": [
|
|
|
|
|
"0",
|
|
|
|
|
"1"
|
|
|
|
|
],
|
|
|
|
|
"model_name": "bge",
|
|
|
|
|
"model_lang": [
|
|
|
|
|
"en"
|
|
|
|
|
],
|
|
|
|
|
"revision": null,
|
|
|
|
|
"max_tokens": 512
|
|
|
|
|
}'''
|
|
|
|
|
"model_type": "embedding",
|
|
|
|
|
"address": "127.0.0.1:43877",
|
|
|
|
|
"accelerators": [
|
|
|
|
|
"0",
|
|
|
|
|
"1"
|
|
|
|
|
],
|
|
|
|
|
"model_name": "bge",
|
|
|
|
|
"model_lang": [
|
|
|
|
|
"en"
|
|
|
|
|
],
|
|
|
|
|
"revision": null,
|
|
|
|
|
"max_tokens": 512
|
|
|
|
|
}'''
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
elif 'v1/cluster/auth' in url:
|
|
|
|
|
response.status_code = 200
|
|
|
|
|
response._content = b'''{
|
|
|
|
|
"auth": true
|
|
|
|
|
}'''
|
|
|
|
|
"auth": true
|
|
|
|
|
}'''
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
def _check_cluster_authenticated(self):
|
|
|
|
|
|