|
|
|
@ -1,10 +1,10 @@
|
|
|
|
from os import path
|
|
|
|
|
|
|
|
from threading import Lock
|
|
|
|
from threading import Lock
|
|
|
|
from time import time
|
|
|
|
from time import time
|
|
|
|
|
|
|
|
|
|
|
|
from requests.adapters import HTTPAdapter
|
|
|
|
from requests.adapters import HTTPAdapter
|
|
|
|
from requests.exceptions import ConnectionError, MissingSchema, Timeout
|
|
|
|
from requests.exceptions import ConnectionError, MissingSchema, Timeout
|
|
|
|
from requests.sessions import Session
|
|
|
|
from requests.sessions import Session
|
|
|
|
|
|
|
|
from yarl import URL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class XinferenceModelExtraParameter:
|
|
|
|
class XinferenceModelExtraParameter:
|
|
|
|
@ -55,7 +55,10 @@ class XinferenceHelper:
|
|
|
|
get xinference model extra parameter like model_format and model_handle_type
|
|
|
|
get xinference model extra parameter like model_format and model_handle_type
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
url = path.join(server_url, 'v1/models', model_uid)
|
|
|
|
if not model_uid or not model_uid.strip() or not server_url or not server_url.strip():
|
|
|
|
|
|
|
|
raise RuntimeError('model_uid is empty')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
url = str(URL(server_url) / 'v1' / 'models' / model_uid)
|
|
|
|
|
|
|
|
|
|
|
|
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
|
|
|
|
# this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
|
|
|
|
session = Session()
|
|
|
|
session = Session()
|
|
|
|
@ -66,7 +69,6 @@ class XinferenceHelper:
|
|
|
|
response = session.get(url, timeout=10)
|
|
|
|
response = session.get(url, timeout=10)
|
|
|
|
except (MissingSchema, ConnectionError, Timeout) as e:
|
|
|
|
except (MissingSchema, ConnectionError, Timeout) as e:
|
|
|
|
raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}')
|
|
|
|
raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}')
|
|
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
if response.status_code != 200:
|
|
|
|
raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}')
|
|
|
|
raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}')
|
|
|
|
|
|
|
|
|
|
|
|
|