Conn.list_models() Show timeout in LLM customization course

mohamedali.toufahi · June 29, 2025, 9:00pm

The course uses NeMo services. I get an error each time I attempt to complete my course. It is urgent since I lose tomorrow.

import os

from nemollm.api import NemoLLM
from llm_utils.nemo_service_models import NemoServiceBaseModel
api_key = os.getenv(‘NGC_API_KEY’)
api_host = os.getenv(‘API_HOST’)
conn = NemoLLM(
** api_host=api_host,**
** api_key=api_key**
)
response = conn.list_models()
models = {}

for model in response[‘models’]:
name = model.get(‘name’)
features = model.get(‘features’)
models[name] = features

ServerSideError Traceback (most recent call last)
Cell In[4], line 1
----> 1 response = conn.list_models()
2 models = {}
4 for model in response[‘models’]:

File /usr/local/lib/python3.12/site-packages/nemollm/api.py:139, in NemoLLM.list_models(self)
136 _thread_context.session = create_session()
138 response = _thread_context.session.get(url, headers=self.headers, timeout=REQUESTS_TIMEOUT_SECS)
→ 139 NemoLLM.handle_response(response)
140 return response.json()

File /usr/local/lib/python3.12/site-packages/nemollm/api.py:130, in NemoLLM.handle_response(response, stream)
126 raise ClientSideError(status_code=status_code, reason=response.reason, decoded_content=decoded_content)
128 # server side errors
129 else:
→ 130 raise ServerSideError(status_code=status_code, reason=response.reason, decoded_content=decoded_content)

ServerSideError: Request failed with HTTP Status Code 500 INTERNAL SERVER ERROR Solution: Server is unable to handle your request right now. Please retry your request after a brief wait. If this problem persist, please contact NeMo LLM team Full response:

requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.llm.ngc.nvidia.com', port=443): Max retries exceeded with url: /v1/models (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)')) // Werkzeug Debugger

ConnectTimeout

requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.llm.ngc.nvidia.com', port=443): Max retries exceeded with url: /v1/models (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)'))

Traceback (most recent call last)

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 196, in `_new_conn`

        """Establish a socket connection and set nodelay settings on it.

        :return: New socket connection.

"""

        try:

            sock = connection.create_connection(

                (self._dns_host, self.port),

                self.timeout,

                source_address=self.source_address,

                socket_options=self.socket_options,

File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 85, in `create_connection`

            if sock is not None:

                sock.close()

    if err is not None:

        try:

            raise err
            ^^^^^^^^^

        finally:

            # Break explicitly a reference cycle

            err = None

    else:

        raise OSError("getaddrinfo returns an empty list")

File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 73, in `create_connection`

            if timeout is not _DEFAULT_TIMEOUT:

                sock.settimeout(timeout)

            if source_address:

                sock.bind(source_address)

            sock.connect(sa)
            ^^^^^^^^^^^^^^^^

            # Break explicitly a reference cycle

            err = None

            return sock

        except OSError as _:

The above exception was the direct cause of the following exception:

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 789, in `urlopen`

            # it will also try to release it and we'll have a double-release

            # mess.

            response_conn = conn if not release_conn else None

            # Make the request on the HTTPConnection object

            response = self._make_request(

                conn,

                method,

                url,

                timeout=timeout_obj,

                body=body,

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 490, in `_make_request`

            # then there

            if isinstance(

                new_e, (OSError, NewConnectionError, TimeoutError, SSLError)

            ) and (conn and conn.proxy and not conn.has_connected_to_proxy):

                new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)

            raise new_e
            ^^^^^^^^^^^

        # conn.request() calls http.client.*.request, not the method in

        # urllib3.request. It also calls makefile (recv) on the socket.

        try:

            conn.request(

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 466, in `_make_request`

        conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)

        try:

            # Trigger any extra validation we need to do.

            try:

                self._validate_conn(conn)
                ^^^^^^^^^^^^^^^^^^^^^^^^^

            except (SocketTimeout, BaseSSLError) as e:

                self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)

                raise

        # _validate_conn() starts the connection to an HTTPS proxy

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 1095, in `_validate_conn`

"""

        super()._validate_conn(conn)

        # Force connect early to allow us to validate the connection.

        if conn.is_closed:

            conn.connect()
            ^^^^^^^^^^^^^^

        # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791

        if not conn.is_verified and not conn.proxy_is_verified:

            warnings.warn(

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 615, in `connect`

        self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)

        self.ca_cert_data = ca_cert_data

    def connect(self) -> None:

        sock: socket.socket | ssl.SSLSocket

        self.sock = sock = self._new_conn()
                           ^^^^^^^^^^^^^^^^

        server_hostname: str = self.host

        tls_in_tls = False

        # Do we need to establish a tunnel?

        if self._tunnel_host is not None:

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 205, in `_new_conn`

                socket_options=self.socket_options,

        except socket.gaierror as e:

            raise NameResolutionError(self.host, self, e) from e

        except SocketTimeout as e:

            raise ConnectTimeoutError(
            ^^^^^^^^

                self,

                f"Connection to {self.host} timed out. (connect timeout={self.timeout})",

            ) from e

        except OSError as e:

The above exception was the direct cause of the following exception:

File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 667, in `send`

            pass

        else:

            timeout = TimeoutSauce(connect=timeout, read=timeout)

        try:

            resp = conn.urlopen(

                method=request.method,

                url=url,

                body=request.body,

                headers=request.headers,

                redirect=False,

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 843, in `urlopen`

            ) and (conn and conn.proxy and not conn.has_connected_to_proxy):

                new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)

            elif isinstance(new_e, (OSError, HTTPException)):

                new_e = ProtocolError("Connection aborted.", new_e)

            retries = retries.increment(

                method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]

            retries.sleep()

            # Keep track of the error for the retry warning.

File "/usr/local/lib/python3.12/site-packages/urllib3/util/retry.py", line 519, in `increment`

            history=history,

        if new_retry.is_exhausted():

            reason = error or ResponseError(cause)

            raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

        log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

        return new_retry

During handling of the above exception, another exception occurred:

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1498, in `call`

    ) -> cabc.Iterable[bytes]:

        """The WSGI server calls the Flask application object as the

        WSGI application. This calls :meth:`wsgi_app`, which can be

        wrapped to apply middleware.

"""

        return self.wsgi_app(environ, start_response)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1476, in `wsgi_app`

            try:

                ctx.push()

                response = self.full_dispatch_request()

            except Exception as e:

                error = e

                response = self.handle_exception(e)
                           ^^^^^^^^^^^^^^^^^^^^^^^^

            except:  # noqa: B001

                error = sys.exc_info()[1]

                raise

            return response(environ, start_response)

        finally:

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1473, in `wsgi_app`

        ctx = self.request_context(environ)

        error: BaseException | None = None

        try:

            try:

                ctx.push()

                response = self.full_dispatch_request()
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

            except Exception as e:

                error = e

                response = self.handle_exception(e)

            except:  # noqa: B001

                error = sys.exc_info()[1]

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 882, in `full_dispatch_request`

            request_started.send(self, _async_wrapper=self.ensure_sync)

            rv = self.preprocess_request()

            if rv is None:

                rv = self.dispatch_request()

        except Exception as e:

            rv = self.handle_user_exception(e)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

        return self.finalize_request(rv)

    def finalize_request(

        self,

        rv: ft.ResponseReturnValue | HTTPException,

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 880, in `full_dispatch_request`

        try:

            request_started.send(self, _async_wrapper=self.ensure_sync)

            rv = self.preprocess_request()

            if rv is None:

                rv = self.dispatch_request()
                     ^^^^^^^^^^^^^^^^^^^^^^^

        except Exception as e:

            rv = self.handle_user_exception(e)

        return self.finalize_request(rv)

    def finalize_request(

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 865, in `dispatch_request`

            and req.method == "OPTIONS"

):

            return self.make_default_options_response()

        # otherwise dispatch to the handler for that endpoint

        view_args: dict[str, t.Any] = req.view_args  # type: ignore[assignment]

        return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    def full_dispatch_request(self) -> Response:

        """Dispatches the request and on top of that performs request

        pre and postprocessing as well as HTTP exception catching and

        error handling.

File "//app.py", line 40, in `proxy`

    # Create a mutable copy of the headers, excluding certain headers

    headers = {key: value for key, value in request.headers.items() if key not in ['Host', 'Content-Length']}

    headers['Authorization'] = f'Bearer {api_key}'

    headers['Accept'] = 'application/json'  # Set Accept header to application/json

    response = requests.request(

        method=request.method,

        url=url,

        headers=headers,

        data=request.get_data(),

        allow_redirects=False,

File "/usr/local/lib/python3.12/site-packages/requests/api.py", line 59, in `request`

    # By using the 'with' statement we are sure the session is closed, thus we

    # avoid leaving sockets open which can trigger a ResourceWarning in some

    # cases, and look like a memory leak in others.

    with sessions.Session() as session:

        return session.request(method=method, url=url, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def get(url, params=None, **kwargs):

    r"""Sends a GET request.

File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 589, in `request`

        send_kwargs = {

            "timeout": timeout,

            "allow_redirects": allow_redirects,

        send_kwargs.update(settings)

        resp = self.send(prep, **send_kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

        return resp

    def get(self, url, **kwargs):

        r"""Sends a GET request. Returns :class:`Response` object.

File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 703, in `send`

        # Start time (approximately) of the request

        start = preferred_clock()

        # Send the request

        r = adapter.send(request, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

        # Total elapsed time of the request (approximately)

        elapsed = preferred_clock() - start

        r.elapsed = timedelta(seconds=elapsed)

File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 688, in `send`

        except MaxRetryError as e:

            if isinstance(e.reason, ConnectTimeoutError):

                # TODO: Remove this in 3.0.0: see #2811

                if not isinstance(e.reason, NewConnectionError):

                    raise ConnectTimeout(e, request=request)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

            if isinstance(e.reason, ResponseError):

                raise RetryError(e, request=request)

            if isinstance(e.reason, _ProxyError):

requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.llm.ngc.nvidia.com', port=443): Max retries exceeded with url: /v1/models (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)'))

This is the Copy/Paste friendly version of the traceback.

Traceback (most recent call last): File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 196, in _new_conn sock = connection.create_connection( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 85, in create_connection raise err File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 73, in create_connection sock.connect(sa) TimeoutError: [Errno 110] Operation timed out

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 789, in urlopen
response = self._make_request(
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 490, in _make_request
raise new_e
File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 466, in _make_request
self._validate_conn(conn)
File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 1095, in _validate_conn
conn.connect()
File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 615, in connect
self.sock = sock = self._new_conn()
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 205, in _new_conn
raise ConnectTimeoutError(
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)')

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 667, in send
resp = conn.urlopen(
^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 843, in urlopen
retries = retries.increment(
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/urllib3/util/retry.py", line 519, in increment
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.llm.ngc.nvidia.com', port=443): Max retries exceeded with url: /v1/models (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)'))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1498, in call
return self.wsgi_app(environ, start_response)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1476, in wsgi_app
response = self.handle_exception(e)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1473, in wsgi_app
response = self.full_dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 882, in full_dispatch_request
rv = self.handle_user_exception(e)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "//app.py", line 40, in proxy
response = requests.request(
^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/requests/api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 688, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='api.llm.ngc.nvidia.com', port=443): Max retries exceeded with url: /v1/models (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f9fc82cbf50>, 'Connection to api.llm.ngc.nvidia.com timed out. (connect timeout=None)'))

The debugger caught an exception in your WSGI application. You can now look at the traceback which led to the error. If you enable JavaScript you can also use additional features such as code execution (if the evalex feature is enabled), automatic pasting of the exceptions and much more.

Brought to you by DON'T PANIC, your friendly Werkzeug powered traceback interpreter.

<div class="pin-prompt">
  <div class="inner">
    <h3>Console Locked</h3>
    <p>
      The console is locked and needs to be unlocked by entering the PIN.
      You can find the PIN printed out on the standard output of your
      shell that runs the server.
    <form>
      <p>PIN:
        <input type=text name=pin size=14>
        <input type=submit name=btn value="Confirm Pin">
    </form>
  </div>
</div>

Topic		Replies	Views
Error while using response = conn.list_models() in the llm customization course NVIDIA NeMo Microservices	3	35	June 29, 2025
Not getting response for this model since yesterday meta/llama-3.1-405b-instruct model AI Foundation Models and Endpoints llama-31-405b-instruct , llama	0	60	December 13, 2024
Getting port=443 Max retries exceeded with url: https://api.bionemo.ngc.nvidia.com/v1 BioNeMo	2	69	April 20, 2025
ChatNVIDIA - HTTPError: 404 Client Error: Not Found Models nim	5	377	September 22, 2024
API connect Models nim , llama-31-8b-instruct , llama	1	134	September 20, 2024
Rag LLM Timed out AI Foundation Models and Endpoints	0	15	December 28, 2024
OpenAI Compatible API does not work Models llama-31-8b-instruct , llama-31-70b-instruct	6	427	August 26, 2024
NVIDIA NIM API / openai.API: Error code: 402,Cloud credits expired - Please contact NVIDIA representatives Models nim , llama-31-405b-instruct , llama	8	444	January 19, 2025
Result of nvidia nims in openai SDK and API inconsistent AI Foundation Models and Endpoints nim , llama-31-405b-instruct , llama	0	28	January 7, 2025
Llama-3.2-nv-embedqa-1b-v2 402 Payment required Models llama	1	26	June 10, 2025

Conn.list_models() Show timeout in LLM customization course

ConnectTimeout

Traceback (most recent call last)

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 196, in _new_conn

File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 85, in create_connection

File "/usr/local/lib/python3.12/site-packages/urllib3/util/connection.py", line 73, in create_connection

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 789, in urlopen

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 490, in _make_request

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 466, in _make_request

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 1095, in _validate_conn

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 615, in connect

File "/usr/local/lib/python3.12/site-packages/urllib3/connection.py", line 205, in _new_conn

File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 667, in send

File "/usr/local/lib/python3.12/site-packages/urllib3/connectionpool.py", line 843, in urlopen

File "/usr/local/lib/python3.12/site-packages/urllib3/util/retry.py", line 519, in increment

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1498, in __call__

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1476, in wsgi_app

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 1473, in wsgi_app

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 882, in full_dispatch_request

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 880, in full_dispatch_request

File "/usr/local/lib/python3.12/site-packages/flask/app.py", line 865, in dispatch_request

File "//app.py", line 40, in proxy

File "/usr/local/lib/python3.12/site-packages/requests/api.py", line 59, in request

File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 589, in request

File "/usr/local/lib/python3.12/site-packages/requests/sessions.py", line 703, in send

File "/usr/local/lib/python3.12/site-packages/requests/adapters.py", line 688, in send

Related topics