Hello,
when I run inference it starts to generate and then suddenly I get an error message:
Server error: Value out of range: -1224647480
return self._loop.run_until_complete(task)
File “/opt/conda/lib/python3.11/asyncio/base_events.py”, line 641, in run_until_complete
self.run_forever()
File “/opt/conda/lib/python3.11/asyncio/base_events.py”, line 608, in run_forever
self._run_once()
File “/opt/conda/lib/python3.11/asyncio/base_events.py”, line 1936, in _run_once
handle._run()
File “/opt/conda/lib/python3.11/asyncio/events.py”, line 84, in _run
self._context.run(self._callback, *self._args)
File “/opt/conda/lib/python3.11/site-packages/grpc_interceptor/server.py”, line 165, in invoke_intercept_method
return await self.intercept(
File “/opt/conda/lib/python3.11/site-packages/text_generation_server/interceptor.py”, line 21, in intercept
return await response
File “/opt/conda/lib/python3.11/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py”, line 120, in _unary_interceptor
raise error
File “/opt/conda/lib/python3.11/site-packages/opentelemetry/instrumentation/grpc/_aio_server.py”, line 111, in _unary_interceptor
return await behavior(request_or_iterator, context)
File “/opt/conda/lib/python3.11/site-packages/text_generation_server/server.py”, line 184, in Decode
return generate_pb2.DecodeResponse(
ValueError: Value out of range: -1224647480
2025-07-25T19:20:38.239491Z ERROR batch{batch_size=1}:decode:decode{size=1}:decode{size=1}: text_generation_router_v3::client: backends/v3/src/client/mod.rs:54: Server error: Value out of range: -1224647480
2025-07-25T19:20:38.240931Z ERROR compat_generate{default_return_full_text=true compute_type=Extension(ComputeType(“1-nvidia-geforce-rtx-3090”))}:generate_stream{parameters=GenerateParameters { best_of: None, temperature: Some(0.7), repetition_penalty: Some(1.0), frequency_penalty: None, top_k: Some(10), top_p: Some(0.999), typical_p: Some(0.95), do_sample: false, max_new_tokens: Some(256), return_full_text: Some(false), stop: , truncate: None, watermark: false, details: true, decoder_input_details: false, seed: None, top_n_tokens: None, grammar: None, adapter_id: None }}:async_stream:generate_stream:schedule:infer:send_error: text_generation_router_v3::backend: backends/v3/src/backend.rs:488: Request failed during generation: Server error: Value out of range: -1224647480
Exception in thread Thread-1 (wrapped_llm_predict):
Traceback (most recent call last):
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/text_generation/client.py”, line 259, in generate_stream
response = StreamResponse(**json_payload)
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/pydantic/main.py”, line 253, in init
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/langchain/llms/base.py”, line 1053, in _generate
self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/langchain/llms/huggingface_text_gen_inference.py”, line 194, in _call
for chunk in self._stream(prompt, stop, run_manager, **kwargs):
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/langchain/llms/huggingface_text_gen_inference.py”, line 240, in _stream
for res in self.client.generate_stream(prompt, **invocation_params):
File “/home/workbench/.conda/envs/api-env/lib/python3.10/site-packages/text_generation/client.py”, line 262, in generate_stream
raise parse_error(resp.status_code, json_payload)
text_generation.errors.GenerationError: Request failed during generation: Server error: Value out of range: -1224647480