Traceback (most recent call last):
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py”, line 403, in run_asgi
result = await app( # type: ignore[func-returns-value]
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py”, line 60, in call
return await self.app(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/fastapi/applications.py”, line 1054, in call
await super().call(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/applications.py”, line 112, in call
await self.middleware_stack(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/middleware/errors.py”, line 187, in call
raise exc
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/middleware/errors.py”, line 165, in call
await self.app(scope, receive, _send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/middleware/exceptions.py”, line 62, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/_exception_handler.py”, line 53, in wrapped_app
raise exc
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/_exception_handler.py”, line 42, in wrapped_app
await app(scope, receive, sender)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/routing.py”, line 714, in call
await self.middleware_stack(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/routing.py”, line 734, in app
await route.handle(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/routing.py”, line 288, in handle
await self.app(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/routing.py”, line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/_exception_handler.py”, line 53, in wrapped_app
raise exc
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/_exception_handler.py”, line 42, in wrapped_app
await app(scope, receive, sender)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/routing.py”, line 73, in app
response = await f(request)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/fastapi/routing.py”, line 301, in app
raw_response = await run_endpoint_function(
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/fastapi/routing.py”, line 214, in run_endpoint_function
return await run_in_threadpool(dependant.call, values)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/starlette/concurrency.py”, line 37, in run_in_threadpool
return await anyio.to_thread.run_sync(func)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/anyio/to_thread.py”, line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py”, line 2461, in run_sync_in_worker_thread
return await future
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py”, line 962, in run
result = context.run(func, *args)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/app.py”, line 17, in data_curation
main()
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/main.py”, line 253, in main
blend_and_shuffle(args, dataset_paths, dataset_weights, target_size)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/main.py”, line 226, in blend_and_shuffle
blended_dataset = shuffle(blended_dataset)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/nemo_curator/modules/base.py”, line 84, in call
return self.call(dataset)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/nemo_curator/modules/dataset_ops.py”, line 47, in call
return self.shuffle_deterministic(dataset)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/nemo_curator/modules/dataset_ops.py”, line 54, in shuffle_deterministic
dataset.df[self.rand_col] = dataset.df.map_partitions(self._add_rand_col)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/dataframe/dask_expr/_collection.py”, line 3053, in setitem
out = self.assign({key: value})
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/dataframe/dask_expr/_collection.py”, line 2829, in assign
v = from_dask_array(v, index=result.index, meta=result._meta)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/dataframe/dask_expr/_collection.py”, line 5088, in from_dask_array
return from_dask_array(x, columns=columns, index=index, meta=meta)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/dataframe/io/io.py”, line 216, in from_dask_array
optimize(ensure_dict(graph), keys),
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/array/optimization.py”, line 62, in optimize
dsk = fuse_linear_task_spec(dsk, keys=keys)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/_task_spec.py”, line 1072, in fuse_linear_task_spec
result[renamed_key] = Task.fuse(*linear_chain, key=renamed_key)
File “/cm/shared/project/FineTuning-as-a-service/microservices/data_curation/venv/lib/python3.10/site-packages/dask/_task_spec.py”, line 465, in fuse
raise ValueError(f"Cannot fuse tasks with multiple outputs {leafs}")
ValueError: Cannot fuse tasks with multiple outputs {(‘_add_rand_col-9cd2270dfb463ece32ecc8ae97a2ebcf’, 6), (‘getitem-3875e6e1c4953bf8bd7f8c2bdfe227c3’, 4)}