While running the following code, I’m getting an error as above. Is it because GPU is running out of memory? That should not happen as GPU should still run on starvation using main memory
Note
Also, I’d like to add that this code ran just fine on Google Colab but did not work on Jupyter Notebook
Code
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
epochs=50
model.compile(loss="binary_crossentropy",optimizer='adam',metrics=['accuracy'])
fitted_model=model.fit(X_train,y_train,epochs=epochs,validation_split=0.3, use_multiprocessing=True)
Error (Command Line)
2021-06-07 16:54:06.549654: W tensorflow/core/framework/op_kernel.cc:1755] Internal: 'cuModuleGetFunction(&function, module, kernel_name)' failed with 'CUDA_ERROR_INVALID_HANDLE'
2021-06-07 16:54:06.549706: W tensorflow/core/framework/op_kernel.cc:1755] Internal: 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, 0, stream, params, nullptr)' failed with 'CUDA_ERROR_INVALID_HANDLE'
[I 16:55:19.628 NotebookApp] Saving file at /temp/Set2.ipynb
Traceback
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-5-c072d84ff9ec> in <module>
4 epochs=50
5 model.compile(loss="binary_crossentropy",optimizer='adam',metrics=['accuracy'])
----> 6 fitted_model=model.fit(X_train,y_train,epochs=epochs,validation_split=0.3, use_multiprocessing=True)
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1181 _r=1):
1182 callbacks.on_train_batch_begin(step)
-> 1183 tmp_logs = self.train_function(iterator)
1184 if data_handler.should_sync:
1185 context.async_wait()
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
887
888 with OptionalXlaContext(self._jit_compile):
--> 889 result = self._call(*args, **kwds)
890
891 new_tracing_count = self.experimental_get_tracing_count()
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
948 # Lifting succeeded, so variables are initialized and we can run the
949 # stateless function.
--> 950 return self._stateless_fn(*args, **kwds)
951 else:
952 _, _, _, filtered_flat_args = \
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
3021 (graph_function,
3022 filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 3023 return graph_function._call_flat(
3024 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
3025
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1958 and executing_eagerly):
1959 # No tape is watching; skip to running the function.
-> 1960 return self._build_call_outputs(self._inference_function.call(
1961 ctx, args, cancellation_manager=cancellation_manager))
1962 forward_backward = self._select_forward_and_backward_functions(
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
589 with _InterpolateFunctionError(self):
590 if cancellation_manager is None:
--> 591 outputs = execute.execute(
592 str(self.signature.name),
593 num_outputs=self._num_outputs,
c:\users\harvish's pc\desktop\logo detection\myvenv\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
InternalError: 'cuModuleGetFunction(&function, module, kernel_name)' failed with 'CUDA_ERROR_INVALID_HANDLE'
[[node sequential/dropout/dropout/Mul_1 (defined at <ipython-input-5-c072d84ff9ec>:6) ]] [Op:__inference_train_function_1066]
Function call stack:
train_function