Hi all,
I am using the latest version of the rapids.ai docker. 21.06 and in Juptyer notebook this code works with no issue:
from cuml.common.sparsefuncs import csr_row_normalize_l2
def efficient_csr_cosine_similarity(query, tfidf_matrix, matrix_normalized=False):
query = csr_row_normalize_l2(query, inplace=False)
if not matrix_normalized:
tfidf_matrix = csr_row_normalize_l2(tfidf_matrix, inplace=False)
return tfidf_matrix.dot(query.T)
def document_search(text_df, query, vectorizer, tfidf_matrix, top_n=3):
query_vec = vectorizer.transform(Series([query]))
similarities = efficient_csr_cosine_similarity(query_vec, tfidf_matrix, matrix_normalized=True)
similarities = similarities.todense().reshape(-1)
best_idx = similarities.argsort()[-top_n:][::-1]
pp = cudf.DataFrame({
'text': text_df['workout'].iloc[best_idx],
'similarity': similarities[best_idx]
})
return pp
But when I run it with straight python on the same docker for a dash application, I get this runtime error:
Traceback (most recent call last):
File "/home/robomike/code/coolstuff_gpu/Merlin/cool/app/work/app.py", line 118, in displayStuff
main_df = document_search(cool_df, 'for time', vec, tfidf_matrix)
File "/home/robomike/code/coolstuff_gpu/Merlin/cool/app/work/app.py", line 43, in document_search
best_idx = similarities.argsort()[-top_n:][::-1]
File "cupy/_core/core.pyx", line 715, in cupy._core.core.ndarray.argsort
File "cupy/_core/core.pyx", line 732, in cupy._core.core.ndarray.argsort
File "cupy/_core/_routines_sorting.pyx", line 86, in cupy._core._routines_sorting._ndarray_argsort
File "cupy/cuda/thrust.pyx", line 117, in cupy.cuda.thrust.argsort
RuntimeError: radix_sort: failed on 2nd step: cudaErrorInvalidValue: invalid argument
Attached is the py file that causes the error.app2.py (1.4 KB)
I really don’t understand what is going wrong here. Can someone please help me?
Update: This problem also persists on rapidsai/rapidsai-nightly:21.08-cuda11.0-runtime-ubuntu20.04-py3.7