Problem copying data to and from GPU

Hello all,

I’m trying to implement a basic shift cipher to help myself learn CUDA programming. Right now the encryption kernel does nothing, I’m just trying to get the structure set up. The code should output “HELLO WORLD”, but ends up outputting “H” followed by several random characters (for instance, “HJBªúÿÿHJ”).

Below is the code I have (minus some of the unimportant CUDA error checking, which is not the problem here):

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <iostream>
#include <cstring>
#include <string>

__global__ void encrypt(char *e, char *d, int shift, char *alphabet)
{
    int t_i = threadIdx.x;
	int a_i = 0;
	// Search the alphabet for the character to encrypt
	for (unsigned int i = 0; i < 27; i++) {
		if (alphabet[i] == d[t_i]) {
			a_i = i;
			break;
		}
	}
	// Encrypt the plaintext character
	e[t_i] = alphabet[(a_i + shift) % 27];
	e[t_i] = d[t_i];
}

int main()
{
    // Plain text input, and initialization of alphabet and ciphertext
    const int array_size = 11;
    const int shift = 1;
    const char* h_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ ";
    const char* h_plaintext = "HELLO WORLD";
    char h_ciphertext[array_size] = {' '};

    // Initialize the variable in GPU memory
    char *d_alphabet;
    char *d_plaintext;
    char *d_ciphertext;

    cudaMalloc(&d_alphabet, 27 * sizeof(char));
    cudaMalloc(&d_plaintext, array_size * sizeof(char));
    cudaMalloc(&d_ciphertext, array_size * sizeof(char));

    // Copy input and known variables to GPU memory
    cudaMemcpy(d_alphabet, h_alphabet, 27 * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(d_plaintext, h_plaintext, array_size * sizeof(char), cudaMemcpyHostToDevice);

    // Encrpyt in parallel
    encrypt<<<array_size, 1>>>(d_ciphertext, d_plaintext, shift, d_alphabet);

    cudaDeviceSynchronize();

    // Copy output from GPU memory back to host memory.
    cudaMemcpy(h_ciphertext, d_ciphertext, array_size * sizeof(char), cudaMemcpyDeviceToHost);
	
    // Print out result of encryption
    printf("%s\n", h_ciphertext);

    cudaDeviceReset();

    cudaFree(d_alphabet);
    cudaFree(d_plaintext);
    cudaFree(d_ciphertext);

    return 0;
}

The reason for the observed output is that t_i is always 0. In a quick perusal of the code I did not spot any issue with the copies to/from device.

One issue is that you have the blocks and threads values reversed in your execution configuration (encrypt<<<…>>>).

This works:

#include <cuda.h>
#include <stdio.h>

__global__ void encrypt(char* const       cipher,
                        const char* const plain,
                        const char* const alphabet,
                        const int         shift)
{
  int tid = threadIdx.x;
  int bid = 0;

  // Search the alphabet for the character to encrypt
  for (unsigned int i = 0; i < 27; i++) {
    if (alphabet[i] == plain[tid]) {
      bid = i;
      break;
    }
  }

  // Encrypt the plaintext character
  cipher[tid] = alphabet[(bid + shift) % 27];
  cipher[tid] = plain[tid]; // override
}

int main()
{
  // Plain text input, and initialization of alphabet and ciphertext
  const char  h_alphabet[]  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ ";
  const char  h_plaintext[] = "HELLO WORLD";

  char        h_ciphertext;
  
  const int   alpha_size    = sizeof(h_alphabet)  - 1;
  const int   plain_size    = sizeof(h_plaintext) - 1;

  const int   shift         = 1;

  // Initialize the variable in GPU memory
  char *d_alphabet;
  char *d_plaintext;
  char *d_ciphertext;

  cudaMalloc(&d_alphabet,  alpha_size * sizeof(char));
  cudaMalloc(&d_plaintext, plain_size * sizeof(char));
  cudaMalloc(&d_ciphertext,plain_size * sizeof(char));

  // Copy input and known variables to GPU memory
  cudaMemcpy(d_alphabet,  h_alphabet,  alpha_size * sizeof(char), cudaMemcpyHostToDevice);
  cudaMemcpy(d_plaintext, h_plaintext, plain_size * sizeof(char), cudaMemcpyHostToDevice);

  // Encrpyt in parallel
  encrypt<<<1,plain_size>>>(d_ciphertext, d_plaintext, d_alphabet, shift);

  cudaDeviceSynchronize();

  // Copy output from GPU memory back to host memory.
  cudaMemcpy(h_ciphertext, d_ciphertext, plain_size * sizeof(char), cudaMemcpyDeviceToHost);

  // set null
  h_ciphertext[plain_size] = '

#include <cuda.h>
#include <stdio.h>

global void encrypt(char* const cipher,
const char* const plain,
const char* const alphabet,
const int shift)
{
int tid = threadIdx.x;
int bid = 0;

// Search the alphabet for the character to encrypt
for (unsigned int i = 0; i < 27; i++) {
if (alphabet[i] == plain[tid]) {
bid = i;
break;
}
}

// Encrypt the plaintext character
cipher[tid] = alphabet[(bid + shift) % 27];
cipher[tid] = plain[tid]; // override
}

int main()
{
// Plain text input, and initialization of alphabet and ciphertext
const char h_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ ";
const char h_plaintext = “HELLO WORLD”;

char h_ciphertext;

const int alpha_size = sizeof(h_alphabet) - 1;
const int plain_size = sizeof(h_plaintext) - 1;

const int shift = 1;

// Initialize the variable in GPU memory
char *d_alphabet;
char *d_plaintext;
char *d_ciphertext;

cudaMalloc(&d_alphabet, alpha_size * sizeof(char));
cudaMalloc(&d_plaintext, plain_size * sizeof(char));
cudaMalloc(&d_ciphertext,plain_size * sizeof(char));

// Copy input and known variables to GPU memory
cudaMemcpy(d_alphabet, h_alphabet, alpha_size * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_plaintext, h_plaintext, plain_size * sizeof(char), cudaMemcpyHostToDevice);

// Encrpyt in parallel
encrypt<<<1,plain_size>>>(d_ciphertext, d_plaintext, d_alphabet, shift);

cudaDeviceSynchronize();

// Copy output from GPU memory back to host memory.
cudaMemcpy(h_ciphertext, d_ciphertext, plain_size * sizeof(char), cudaMemcpyDeviceToHost);

// set null
h_ciphertext[plain_size] = ‘\0’;

printf(“alpha_size = %d\n”,alpha_size);

// Print out result of encryption
printf("%s\n", h_ciphertext);

cudaDeviceReset();

cudaFree(d_alphabet);
cudaFree(d_plaintext);
cudaFree(d_ciphertext);

return 0;
}

';

  printf("alpha_size = %d\n",alpha_size);
  
  // Print out result of encryption
  printf("%s\n", h_ciphertext);

  cudaDeviceReset();

  cudaFree(d_alphabet);
  cudaFree(d_plaintext);
  cudaFree(d_ciphertext);

  return 0;
}

Thanks allanmac, you’re right. Switching the block and thread parameters in encrypt<<<…>>> fixed it. I was remembering the parameters in the wrong order. I appreciate the help!