Issues with read/write

Hello! So, pretty much I have a program that writes and reads memory cells. The write function changes a number of memory cells from 0x00000000 to 0x55555556, and then the read function reads back any number of memory cells. However, whenever I try to read back the cells after writing to them, they return as 0x00000000. I’ve tried to use the kernel to add 1 to the number I write (Which is why it is 0x55555556 instead of 0x55555555) and then doing the same thing when I read the cells in order to hopefully invoke the register, but it still only returns zeros. I’d be extremely grateful if someone could help me figure out what I’m doing wrong! Thanks!

#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

cudaError_t WritePatternToDevice(unsigned int *c, unsigned int *a, int size);
cudaError_t ReadMemoryFromDevice(unsigned int c, int size, FILE outputFile);

global void
IncrementSome(unsigned int *dev_a, int numElements)
{
…int i = blockDim.x * blockIdx.x + threadIdx.x;

…if (i < numElements)
…{
…dev_a[i] ++;
…}
}

int main(int argc, char argv)
{
…cudaError_t cudaStatus;
…char command[5];
…int arraySize = 0;
…unsigned int
a;
…unsigned int
c;

…if (argc < 3)
…{
…printf("\nUsage:");
…printf("\n"Chau_text w size" where size is the number of cells (Millions) to write");
…printf("\n"Chau_text r size" where size is the number of cells to read");
…}
…else
…{
…strcpy(command, argv[1]);
…char chSize[10];
…strcpy(chSize, argv[2]);
…int numMB = atoi(chSize);
…arraySize = numMB * 1000000;
…//for (int i = 0; i < argc; ++i)
…// printf("\n%s", argv[i]);

…if (strcmp(command, “w”) == 0)
…{
…printf("\nWriting %d memory cells", arraySize);
…a = (unsigned int*) malloc(arraySize * sizeof(unsigned int));
…c = (unsigned int*) calloc(arraySize, sizeof(unsigned int));
…for (int i = 0; i < arraySize; ++i)
…a[i] = 0x55555555;

…// Write the pattern to the device.
…cudaStatus = WritePatternToDevice(c, a, arraySize);
…if (cudaStatus != cudaSuccess) {
…fprintf(stderr, “\nWritePatternToDevice failed!”);
…return 1;
…}
…}
…else if (strcmp(command, “r”) == 0)
…{
…printf("\nReading %d cells", numMB);
…FILE * outputFile = NULL;
…c = (unsigned int*) calloc(arraySize, sizeof(unsigned int));
…if (argc == 4)
…{
…outputFile = fopen(argv[3], “w”);
…}

…// Read the device memory
…cudaStatus = ReadMemoryFromDevice(c, numMB, outputFile);

…if (outputFile != NULL)
…fclose(outputFile);
…}
…else
…{
…printf("\nUsage:");
…printf("\n"Chau_text w size" where size is the number of cells (Millions) to write");
…printf("\n"Chau_text r size" where size is the number of cells to read");
…return 1;
…}
…}

…// Copy output vector from GPU buffer to host memory.
…cudaStatus = cudaMemcpy(c, dev_a, size * sizeof(unsigned int), cudaMemcpyDeviceToHost);

…cudaStatus = cudaDeviceSynchronize();
…if (cudaStatus != cudaSuccess) {
…fprintf(stderr, “cudaDeviceSynchronize returned error code %d after launching addKernel!\n”, cudaStatus);
…goto Error;
…}

…if (cudaStatus != cudaSuccess)
…{
…fprintf(stderr, “\ncudaMemcpy failed!”);
…goto Error;
…}
…else
…{
…printf("\nFirst 10 cells before write…");
…for (int i = 0; i < 10; ++i)
…printf("\n0x%08X", c[i]);
…}

…cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(unsigned int), cudaMemcpyHostToDevice);

…cudaStatus = cudaDeviceSynchronize();
…if (cudaStatus != cudaSuccess) {
…fprintf(stderr, “cudaDeviceSynchronize returned error code %d after launching addKernel!\n”, cudaStatus);
…goto Error;
…}

…if (cudaStatus != cudaSuccess) {
…fprintf(stderr, “\ncudaMemcpy failed!”);
…goto Error;
…}

…int threadsPerBlock = 256;
…int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
…printf(“CUDA kernel launch with %d blocks of %d threads\n”, blocksPerGrid, threadsPerBlock);
…IncrementSome << <blocksPerGrid, threadsPerBlock >> >(dev_a, size);

…// Copy output vector from GPU buffer to host memory.
…cudaStatus = cudaMemcpy(c, dev_a, size * sizeof(unsigned int), cudaMemcpyDeviceToHost);

you may need to simplify your example to bare minimum and repost it using Code tag (last one):

code
  more code