What I am doing wrong in the code below, adapted from Linux? I get: Run-Time Check Failure #3 - The variable ‘cudaStatus’ is being used without being initialized at the begging of function code cudaError_t launchGPUHandlerThread(void) in the row if (cudaStatus != cudaSuccess) { after cudaSetDevice(0);
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <winsock2.h> // struct timeval
#include <windows.h> // timeGetTime()
#pragma comment(lib, "winmm.lib") // timeGetTime()
#include <stdlib.h>
#include <locale.h>
#include "sha256.cuh"
#define TEXT_TXT "Caster"
#define TEXT_LEN 6
#define THREADS 1500
#define BLOCKS 256
#define GPUS 1
#define DIFFICULTY 4
#define RANDOM_LEN 20
typedef unsigned long DWORD;
int gettimeofday(struct timeval* tp, void* tzp);
long long timems(void);
cudaError_t launchGPUHandlerThread(void);
__constant__ BYTE characterSet[63] = { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890" };
__global__ void initSolutionMemory(int* blockContainsSolution) {
*blockContainsSolution = -1;
}
__device__ uint64_t deviceRandomGen(uint64_t x) {
x ^= (x << 21);
x ^= (x >> 35);
x ^= (x << 4);
return x;
}
__global__ void sha256_cuda(BYTE* prefix, BYTE* solution, int* blockContainsSolution, uint64_t baseSeed) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
SHA256_CTX ctx;
BYTE digest[32];
BYTE random[RANDOM_LEN];
uint64_t seed = baseSeed;
seed += (uint64_t)i;
for (int j = 0; j < RANDOM_LEN; j++) {
seed = deviceRandomGen(seed);
int randomIdx = (int)(seed % 62);
random[j] = characterSet[randomIdx];
}
sha256_init(&ctx);
sha256_update(&ctx, prefix, TEXT_LEN);
sha256_update(&ctx, random, RANDOM_LEN);
sha256_final(&ctx, digest);
for (int j = 0; j < DIFFICULTY; j++)
if (digest[j] > 0)
return;
if ((digest[DIFFICULTY] & 0xF0) > 0)
return;
if (*blockContainsSolution == 1)
return;
*blockContainsSolution = 1;
for (int j = 0; j < RANDOM_LEN; j++)
solution[j] = random[j];
}
void hostRandomGen(uint64_t* x) {
*x ^= (*x << 21);
*x ^= (*x >> 35);
*x ^= (*x << 4);
}
void pre_sha256() {
cudaMemcpyToSymbol(dev_k, host_k, sizeof(host_k), 0, cudaMemcpyHostToDevice);
}
int gettimeofday(struct timeval* tp, void* tzp) {
DWORD t;
t = timeGetTime();
tp->tv_sec = t / 1000;
tp->tv_usec = t % 1000;
return 0;
}
long long timems(void) {
struct timeval end;
gettimeofday(&end, NULL);
return end.tv_sec * 1000LL + end.tv_usec / 1000;
}
uint64_t hashesProcessed;
//pthread_mutex_t solutionLock;
BYTE* solution;
cudaError_t launchGPUHandlerThread(void) {
cudaError_t cudaStatus;
cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
pre_sha256();
BYTE cpuPrefix[] = { TEXT_TXT };
BYTE* d_prefix;
// Allocate GPU buffer A
cudaStatus = cudaMalloc(&d_prefix, TEXT_LEN);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
// Copy data from host memory to GPU buffer A
cudaStatus = cudaMemcpy(d_prefix, cpuPrefix, TEXT_LEN, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
BYTE* blockSolution = (BYTE*)malloc(sizeof(BYTE) * RANDOM_LEN);
BYTE* d_solution;
// Allocate GPU buffer B
cudaStatus = cudaMalloc(&d_solution, sizeof(BYTE) * RANDOM_LEN);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
int* blockContainsSolution = (int*)malloc(sizeof(int));
int* d_blockContainsSolution;
// Allocate GPU buffer C
cudaStatus = cudaMalloc(&d_blockContainsSolution, sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
uint64_t rngSeed = timems();
// Launch a kernel on the GPU with one thread and one element.
initSolutionMemory << <1, 1 >> > (d_blockContainsSolution);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "initSolutionMemory launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
while (1) {
hostRandomGen(&rngSeed);
/*hi->hashesProcessed += THREADS * BLOCKS;*/
hashesProcessed += THREADS * BLOCKS;
sha256_cuda << <THREADS, BLOCKS >> > (d_prefix, d_solution, d_blockContainsSolution, rngSeed);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "sha256_cuda launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(blockContainsSolution, d_blockContainsSolution, sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
if (*blockContainsSolution == 1) {
cudaStatus = cudaMemcpy(blockSolution, d_solution, sizeof(BYTE) * RANDOM_LEN, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
solution = blockSolution;
break;
}
if (solution) {
break;
}
}
Error:
cudaDeviceReset();
/*return NULL;*/
return cudaStatus;
}
int main()
{
setlocale(LC_NUMERIC, "");
//uint64_t** processedPtrs = (uint64_t**)malloc(sizeof(uint64_t*) * GPUS);
uint64_t processedPtrs;
long long start = timems();
hashesProcessed = 0;
processedPtrs = hashesProcessed;
cudaError_t cudaStatus = launchGPUHandlerThread();
while (1) {
uint64_t totalProcessed = 0;
totalProcessed += processedPtrs;
long long elapsed = timems() - start;
printf("Hashes %llu Seconds %f Hashes/sec %llu\r", totalProcessed, ((float)elapsed) / 1000.0, (uint64_t)((double)totalProcessed / (double)elapsed) * 1000);
if (solution) {
break;
}
}
printf("\n");
long long end = timems();
long long elapsed = end - start;
uint64_t totalProcessed = 0;
totalProcessed += processedPtrs;
printf("Solution: %.20s\n", solution);
printf("Hashes processed: %llu\n", totalProcessed);
printf("Time: %llu\n", elapsed);
printf("Hashes/sec: %llu\n", (uint64_t)((double)totalProcessed / (double)elapsed) * 1000);
//// cudaDeviceReset must be called before exiting in order for profiling and
//// tracing tools such as Nsight and Visual Profiler to show complete traces.
//cudaStatus = cudaDeviceReset();
//if (cudaStatus != cudaSuccess) {
// fprintf(stderr, "cudaDeviceReset failed!");
// return 1;
//}
return 0;
}