What do you mean by large integers? The result of your example fits in a signed 64 bit integer.
#include <stdio.h>
__global__
void foo(int64_t* c, int64_t a, int64_t b){
*c = a * b;
}
int main(){
int64_t* c;
cudaMallocManaged(&c, sizeof(int64_t));
foo<<<1,1>>>(c, 1000330000ULL, 7000567989ULL);
cudaDeviceSynchronize();
printf("%ld\n", *c);
cudaFree(c);
}
Prints: 7002878176436370000
There are also integer intrinsic functions to calculate the upper 32 / 64 bit of a 32 / 64 bit multiplication
__mul64hi ( long long int x, long long int y)
__mulhi (int x, int y)
see http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html