Hello ! I was trying to use some inline asm to perform a vector load. This version seems to work fine:
__device__ void make_load(const float4* w)
{
float4 data0;
asm volatile ("ld.global.f32 %0, [%1];" : "=f"(data0.x) : "l"(w + 0));
asm volatile ("ld.global.f32 %0, [%1];" : "=f"(data0.y) : "l"(w + 1));
asm volatile ("ld.global.f32 %0, [%1];" : "=f"(data0.z) : "l"(w + 2));
asm volatile ("ld.global.f32 %0, [%1];" : "=f"(data0.w) : "l"(w + 3));
...
}
However, this version with the vector load gives me an “Invalid __global__ read of size 16 bytes
”:
__device__ void make_load(const float4* w)
{
float4 data0;
asm volatile ("ld.global.v4.f32 { %0, %1, %2, %3 }, [%4];" : "=f"(data0.x) , "=f"(data0.y) , "=f"(data0.z) , "=f"(data0.w) : "l"(w));
...
}
Im sure it must be either a syntax or alignment mistake on the pointer. However I have gone through some documentation and posts and Im failing to see any issues with the code.
Thanks in advance
njuffa
November 4, 2022, 8:42pm
2
Whatever is triggering that error report is something outside the posted code. Compare this, which works just fine:
#include <stdio.h>
#include <stdlib.h>
__device__ float4 make_load (const float4* w)
{
float4 data0;
asm volatile ("ld.global.v4.f32 { %0, %1, %2, %3 }, [%4];" : "=f"(data0.x) , "=f"(data0.y) , "=f"(data0.z) , "=f"(data0.w) : "l"(w));
return data0;
}
__global__ void kernel (float4 *p)
{
float4 data = make_load (p);
printf ("data = {%15.8e %15.8e %15.8e %15.8e}\n", data.x, data.y, data.z, data.w);
}
int main (void)
{
float4 hdata = {1.0f, 2.0f, 3.0f, 4.0f};
float4 *ddata = 0;
cudaMalloc ((void**)&ddata, sizeof hdata);
cudaMemcpy (ddata, &hdata, sizeof hdata, cudaMemcpyHostToDevice);
kernel<<<1,1>>>(ddata);
cudaDeviceSynchronize();
return EXIT_SUCCESS;
}
Is your data aligned to 16 bytes? Or perhaps you have to translate your address with __cvta_generic_to_global()?
@njuffa seems to be right, my issue was the typical mistake, I was trying to load from global using a constant memory address.
Why did it work without the vector load ? No clue, it should have failed as well.
Thanks !
system
Closed
November 21, 2022, 10:58am
5
This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.