Well, after I saw a bandwidthBench it made sense to me because 2.5GB/sec I am getting with CUDA transfers is considerably faster than 850 MB/sec I am getting using OpenGL. That is why I asked if there is a way.
Exactly. I am trying to do it but I can’t make it work with 3D textures. Basically I modified Dominik’s tutorial to work with 3D textures but it doesn’t work as it should:
// w = width, h = height, z goes from 0 to d (depth)
glTexSubImage3D(GL_TEXTURE_3D, 0, 0, 0, z, w, h, 1, GL_RGBA, GL_UNSIGNED_BYTE, 0);
It should source from PBO buffer (which I am updating prior to this call) to the proper position in the 3D texture but it always overwrites at z=0, in other words it ignores z offset completely. Here is the code I am having problems with, any advice?
#define GLEW_STATIC
#define GLUT_STATIC_LIB
#pragma comment(lib, "advapi32.lib")
#pragma comment(lib, "glew32s.lib")
#pragma comment(lib, "glutstatic.lib")
#include <stdio.h>
#include <windows.h>
#include <GL/glew.h>
#include <GL/glut.h>
#define valloc(size) VirtualAlloc(NULL, (size), MEM_COMMIT, PAGE_READWRITE)
#define vfree(ptr) VirtualFree(ptr, 0, MEM_RELEASE)
static DWORD CPUFrequency(void)
{
DWORD freq;
HKEY hKey;
const char *key = "HARDWARE\DESCRIPTION\System\CentralProcessor\0";
DWORD buflen = 4;
RegOpenKeyExA(HKEY_LOCAL_MACHINE, key, 0, KEY_READ, &hKey);
RegQueryValueExA(hKey, "~Mhz", NULL, NULL, (LPBYTE)&freq, &buflen);
RegCloseKey(hKey);
return freq;
}
static __declspec(naked) unsigned __int64 ReadTSC(void)
{
__asm {
rdtsc
ret
}
}
int main(int argc, char *argv[])
{
const int w = 512, h = 512, d = 256;
int frame_size = w * h * sizeof(float);
int data_size = frame_size * d;
glutInit(&argc, argv);
glutCreateWindow("STREAMING TUTORIAL");
glewInit();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0, w, 0, h, -1, 1);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glViewport(0, 0, w, h);
float *data1 = (float*)valloc(data_size);
GLuint texture3D;
glGenTextures(1, &texture3D);
glBindTexture(GL_TEXTURE_3D, texture3D);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA8, w, h, d, 0, GL_RGBA, GL_BYTE, 0);
GLuint buffer;
glGenBuffers(1, &buffer);
glFinish();
unsigned __int64 t0, t1;
double tt, freq = CPUFrequency();
t0 = ReadTSC();
for (int z = 0; z < d; z++) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, buffer);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, frame_size, NULL, GL_STREAM_DRAW);
unsigned char *data_ptr = (unsigned char *)data1 + z * (frame_size);
float *mem = (float*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
if (mem == NULL) {
DebugBreak();
}
memcpy(mem, data_ptr, frame_size);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB);
glTexSubImage3D(GL_TEXTURE_3D, 0, 0, 0, z, w, h, 1, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
}
glFinish();
t1 = ReadTSC();
tt = (double)(t1 - t0) / freq;
printf("2D = %.3f ms, %.2f MB/sec\n", tt / 1000.0, (double)data_size / tt);
glDeleteBuffers(1, &buffer);
glDeleteTextures(1, &texture3D);
vfree(data1);
return 0;
}
What am I doing wrong?