|
偏移量在CUDA host端是可以用的
Memcpy可以用任意偏移量
但是bind texture是按partition按某种方式映射的,所以偏移量必须对齐到256Byte
如果偏移量为64 float 就可以
#include <stdio.h>
#include <cuda_runtime.h>
texture<float> rt;
__global__ void fetch(float *out){
out[threadIdx.x] = tex1Dfetch(rt, threadIdx.x);
}
int main(){
float *in;
int num = 512;
int len = num * sizeof(float);
cudaMallocHost((void**)&in, len);
for(int i = 0; i < num; i++)
in = i;
float *d_in;
cudaMalloc((void**)&d_in, len);
cudaMemcpyAsync(d_in, in, len, cudaMemcpyHostToDevice, 0);
unsigned int grid = 256;
cudaBindTexture(0, rt, d_in + 64, grid*sizeof(float));
float *d_out;
cudaMalloc((void**)&d_out, grid*sizeof(float));
fetch<<<1, grid>>>(d_out);
float *out;
cudaMallocHost((void**)&out, grid*sizeof(float));
cudaMemcpyAsync(out, d_out, grid*sizeof(float), cudaMemcpyDeviceToHost, 0);
for(int i = 0; i < grid; i++)
printf("%f\n", out);
cudaUnbindTexture(rt);
cudaFreeHost(out);
cudaFree(d_out);
cudaFree(d_in);
cudaFreeHost(in);
}
对本帖的情况,有很多种方式可以代替按偏移量绑定到纹理。可以使用其他方式代替
[ 本帖最后由 NvidiaCTC 于 2010-5-13 12:18 编辑 ] |
|