|
原帖由 NvidiaCTC 于 2010-4-15 22:26 发表 ![]()
__device__ int* dp;
// allocate host memory
int* h_idata = (int*) malloc( mem_size);
// initalize the memory
for( unsigned int i = 0; i < num_threads; ++i)
{
h_idata = (int) i;
}
//allocate device memory
int* d_idata;
cutilSafeCall( cudaMalloc( (void**) &d_idata, mem_size));
//copy host memory to device
cutilSafeCall( cudaMemcpy( d_idata, h_idata, mem_size,
cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMemcpyToSymbol (dp, &d_idata, sizeof(int*), 0, cudaMemcpyHostToDevice));
__global__ void
testKernel( int* g_odata)
{
// write data to global memory
g_odata[tid] = dp[tid];
}
哇~~多谢多谢~终于成功了。
感谢感谢,可是我还有一个不太明白的地方,cudaMemcpyToSymbol() 和cudaMemcpy() 的差别是什么? 我尝试把 cudaMemcpyToSymbol (dp, &d_idata, sizeof(int*), 0, cudaMemcpyHostToDevice) 这句换成 cudaMemcpy( &dp, &h_idata, sizeof(int *),cudaMemcpyHostToDevice) , 结果就不成了,这是为什么? |
|