__global__ void mykernel(int *a, int *b, int *c) { int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < 1024) { c[idx] = a[idx] + b[idx]; } }