Error_t in cuda Related Queries

i wnat to make a ErrorHandling program in Cuda using a Error_t Data type in Cuda.i Wrote the whole code but it produce wrong output(snapshot Error).Following are the code of program.

Tell me the answers of queries in Progam.And why it gave me a wrong output

Thanks in Advance

#include<stdio.h>

#include<assert.h>//why we Use assert.h in our program and it?

void CudaError(const char* p);

__global__ void ErrorMsg(int *a,int *b)

{

		int inOff=blockDim.x*blockIdx.x;

		int outoff=blockDim.x*(gridDim.x-1-blockIdx.x);

		int in=inOff+threadIdx.x;

		int out=outoff+(blockDim.x-1-threadIdx.x);

		a[out]=b[in];

}

int main(int arg,char** ag)//What is purpose of Passing  argument in the Main and where we Pass the value in these parameter?

{

		int *a_h,*a_d,*b_d;

		int dimA=256*1024;

		int blocksize=256;

		int nblock=dimA/blocksize;

		size_t size=blocksize*nblock*sizeof(int);

		a_h=(int*)malloc(size);

		cudaMalloc((void**)&a_d,size);

		cudaMalloc((void**)&b_d,size);

		for(int i=0;i<dimA;i++)

		{

				a_h[i]=i;

		}

		cudaMemcpy(a_d,a_h,size,cudaMemcpyHostToDevice);

		dim3 grid(nblock);

		dim3  block(blocksize);

		ErrorMsg<<<grid,block>>>(a_d,b_d);

cudaThreadSynchronize();

		CudaError("Kernal invocation");

		cudaMemcpy(a_h,b_d,size,cudaMemcpyDeviceToHost);

		CudaError("memcpy");

		for(int i=0;i<dimA;i++)

		{

				assert(a_h[i]==dimA-1-i);

}

		cudaFree(a_d);

		cudaFree(b_d);

		free(a_h);

		printf("Correct!\n");

		return 0;

}

void CudaError(const char *msg)

{

		cudaError_t err=cudaGetLastError();

		if(cudaSuccess!=err)//What is CudaSuccess?is it Variable

		{

				fprintf(stderr,"Cuda Error:%s,%s,\n",msg,cudaGetErrorString(err));

		exit(EXIT_FAILURE);//What is the purspose of this function and can we pass other Argument rather than  EXIT_FAILURE

		}

}

How about reading The C Programming Language before trying to approach CUDA programming?