
__kernel void inplace_add(
          __global float * A,
          unsigned int A_row_start,
          unsigned int A_col_start,
          unsigned int A_row_size,
          unsigned int A_col_size,
          unsigned int A_internal_rows,
          unsigned int A_internal_cols,
          __global const float * B,  
          unsigned int B_row_start,
          unsigned int B_col_start,
          unsigned int B_row_size,
          unsigned int B_col_size,
          unsigned int B_internal_rows,
          unsigned int B_internal_cols)
{ 
  if (   get_global_id(0) < A_row_size
      && get_global_id(1) < A_col_size
     )
    A[  (get_global_id(0) + A_row_start) * A_internal_cols
      + (get_global_id(1) + A_col_start)] 
      += B[  (get_global_id(0) + B_row_start) * B_internal_cols
           + (get_global_id(1) + B_col_start)];
}

