From 21e26ba51bfdcaf68c15a975447229ac2a21d00c Mon Sep 17 00:00:00 2001 From: fullbat Date: Sun, 3 Nov 2024 14:15:44 +0100 Subject: [PATCH] Refactor setup.py and core.pyx files in line with master --- commit/core.pyx | 118 +- commit/operator/operator_c.c | 9037 ++++++++++++++++++++++++++++++++++ setup.py | 6 +- 3 files changed, 9094 insertions(+), 67 deletions(-) diff --git a/commit/core.pyx b/commit/core.pyx index 7753b584..b2385ce5 100755 --- a/commit/core.pyx +++ b/commit/core.pyx @@ -735,16 +735,7 @@ cdef class Evaluation : logger.subinfo('') logger.info( 'Building linear operator A' ) - nF = self.DICTIONARY['IC']['nF'] # number of FIBERS - nR = self.KERNELS['wmr'].shape[0] # number of FIBER RADII - nE = self.DICTIONARY['EC']['nE'] # number of EC segments - nT = self.KERNELS['wmh'].shape[0] # number of EC TORTUOSITY values - nV = self.DICTIONARY['nV'] # number of VOXELS - nI = self.KERNELS['iso'].shape[0] # number of ISO contributions - n2 = nR * nF + nT * nE + nI * nV - self.DICTIONARY["IC"]["eval"] = np.ones( int(n2), dtype=np.uint32) - - self.A = operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, nolut=True if hasattr(self.model, 'nolut') else False ) + self.A = operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, True if hasattr(self.model, 'nolut') else False ) logger.info( f'[ {format_time(time.time() - tic)} ]' ) @@ -761,30 +752,8 @@ cdef class Evaluation : y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64) - if self.contribution_mask is not None : - - # find the voxels traversed by the tracts with zero contribution in the mask - zero_fibs = np.where(self.contribution_mask == 0)[0] - fibs = np.where(self.contribution_mask > 0)[0] - vox_zero = [] - for f in zero_fibs : - vox_zero.extend(self.DICTIONARY['IC']['v'][self.DICTIONARY['IC']['fiber'] == f]) - - # find voxel not in the mask - vox_in = [] - for f in fibs : - vox_in.extend(self.DICTIONARY['IC']['v'][self.DICTIONARY['IC']['fiber'] == f]) - - # find voxel in vox_zero but not in vox_in - vox_zero = np.array(vox_zero) - vox_in = np.array(vox_in) - vox_not_in = np.setdiff1d(vox_zero, vox_in) - - vox_sub = np.setdiff1d(vox_in, vox_not_in) - self.contribution_voxels = vox_sub - - # set the y values of the voxels not in the mask to zero - y[vox_not_in] = 0 + if self.debias_mask is not None : + y *= self.debias_mask return y @@ -1377,13 +1346,11 @@ cdef class Evaluation : self.set_verbose(0) nF = self.DICTIONARY['IC']['nF'] - nE = self.DICTIONARY['EC']['nE'] - nV = self.DICTIONARY['nV'] offset1 = nF * self.KERNELS['wmr'].shape[0] xic = self.x[:offset1] - mask = np.ones(nF, dtype=np.uint32) + mask = np.ones(offset1, dtype=np.uint32) mask[xic<0.000000000000001] = 0 self.DICTIONARY["IC"]["eval"] = mask @@ -1397,29 +1364,16 @@ cdef class Evaluation : logger.subinfo('Recomputing coefficients', indent_lvl=1, indent_char='*', with_progress=True) x_debias = self.x.copy() - - logger.debug( f'positive values of x before debias: {np.sum(x_debias>0)}' ) - logger.debug( f'positive values of mask: {np.sum(mask>0)}' ) - - x_debias[:nF] *= mask + x_debias[:offset1] *= mask x_debias[offset1:] = 0 - logger.debug( f"positive values of masked x: {np.sum(x_debias[:nF]>0)}" ) - - logger.debug( f'Shape of y: {self.get_y().size} Number of non zero values in y before: {np.sum(self.get_y()>0)}' ) - y_mask = np.asarray(self.A.dot(x_debias)) - print(f"number of non zero values in y_mask before bin: {np.sum(y_mask>0)}") # binarize y_debias y_mask[y_mask<0] = 0 y_mask[y_mask>0] = 1 - print(f"number of non zero values in y_mask after bin: {np.sum(y_mask>0)}") self.debias_mask = y_mask - logger.debug( f'Shape of y: {self.get_y().size} Number of non zero values in y after: {np.sum(self.get_y()>0)}' ) - # print the first 10 non zero values of y_debias - logger.debug( f'First 10 non zero values of y_debias: {self.get_y()[:10]}' ) with ProgressBar(disable=self.verbose!=3, hide_on_exit=True, subinfo=True) as pbar: self.x, opt_details = commit.solvers.solve(self.get_y(), self.A, self.A.T, tol_fun=tol_fun, tol_x=tol_x, max_iter=max_iter, verbose=self.verbose, x0=x0, regularisation=self.regularisation_params, confidence_array=confidence_array) @@ -1531,24 +1485,59 @@ cdef class Evaluation : niiMAP_hdr['descrip'] = 'Created with COMMIT %s'%self.get_config('version') niiMAP_hdr['db_name'] = '' - y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) ) - y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32) - tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) ) - if self.debias_mask is not None: - y_mask = np.reshape(self.debias_mask, (nV,-1)) - # compute tmp only for the voxels of y_mea and y_est that are non zero in y_mask - idx = np.where(y_mask.flatten()>0) - tmp = np.sqrt( np.mean((y_mea.flatten()[idx]-y_est.flatten()[idx])**2) ) + nV = int(np.sum(self.debias_mask)/self.niiDWI_img.shape[3]) + ind_mask = np.where(self.debias_mask>0)[0] + vox_mask = np.reshape( self.debias_mask[ind_mask], (nV,-1) ) + + y_mea = np.reshape( self.get_y()[ind_mask], (nV,-1) ) + + y_est_ = np.asarray(self.A.dot(self.x)) + y_est = np.reshape( y_est_[ind_mask], (nV,-1) ) + + tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) ) + + logger.subinfo(f'RMSE: {tmp.mean():.3f} +/- {tmp.std():.3f}', indent_lvl=2, indent_char='-') + + tmp = np.sum(y_mea**2,axis=1) + idx = np.where( tmp < 1E-12 ) + tmp[ idx ] = 1 + tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp ) + tmp[ idx ] = 0 + logger.subinfo(f'NRMSE: {tmp.mean():.3f} +/- {tmp.std():.3f}', indent_lvl=2, indent_char='-') + + y_mea = np.reshape( self.get_y(), (self.DICTIONARY['nV'],-1) ) + y_est_ = np.asarray(self.A.dot(self.x)) + y_est = np.reshape( y_est_, (self.DICTIONARY['nV'],-1) ) + tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) ) + + niiMAP_img[self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz']] = tmp + niiMAP_hdr['cal_min'] = 0 + niiMAP_hdr['cal_max'] = tmp.max() + nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') ) + tmp = np.sum(y_mea**2,axis=1) + idx = np.where( tmp < 1E-12 ) + tmp[ idx ] = 1 + tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp ) + tmp[ idx ] = 0 + niiMAP_img[self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz']] = tmp + niiMAP_hdr['cal_min'] = 0 + niiMAP_hdr['cal_max'] = 1 + nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') ) - logger.subinfo(f'RMSE: {tmp.mean():.3f} +/- {tmp.std():.3f}', indent_lvl=2, indent_char='-') - niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp - niiMAP_hdr['cal_min'] = 0 - niiMAP_hdr['cal_max'] = tmp.max() - nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') ) + else: + y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) ) + y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32) + tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) ) + + logger.subinfo(f'RMSE: {tmp.mean():.3f} +/- {tmp.std():.3f}', indent_lvl=2, indent_char='-') + niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp + niiMAP_hdr['cal_min'] = 0 + niiMAP_hdr['cal_max'] = tmp.max() + nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') ) tmp = np.sum(y_mea**2,axis=1) idx = np.where( tmp < 1E-12 ) @@ -1561,6 +1550,7 @@ cdef class Evaluation : niiMAP_hdr['cal_max'] = 1 nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') ) + if self.confidence_map_img is not None: confidence_array = np.reshape( self.confidence_map_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) ) diff --git a/commit/operator/operator_c.c b/commit/operator/operator_c.c index e69de29b..1816bc95 100644 --- a/commit/operator/operator_c.c +++ b/commit/operator/operator_c.c @@ -0,0 +1,9037 @@ +#include +#include + +// max number of threads +#define MAX_THREADS 255 + +// global variables +int nF, n, nE, nV, nS, ndirs; +double *x, *Y; +uint32_t *ICthreads, *ECthreads, *ISOthreads; +uint8_t *ICthreadsT; +uint32_t *ECthreadsT, *ISOthreadsT; +uint32_t *ICf, *ICeval, *ICv, *ECv, *ISOv; +uint16_t *ICo, *ECo; +float *ICl; +float *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19; +float *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19; +float *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19; +uint32_t nIC, nEC, nISO; + + +// +// Compute a sub-block of the A*x MATRIX-VECTOR product +// +void* COMMIT_A__block( void *ptr ) +{ + int id = (long)ptr; + int offset; + uint32_t *eval0, *eval1, *eval2, *eval3, *eval4, *eval5, *eval6, *eval7, *eval8, *eval9, *eval10, *eval11, *eval12, *eval13, *eval14, *eval15, *eval16, *eval17, *eval18, *eval19; + double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w; + double *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19; + double *YPtr, *YPtrEnd; + float *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr; + uint32_t *t_v, *t_vEnd, *t_f; + uint16_t *t_o; + float *t_l; + + // intra-cellular compartments + if (nIC > 0) + { + t_v = ICv + ICthreads[id]; + t_vEnd = ICv + ICthreads[id+1]; + t_o = ICo + ICthreads[id]; + t_l = ICl + ICthreads[id]; + t_f = ICf + ICthreads[id]; + switch (nIC) + { + case 1: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + if (x0 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 2: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + if (x0 != 0 || x1 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 3: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + if (x0 != 0 || x1 != 0 || x2 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 4: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 5: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 6: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 7: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 8: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 9: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 10: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 11: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 12: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 13: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 14: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 15: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 16: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + xPtr15 = xPtr14 + nF; + eval15 = eval14 + nF; + x15 = *xPtr15 * (double)(*eval15); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 17: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + xPtr15 = xPtr14 + nF; + eval15 = eval14 + nF; + x15 = *xPtr15 * (double)(*eval15); + xPtr16 = xPtr15 + nF; + eval16 = eval15 + nF; + x16 = *xPtr16 * (double)(*eval16); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 18: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + xPtr15 = xPtr14 + nF; + eval15 = eval14 + nF; + x15 = *xPtr15 * (double)(*eval15); + xPtr16 = xPtr15 + nF; + eval16 = eval15 + nF; + x16 = *xPtr16 * (double)(*eval16); + xPtr17 = xPtr16 + nF; + eval17 = eval16 + nF; + x17 = *xPtr17 * (double)(*eval17); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 19: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + xPtr15 = xPtr14 + nF; + eval15 = eval14 + nF; + x15 = *xPtr15 * (double)(*eval15); + xPtr16 = xPtr15 + nF; + eval16 = eval15 + nF; + x16 = *xPtr16 * (double)(*eval16); + xPtr17 = xPtr16 + nF; + eval17 = eval16 + nF; + x17 = *xPtr17 * (double)(*eval17); + xPtr18 = xPtr17 + nF; + eval18 = eval17 + nF; + x18 = *xPtr18 * (double)(*eval18); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + SFP18ptr = wmrSFP18 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + case 20: + while (t_v != t_vEnd) + { + xPtr0 = x + (*t_f); + eval0 = ICeval + *t_f; + x0 = *xPtr0 * (double)(*eval0); + xPtr1 = xPtr0 + nF; + eval1 = eval0 + nF; + x1 = *xPtr1 * (double)(*eval1); + xPtr2 = xPtr1 + nF; + eval2 = eval1 + nF; + x2 = *xPtr2 * (double)(*eval2); + xPtr3 = xPtr2 + nF; + eval3 = eval2 + nF; + x3 = *xPtr3 * (double)(*eval3); + xPtr4 = xPtr3 + nF; + eval4 = eval3 + nF; + x4 = *xPtr4 * (double)(*eval4); + xPtr5 = xPtr4 + nF; + eval5 = eval4 + nF; + x5 = *xPtr5 * (double)(*eval5); + xPtr6 = xPtr5 + nF; + eval6 = eval5 + nF; + x6 = *xPtr6 * (double)(*eval6); + xPtr7 = xPtr6 + nF; + eval7 = eval6 + nF; + x7 = *xPtr7 * (double)(*eval7); + xPtr8 = xPtr7 + nF; + eval8 = eval7 + nF; + x8 = *xPtr8 * (double)(*eval8); + xPtr9 = xPtr8 + nF; + eval9 = eval8 + nF; + x9 = *xPtr9 * (double)(*eval9); + xPtr10 = xPtr9 + nF; + eval10 = eval9 + nF; + x10 = *xPtr10 * (double)(*eval10); + xPtr11 = xPtr10 + nF; + eval11 = eval10 + nF; + x11 = *xPtr11 * (double)(*eval11); + xPtr12 = xPtr11 + nF; + eval12 = eval11 + nF; + x12 = *xPtr12 * (double)(*eval12); + xPtr13 = xPtr12 + nF; + eval13 = eval12 + nF; + x13 = *xPtr13 * (double)(*eval13); + xPtr14 = xPtr13 + nF; + eval14 = eval13 + nF; + x14 = *xPtr14 * (double)(*eval14); + xPtr15 = xPtr14 + nF; + eval15 = eval14 + nF; + x15 = *xPtr15 * (double)(*eval15); + xPtr16 = xPtr15 + nF; + eval16 = eval15 + nF; + x16 = *xPtr16 * (double)(*eval16); + xPtr17 = xPtr16 + nF; + eval17 = eval16 + nF; + x17 = *xPtr17 * (double)(*eval17); + xPtr18 = xPtr17 + nF; + eval18 = eval17 + nF; + x18 = *xPtr18 * (double)(*eval18); + xPtr19 = xPtr18 + nF; + eval19 = eval18 + nF; + x19 = *xPtr19 * (double)(*eval19); + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0 || x19 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + SFP18ptr = wmrSFP18 + offset; + SFP19ptr = wmrSFP19 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += w * (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++) + x19 * (*SFP19ptr++)); + } + t_f++; + t_v++; + t_o++; + t_l++; + } + break; + } + } + + // extra-cellular compartments + if (nEC > 0) + { + t_v = ECv + ECthreads[id]; + t_vEnd = ECv + ECthreads[id+1]; + t_o = ECo + ECthreads[id]; + xPtr0 = x + nIC*nF + ECthreads[id]; + switch (nEC) + { + case 1: + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + if (x0 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++)); + } + t_v++; + t_o++; + } + break; + case 2: + xPtr1 = xPtr0 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + if (x0 != 0 || x1 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++)); + } + t_v++; + t_o++; + } + break; + case 3: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + if (x0 != 0 || x1 != 0 || x2 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++)); + } + t_v++; + t_o++; + } + break; + case 4: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++)); + } + t_v++; + t_o++; + } + break; + case 5: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++)); + } + t_v++; + t_o++; + } + break; + case 6: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++)); + } + t_v++; + t_o++; + } + break; + case 7: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++)); + } + t_v++; + t_o++; + } + break; + case 8: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++)); + } + t_v++; + t_o++; + } + break; + case 9: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++)); + } + t_v++; + t_o++; + } + break; + case 10: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++)); + } + t_v++; + t_o++; + } + break; + case 11: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++)); + } + t_v++; + t_o++; + } + break; + case 12: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++)); + } + t_v++; + t_o++; + } + break; + case 13: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++)); + } + t_v++; + t_o++; + } + break; + case 14: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++)); + } + t_v++; + t_o++; + } + break; + case 15: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++)); + } + t_v++; + t_o++; + } + break; + case 16: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++)); + } + t_v++; + t_o++; + } + break; + case 17: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++)); + } + t_v++; + t_o++; + } + break; + case 18: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++)); + } + t_v++; + t_o++; + } + break; + case 19: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + xPtr18 = xPtr17 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + x18 = *xPtr18++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + SFP18ptr = wmhSFP18 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++)); + } + t_v++; + t_o++; + } + break; + case 20: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + xPtr18 = xPtr17 + nE; + xPtr19 = xPtr18 + nE; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + x18 = *xPtr18++; + x19 = *xPtr19++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0 || x19 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + SFP18ptr = wmhSFP18 + offset; + SFP19ptr = wmhSFP19 + offset; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++) + x19 * (*SFP19ptr++)); + } + t_v++; + t_o++; + } + break; + } + } + + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreads[id]; + t_vEnd = ISOv + ISOthreads[id+1]; + xPtr0 = x + nIC*nF + nEC*nE + ISOthreads[id]; + switch (nISO) + { + case 1: + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + if (x0 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++)); + } + t_v++; + } + break; + case 2: + xPtr1 = xPtr0 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + if (x0 != 0 || x1 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++)); + } + t_v++; + } + break; + case 3: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + if (x0 != 0 || x1 != 0 || x2 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++)); + } + t_v++; + } + break; + case 4: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++)); + } + t_v++; + } + break; + case 5: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++)); + } + t_v++; + } + break; + case 6: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++)); + } + t_v++; + } + break; + case 7: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++)); + } + t_v++; + } + break; + case 8: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++)); + } + t_v++; + } + break; + case 9: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++)); + } + t_v++; + } + break; + case 10: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++)); + } + t_v++; + } + break; + case 11: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++)); + } + t_v++; + } + break; + case 12: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++)); + } + t_v++; + } + break; + case 13: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++)); + } + t_v++; + } + break; + case 14: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++)); + } + t_v++; + } + break; + case 15: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++)); + } + t_v++; + } + break; + case 16: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++)); + } + t_v++; + } + break; + case 17: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++)); + } + t_v++; + } + break; + case 18: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++)); + } + t_v++; + } + break; + case 19: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + xPtr18 = xPtr17 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + x18 = *xPtr18++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + SFP18ptr = isoSFP18; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++)); + } + t_v++; + } + break; + case 20: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + xPtr18 = xPtr17 + nV; + xPtr19 = xPtr18 + nV; + while (t_v != t_vEnd) + { + x0 = *xPtr0++; + x1 = *xPtr1++; + x2 = *xPtr2++; + x3 = *xPtr3++; + x4 = *xPtr4++; + x5 = *xPtr5++; + x6 = *xPtr6++; + x7 = *xPtr7++; + x8 = *xPtr8++; + x9 = *xPtr9++; + x10 = *xPtr10++; + x11 = *xPtr11++; + x12 = *xPtr12++; + x13 = *xPtr13++; + x14 = *xPtr14++; + x15 = *xPtr15++; + x16 = *xPtr16++; + x17 = *xPtr17++; + x18 = *xPtr18++; + x19 = *xPtr19++; + if (x0 != 0 || x1 != 0 || x2 != 0 || x3 != 0 || x4 != 0 || x5 != 0 || x6 != 0 || x7 != 0 || x8 != 0 || x9 != 0 || x10 != 0 || x11 != 0 || x12 != 0 || x13 != 0 || x14 != 0 || x15 != 0 || x16 != 0 || x17 != 0 || x18 != 0 || x19 != 0) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + SFP18ptr = isoSFP18; + SFP19ptr = isoSFP19; + while (YPtr != YPtrEnd) + (*YPtr++) += (x0 * (*SFP0ptr++) + x1 * (*SFP1ptr++) + x2 * (*SFP2ptr++) + x3 * (*SFP3ptr++) + x4 * (*SFP4ptr++) + x5 * (*SFP5ptr++) + x6 * (*SFP6ptr++) + x7 * (*SFP7ptr++) + x8 * (*SFP8ptr++) + x9 * (*SFP9ptr++) + x10 * (*SFP10ptr++) + x11 * (*SFP11ptr++) + x12 * (*SFP12ptr++) + x13 * (*SFP13ptr++) + x14 * (*SFP14ptr++) + x15 * (*SFP15ptr++) + x16 * (*SFP16ptr++) + x17 * (*SFP17ptr++) + x18 * (*SFP18ptr++) + x19 * (*SFP19ptr++)); + } + t_v++; + } + break; + } + } + + pthread_exit( 0 ); +} + +// +// Function called by Cython +// +void COMMIT_A( + int _nF, int _nE, int _nV, int _nS, int _ndirs, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICeval, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, + uint32_t *_ECv, uint16_t *_ECo, + uint32_t *_ISOv, + float *_wmrSFP, float *_wmhSFP, float *_isoSFP, + uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads, + uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + nE = _nE; + nV = _nV; + nS = _nS; + ndirs = _ndirs; + + x = _vIN; + Y = _vOUT; + + ICf = _ICf; + ICeval = _ICeval; + ICv = _ICv; + ICo = _ICo; + ICl = _ICl; + ECv = _ECv; + ECo = _ECo; + ISOv = _ISOv; + + nIC = _nIC; + nEC = _nEC; + nISO = _nISO; + + switch (nIC) + { + case 1: + wmrSFP0 = _wmrSFP; + break; + case 2: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + break; + case 3: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + break; + case 4: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + break; + case 5: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + break; + case 6: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + break; + case 7: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + break; + case 8: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + break; + case 9: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + break; + case 10: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + break; + case 11: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + break; + case 12: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + break; + case 13: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + break; + case 14: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + break; + case 15: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + break; + case 16: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + break; + case 17: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + break; + case 18: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + break; + case 19: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + wmrSFP18 = wmrSFP17 + _ndirs*_nS; + break; + case 20: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + wmrSFP18 = wmrSFP17 + _ndirs*_nS; + wmrSFP19 = wmrSFP18 + _ndirs*_nS; + break; + } + + switch (nEC) + { + case 1: + wmhSFP0 = _wmhSFP; + break; + case 2: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + break; + case 3: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + break; + case 4: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + break; + case 5: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + break; + case 6: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + break; + case 7: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + break; + case 8: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + break; + case 9: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + break; + case 10: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + break; + case 11: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + break; + case 12: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + break; + case 13: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + break; + case 14: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + break; + case 15: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + break; + case 16: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + break; + case 17: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + break; + case 18: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + break; + case 19: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + wmhSFP18 = wmhSFP17 + _ndirs*_nS; + break; + case 20: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + wmhSFP18 = wmhSFP17 + _ndirs*_nS; + wmhSFP19 = wmhSFP18 + _ndirs*_nS; + break; + } + + switch (nISO) + { + case 1: + isoSFP0 = _isoSFP; + break; + case 2: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + break; + case 3: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + break; + case 4: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + break; + case 5: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + break; + case 6: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + break; + case 7: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + break; + case 8: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + break; + case 9: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + break; + case 10: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + break; + case 11: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + break; + case 12: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + break; + case 13: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + break; + case 14: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + break; + case 15: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + break; + case 16: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + break; + case 17: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + break; + case 18: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + break; + case 19: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + isoSFP18 = isoSFP17 + _nS; + break; + case 20: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + isoSFP18 = isoSFP17 + _nS; + isoSFP19 = isoSFP18 + _nS; + break; + } + + + ICthreads = _ICthreads; + ECthreads = _ECthreads; + ISOthreads = _ISOthreads; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +} + +// +// Compute a sub-block of the At*y MATRIX-VECTOR product +// +void* COMMIT_At__block( void *ptr ) +{ + int id = (long)ptr; + int offset; + uint32_t *eval0, *eval1, *eval2, *eval3, *eval4, *eval5, *eval6, *eval7, *eval8, *eval9, *eval10, *eval11, *eval12, *eval13, *eval14, *eval15, *eval16, *eval17, *eval18, *eval19; + double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, YTmp; + double *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19; + double *YPtr, *YPtrEnd; + float *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr; + uint32_t *t_v, *t_vEnd, *t_f; + uint16_t *t_o; + float *t_l; + uint8_t *t_t; + + // intra-cellular compartments + if (nIC > 0) + { + t_v = ICv; + t_vEnd = ICv + n; + t_o = ICo; + t_l = ICl; + t_f = ICf; + t_t = ICthreadsT; + switch (nIC) + { + case 1: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 2: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 3: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 4: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 5: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 6: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 7: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 8: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 9: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 10: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 11: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 12: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 13: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 14: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 15: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 16: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + x15 = (*SFP15ptr++) * YTmp; + eval15 = eval14 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + x[*t_f+15*nF] += w * x15 * (double)(*eval15); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 17: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + x15 = (*SFP15ptr++) * YTmp; + eval15 = eval14 + nF; + x16 = (*SFP16ptr++) * YTmp; + eval16 = eval15 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + x[*t_f+15*nF] += w * x15 * (double)(*eval15); + x[*t_f+16*nF] += w * x16 * (double)(*eval16); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 18: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + x15 = (*SFP15ptr++) * YTmp; + eval15 = eval14 + nF; + x16 = (*SFP16ptr++) * YTmp; + eval16 = eval15 + nF; + x17 = (*SFP17ptr++) * YTmp; + eval17 = eval16 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + x[*t_f+15*nF] += w * x15 * (double)(*eval15); + x[*t_f+16*nF] += w * x16 * (double)(*eval16); + x[*t_f+17*nF] += w * x17 * (double)(*eval17); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 19: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + SFP18ptr = wmrSFP18 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + x15 = (*SFP15ptr++) * YTmp; + eval15 = eval14 + nF; + x16 = (*SFP16ptr++) * YTmp; + eval16 = eval15 + nF; + x17 = (*SFP17ptr++) * YTmp; + eval17 = eval16 + nF; + x18 = (*SFP18ptr++) * YTmp; + eval18 = eval17 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + x[*t_f+15*nF] += w * x15 * (double)(*eval15); + x[*t_f+16*nF] += w * x16 * (double)(*eval16); + x[*t_f+17*nF] += w * x17 * (double)(*eval17); + x[*t_f+18*nF] += w * x18 * (double)(*eval18); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + case 20: + while (t_v != t_vEnd) + { + if (*t_t == id) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmrSFP0 + offset; + SFP1ptr = wmrSFP1 + offset; + SFP2ptr = wmrSFP2 + offset; + SFP3ptr = wmrSFP3 + offset; + SFP4ptr = wmrSFP4 + offset; + SFP5ptr = wmrSFP5 + offset; + SFP6ptr = wmrSFP6 + offset; + SFP7ptr = wmrSFP7 + offset; + SFP8ptr = wmrSFP8 + offset; + SFP9ptr = wmrSFP9 + offset; + SFP10ptr = wmrSFP10 + offset; + SFP11ptr = wmrSFP11 + offset; + SFP12ptr = wmrSFP12 + offset; + SFP13ptr = wmrSFP13 + offset; + SFP14ptr = wmrSFP14 + offset; + SFP15ptr = wmrSFP15 + offset; + SFP16ptr = wmrSFP16 + offset; + SFP17ptr = wmrSFP17 + offset; + SFP18ptr = wmrSFP18 + offset; + SFP19ptr = wmrSFP19 + offset; + x0 = (*SFP0ptr++) * YTmp; + eval0 = ICeval + *t_f; + + x1 = (*SFP1ptr++) * YTmp; + eval1 = eval0 + nF; + x2 = (*SFP2ptr++) * YTmp; + eval2 = eval1 + nF; + x3 = (*SFP3ptr++) * YTmp; + eval3 = eval2 + nF; + x4 = (*SFP4ptr++) * YTmp; + eval4 = eval3 + nF; + x5 = (*SFP5ptr++) * YTmp; + eval5 = eval4 + nF; + x6 = (*SFP6ptr++) * YTmp; + eval6 = eval5 + nF; + x7 = (*SFP7ptr++) * YTmp; + eval7 = eval6 + nF; + x8 = (*SFP8ptr++) * YTmp; + eval8 = eval7 + nF; + x9 = (*SFP9ptr++) * YTmp; + eval9 = eval8 + nF; + x10 = (*SFP10ptr++) * YTmp; + eval10 = eval9 + nF; + x11 = (*SFP11ptr++) * YTmp; + eval11 = eval10 + nF; + x12 = (*SFP12ptr++) * YTmp; + eval12 = eval11 + nF; + x13 = (*SFP13ptr++) * YTmp; + eval13 = eval12 + nF; + x14 = (*SFP14ptr++) * YTmp; + eval14 = eval13 + nF; + x15 = (*SFP15ptr++) * YTmp; + eval15 = eval14 + nF; + x16 = (*SFP16ptr++) * YTmp; + eval16 = eval15 + nF; + x17 = (*SFP17ptr++) * YTmp; + eval17 = eval16 + nF; + x18 = (*SFP18ptr++) * YTmp; + eval18 = eval17 + nF; + x19 = (*SFP19ptr++) * YTmp; + eval19 = eval18 + nF; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + x19 += (*SFP19ptr++) * YTmp; + } + x[*t_f] += w * x0 * (double)(*eval0); + x[*t_f+nF] += w * x1 * (double)(*eval1); + x[*t_f+2*nF] += w * x2 * (double)(*eval2); + x[*t_f+3*nF] += w * x3 * (double)(*eval3); + x[*t_f+4*nF] += w * x4 * (double)(*eval4); + x[*t_f+5*nF] += w * x5 * (double)(*eval5); + x[*t_f+6*nF] += w * x6 * (double)(*eval6); + x[*t_f+7*nF] += w * x7 * (double)(*eval7); + x[*t_f+8*nF] += w * x8 * (double)(*eval8); + x[*t_f+9*nF] += w * x9 * (double)(*eval9); + x[*t_f+10*nF] += w * x10 * (double)(*eval10); + x[*t_f+11*nF] += w * x11 * (double)(*eval11); + x[*t_f+12*nF] += w * x12 * (double)(*eval12); + x[*t_f+13*nF] += w * x13 * (double)(*eval13); + x[*t_f+14*nF] += w * x14 * (double)(*eval14); + x[*t_f+15*nF] += w * x15 * (double)(*eval15); + x[*t_f+16*nF] += w * x16 * (double)(*eval16); + x[*t_f+17*nF] += w * x17 * (double)(*eval17); + x[*t_f+18*nF] += w * x18 * (double)(*eval18); + x[*t_f+19*nF] += w * x19 * (double)(*eval19); + } + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + } + break; + } + } + + // extra-cellular compartments + if (nEC > 0) + { + t_v = ECv + ECthreadsT[id]; + t_vEnd = ECv + ECthreadsT[id+1]; + t_o = ECo + ECthreadsT[id]; + xPtr0 = x + nIC*nF + ECthreadsT[id]; + switch (nEC) + { + case 1: + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + x0 = (*SFP0ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + } + (*xPtr0++) += x0; + t_v++; + t_o++; + } + break; + case 2: + xPtr1 = xPtr0 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + t_v++; + t_o++; + } + break; + case 3: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + t_v++; + t_o++; + } + break; + case 4: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + t_v++; + t_o++; + } + break; + case 5: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + t_v++; + t_o++; + } + break; + case 6: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + t_v++; + t_o++; + } + break; + case 7: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + t_v++; + t_o++; + } + break; + case 8: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + t_v++; + t_o++; + } + break; + case 9: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + t_v++; + t_o++; + } + break; + case 10: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + t_v++; + t_o++; + } + break; + case 11: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + t_v++; + t_o++; + } + break; + case 12: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + t_v++; + t_o++; + } + break; + case 13: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + t_v++; + t_o++; + } + break; + case 14: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + t_v++; + t_o++; + } + break; + case 15: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + t_v++; + t_o++; + } + break; + case 16: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + t_v++; + t_o++; + } + break; + case 17: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + t_v++; + t_o++; + } + break; + case 18: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + t_v++; + t_o++; + } + break; + case 19: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + xPtr18 = xPtr17 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + SFP18ptr = wmhSFP18 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + x18 = (*SFP18ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + (*xPtr18++) += x18; + t_v++; + t_o++; + } + break; + case 20: + xPtr1 = xPtr0 + nE; + xPtr2 = xPtr1 + nE; + xPtr3 = xPtr2 + nE; + xPtr4 = xPtr3 + nE; + xPtr5 = xPtr4 + nE; + xPtr6 = xPtr5 + nE; + xPtr7 = xPtr6 + nE; + xPtr8 = xPtr7 + nE; + xPtr9 = xPtr8 + nE; + xPtr10 = xPtr9 + nE; + xPtr11 = xPtr10 + nE; + xPtr12 = xPtr11 + nE; + xPtr13 = xPtr12 + nE; + xPtr14 = xPtr13 + nE; + xPtr15 = xPtr14 + nE; + xPtr16 = xPtr15 + nE; + xPtr17 = xPtr16 + nE; + xPtr18 = xPtr17 + nE; + xPtr19 = xPtr18 + nE; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + SFP0ptr = wmhSFP0 + offset; + SFP1ptr = wmhSFP1 + offset; + SFP2ptr = wmhSFP2 + offset; + SFP3ptr = wmhSFP3 + offset; + SFP4ptr = wmhSFP4 + offset; + SFP5ptr = wmhSFP5 + offset; + SFP6ptr = wmhSFP6 + offset; + SFP7ptr = wmhSFP7 + offset; + SFP8ptr = wmhSFP8 + offset; + SFP9ptr = wmhSFP9 + offset; + SFP10ptr = wmhSFP10 + offset; + SFP11ptr = wmhSFP11 + offset; + SFP12ptr = wmhSFP12 + offset; + SFP13ptr = wmhSFP13 + offset; + SFP14ptr = wmhSFP14 + offset; + SFP15ptr = wmhSFP15 + offset; + SFP16ptr = wmhSFP16 + offset; + SFP17ptr = wmhSFP17 + offset; + SFP18ptr = wmhSFP18 + offset; + SFP19ptr = wmhSFP19 + offset; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + x18 = (*SFP18ptr++) * YTmp; + x19 = (*SFP19ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + x19 += (*SFP19ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + (*xPtr18++) += x18; + (*xPtr19++) += x19; + t_v++; + t_o++; + } + break; + } + } + + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreadsT[id]; + t_vEnd = ISOv + ISOthreadsT[id+1]; + xPtr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id]; + switch (nISO) + { + case 1: + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + x0 = (*SFP0ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + } + (*xPtr0++) += x0; + t_v++; + } + break; + case 2: + xPtr1 = xPtr0 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + t_v++; + } + break; + case 3: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + t_v++; + } + break; + case 4: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + t_v++; + } + break; + case 5: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + t_v++; + } + break; + case 6: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + t_v++; + } + break; + case 7: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + t_v++; + } + break; + case 8: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + t_v++; + } + break; + case 9: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + t_v++; + } + break; + case 10: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + t_v++; + } + break; + case 11: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + t_v++; + } + break; + case 12: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + t_v++; + } + break; + case 13: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + t_v++; + } + break; + case 14: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + t_v++; + } + break; + case 15: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + t_v++; + } + break; + case 16: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + t_v++; + } + break; + case 17: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + t_v++; + } + break; + case 18: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + t_v++; + } + break; + case 19: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + xPtr18 = xPtr17 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + SFP18ptr = isoSFP18; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + x18 = (*SFP18ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + (*xPtr18++) += x18; + t_v++; + } + break; + case 20: + xPtr1 = xPtr0 + nV; + xPtr2 = xPtr1 + nV; + xPtr3 = xPtr2 + nV; + xPtr4 = xPtr3 + nV; + xPtr5 = xPtr4 + nV; + xPtr6 = xPtr5 + nV; + xPtr7 = xPtr6 + nV; + xPtr8 = xPtr7 + nV; + xPtr9 = xPtr8 + nV; + xPtr10 = xPtr9 + nV; + xPtr11 = xPtr10 + nV; + xPtr12 = xPtr11 + nV; + xPtr13 = xPtr12 + nV; + xPtr14 = xPtr13 + nV; + xPtr15 = xPtr14 + nV; + xPtr16 = xPtr15 + nV; + xPtr17 = xPtr16 + nV; + xPtr18 = xPtr17 + nV; + xPtr19 = xPtr18 + nV; + while (t_v != t_vEnd) + { + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + SFP0ptr = isoSFP0; + SFP1ptr = isoSFP1; + SFP2ptr = isoSFP2; + SFP3ptr = isoSFP3; + SFP4ptr = isoSFP4; + SFP5ptr = isoSFP5; + SFP6ptr = isoSFP6; + SFP7ptr = isoSFP7; + SFP8ptr = isoSFP8; + SFP9ptr = isoSFP9; + SFP10ptr = isoSFP10; + SFP11ptr = isoSFP11; + SFP12ptr = isoSFP12; + SFP13ptr = isoSFP13; + SFP14ptr = isoSFP14; + SFP15ptr = isoSFP15; + SFP16ptr = isoSFP16; + SFP17ptr = isoSFP17; + SFP18ptr = isoSFP18; + SFP19ptr = isoSFP19; + x0 = (*SFP0ptr++) * YTmp; + x1 = (*SFP1ptr++) * YTmp; + x2 = (*SFP2ptr++) * YTmp; + x3 = (*SFP3ptr++) * YTmp; + x4 = (*SFP4ptr++) * YTmp; + x5 = (*SFP5ptr++) * YTmp; + x6 = (*SFP6ptr++) * YTmp; + x7 = (*SFP7ptr++) * YTmp; + x8 = (*SFP8ptr++) * YTmp; + x9 = (*SFP9ptr++) * YTmp; + x10 = (*SFP10ptr++) * YTmp; + x11 = (*SFP11ptr++) * YTmp; + x12 = (*SFP12ptr++) * YTmp; + x13 = (*SFP13ptr++) * YTmp; + x14 = (*SFP14ptr++) * YTmp; + x15 = (*SFP15ptr++) * YTmp; + x16 = (*SFP16ptr++) * YTmp; + x17 = (*SFP17ptr++) * YTmp; + x18 = (*SFP18ptr++) * YTmp; + x19 = (*SFP19ptr++) * YTmp; + while (++YPtr != YPtrEnd) + { + YTmp = *YPtr; + x0 += (*SFP0ptr++) * YTmp; + x1 += (*SFP1ptr++) * YTmp; + x2 += (*SFP2ptr++) * YTmp; + x3 += (*SFP3ptr++) * YTmp; + x4 += (*SFP4ptr++) * YTmp; + x5 += (*SFP5ptr++) * YTmp; + x6 += (*SFP6ptr++) * YTmp; + x7 += (*SFP7ptr++) * YTmp; + x8 += (*SFP8ptr++) * YTmp; + x9 += (*SFP9ptr++) * YTmp; + x10 += (*SFP10ptr++) * YTmp; + x11 += (*SFP11ptr++) * YTmp; + x12 += (*SFP12ptr++) * YTmp; + x13 += (*SFP13ptr++) * YTmp; + x14 += (*SFP14ptr++) * YTmp; + x15 += (*SFP15ptr++) * YTmp; + x16 += (*SFP16ptr++) * YTmp; + x17 += (*SFP17ptr++) * YTmp; + x18 += (*SFP18ptr++) * YTmp; + x19 += (*SFP19ptr++) * YTmp; + } + (*xPtr0++) += x0; + (*xPtr1++) += x1; + (*xPtr2++) += x2; + (*xPtr3++) += x3; + (*xPtr4++) += x4; + (*xPtr5++) += x5; + (*xPtr6++) += x6; + (*xPtr7++) += x7; + (*xPtr8++) += x8; + (*xPtr9++) += x9; + (*xPtr10++) += x10; + (*xPtr11++) += x11; + (*xPtr12++) += x12; + (*xPtr13++) += x13; + (*xPtr14++) += x14; + (*xPtr15++) += x15; + (*xPtr16++) += x16; + (*xPtr17++) += x17; + (*xPtr18++) += x18; + (*xPtr19++) += x19; + t_v++; + } + break; + } + } + + pthread_exit( 0 ); +} + +// +// Function called by Cython +// +void COMMIT_At( + int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICeval, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, + uint32_t *_ECv, uint16_t *_ECo, + uint32_t *_ISOv, + float *_wmrSFP, float *_wmhSFP, float *_isoSFP, + uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT, + uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + n = _n; + nE = _nE; + nV = _nV; + nS = _nS; + ndirs = _ndirs; + + x = _vOUT; + Y = _vIN; + + ICf = _ICf; + ICeval = _ICeval; + ICv = _ICv; + ICo = _ICo; + ICl = _ICl; + ECv = _ECv; + ECo = _ECo; + ISOv = _ISOv; + + nIC = _nIC; + nEC = _nEC; + nISO = _nISO; + + switch (nIC) + { + case 1: + wmrSFP0 = _wmrSFP; + break; + case 2: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + break; + case 3: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + break; + case 4: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + break; + case 5: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + break; + case 6: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + break; + case 7: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + break; + case 8: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + break; + case 9: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + break; + case 10: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + break; + case 11: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + break; + case 12: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + break; + case 13: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + break; + case 14: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + break; + case 15: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + break; + case 16: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + break; + case 17: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + break; + case 18: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + break; + case 19: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + wmrSFP18 = wmrSFP17 + _ndirs*_nS; + break; + case 20: + wmrSFP0 = _wmrSFP; + wmrSFP1 = wmrSFP0 + _ndirs*_nS; + wmrSFP2 = wmrSFP1 + _ndirs*_nS; + wmrSFP3 = wmrSFP2 + _ndirs*_nS; + wmrSFP4 = wmrSFP3 + _ndirs*_nS; + wmrSFP5 = wmrSFP4 + _ndirs*_nS; + wmrSFP6 = wmrSFP5 + _ndirs*_nS; + wmrSFP7 = wmrSFP6 + _ndirs*_nS; + wmrSFP8 = wmrSFP7 + _ndirs*_nS; + wmrSFP9 = wmrSFP8 + _ndirs*_nS; + wmrSFP10 = wmrSFP9 + _ndirs*_nS; + wmrSFP11 = wmrSFP10 + _ndirs*_nS; + wmrSFP12 = wmrSFP11 + _ndirs*_nS; + wmrSFP13 = wmrSFP12 + _ndirs*_nS; + wmrSFP14 = wmrSFP13 + _ndirs*_nS; + wmrSFP15 = wmrSFP14 + _ndirs*_nS; + wmrSFP16 = wmrSFP15 + _ndirs*_nS; + wmrSFP17 = wmrSFP16 + _ndirs*_nS; + wmrSFP18 = wmrSFP17 + _ndirs*_nS; + wmrSFP19 = wmrSFP18 + _ndirs*_nS; + break; + } + + switch (nEC) + { + case 1: + wmhSFP0 = _wmhSFP; + break; + case 2: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + break; + case 3: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + break; + case 4: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + break; + case 5: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + break; + case 6: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + break; + case 7: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + break; + case 8: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + break; + case 9: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + break; + case 10: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + break; + case 11: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + break; + case 12: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + break; + case 13: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + break; + case 14: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + break; + case 15: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + break; + case 16: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + break; + case 17: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + break; + case 18: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + break; + case 19: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + wmhSFP18 = wmhSFP17 + _ndirs*_nS; + break; + case 20: + wmhSFP0 = _wmhSFP; + wmhSFP1 = wmhSFP0 + _ndirs*_nS; + wmhSFP2 = wmhSFP1 + _ndirs*_nS; + wmhSFP3 = wmhSFP2 + _ndirs*_nS; + wmhSFP4 = wmhSFP3 + _ndirs*_nS; + wmhSFP5 = wmhSFP4 + _ndirs*_nS; + wmhSFP6 = wmhSFP5 + _ndirs*_nS; + wmhSFP7 = wmhSFP6 + _ndirs*_nS; + wmhSFP8 = wmhSFP7 + _ndirs*_nS; + wmhSFP9 = wmhSFP8 + _ndirs*_nS; + wmhSFP10 = wmhSFP9 + _ndirs*_nS; + wmhSFP11 = wmhSFP10 + _ndirs*_nS; + wmhSFP12 = wmhSFP11 + _ndirs*_nS; + wmhSFP13 = wmhSFP12 + _ndirs*_nS; + wmhSFP14 = wmhSFP13 + _ndirs*_nS; + wmhSFP15 = wmhSFP14 + _ndirs*_nS; + wmhSFP16 = wmhSFP15 + _ndirs*_nS; + wmhSFP17 = wmhSFP16 + _ndirs*_nS; + wmhSFP18 = wmhSFP17 + _ndirs*_nS; + wmhSFP19 = wmhSFP18 + _ndirs*_nS; + break; + } + + switch (nISO) + { + case 1: + isoSFP0 = _isoSFP; + break; + case 2: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + break; + case 3: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + break; + case 4: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + break; + case 5: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + break; + case 6: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + break; + case 7: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + break; + case 8: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + break; + case 9: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + break; + case 10: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + break; + case 11: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + break; + case 12: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + break; + case 13: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + break; + case 14: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + break; + case 15: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + break; + case 16: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + break; + case 17: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + break; + case 18: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + break; + case 19: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + isoSFP18 = isoSFP17 + _nS; + break; + case 20: + isoSFP0 = _isoSFP; + isoSFP1 = isoSFP0 + _nS; + isoSFP2 = isoSFP1 + _nS; + isoSFP3 = isoSFP2 + _nS; + isoSFP4 = isoSFP3 + _nS; + isoSFP5 = isoSFP4 + _nS; + isoSFP6 = isoSFP5 + _nS; + isoSFP7 = isoSFP6 + _nS; + isoSFP8 = isoSFP7 + _nS; + isoSFP9 = isoSFP8 + _nS; + isoSFP10 = isoSFP9 + _nS; + isoSFP11 = isoSFP10 + _nS; + isoSFP12 = isoSFP11 + _nS; + isoSFP13 = isoSFP12 + _nS; + isoSFP14 = isoSFP13 + _nS; + isoSFP15 = isoSFP14 + _nS; + isoSFP16 = isoSFP15 + _nS; + isoSFP17 = isoSFP16 + _nS; + isoSFP18 = isoSFP17 + _nS; + isoSFP19 = isoSFP18 + _nS; + break; + } + + ICthreadsT = _ICthreadsT; + ECthreadsT = _ECthreadsT; + ISOthreadsT = _ISOthreadsT; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +} + +// +// Compute a sub-block of the A*x MATRIX-VECTOR product +// +void* COMMIT_A__block_nolut( void *ptr ) +{ + int id = (long)ptr; + uint32_t *eval0; + double x0; + double *xPtr; + uint32_t *t_v, *t_vEnd, *t_f; + float *t_l; + + // intra-cellular compartments + t_v = ICv + ICthreads[id]; + t_vEnd = ICv + ICthreads[id+1]; + t_l = ICl + ICthreads[id]; + t_f = ICf + ICthreads[id]; + + while( t_v != t_vEnd ) + { + x0 = x[*t_f] * (double)(ICeval[*t_f]); + if ( x0 != 0 ) + Y[*t_v] += (double)(*t_l) * x0; + t_f++; + t_v++; + t_l++; + } + + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreads[id]; + t_vEnd = ISOv + ISOthreads[id+1]; + xPtr = x + nF + ISOthreads[id]; + + while( t_v != t_vEnd ) + { + x0 = *xPtr++; + if ( x0 != 0 ) + Y[*t_v] += x0; + t_v++; + } + } + + pthread_exit( 0 ); +} + +// +// Function called by Cython +// +void COMMIT_A_nolut( + int _nF, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICeval, uint32_t *_ICv, float *_ICl, + uint32_t *_ISOv, + uint32_t* _ICthreads, uint32_t* _ISOthreads, + uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + + x = _vIN; + Y = _vOUT; + + ICf = _ICf; + ICeval = _ICeval; + ICv = _ICv; + ICl = _ICl; + ISOv = _ISOv; + + nISO = _nISO; + + ICthreads = _ICthreads; + ISOthreads = _ISOthreads; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_A__block_nolut, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +} + +// +// Compute a sub-block of the At*y MATRIX-VECTOR product +// +void* COMMIT_At__block_nolut( void *ptr ) +{ + int id = (long)ptr; + double *xPtr; + uint32_t *eval0; + uint32_t *t_v, *t_vEnd, *t_f; + float *t_l; + uint8_t *t_t; + + // intra-cellular compartments + t_v = ICv; + t_vEnd = ICv + n; + t_l = ICl; + t_f = ICf; + t_t = ICthreadsT; + + while( t_v != t_vEnd ) + { + // in this case, I need to walk throug because the segments are ordered in "voxel order" + if ( *t_t == id ) + x[*t_f] += (double)(*t_l) * Y[*t_v] * (double)(ICeval[*t_f]); + t_t++; + t_f++; + t_v++; + t_l++; + } + + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreadsT[id]; + t_vEnd = ISOv + ISOthreadsT[id+1]; + xPtr = x + nF + ISOthreadsT[id]; + + while( t_v != t_vEnd ) + (*xPtr++) += Y[*t_v++]; + } + + + pthread_exit( 0 ); +} + +// +// Function called by Cython +// +void COMMIT_At_nolut( + int _nF, int _n, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICeval, uint32_t *_ICv, float *_ICl, + uint32_t *_ISOv, + uint8_t* _ICthreadsT, uint32_t* _ISOthreadsT, + uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + n = _n; + + x = _vOUT; + Y = _vIN; + + ICf = _ICf; + ICeval = _ICeval; + ICv = _ICv; + ICl = _ICl; + ISOv = _ISOv; + + nISO = _nISO; + + ICthreadsT = _ICthreadsT; + ISOthreadsT = _ISOthreadsT; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_At__block_nolut, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +} diff --git a/setup.py b/setup.py index 38204cb7..2026a698 100644 --- a/setup.py +++ b/setup.py @@ -66,9 +66,9 @@ def run(self): build_ext.run(self) # # generate the operator_c.c file -# sys.path.insert(0, os.path.dirname(__file__)) -# from setup_operator import write_operator_c_file -# write_operator_c_file() +sys.path.insert(0, os.path.dirname(__file__)) +from setup_operator import write_operator_c_file +write_operator_c_file() # create the 'build' directory if not os.path.exists('build'):