diff --git a/src/CalculateE.c b/src/CalculateE.c index f8c8009e..4c80fb59 100644 --- a/src/CalculateE.c +++ b/src/CalculateE.c @@ -5,7 +5,7 @@ * Routines for most scattering quantities are in crosssec.c. Also saves internal fields to * file (optional). * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -60,13 +60,6 @@ extern size_t TotalEFieldPlane; // used in iterative.c TIME_TYPE tstart_CE; -// EXTERNAL FUNCTIONS - -// GenerateB.c -void GenerateB(enum incpol which,doublecomplex *x); -// iterative.c -int IterativeSolver(enum iter method); - // LOCAL VARIABLES #define MUEL_HEADER "s11 s12 s13 s14 s21 s22 s23 s24 s31 s32 s33 s34 s41 s42 s43 s44" @@ -80,6 +73,13 @@ int IterativeSolver(enum iter method); #define ANGLE_FORMAT "%.2f" #define RMSE_FORMAT "%.3E" +// EXTERNAL FUNCTIONS + +// GenerateB.c +void GenerateB(enum incpol which,doublecomplex *x); +// iterative.c +int IterativeSolver(enum iter method); + //============================================================ static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,const doublecomplex s2, @@ -111,37 +111,6 @@ static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,con //============================================================ -static void ATT_UNUSED ComputeMuellerMatrixNorm(double matrix[4][4],const doublecomplex s1, - const doublecomplex s2,const doublecomplex s3,const doublecomplex s4) -/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, according to formula - * 3.16 from Bohren and Huffman; normalize all elements to S11 (except itself) - */ -{ - matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+cMultConRe(s3,s3)+cMultConRe(s4,s4)); - matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s4,s4)-cMultConRe(s3,s3)) - / matrix[0][0]; - matrix[0][2] = (cMultConRe(s2,s3)+cMultConRe(s1,s4))/matrix[0][0]; - matrix[0][3] = (cMultConIm(s2,s3)-cMultConIm(s1,s4))/matrix[0][0]; - - matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s3,s3)-cMultConRe(s4,s4)) - / matrix[0][0]; - matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)-cMultConRe(s3,s3)-cMultConRe(s4,s4)) - / matrix[0][0]; - matrix[1][2] = (cMultConRe(s2,s3)-cMultConRe(s1,s4))/matrix[0][0]; - matrix[1][3] = (cMultConIm(s2,s3)+cMultConIm(s1,s4))/matrix[0][0]; - - matrix[2][0] = (cMultConRe(s2,s4)+cMultConRe(s1,s3))/matrix[0][0]; - matrix[2][1] = (cMultConRe(s2,s4)-cMultConRe(s1,s3))/matrix[0][0]; - matrix[2][2] = (cMultConRe(s1,s2)+cMultConRe(s3,s4))/matrix[0][0]; - matrix[2][3] = (cMultConIm(s2,s1)+cMultConIm(s4,s3))/matrix[0][0]; - - matrix[3][0] = (cMultConIm(s4,s2)+cMultConIm(s1,s3))/matrix[0][0]; - matrix[3][1] = (cMultConIm(s4,s2)-cMultConIm(s1,s3))/matrix[0][0]; - matrix[3][2] = (cMultConIm(s1,s2)-cMultConIm(s3,s4))/matrix[0][0]; - matrix[3][3] = (cMultConRe(s1,s2)-cMultConRe(s3,s4))/matrix[0][0]; -} - -//============================================================== INLINE void InitMuellerIntegrFile(const int type,const char * restrict fname,FILE * restrict * file, char * restrict buf,const size_t buf_size,double * restrict *mult) /* If 'phi_int_type' matches 'type', appropriate file (name given by 'fname') is created (with diff --git a/src/Makefile b/src/Makefile index eba25a5e..a11c5b13 100644 --- a/src/Makefile +++ b/src/Makefile @@ -79,7 +79,7 @@ ifneq ($(if $(MAKECMDGOALS),$(if $(filter $(NONTRIVIAL),$(MAKECMDGOALS)),1,),1), # below are appended to the list specified elsewhere. # Full list of possible options is the following: VALID_OPTS := DEBUG DEBUGFULL FFT_TEMPERTON PRECISE_TIMING NOT_USE_LOCK ONLY_LOCKFILE NO_FORTRAN \ - NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE + NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE USE_SSE3 # Debug mode. By default, release configuration is used (no debug, no warnings, maximum # optimization). DEBUG turns on producing debugging symbols (-g) and warnings and brings @@ -224,22 +224,42 @@ else $(info Release mode) DBGLVL := 0 endif -ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),) - $(info Temperton FFT) - CDEFS += -DFFT_TEMPERTON - ifeq ($(filter NO_FORTRAN,$(OPTIONS)),) - FSOURCE += cfft99D.f - else - $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN) +ifneq ($(filter SPARSE,$(OPTIONS)),) + $(info Sparse (non-FFT) mode) + ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),) + $(error SPARSE turns off all FFT-related code, so it is incompatible with FFT_TEMPERTON) + endif + ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),) + $(error SPARSE turns off all FFT-related code, so it is incompatible with CLFFT_APPLE) + endif + ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),) + $(error SPARSE is currently incompatible with PRECISE_TIMING) endif + + CDEFS += -DSPARSE else - $(info FFTW3) - LDLIBS += -lfftw3 - ifdef FFTW3_INC_PATH - CFLAGS += -I$(FFTW3_INC_PATH) + CSOURCE += fft.c + ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),) + $(info Temperton FFT) + CDEFS += -DFFT_TEMPERTON + ifeq ($(filter NO_FORTRAN,$(OPTIONS)),) + FSOURCE += cfft99D.f + else + $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN) + endif + else + $(info FFTW3) + LDLIBS += -lfftw3 + ifdef FFTW3_INC_PATH + CFLAGS += -I$(FFTW3_INC_PATH) + endif + ifdef FFTW3_LIB_PATH + LDFLAGS += -L$(FFTW3_LIB_PATH) + endif endif - ifdef FFTW3_LIB_PATH - LDFLAGS += -L$(FFTW3_LIB_PATH) + ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),) + # Here only the info is printed, the main logic is in ocl/Makefile + $(info Apple clFFT routines) endif endif ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),) @@ -270,24 +290,15 @@ ifneq ($(filter OVERRIDE_STDC_TEST,$(OPTIONS)),) $(info Overriding test for C99 conformance) CDEFS += -DOVERRIDE_STDC_TEST endif - -ifneq ($(filter SPARSE,$(OPTIONS)),) - CDEFS += -DADDA_SPARSE -else - CSOURCE += fft.c -endif ifneq ($(filter USE_SSE3,$(OPTIONS)),) CDEFS += -DUSE_SSE3 CFLAGS += -msse3 + $(info Using SSE3 optimizations) endif ifneq ($(filter OCL_READ_SOURCE_RUNTIME,$(OPTIONS)),) # Here only the info is printed, the main logic is in ocl/Makefile $(info Read CL sources at runtime) endif -ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),) - # Here only the info is printed, the main logic is in ocl/Makefile - $(info Apple clFFT routines) -endif # Process EXTRA_FLAGS ifneq ($(strip $(EXTRA_FLAGS)),) $(info Extra compiler options: '$(EXTRA_FLAGS)') @@ -446,12 +457,12 @@ clean: cleanseq cleanmpi cleanocl # compilation and thus contain quite heavy processing. cleanseq: @echo "Removing sequential compiled files" - cd $(SEQ) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe + cd $(SEQ) && rm -f *.o *.d $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe cleanmpi: @echo "Removing MPI compiled files" - cd $(MPI) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe + cd $(MPI) && rm -f *.o *.d $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe cleanocl: @echo "Removing OpenCL compiled files" - cd $(OCL) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr + cd $(OCL) && rm -f *.o *.d $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr diff --git a/src/calculator.c b/src/calculator.c index fa29529c..c3a08ea3 100644 --- a/src/calculator.c +++ b/src/calculator.c @@ -3,7 +3,7 @@ * Descr: all the initialization is done here before actually calculating internal fields; includes * calculation of couple constants * - * Copyright (C) 2006-2010,2012 ADDA contributors + * Copyright (C) 2006-2010,2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -27,6 +27,7 @@ #include "interaction.h" #include "io.h" #include "memory.h" +#include "oclcore.h" #include "Romberg.h" #include "timing.h" #include "vars.h" @@ -35,10 +36,6 @@ #include #include -#ifdef OPENCL -# include "oclcore.h" -#endif - // SEMI-GLOBAL VARIABLES // defined and initialized in crosssec.c @@ -64,26 +61,27 @@ double dtheta_deg,dtheta_rad; // delta theta in degrees and radians // amplitude matrix for different values of alpha doublecomplex * restrict ampl_alphaX,* restrict ampl_alphaY; double * restrict muel_alpha; // mueller matrix for different values of alpha -// used in fft.c - // tables of integrals -double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5, - * restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10; -/* it is preferable to declare the following as "* restrict * restrict", but it is hard to make it - * generally compatible with Free_iMatrix function syntax. However, it is defined so in fft.c. - */ -int ** restrict tab_index; // matrix for indexing of table arrays + // used in crosssec.c double * restrict E2_alldir; // square of E, calculated for alldir double * restrict E2_alldir_buffer; // buffer to accumulate E2_alldir doublecomplex cc[MAX_NMAT][3]; // couple constants // arrays of exponents along 3 axes (for calc_field) +#ifndef SPARSE doublecomplex * restrict expsX,* restrict expsY,* restrict expsZ; +#endif + // used in iterative.c doublecomplex *rvec; // current residual doublecomplex * restrict Avecbuffer; // used to hold the result of matrix-vector products // auxiliary vectors, used in some iterative solvers (with more meaningful names) doublecomplex * restrict vec1,* restrict vec2,* restrict vec3; +#ifdef SPARSE +// used in matvec.c +doublecomplex * restrict arg_full; // vector to hold argvec for all dipoles +#endif + // LOCAL VARIABLES static size_t block_theta; // size of one block of mueller matrix - 16*nTheta @@ -95,9 +93,9 @@ static double * restrict out; // EXTERNAL FUNCTIONS // CalculateE.c -extern int CalculateE(enum incpol which,enum Eftype type); -extern void MuellerMatrix(void); -extern void SaveMuellerAndCS(double * restrict in); +int CalculateE(enum incpol which,enum Eftype type); +void MuellerMatrix(void); +void SaveMuellerAndCS(double * restrict in); //============================================================ @@ -247,91 +245,6 @@ static void InitCC(const enum incpol which) //============================================================ -static double * ATT_MALLOC ReadTableFile(const char * restrict sh_fname,const int size_multiplier) -{ - FILE * restrict ftab; - double * restrict tab_n; - int size; - char fname[MAX_FNAME]; - int i; - - size=TAB_SIZE*size_multiplier; - memory+=size*sizeof(double); - if (!prognosis) { - // allocate memory for tab_n - MALLOC_VECTOR(tab_n,double,size,ALL); - // open file - SnprintfErr(ALL_POS,fname,MAX_FNAME,TAB_PATH"%s",sh_fname); - ftab=FOpenErr(fname,"r",ALL_POS); - // scan file - for (i=0; i #ifdef ADDA_MPI -MPI_Datatype mpi_dcomplex,mpi_double3,mpi_dcomplex3; // combined datatypes +MPI_Datatype mpi_dcomplex,mpi_int3,mpi_double3,mpi_dcomplex3; // combined datatypes int *recvcounts,*displs; // arrays of size ringid required for AllGather operations bool displs_init=false; // whether arrays above are initialized #endif @@ -46,31 +46,25 @@ bool displs_init=false; // whether arrays above are initialized */ #define SYNCHRONIZE_TIMING +#ifdef PARALLEL +#ifndef SPARSE + // SEMI-GLOBAL VARIABLES // defined and allocated in fft.c extern double * restrict BT_buffer, * restrict BT_rbuffer; + // defined and initialized in timing.c extern TIME_TYPE Timing_InitDmComm; // LOCAL VARIABLES -#ifdef PARALLEL - static int Ntrans; // number of transmissions; used in CalcPartner -#ifndef ADDA_SPARSE static int * restrict gr_comm_size; // sizes of transmissions for granule generator communications static int * restrict gr_comm_overl; // shows whether two sequential transmissions overlap static unsigned char * restrict gr_comm_ob; // buffer for overlaps static void * restrict gr_comm_buf; // buffer for MPI transfers -#else //ADDA_SPARSE -int * proc_mem_position; -int * proc_mem_material; -int * proc_mem_argvec; -int * proc_disp_position; -int * proc_disp_material; -int * proc_disp_argvec; -#endif //ADDA_SPARSE +#endif // !SPARSE // First several functions are defined only in parallel mode @@ -91,7 +85,8 @@ static void RecoverCommandLine(int *argc_p,char ***argv_p) } //============================================================ -#ifndef ADDA_SPARSE +#ifndef SPARSE + INLINE size_t IndexBlock(const size_t x,const size_t y,const size_t z,const size_t lengthY) // index block; used in BlockTranspose { @@ -119,7 +114,7 @@ INLINE int CalcPartner(const int tran) } return part; } -#endif //ADDA_SPARSE +#endif // !SPARSE //============================================================ @@ -181,6 +176,13 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult) } else res=mpi_dcomplex; } + else if (type==int3_type) { + if (reduce) { + res=MPI_INT; + *mult=3; + } + else res=mpi_int3; + } else if (type==double3_type) { if (reduce) { res=MPI_DOUBLE; @@ -224,7 +226,9 @@ void InitDispls(void) //============================================================ void AllGather(void * restrict x_from,void * restrict x_to,const var_type type,TIME_TYPE *timing) -// Gather distributed arrays; works for all types;increments 'timing' (if not NULL) by the time used +/* Gather distributed arrays; works for all types; x_from can be NULL then the data from x_to is used (in_place) + * increments 'timing' (if not NULL) by the time used + */ { #ifdef ADDA_MPI MPI_Datatype mes_type; @@ -240,7 +244,8 @@ void AllGather(void * restrict x_from,void * restrict x_to,const var_type type,T } InitDispls(); // actually initialization is done only once mes_type=MPIVarType(type,false,NULL); - MPI_Allgatherv(x_from,local_nvoid_Ndip,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD); + if (x_from==NULL) MPI_Allgatherv(MPI_IN_PLACE,0,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD); + else MPI_Allgatherv(x_from,local_nvoid_Ndip,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD); if (timing!=NULL) (*timing)+=GET_TIME()-tstart; #endif } @@ -269,15 +274,19 @@ void InitComm(int *argc_p UOIP,char ***argv_p UOIP) // initialize ringid and nprocs MPI_Comm_rank(MPI_COMM_WORLD,&ringid); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); +#ifndef SPARSE // initialize Ntrans if (IS_EVEN(nprocs)) Ntrans=nprocs-1; else Ntrans=nprocs; +#endif // define a few derived datatypes /* this is intimately tied to the type definition of doublecomplex; when switching to C99 * complex datatypes, this should be replaced just by MPI_DOUBLE_COMPLEX. */ MPI_Type_contiguous(2,MPI_DOUBLE,&mpi_dcomplex); MPI_Type_commit(&mpi_dcomplex); + MPI_Type_contiguous(3,MPI_INT,&mpi_int3); + MPI_Type_commit(&mpi_int3); MPI_Type_contiguous(3,MPI_DOUBLE,&mpi_double3); MPI_Type_commit(&mpi_double3); MPI_Type_contiguous(3,mpi_dcomplex,&mpi_dcomplex3); @@ -294,12 +303,12 @@ void InitComm(int *argc_p UOIP,char ***argv_p UOIP) ringid=ADDA_ROOT; #endif -#ifndef ADDA_SPARSE //CheckNprocs does not exist in sparse mode +#ifndef SPARSE // CheckNprocs does not exist in sparse mode /* check if weird number of processors is specified; called even in sequential mode to * initialize weird_nprocs */ CheckNprocs(); -#endif //ADDA_SPARSE +#endif // !SPARSE } //============================================================ @@ -467,12 +476,10 @@ void MyInnerProduct(void * restrict data UOIP,const var_type type UOIP,size_t n_ void ParSetup(void) // initialize common parameters; need to do in the beginning to enable call to MakeParticle { - -#ifndef ADDA_SPARSE //FFT mode initialization - -#ifdef PARALLEL +#ifndef SPARSE //FFT mode initialization +# ifdef PARALLEL int unitZ,unitX; -#endif +# endif // calculate size of 3D grid gridX=fftFit(2*boxX,nprocs); gridY=fftFit(2*boxY,1); @@ -484,7 +491,7 @@ void ParSetup(void) * except for XY values, used in granule generator */ gridYZ=MultOverflow(gridY,gridZ,ALL_POS,"gridYZ"); -#ifdef PARALLEL +# ifdef PARALLEL unitZ=smallZ/nprocs; // this should always be an exact division local_z0=ringid*unitZ; local_z1=(ringid+1)*unitZ; @@ -493,13 +500,13 @@ void ParSetup(void) unitX=gridX/nprocs; local_x0=ringid*unitX; local_x1=(ringid+1)*unitX; -#else +# else local_z0=0; local_z1=smallZ; local_z1_coer=boxZ; local_x0=0; local_x1=gridX; -#endif +# endif if (local_z1_coer<=local_z0) { LogWarning(EC_INFO,ALL_POS,"No real dipoles are assigned"); local_z1_coer=local_z0; @@ -509,29 +516,30 @@ void ParSetup(void) boxXY=boxX*(size_t)boxY; // overflow check is covered by gridYZ above local_Ndip=MultOverflow(boxXY,local_z1_coer-local_z0,ALL_POS,"local_Ndip"); D("%i : %i %i %i %zu %zu \n",ringid,local_z0,local_z1_coer,local_z1,local_Ndip,local_Nx); - -#else //sparse mode initialization (completely different from FFT mode) - -#ifdef PARALLEL +#else // SPARSE + /* For sparse mode, nvoid_Ndip is defined in InitDipFile(), and here we define local_nvoid_d0 and local_nvoid_d1, + * since they are required already in ReadDipFile() + */ + D("%zu",nvoid_Ndip); +# ifdef PARALLEL double unit_f=1.0/nprocs; //making sure that the first local_f0 and last local_f1 are 0.0 and 1.0 - local_f0 = (ringid == 0) ? 0.0 : ringid*unit_f; - local_f1 = (ringid == nprocs-1) ? 1.0 : (ringid+1)*unit_f; -#else - local_f0 = 0.0; - local_f1 = 1.0; -#endif //PARALLEL - -#endif //ADDA_SPARSE + double local_f0 = (ringid == 0) ? 0.0 : ringid*unit_f; + double local_f1 = (ringid == nprocs-1) ? 1.0 : (ringid+1)*unit_f; + local_nvoid_d0 = local_f0*nvoid_Ndip; + local_nvoid_d1 = local_f1*nvoid_Ndip; +# else + local_nvoid_d0=0; + local_nvoid_d1=nvoid_Ndip; +# endif // PARALLEL +#endif } //============================================================ void SetupLocalD(void) -// initialize local_nvoid_d0 and local_nvoid_d1 +// initialize local_nvoid_d0 and local_nvoid_d1 in FFT mode. In sparse mode those are set earlier in ParSetup() { -#ifndef ADDA_SPARSE //FFT mode - #ifdef ADDA_MPI /* use of exclusive scan (MPI_Exscan) is logically more suitable, but it has special behavior * for the ringid=0. The latter would require special additional arrangements. @@ -542,19 +550,11 @@ void SetupLocalD(void) local_nvoid_d0=0; local_nvoid_d1=local_nvoid_Ndip; #endif - -#else // sparse mode - - D("%d", (int)nvoid_Ndip); - local_nvoid_d0 = local_f0*nvoid_Ndip; - local_nvoid_d1 = local_f1*nvoid_Ndip; - -#endif //ADDA_SPARSE } //============================================================ -#ifndef ADDA_SPARSE +#ifndef SPARSE void BlockTranspose(doublecomplex * restrict X UOIP,TIME_TYPE *timing UOIP) /* do the data-transposition, i.e. exchange, between fftX and fftY&fftZ; specializes at Xmatrix; @@ -866,70 +866,4 @@ bool ExchangePhaseShifts(doublecomplex * restrict bottom, doublecomplex * restri #endif // PARALLEL -#endif //ADDA_SPARSE - -//============================================================ - -#ifdef ADDA_SPARSE //functions exclusive to the sparse mode - -void InitArraySync() { -#ifdef ADDA_MPI - MALLOC_VECTOR(proc_mem_position,int,nprocs,ALL); - MALLOC_VECTOR(proc_mem_material,int,nprocs,ALL); - MALLOC_VECTOR(proc_mem_argvec,int,nprocs,ALL); - MALLOC_VECTOR(proc_disp_position,int,nprocs,ALL); - MALLOC_VECTOR(proc_disp_material,int,nprocs,ALL); - MALLOC_VECTOR(proc_disp_argvec,int,nprocs,ALL); - - proc_mem_position[ringid] = 3*local_nvoid_Ndip; - proc_mem_material[ringid] = local_nvoid_Ndip; - proc_mem_argvec[ringid] = 3*local_nvoid_Ndip*2; //*2 due to 2 doubles in complex - proc_disp_position[ringid] = 3*local_nvoid_d0; - proc_disp_material[ringid] = local_nvoid_d0; - proc_disp_argvec[ringid] = 3*local_nvoid_d0*2; - - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_position,1,MPI_INT,MPI_COMM_WORLD); - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_material,1,MPI_INT,MPI_COMM_WORLD); - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_argvec,1,MPI_INT,MPI_COMM_WORLD); - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_position,1,MPI_INT,MPI_COMM_WORLD); - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_material,1,MPI_INT,MPI_COMM_WORLD); - MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_argvec,1,MPI_INT,MPI_COMM_WORLD); -#endif -} - -//============================================================ - -void SyncPosition(int * restrict pos) -{ -#ifdef ADDA_MPI - static const MPI_Datatype mes_type = MPI_INT; - - MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type,pos, proc_mem_position, - proc_disp_position, mes_type, MPI_COMM_WORLD); - -#endif -} - -//============================================================ - -void SyncMaterial(unsigned char * restrict mat) -{ -#ifdef ADDA_MPI - static const MPI_Datatype mes_type = MPI_CHAR; - MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type, mat, proc_mem_material, - proc_disp_material, mes_type, MPI_COMM_WORLD); -#endif -} - -//============================================================ - -void SyncArgvec() -{ -#ifdef ADDA_MPI - static const MPI_Datatype mes_type = MPI_DOUBLE; - MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type,(double *)arg_full, proc_mem_argvec, - proc_disp_argvec, mes_type, MPI_COMM_WORLD); -#endif -} - -#endif //ADDA_SPARSE +#endif // !SPARSE diff --git a/src/comm.h b/src/comm.h index c0ecb4dd..d0e0e8cd 100644 --- a/src/comm.h +++ b/src/comm.h @@ -2,7 +2,7 @@ * $Date:: $ * Descr: definitions of communication global variables and routines * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -31,27 +31,11 @@ #define UOIP ATT_UNUSED #endif -typedef enum {uchar_type,int_type,sizet_type,double_type,double3_type,cmplx_type,cmplx3_type} var_type; - -#ifdef PARALLEL -#ifdef ADDA_SPARSE -extern int * proc_mem_position; -extern int * proc_mem_material; -extern int * proc_mem_argvec; -extern int * proc_disp_position; -extern int * proc_disp_material; -extern int * proc_disp_argvec; -#endif //ADDA_SPARSE -#endif //PARALLEL +typedef enum {uchar_type,int_type,int3_type,sizet_type,double_type,double3_type,cmplx_type,cmplx3_type} var_type; void Stop(int) ATT_NORETURN; void Synchronize(void); -#ifndef ADDA_SPARSE -void BlockTranspose(doublecomplex * restrict X,TIME_TYPE *timing); -void BlockTranspose_Dm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ); -#endif //ADDA_SPARSE double AccumulateMax(double data,double *max); - void Accumulate(double * restrict data,size_t size,double * restrict buffer,TIME_TYPE *timing); void MyInnerProduct(void * restrict data,var_type type,size_t n_elem,TIME_TYPE *timing); void InitComm(int *argc_p,char ***argv_p); @@ -59,21 +43,17 @@ void ParSetup(void); void SetupLocalD(void); void MyBcast(void * restrict data,var_type type,const size_t n_elem,TIME_TYPE *timing); void BcastOrient(int *i,int *j,int *k); + +#ifndef SPARSE +void BlockTranspose(doublecomplex * restrict X,TIME_TYPE *timing); +void BlockTranspose_Dm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ); // used by granule generator -#ifndef ADDA_SPARSE void SetGranulComm(double z0,double z1,double gdZ,int gZ,size_t gXY,size_t buf_size,int *lz0, int *lz1,int sm_gr); void CollectDomainGranul(unsigned char * restrict dom,size_t gXY,int lz0,int locgZ,TIME_TYPE *timing); void FreeGranulComm(int sm_gr); void ExchangeFits(char * restrict data,const size_t n,TIME_TYPE *timing); -#endif //ADDA_SPARSE - -#ifdef ADDA_SPARSE -void InitArraySync(void); -void SyncPosition(int * restrict pos); -void SyncMaterial(unsigned char * restrict mat); -void SyncArgvec(void); -#endif //ADDA_SPARSE +#endif // !SPARSE #ifdef PARALLEL // these functions are defined only in parallel mode diff --git a/src/const.h b/src/const.h index 06aad603..26e0b1e3 100644 --- a/src/const.h +++ b/src/const.h @@ -21,7 +21,7 @@ #define __const_h // version number (string) -#define ADDA_VERSION "1.2b1" +#define ADDA_VERSION "1.2b2" /* ADDA uses certain C99 extensions, which are widely supported by GNU and Intel compilers. However, * they may be not completely supported by e.g. Microsoft Visual Studio compiler. Therefore, we diff --git a/src/crosssec.c b/src/crosssec.c index 3ebc6da7..df0ec61a 100644 --- a/src/crosssec.c +++ b/src/crosssec.c @@ -3,7 +3,7 @@ * Descr: all the functions to calculate scattering quantities (except Mueller matrix); to read * different parameters from files; and initialize orientation of the particle * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -41,7 +41,9 @@ // defined and initialized in calculator.c extern double * restrict E2_alldir,* restrict E2_alldir_buffer; extern const doublecomplex cc[][3]; +#ifndef SPARSE extern doublecomplex * restrict expsX,* restrict expsY,* restrict expsZ; +#endif // defined and initialized in GenerateB.c extern const double beam_center_0[3]; // defined and initialized in param.c @@ -117,26 +119,6 @@ void InitRotation (void) //===================================================================== -static int ATT_UNUSED ReadLine(FILE * restrict file, // opened file - const char * restrict fname, // ... its filename - char * restrict buf,const int buf_size) // buffer for line and its size -// reads the first uncommented line; returns 1 if EOF reached -{ - while (!feof(file)) { - fgets(buf,buf_size,file); - if (*buf!='#') { // if uncommented - if (strstr(buf,"\n")==NULL && !feof(file)) LogError(ONE_POS, - "Buffer overflow while reading '%s' (size of uncommented line > %d)", - fname,buf_size-1); - else return 0; // complete line is read - } // finish reading the commented line - else while (strstr(buf,"\n")==NULL && !feof(file)) fgets(buf,buf_size,file); - } - return 1; -} - -//===================================================================== - static void ReadLineStart(FILE * restrict file, // opened file const char * restrict fname, // ... its filename char * restrict buf,const int buf_size, // buffer for line and its size @@ -526,7 +508,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca double temp, na; doublecomplex mult_mat[MAX_NMAT]; const bool scat_avg=true; // temporary fixed option for SO formulation -#ifdef ADDA_SPARSE +#ifdef SPARSE doublecomplex expX, expY, expZ; #endif @@ -546,11 +528,11 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca } for(i=0;i<3;i++) sum[i][RE]=sum[i][IM]=0.0; // prepare values of exponents, along each of the coordinates -#ifndef ADDA_SPARSE +#ifndef SPARSE imExp_arr(-kd*n[0],boxX,expsX); imExp_arr(-kd*n[1],boxY,expsY); imExp_arr(-kd*n[2],local_Nz_unif,expsZ); -#endif //ADDA_SPARSE +#endif // !SPARSE /* not to double the code in the source we use two temporary defines,since the following 'if' * cases differ only by one line of code; (taking 'if' inside the cycle will affect performance) */ @@ -562,7 +544,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca * the grid. */ -#ifndef ADDA_SPARSE //FFT mode +#ifndef SPARSE //FFT mode #define PART1\ iy1=iz1=UNDEF;\ for (j=0;j rdipT; b) pvec -> pT. * Actually this routine is usually called for two polarizations and rdipT does not change * between the calls. So one AllGather of rdipT can be removed. Number of memory allocations can * also be reduced. But this should be replaced by Fourier anyway. */ + /* The following is somewhat redundant in sparse mode, since "full" (containing information about all dipoles) + * vectors are already present in that mode. However, we do not optimize it now, since in standard mode radiation + * forces should be computed by FFT anyway. Moreover, there are certain ideas to optimize sparse mode, so it will + * not use full vectors - if done, this improvement can be also adjusted to the code below. + */ // allocates a lot of additional memory MALLOC_VECTOR(rdipT,double,nRows,ALL); MALLOC_VECTOR(pT,complex,nRows,ALL); diff --git a/src/fft.c b/src/fft.c index 501e3764..46f84760 100644 --- a/src/fft.c +++ b/src/fft.c @@ -1,7 +1,6 @@ /* File: fft.c * $Date:: $ - * Descr: initialization of all FFT for matrix-vector products; and FFT procedures themselves - + * Descr: initialization of all FFT for matrix-vector products; and FFT procedures themselves; not used in sparse mode * TODO: A lot of indirect indexing used - way to optimize. * * Copyright (C) 2006-2013 ADDA contributors @@ -26,7 +25,9 @@ #include "debug.h" #include "function.h" #include "io.h" +#include "interaction.h" #include "memory.h" +#include "oclcore.h" #include "prec_time.h" #include "vars.h" // system headers @@ -34,16 +35,13 @@ #include #include -#ifdef OPENCL -# ifdef CLFFT_AMD -# include //external library from AMD -# elif defined(CLFFT_APPLE) -# include "cpp/clFFT.h" //nearly unmodified APPLE FFT header file -# ifdef NO_CPP -# error "Apple clFFT relies on C++ sources, hence is incompatible with NO_CPP option" -# endif +#ifdef CLFFT_AMD +# include //external library from AMD +#elif defined(CLFFT_APPLE) +# include "cpp/clFFT.h" //nearly unmodified APPLE FFT header file +# ifdef NO_CPP +# error "Apple clFFT relies on C++ sources, hence is incompatible with NO_CPP option" # endif -# include "oclcore.h" #endif /* standard FFT routines (FFTW3 of FFT_TEMPERTON) are required even when OpenCL is used, since * they are used for Fourier transform of the D-matrix @@ -70,17 +68,6 @@ // SEMI-GLOBAL VARIABLES -// defined ant initialized in calculator.c -extern const double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5, - * restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10; -extern const int * restrict * restrict tab_index; - -// defined and initialized in make_particle.c -extern double gridspace; - -// defined and initialized in param.c -extern double igt_lim, igt_eps; - // defined and initialized in timing.c extern TIME_TYPE Timing_FFT_Init,Timing_Dm_Init; @@ -132,17 +119,6 @@ void cfft99_(double * restrict data,double * restrict _work,const double * restr const int *isign); #endif -// EXTERNAL FUNCTIONS - -#ifndef NO_FORTRAN -// fort/propaesplibreintadda.f -void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube, - const double *relreq, double *result); -#endif - -// sinint.c -void cisi(double x,double *ci,double *si); - //============================================================ INLINE size_t IndexDmatrix(const size_t x,size_t y,size_t z) @@ -373,7 +349,7 @@ void fftY(const int isign) if (isign==FFT_FORWARD) fftw_execute(planYf); else fftw_execute(planYb); #elif defined(FFT_TEMPERTON) - int nn=gridY,inc=1,jump=nn,lot=3*gridZ,j; + int nn=gridY,inc=1,jump=nn,lot=3*gridZ; cfft99_((double *)(slices_tr),work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign); #endif @@ -740,532 +716,6 @@ static void fftInitAfterD(void) //============================================================ -INLINE bool TestTableSize(const double rn) -// tests if rn fits into the table; if not, returns false and produces info message -{ - static bool warned=false; - - if (rn>TAB_RMAX) { - if (!warned) { - warned=true; - LogWarning(EC_INFO,ONE_POS,"Not enough table size (available only up to R/d=%d), " - "so O(kd^2) accuracy of Green's function is not guaranteed",TAB_RMAX); - } - return false; - } - else return true; -} - -//============================================================ - -static void CalcInterTerm(const int i,const int j,const int k,doublecomplex * restrict result) -/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} - * (in units of d). All six components of the symmetric matrix are computed at once. - */ -{ - double rr,qvec[3],q2[3],invr3,qavec[3],av[3]; - double kr,kr2,kr3,kd2,q4,rn,rn2; - double temp,qmunu[6],qa,qamunu[6],invrn,invrn2,invrn3,invrn4; - double dmunu[6]; // KroneckerDelta[mu,nu] - can serve both as multiplier, and as bool - double kfr,ci,si,ci1,si1,ci2,si2,brd,cov,siv,g0,g2; - doublecomplex expval,br,br1,m,m2,Gf1,Gm0,Gm1,Gc1,Gc2; - int ind0,ind1,ind2,ind2m,ind3,ind4,indmunu,comp,mu,nu,mu1,nu1; - int sigV[3],ic,sig,ivec[3],ord[3],invord[3]; - double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa; - const bool inter_avg=true; // temporary fixed option for SO formulation - -// this is used for debugging, should be empty define, when not required -#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\ - i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\ - result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ - - // self interaction; self term is computed in different subroutine - if (i==0 && j==0 && k==0) for (comp=0;comp=j>=k>=0 - // building of ord; ord[x] is x-th largest coordinate (0-th - the largest) - if (ivec[0]>=ivec[1]) { // i>=j - if (ivec[0]>=ivec[2]) { // i>=k - ord[0]=0; - if (ivec[1]>=ivec[2]) { // j>=k - ord[1]=1; - ord[2]=2; - } - else { - ord[1]=2; - ord[2]=1; - } - } - else { - ord[0]=2; - ord[1]=0; - ord[2]=1; - } - } - else { - if (ivec[0]>=ivec[2]) { // i>=k - ord[0]=1; - ord[1]=0; - ord[2]=2; - } - else { - ord[2]=0; - if (ivec[1]>=ivec[2]) { // j>=k - ord[0]=1; - ord[1]=2; - } - else { - ord[0]=2; - ord[1]=1; - } - } - } - // change parameters according to coordinate transforms - Permutate(qvec,ord); - Permutate_i(ivec,ord); - // compute inverse permutation - memcpy(invord,ord,3*sizeof(int)); - Permutate_i(invord,ord); - if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int)); - // temp = kr/24; and set some indices - temp=kr/24; - ind0=tab_index[ivec[0]][ivec[1]]+ivec[2]; - ind1=3*ind0; - ind2m=6*ind0; - // cycle over tensor components - for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) { - sig=sigV[mu]*sigV[nu]; // sign of some terms below - /* indexes for tables of different dimensions based on transformed indices mu and nu - * '...munu' variables are invariant to permutations because both constituent - * vectors and indices are permutated. So this variables can be used, as precomputed - * above. - */ - mu1=invord[mu]; - nu1=invord[nu]; - /* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted - * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}} - */ - indmunu=mu1+nu1; - if (mu1==2 || nu1==2) indmunu++; - ind2=ind2m+indmunu; - ind3=3*ind2; - ind4=6*ind2; - // computing several quantities with table integrals - t3q=DotProd(qvec,tab3+ind1); - t4q=DotProd(qvec,tab4+ind3); - t5tr=TrSym(tab5+ind2m); - t6tr=TrSym(tab6+ind4); - //====== computing Gc0 ===== - /* br = delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr) - * - (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr) - */ - br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu[comp]*kr; - br[IM]=3*temp*qmunu[comp]; - if (dmunu[comp]) { - br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0]; - br[IM]-=temp; - } - // br*=kd^2 - cMultReal(kd2,br,br); - // br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2) - br[RE]+=sig*tab2[ind2]*(3-kr2); - br[IM]-=sig*tab2[ind2]*3*kr; - if (dmunu[comp]) { - br[RE]+=tab1[ind0]*(kr2-1); - br[IM]+=tab1[ind0]*kr; - } - // Gc0=expval*br - cMult(expval,br,result[comp]); - } - } - else { - //====== Gfar (and part of Gmedian) for IGT ======= - // Gf0 = Gp*(1-kd^2/24) - for (comp=0;comp=j>=k>=0 - // building of ord; ord[x] is x-th largest coordinate (0-th - the largest) - if (ivec[0]>=ivec[1]) { // i>=j - if (ivec[0]>=ivec[2]) { // i>=k - ord[0]=0; - if (ivec[1]>=ivec[2]) { // j>=k - ord[1]=1; - ord[2]=2; - } - else { - ord[1]=2; - ord[2]=1; - } - } - else { - ord[0]=2; - ord[1]=0; - ord[2]=1; - } - } - else { - if (ivec[0]>=ivec[2]) { // i>=k - ord[0]=1; - ord[1]=0; - ord[2]=2; - } - else { - ord[2]=0; - if (ivec[1]>=ivec[2]) { // j>=k - ord[0]=1; - ord[1]=2; - } - else { - ord[0]=2; - ord[1]=1; - } - } - } - // change parameters according to coordinate transforms - Permutate(qvec,ord); - if (!inter_avg) Permutate(av,ord); - Permutate_i(ivec,ord); - // compute inverse permutation - memcpy(invord,ord,3*sizeof(int)); - Permutate_i(invord,ord); - if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int)); - // temp = kr/24; and set some indices - temp=kr/24; - ind0=tab_index[ivec[0]][ivec[1]]+ivec[2]; - ind1=3*ind0; - ind2m=6*ind0; // cycle over tensor components - for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) { - sig=sigV[mu]*sigV[nu]; // sign of some terms below - /* indexes for tables of different dimensions based on transformed indices mu and nu - * '...munu' variables are invariant to permutations because both constituent - * vectors and indices are permutated. So this variables can be used, as precomputed - * above. - */ - mu1=invord[mu]; - nu1=invord[nu]; - /* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted - * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}} - */ - indmunu=mu1+nu1; - if (mu1==2 || nu1==2) indmunu++; - ind2=ind2m+indmunu; - ind3=3*ind2; - ind4=6*ind2; - // computing several quantities with table integrals - t3q=DotProd(qvec,tab3+ind1); - t4q=DotProd(qvec,tab4+ind3); - t5tr=TrSym(tab5+ind2m); - t6tr=TrSym(tab6+ind4); - if (inter_avg) { - // =1/3*delta[mu,nu] - t5aa=ONE_THIRD*t5tr; - t6aa=ONE_THIRD*t6tr; - } - else { - t3a=DotProd(av,tab3+ind1); - t4a=DotProd(av,tab4+ind3); - t5aa=QuadForm(tab5+ind2m,av); - t6aa=QuadForm(tab6+ind4,av); - } - //====== computing Gc0 ===== - /* br = delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr) - * - (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr) - */ - br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu[comp]*kr; - br[IM]=3*temp*qmunu[comp]; - if (dmunu[comp]) { - br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0]; - br[IM]-=temp; - } - // br*=kd^2 - cMultReal(kd2,br,br); - // br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2) - br[RE]+=sig*tab2[ind2]*(3-kr2); - br[IM]-=sig*tab2[ind2]*3*kr; - if (dmunu[comp]) { - br[RE]+=tab1[ind0]*(kr2-1); - br[IM]+=tab1[ind0]*kr; - } - // Gc0=expval*br - cMult(expval,br,result[comp]); - //==== computing Gc1 ====== - if (!inter_avg) { - // br=(kd*kr/24)*(qa*(delta[mu,nu]*(-2+ikr)-qmunu*(-6+ikr))-qamunu) - br[RE]=6*qmunu[comp]; - br[IM]=-kr*qmunu[comp]; - if (dmunu[comp]) { - br[RE]-=2; - br[IM]+=kr; - } - cMultReal(qa,br,br); - br[RE]-=qamunu[comp]; - cMultReal(2*temp*kd,br,br); - // br1=(d/r)*(delta[mu,nu]*t3h*(-1+ikr)-sig*t4h*(-3+3ikr)) - br1[RE]=3*sig*t4a; - br1[IM]=-kr*br1[RE]; - if (dmunu[comp]) { - br1[RE]-=t3a; - br1[IM]+=t3a*kr; - } - cMultReal(1/rn,br1,br1); - // Gc1=expval*i*m*kd*(br1+br) - cAdd(br,br1,Gc1); - cMultSelf(Gc1,m); - cMultReal(kd,Gc1,Gc1); - cMultSelf(Gc1,expval); - cMult_i(Gc1); - } - //==== computing Gc2 ====== - // br=delta[mu,nu]*t5aa-3*sig*t6aa-(kr/12)*(delta[mu,nu]*(i+kr)-qmunu*(3i+kr)) - br[RE]=-kr*qmunu[comp]; - br[IM]=-3*qmunu[comp]; - if (dmunu[comp]) { - br[RE]+=kr; - br[IM]+=1; - } - cMultReal(-2*temp,br,br); - br[RE]-=3*sig*t6aa; - if (dmunu[comp]) br[RE]+=t5aa; - // Gc2=expval*(kd^2/2)*m^2*br - cMult(m2,br,Gc2); - cMultReal(kd2/2,Gc2,Gc2); - cMultSelf(Gc2,expval); - // result = Gc0 + [ Gc1 ] + Gc2 - if (!inter_avg) cAdd(Gc2,Gc1,Gc2); - cAdd(Gc2,result[comp],result[comp]); - } - } - else { - //====== Gfar (and part of Gmedian) ======= - // temp=kd^2/24 - temp=kd2/24; - // br=1-(1+m^2)*kd^2/24 - br[RE]=1-(1+m2[RE])*temp; - br[IM]=-m2[IM]*temp; - // Gf0 + Gf2 = Gp*br - for (comp=0;comp. */ +#ifndef SPARSE + #ifndef __fft_h #define __fft_h -#ifndef ADDA_SPARSE //not needed in sparse mode - #ifndef FFT_TEMPERTON # define FFTW3 // FFTW3 is default #endif @@ -45,6 +45,6 @@ void Free_FFT_Dmat(void); int fftFit(int size, int _div); void CheckNprocs(void); -#endif // ADDA_SPARSE - #endif // __fft_h + +#endif // !SPARSE diff --git a/src/interaction.c b/src/interaction.c index 4a8fd15a..bce978b2 100644 --- a/src/interaction.c +++ b/src/interaction.c @@ -1,227 +1,193 @@ /* FILE : interaction.c * Descr: the functions used to calculate the interaction term * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2011-2013 ADDA contributors * This file is part of ADDA. * - * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU - * General Public License as published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. + * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. * - * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even - * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. + * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with ADDA. If not, see * . */ - +#include "const.h" // keep this first +#include "interaction.h" // corresponding headers +// project headers #include "cmplx.h" -#include "const.h" -#include "interaction.h" #include "io.h" -#include "types.h" +#include "memory.h" #include "vars.h" -#include "param.h" -// defined and initialized in calculator.c -extern const double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5, - * restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10; -extern const int * restrict * restrict tab_index; +// SEMI-GLOBAL VARIABLES + +// defined and initialized in make_particle.c extern double gridspace; -extern double igt_lim, igt_eps; -#ifndef NO_FORTRAN -// fort/propaesplibreintadda.f -void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube, - const double *relreq, double *result); -#endif +// defined and initialized in param.c +extern double igt_lim,igt_eps; + +// LOCAL VARIABLES -//This function pointer... -void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result); -//...will point to one of the functions below -void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result); -void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex * restrict result); +// tables of integrals +static double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5,* restrict tab6, + * restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10; +/* it is preferable to declare the following as "* restrict * restrict", but it is hard to make it +* generally compatible with Free_iMatrix function syntax. +*/ +static int ** restrict tab_index; // matrix for indexing of table arrays #ifdef USE_SSE3 -__m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg; -__m128d exptbl[361]; +static __m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg; +static __m128d exptbl[361]; #endif //USE_SSE3 -#define M_PI 3.14159265358979323846 +// EXTERNAL FUNCTIONS -void InitInteraction(void) -/* - Initialize the interaction calculations -*/ -{ - if (IntRelation==G_POINT_DIP) { - CalcInterTerm = CalcInterTerm_opt; - } else { - CalcInterTerm = CalcInterTerm_complete; - } - - #ifdef USE_SSE3 - c1 = _mm_set_pd(1.34959795251974073996e-11,3.92582397764340914444e-14); - c2 = _mm_set_pd(-8.86096155697856783296e-7,-3.86632385155548605680e-9); - c3 = _mm_set_pd(1.74532925199432957214e-2,1.52308709893354299569e-4); - zo = _mm_set_pd(0.0,1.0); - inv_2pi = _mm_set_sd(1.0/(2*M_PI)); - p360 = _mm_set_sd(360.0); - prad_to_deg = _mm_set_sd(180.0/M_PI); - - for (unsigned int i=0; i<=360; i++) { - double x = M_PI/180.0*(double)i; - exptbl[i] = _mm_set_pd(sin(x),cos(x)); - } - #endif -} +#ifndef NO_FORTRAN +// fort/propaesplibreintadda.f +void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube,const double *relreq, + double *result); +#endif +// sinint.c +void cisi(double x,double *ci,double *si); +// this is used for debugging, should be empty define, when not required +#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\ + i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\ + result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ #ifdef USE_SSE3 -static inline __m128d accImExp(double x, double c) -/* - Accelerated sin-cos (or imaginary exp) routine for use in the calculation of the - interaction tensor. Returns c*exp(ix) in the resultant vector. The code is adapted - from the CEPHES library. The idea is that we have precalculated exp(iy) for some - discrete values of y. Then we take the y that is nearest to our x and write - exp(ix)=exp(iy+(ix-iy))=exp(iy)exp(i(x-y)). We take exp(y) from the table and - exp(i(x-y)) from the Taylor series. This converges very fast since |x-y| is small. -*/ +//===================================================================================================================== + +static inline __m128d accImExp(double x,double c) +/* Accelerated sin-cos (or imaginary exp) routine for use in the calculation of the interaction tensor. Returns + * c*exp(ix) in the resultant vector. The code is adapted from the CEPHES library. The idea is that we have + * precalculated exp(iy) for some discrete values of y. Then we take the y that is nearest to our x and write + * exp(ix)=exp(iy+(ix-iy))=exp(iy)exp(i(x-y)). We take exp(y) from the table and exp(i(x-y)) from the Taylor series. + * This converges very fast since |x-y| is small. + */ { __m128d px = _mm_set_sd(x); __m128d ipx = _mm_mul_pd(px,inv_2pi); - int ix = _mm_cvttsd_si32(ipx); //truncation so the function only works for 0 <= x < 2*pi*2^-31 + int ix = _mm_cvttsd_si32(ipx); // truncation so the function only works for 0 <= x < 2*pi*2^-31 ipx = _mm_cvtsi32_sd(ipx,ix); ipx = _mm_mul_pd(p360,ipx); px = _mm_mul_pd(prad_to_deg,px); - px = _mm_sub_pd(px,ipx); //this is x (deg) mod 360 - - ix = _mm_cvtsd_si32(px); //the closest integer (rounded, not truncated) - __m128d scx = exptbl[ix]; //the tabulated value - + px = _mm_sub_pd(px,ipx); // this is x (deg) mod 360 + + ix = _mm_cvtsd_si32(px); // the closest integer (rounded, not truncated) + __m128d scx = exptbl[ix]; // the tabulated value + ipx = _mm_cvtsi32_sd(ipx,ix); - __m128d pz = _mm_sub_pd(ipx,px); //the residual -z=(ix-x) + __m128d pz = _mm_sub_pd(ipx,px); // the residual -z=(ix-x) __m128d py = _mm_mul_pd(pz,pz); - __m128d yy = _mm_shuffle_pd(py,py,0); //now (y,y) + __m128d yy = _mm_shuffle_pd(py,py,0); // now (y,y) __m128d zy = _mm_shuffle_pd(yy,pz,1); - - __m128d scz = _mm_mul_pd(c1,yy); //taylor series approximation + + __m128d scz = _mm_mul_pd(c1,yy); // Taylor series approximation scz = _mm_add_pd(c2,scz); scz = _mm_mul_pd(yy,scz); scz = _mm_add_pd(c3,scz); scz = _mm_mul_pd(zy,scz); scz = _mm_sub_pd(zo,scz); - scx = cmul(scz,scx); //multiply lookup and approximation - return _mm_mul_pd(_mm_set1_pd(c),scx); + scx = cmul(scz,scx); // multiply lookup and approximation + return _mm_mul_pd(_mm_set1_pd(c),scx); } +//===================================================================================================================== + void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result) -/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} - * (in units of d). All six components of the symmetric matrix are computed at once. - * The elements in result are: [G11, G12, G13, G22, G23, G33] +/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six + * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33] + * Only 'poi' interaction is implemented in this optimized routine */ { - -// this is used for debugging, should be empty define, when not required -#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\ - i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\ - result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ - double qx = i; double qy = j; double qz = k; - + const double rn = sqrt(qx*qx + qy*qy + qz*qz); - const double rr=rn*gridspace; const double invr3=1.0/(rr*rr*rr); const double kr=WaveNum*rr; const __m128d sc = accImExp(kr,invr3); - const double invrn = 1.0/rn; qx *= invrn; qy *= invrn; qz *= invrn; const double qxx=qx*qx, qxy=qx*qy, qxz=qx*qz, qyy=qy*qy, qyz=qy*qz, qzz=qz*qz; - const double kr2=kr*kr; const double t1=(3-kr2), t2=-3*kr, t3=(kr2-1); - + __m128d qff, im_re; #define INTERACT_MUL(I)\ - im_re = cmul(sc,im_re);\ - _mm_store_pd(result+I,im_re);\ - + im_re = cmul(sc,im_re);\ + _mm_store_pd(result[I],im_re); + const __m128d v1 = _mm_set_pd(kr,t3); const __m128d v2 = _mm_set_pd(t2,t1); - + qff = _mm_set1_pd(qxx); im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); INTERACT_MUL(0) - + qff = _mm_set1_pd(qxy); im_re = _mm_mul_pd(v2,qff); INTERACT_MUL(1) - + qff = _mm_set1_pd(qxz); im_re = _mm_mul_pd(v2,qff); INTERACT_MUL(2) - + qff = _mm_set1_pd(qyy); im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); INTERACT_MUL(3) - + qff = _mm_set1_pd(qyz); im_re = _mm_mul_pd(v2,qff); INTERACT_MUL(4) - + qff = _mm_set1_pd(qzz); im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); INTERACT_MUL(5) - #undef INTERACT_MUL - + PRINT_GVAL; } #else //not using SSE3 +//===================================================================================================================== + void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result) -/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} - * (in units of d). All six components of the symmetric matrix are computed at once. - * The elements in result are: [G11, G12, G13, G22, G23, G33] +/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six + * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33] */ { double re,im; - -// this is used for debugging, should be empty define, when not required -#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\ - i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\ - result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ - double qx = i; double qy = j; double qz = k; - + const double rn = sqrt(qx*qx + qy*qy + qz*qz); const double invrn = 1.0/rn; qx *= invrn; qy *= invrn; qz *= invrn; const double qxx=qx*qx, qxy=qx*qy, qxz=qx*qz, qyy = qy*qy, qyz=qy*qz, qzz=qz*qz; - const double rr=rn*gridspace; const double invr3=1.0/(rr*rr*rr); const double kr=WaveNum*rr; const double kr2=kr*kr; - const double cov=cos(kr)*invr3; const double siv=sin(kr)*invr3; - + const double t1=(3-kr2), t2=-3*kr, t3=(kr2-1); - + #define INTERACT_MUL(I)\ - result[I][RE] = re*cov - im*siv;\ + result[I][RE] = re*cov - im*siv;\ result[I][IM] = re*siv + im*cov; - + re = t1*qxx + t3; im = kr + t2*qxx; INTERACT_MUL(0) @@ -240,18 +206,15 @@ void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restr re = t1*qzz + t3; im = kr + t2*qzz; INTERACT_MUL(5) - + #undef INTERACT_MUL - + PRINT_GVAL; } #endif //USE_SSE3 -// sinint.c -void cisi(double x,double *ci,double *si); - -//============================================================ +//===================================================================================================================== INLINE bool TestTableSize(const double rn) // tests if rn fits into the table; if not, returns false and produces info message @@ -269,12 +232,11 @@ INLINE bool TestTableSize(const double rn) else return true; } -//============================================================ +//===================================================================================================================== void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex * restrict result) -/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} - * (in units of d). All six components of the symmetric matrix are computed at once. - * The elements in result are: [G11, G12, G13, G22, G23, G33] +/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six + * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33] */ { static double rr,qvec[3],q2[3],invr3,qavec[3],av[3]; @@ -288,16 +250,6 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex * static double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa; static const bool inter_avg=true; // temporary fixed option for SO formulation -// this is used for debugging, should be empty define, when not required -#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\ - i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\ - result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ - - // self interaction; self term is computed in different subroutine - if (!(i || j || k)) for (comp=0;comp. */ - #ifndef __interaction_h #define __interaction_h #include "cmplx.h" -extern double gridspace, WaveNum; - -#ifdef USE_SSE3 -extern __m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg; -extern __m128d exptbl[361]; -#endif //USE_SSE3 - -extern void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result); -extern void InitInteraction(void); - -//============================================================ +void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result); +void InitInteraction(void); +void FreeInteraction(void); #endif //__interaction_h diff --git a/src/iterative.c b/src/iterative.c index b65a400c..2bf3cd67 100644 --- a/src/iterative.c +++ b/src/iterative.c @@ -10,7 +10,7 @@ * (e.g. -int so), however they do it much slowly than usually. It is recommended then to use * BiCGStab. * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -49,7 +49,7 @@ extern const TIME_TYPE tstart_CE; extern doublecomplex *rvec; // can't be declared restrict due to SwapPointers extern doublecomplex * restrict vec1,* restrict vec2,* restrict vec3,* restrict Avecbuffer; // defined and initialized in fft.c -#ifndef OPENCL +#if !defined(OPENCL) && !defined(SPARSE) extern doublecomplex * restrict Xmatrix; // used as storage for arrays in WKB init field #endif // defined and initialized in param.c @@ -366,7 +366,7 @@ ITER_FUNC(BiCG_CS) scalars[0].ptr=&ro_old; scalars[0].size=sizeof(doublecomplex); } - else if (ph==PHASE_INIT); // no specific initialization required + else if (ph==PHASE_INIT) {} // no specific initialization required // main iteration cycle else if (ph==PHASE_ITER) { // ro_k-1=r_k-1(*).r_k-1; check for ro_k-1!=0 @@ -384,7 +384,7 @@ ITER_FUNC(BiCG_CS) nIncrem10_cmplx(pvec,rvec,beta,NULL,NULL); } // q_k=Avecbuffer=A.p_k - if (niter==1 && matvec_ready); // do nothing, Avecbuffer is ready to use + if (niter==1 && matvec_ready) {} // do nothing, Avecbuffer is ready to use else MatVec(pvec,Avecbuffer,NULL,false,&Timing_OneIterComm); // mu_k=p_k.q_k; check for mu_k!=0 nDotProd_conj(pvec,Avecbuffer,mu,&Timing_OneIterComm); @@ -514,7 +514,7 @@ ITER_FUNC(CGNR) scalars[0].ptr=&ro_old; scalars[0].size=sizeof(double); } - else if (ph==PHASE_INIT); // no specific initialization required + else if (ph==PHASE_INIT) {} // no specific initialization required else if (ph==PHASE_ITER) { // p_1=Ah.r_0 and ro_new=ro_0=|Ah.r_0|^2 // since first product is with Ah , matvec_ready can't be employed @@ -1061,7 +1061,7 @@ static const char *CalcInitField(double zero_resid) nSubtr(rvec,pvec,Avecbuffer,&inprodR,&Timing_InitIterComm); descr="x_0 = E_inc\n"; } -#ifndef ADDA_SPARSE //currently no support for WKB in sparse mode +#ifndef SPARSE //currently no support for WKB in sparse mode else if (InitField==IF_WKB) { if (prop[2]!=1) LogError(ONE_POS,"WKB initial field currently works only with default " "incident direction of the incoming wave (along z-axis)"); @@ -1160,7 +1160,7 @@ static const char *CalcInitField(double zero_resid) nSubtr(rvec,pvec,Avecbuffer,&inprodR,&Timing_InitIterComm); descr="x_0 = result of WKB\n"; } // redundant test -#endif //ADDA_SPARSE +#endif // !SPARSE else LogError(ONE_POS,"Unknown method to calculate initial field (%d)",(int)InitField); return descr; } diff --git a/src/make_particle.c b/src/make_particle.c index 252fbb3e..907a4480 100644 --- a/src/make_particle.c +++ b/src/make_particle.c @@ -3,7 +3,7 @@ * Descr: this module initializes the dipole set, either using predefined shapes or reading from a * file; includes granule generator * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -43,28 +43,33 @@ // SEMI-GLOBAL VARIABLES // defined and initialized in param.c -extern const int sh_Npars; extern const enum sh shape; -extern const double sh_pars[]; extern const enum sym sym_type; extern const double lambda; extern double sizeX,dpl,a_eq; extern const int jagged; extern const char *shape_fname; extern const char *shapename; -extern const char *save_geom_fname; extern const bool volcor,save_geom; extern opt_index opt_sh; +#ifndef SPARSE +extern const int sh_Npars; +extern const double sh_pars[]; +extern const char *save_geom_fname; extern const double gr_vf; extern double gr_d; extern const int gr_mat; extern enum shform sg_format; extern bool store_grans; +#endif // defined and initialized in timing.c -extern TIME_TYPE Timing_Particle,Timing_Granul,Timing_GranulComm; +extern TIME_TYPE Timing_Particle; +#ifndef SPARSE +extern TIME_TYPE Timing_Granul,Timing_GranulComm; +#endif -// used in fft.c +// used in interaction.c double gridspace; // interdipole distance (dipole size) // used in param.c @@ -83,7 +88,9 @@ static const char geom_format_ext[]="%d %d %d %d\n"; // extended format of */ static const char ddscat_format_read1[]="%*s %d %d %d %d %d %d\n"; static const char ddscat_format_read2[]="%*s %d %d %d %d"; +#ifndef SPARSE static const char ddscat_format_write[]="%zu %d %d %d %d %d %d\n"; +#endif // ratio of scatterer volume to enclosing cube; used for dpl correction and initialization by a_eq static double volume_ratio; static double Ndip; // total number of dipoles (in a circumscribing cube) @@ -93,7 +100,7 @@ static FILE * restrict dipfile; // handle of dipole file static enum shform read_format; // format of dipole file, which is read static double cX,cY,cZ; // center for DipoleCoord, it is sometimes used in PlaceGranules -#ifndef ADDA_SPARSE +#ifndef SPARSE // shape parameters static double coat_x,coat_y,coat_z,coat_r2; @@ -133,11 +140,11 @@ struct segment * restrict contSeg; // temporary arrays before their real counterparts are allocated static unsigned char * restrict material_tmp; -static double * restrict DipoleCoord_tmp; static unsigned short * restrict position_tmp; -#endif //ADDA_SPARSE -#ifndef ADDA_SPARSE //much of the functionality here is disabled in sparse mode +#endif // !SPARSE + +#ifndef SPARSE //much of the functionality here is disabled in sparse mode // EXTERNAL FUNCTIONS @@ -231,8 +238,6 @@ static void SaveGeometry(void) Timing_FileIO+=GET_TIME()-tstart; } -//=========================================================== - //========================================================== #define ALLOCATE_SEGMENTS(N) (struct segment *)voidVector((N)*sizeof(struct segment),ALL_POS,\ "contour segment"); @@ -1037,7 +1042,7 @@ static size_t PlaceGranules(void) //=========================================================== -#endif //ADDA_SPARSE +#endif // !SPARSE static void InitDipFile(const char * restrict fname,int *bX,int *bY,int *bZ,int *Nm, const char **rft) @@ -1227,17 +1232,21 @@ static void InitDipFile(const char * restrict fname,int *bX,int *bY,int *bZ,int //=========================================================== static void ReadDipFile(const char * restrict fname) -/* read dipole file; no consistency checks are made since they are made in InitDipFile. - * the file is opened in InitDipFile; this function only closes the file. +/* read dipole file; no consistency checks are made since they are made in InitDipFile. The file is opened in + * InitDipFile; this function only closes the file. + * + * The operation is quite different in FFT and sparse modes. In FFT mode only material is set here, while position is + * set in the main loop over shapes in MakeParticle(). By contrast, in sparse mode position is set here as well (since + * the loop in MakeParticle() is skipped altogether. */ { int x,y,z,x0,y0,z0,mat,scanned; size_t index=0; char linebuf[BUF_LINE]; -#ifndef ADDA_SPARSE +#ifndef SPARSE // to remove possible overflows size_t boxX_l=(size_t)boxX; -#endif //ADDA_SPARSE +#endif // !SPARSE TIME_TYPE tstart=GET_TIME(); @@ -1262,26 +1271,25 @@ static void ReadDipFile(const char * restrict fname) y0-=minY; z0-=minZ; // initialize box jagged*jagged*jagged instead of one dipole -#ifndef ADDA_SPARSE +#ifndef SPARSE for (z=jagged*z0;z=local_z0 && z= local_nvoid_d0) && (index < local_nvoid_d1)) { - material_full[index]=(unsigned char)(mat-1); + material[index-local_nvoid_d0]=(unsigned char)(mat-1); position_full[3*index]=x0*jagged+x; position_full[3*index+1]=y0*jagged+y; position_full[3*index+2]=z0*jagged+z; } index++; } -#endif //ADDA_SPARSE +#endif // SPARSE } } @@ -1333,7 +1341,7 @@ void InitShape(void) double n_sizeX; // new value for size double yx_ratio,zx_ratio; double tmp1,tmp2; -#ifndef ADDA_SPARSE +#ifndef SPARSE double tmp3; #endif TIME_TYPE tstart; @@ -1371,7 +1379,7 @@ void InitShape(void) // initialization of global option index for error messages opt=opt_sh; // shape initialization -#ifndef ADDA_SPARSE //shapes other than "read" are disabled in sparse mode +#ifndef SPARSE //shapes other than "read" are disabled in sparse mode if (shape==SH_AXISYMMETRIC) { /* Axisymmetric homogeneous shape, defined by its contour in ro-z plane of the cylindrical * coordinate system. Its symmetry axis coincides with the z-axis, and the contour is read @@ -1782,7 +1790,7 @@ void InitShape(void) Nmat_need=2; } else -#endif //ADDA_SPARSE +#endif // !SPARSE if (shape==SH_READ) { symX=symY=symZ=symR=false; // input file is assumed fully asymmetric const char *rf_text; @@ -1826,11 +1834,11 @@ void InitShape(void) * define your own (with more informative names) in the beginning of this function. */ else -#ifndef ADDA_SPARSE +#ifndef SPARSE LogError(ONE_POS,"Unknown shape"); // this is mainly to remove 'uninitialized' warnings #else LogError(ONE_POS,"Only shapes read from a file are currently supported in sparse mode"); -#endif //ADDA_SPARSE +#endif // SPARSE // check for redundancy of input data if (dpl!=UNDEF) { @@ -1848,7 +1856,7 @@ void InitShape(void) "while shape '%s' sets both the particle size and the size of the grid",shapename); } } -#ifndef ADDA_SPARSE //granulation disabled in sparse mode +#ifndef SPARSE //granulation disabled in sparse mode // initialize domain granulation if (sh_granul) { symX=symY=symZ=symR=false; // no symmetry with granules @@ -1961,13 +1969,13 @@ void InitShape(void) PrintError("Particle (boxY,Z={%d,%d}) does not fit into specified boxY,Z={%d,%d}", n_boxY,n_boxZ,boxY,boxZ); } -#ifndef ADDA_SPARSE //this check is not needed in sparse mode +#ifndef SPARSE //this check is not needed in sparse mode // initialize number of dipoles; first check that it fits into size_t type double tmp=((double)boxX)*((double)boxY)*((double)boxZ); if (tmp > SIZE_MAX) LogError(ONE_POS,"Total number of dipoles in the circumscribing " "box (%.0f) is larger than supported by size_t type on this system (%zu). If possible, " "please recompile ADDA in 64-bit mode.",tmp,SIZE_MAX); -#endif //ADDA_SPARSE +#endif // !SPARSE Ndip=boxX*boxY*boxZ; // initialize maxiter; not very realistic if (maxiter==UNDEF) maxiter=MIN(INT_MAX,3*Ndip); @@ -1991,11 +1999,10 @@ void MakeParticle(void) // creates a particle; initializes all dipoles counts, dpl, gridspace { - size_t index; + size_t index,dip,i3; TIME_TYPE tstart; -#ifndef ADDA_SPARSE int i; - size_t dip; +#ifndef SPARSE double jcX,jcY,jcZ; // center for jagged size_t local_nRows_tmp; int j,k,ns; @@ -2007,7 +2014,7 @@ void MakeParticle(void) int mat; unsigned short us_tmp; TIME_TYPE tgran; -#endif //ADDA_SPARSE +#endif // !SPARSE /* TO ADD NEW SHAPE * Add here all intermediate variables, which are used only inside this function. You may as @@ -2020,7 +2027,7 @@ void MakeParticle(void) cY=(boxY-1)/2.0; cZ=(boxZ-1)/2.0; -#ifndef ADDA_SPARSE //shapes other than "read" are disabled in sparse mode +#ifndef SPARSE //shapes other than "read" are disabled in sparse mode index=0; // assumed that box's are even jcX=jcY=jcZ=jagged/2.0; @@ -2031,7 +2038,6 @@ void MakeParticle(void) * they will be reallocated afterwards (when local_nRows is known). */ MALLOC_VECTOR(material_tmp,uchar,local_Ndip,ALL); - MALLOC_VECTOR(DipoleCoord_tmp,double,local_nRows_tmp,ALL); MALLOC_VECTOR(position_tmp,ushort,local_nRows_tmp,ALL); for(k=local_z0;k #include -#ifdef OPENCL -# include "oclcore.h" -#endif - // SEMI-GLOBAL VARIABLES +#ifdef SPARSE +// defined and initialized in calculator.c +extern doublecomplex * restrict arg_full; +#elif !defined(OPENCL) // defined and initialized in fft.c -#ifndef OPENCL extern const doublecomplex * restrict Dmatrix; extern doublecomplex * restrict Xmatrix,* restrict slices,* restrict slices_tr; -#endif extern const size_t DsizeY,DsizeZ,DsizeYZ; +#endif // !SPARSE && !OPENCL // defined and initialized in timing.c extern size_t TotalMatVec; -#ifndef ADDA_SPARSE +#ifndef SPARSE #ifndef OPENCL // the following inline functions are not used in OpenCL or sparse mode //============================================================ @@ -102,11 +102,11 @@ INLINE size_t IndexDmatrix_mv(size_t x,size_t y,size_t z,const bool transposed) return(NDCOMP*(x*DsizeYZ+z*DsizeY+y)); } #endif // OPENCL -#endif // ADDA_SPARSE +#endif // SPARSE //============================================================ -#ifndef ADDA_SPARSE +#ifndef SPARSE void MatVec (doublecomplex * restrict argvec, // the argument vector doublecomplex * restrict resultvec, // the result vector double *inprod, // the resulting inner product @@ -474,11 +474,12 @@ void MatVec (doublecomplex * restrict argvec, // the argument vector TotalMatVec++; } -#else //ADDA_SPARSE is defined +#else // SPARSE is defined + +//============================================================ -/* The sparse MatVec is implemented completely separately from the non-sparse version. - * Although there is some code duplication, this probably makes the both versions easier - * to maintain. +/* The sparse MatVec is implemented completely separately from the non-sparse version. Although there is some code + * duplication, this probably makes the both versions easier to maintain. */ void MatVec (doublecomplex * restrict argvec, // the argument vector doublecomplex * restrict resultvec, // the result vector @@ -487,52 +488,33 @@ void MatVec (doublecomplex * restrict argvec, // the argument vector TIME_TYPE *comm_timing) // this variable is incremented by communication time { const bool ipr = (inprod != NULL); - - if (her) { - for (size_t j=0; j. */ +#ifdef OPENCL #ifndef __oclcore_h #define __oclcore_h @@ -73,3 +74,5 @@ INLINE void CheckCLErr(const cl_int err,ERR_LOC_DECL,const char * restrict msg) } #endif // __oclcore_h + +#endif // OPENCL diff --git a/src/param.c b/src/param.c index eaf59905..b0bd394f 100644 --- a/src/param.c +++ b/src/param.c @@ -26,6 +26,7 @@ #include "fft.h" #include "function.h" #include "io.h" +#include "oclcore.h" #include "os.h" #include "parbas.h" #include "vars.h" @@ -38,11 +39,8 @@ #include #include -#ifdef OPENCL -# include "oclcore.h" -# ifdef CLFFT_AMD -# include // for version information -# endif +#ifdef CLFFT_AMD +# include // for version information #endif // definitions for file locking @@ -80,10 +78,10 @@ extern const char beam_descr[]; // defined and initialized in make_particle.c extern const bool volcor_used; extern const char *sh_form_str1,*sh_form_str2; -#ifndef ADDA_SPARSE +#ifndef SPARSE extern const int gr_N; extern const double gr_vf_real; -#endif //ADDA_SPARSE +#endif //SPARSE extern const size_t mat_count[]; // used in CalculateE.c @@ -110,15 +108,16 @@ const char *scat_grid_parms; // name of file with parameters of scattering double prop_0[3]; // initial incident direction (in laboratory reference frame) double incPolX_0[3],incPolY_0[3]; // initial incident polarizations (in lab RF) enum scat ScatRelation; // type of formulae for scattering quantities -// used in fft.c -double igt_lim; // limit (threshold) for integration in IGT -double igt_eps; // relative error of integration in IGT + // used in GenerateB.c int beam_Npars; double beam_pars[MAX_N_BEAM_PARMS]; // beam parameters opt_index opt_beam; // option index of beam option used const char *beam_fnameY; // names of files, defining the beam (for two polarizations) const char *beam_fnameX; +// used in interaction.c +double igt_lim; // limit (threshold) for integration in IGT +double igt_eps; // relative error of integration in IGT // used in io.c char logfname[MAX_FNAME]=""; // name of logfile // used in iterative.c @@ -237,6 +236,7 @@ static const struct subopt_struct beam_opt[]={ {NULL,NULL,NULL,0,0} }; static const struct subopt_struct shape_opt[]={ +#ifndef SPARSE {"axisymmetric","","Axisymmetric homogeneous shape, defined by its contour in " "ro-z plane of the cylindrical coordinate system. Its symmetry axis coincides with the " "z-axis, and the contour is read from file.",FNAME_ARG,SH_AXISYMMETRIC}, @@ -279,10 +279,13 @@ static const struct subopt_struct shape_opt[]={ "b, and diameter at the position of the maximum width c. The surface is described by " "ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by the " "described parameters.",3,SH_RBC}, +#endif // !SPARSE {"read","","Read a particle geometry from file ",FNAME_ARG,SH_READ}, +#ifndef SPARSE {"sphere","","Homogeneous sphere",0,SH_SPHERE}, {"spherebox","","Sphere (diameter d_sph) in a cube (size Dx, first domain)",1, SH_SPHEREBOX}, +#endif // !SPARSE /* TO ADD NEW SHAPE * add a row to this list in alphabetical order. It contains: * shape name (used in command line), usage string, help string, possible number of float @@ -337,7 +340,9 @@ PARSE_FUNC(eq_rad); #ifdef OPENCL PARSE_FUNC(gpu); #endif +#ifndef SPARSE PARSE_FUNC(granul); +#endif PARSE_FUNC(grid); PARSE_FUNC(h) ATT_NORETURN; PARSE_FUNC(init_field); @@ -357,17 +362,23 @@ PARSE_FUNC(pol); PARSE_FUNC(prognosis); PARSE_FUNC(prop); PARSE_FUNC(recalc_resid); +#ifndef SPARSE PARSE_FUNC(save_geom); +#endif PARSE_FUNC(scat); PARSE_FUNC(scat_grid_inp); PARSE_FUNC(scat_matr); +#ifndef SPARSE PARSE_FUNC(sg_format); +#endif PARSE_FUNC(shape); PARSE_FUNC(size); PARSE_FUNC(store_beam); PARSE_FUNC(store_dip_pol); PARSE_FUNC(store_force); +#ifndef SPARSE PARSE_FUNC(store_grans); +#endif PARSE_FUNC(store_int_field); PARSE_FUNC(store_scat_grid); PARSE_FUNC(sym); @@ -422,11 +433,13 @@ static struct opt_struct options[]={ "only for OpenCL version of ADDA, running on a system with several GPUs.\n" "Default: 0",1,NULL}, #endif +#ifndef SPARSE {PAR(granul)," []","Specifies that one particle domain should be " "randomly filled with spherical granules with specified diameter and volume " "fraction . Domain number to fill is given by the last optional argument. " "Algorithm may fail for volume fractions > 30-50%.\n" "Default : 1",UNDEF,NULL}, +#endif // !SPARSE {PAR(grid)," [ ]","Sets dimensions of the computation grid. Arguments should be " "even integers. In most cases and can be omitted (they are automatically " "determined by based on the proportions of the scatterer). This command line option " @@ -440,10 +453,15 @@ static struct opt_struct options[]={ "preceding dash). For some options (e.g. '-beam' or '-shape') specific help on a " "particular suboption may be shown.\n" "Example: shape coated",UNDEF,NULL}, - {PAR(init_field),"{auto|zero|inc|wkb}","Sets prescription to calculate initial (starting) " - "field for the iterative solver. 'zero' is a zero vector, 'inc' - equal to the incident " - "field, 'wkb' - from Wentzel-Kramers-Brillouin approximation, 'auto' - automatically " - "choose from 'zero' and 'inc' based on the lower residual value.\n" + {PAR(init_field),"{auto|inc|wkb|zero}", + "Sets prescription to calculate initial (starting) field for the iterative solver.\n" + "'auto' - automatically choose from 'zero' and 'inc' based on the lower residual value.\n" + "'inc' - equal to the incident field,\n" + "'wkb' - from Wentzel-Kramers-Brillouin approximation,\n" +#ifdef SPARSE + "!!! 'wkb' is not operational in sparse mode\n" +#endif + "'zero' is a zero vector,\n" "Default: auto",1,NULL}, {PAR(int),"{fcd|fcd_st|igt [ []]|igt_so|poi|so}", "Sets prescription to calculate the interaction term.\n" @@ -459,6 +477,9 @@ static struct opt_struct options[]={ "'igt_so' - approximate evaluation of IGT using second order of kd approximation.\n" "'poi' - (the simplest) interaction between point dipoles.\n" "'so' - under development and incompatible with '-anisotr'.\n" +#ifdef SPARSE + "!!! All options except 'poi' incur a significant slowing down in sparse mode.\n" +#endif "Default: poi",UNDEF,NULL}, {PAR(iter),"{bicg|bicgstab|cgnr|csym|qmr|qmr2}","Sets the iterative solver.\n" "Default: qmr",1,NULL}, @@ -529,12 +550,14 @@ static struct opt_struct options[]={ "Normalization (to the unity vector) is performed automatically.\n" "Default: 0 0 1",3,NULL}, {PAR(recalc_resid),"","Recalculate residual at the end of iterative solver.",0,NULL}, +#ifndef SPARSE {PAR(save_geom),"[]","Save dipole configuration to a file (a path " "relative to the output directory). Can be used with '-prognosis'.\n" "Default: .geom \n" "( is a first argument to the '-shape' option; '_gran' is added if '-granul' option " "is used; file extension can differ depending on argument of '-sg_format' option).", UNDEF,NULL}, +#endif // !SPARSE {PAR(scat),"{dr|fin|igt_so|so}","Sets prescription to calculate scattering quantities.\n" "'dr' - (by Draine) standard formulation for point dipoles\n" "'fin' - slightly different one, based on a radiative correction for a finite dipole.\n" @@ -548,12 +571,14 @@ static struct opt_struct options[]={ "amplitude) should be saved to file. Amplitude matrix is never integrated (in combination " "with '-orient avg' or '-phi_integr').\n" "Default: muel",1,NULL}, +#ifndef SPARSE {PAR(sg_format),"{text|text_ext|ddscat6|ddscat7}","Specifies format for saving geometry files. " "First two are ADDA default formats for single- and multi-domain particles respectively. " "'text' is automatically changed to 'text_ext' for multi-domain particles. Two DDSCAT " "formats correspond to its shape options 'FRMFIL' (version 6) and 'FROM_FILE' (version 7) " "and output of 'calltarget' utility.\n" "Default: text",1,NULL}, +#endif // !SPARSE /* TO ADD NEW FORMAT OF SHAPE FILE * Modify string constants after 'PAR(sg_format)': add new argument to list {...} and * add its description to the next string. @@ -572,7 +597,9 @@ static struct opt_struct options[]={ {PAR(store_dip_pol),"","Save dipole polarizations to a file",0,NULL}, {PAR(store_force),"","Calculate the radiation force on each dipole. Implies '-Cpr'", 0,NULL}, +#ifndef SPARSE {PAR(store_grans),"","Save granule coordinates (placed by '-granul' option) to a file",0,NULL}, +#endif {PAR(store_int_field),"","Save internal fields to a file",0,NULL}, {PAR(store_scat_grid),"","Calculate Mueller matrix for a grid of scattering angles and save it " "to a file.",0,NULL}, @@ -722,7 +749,7 @@ INLINE void TestNarg(const int Narg,const int need) NargError(Narg,buf); } } // otherwise special cases are considered, encoded by negative values - else if (need==UNDEF); // do nothing + else if (need==UNDEF) {} // do nothing else if (need==FNAME_ARG) { if (Narg!=1) NargError(Narg,"1"); } @@ -901,6 +928,7 @@ PARSE_FUNC(beam) int i,j,need; bool found; + if (Narg==0) PrintErrorHelp("Suboption (argument) is missing for '-beam' option"); Narg--; found=false; i=-1; @@ -997,6 +1025,7 @@ PARSE_FUNC(gpu) TestNonNegative_i(gpuInd,"GPU index"); } #endif +#ifndef SPARSE PARSE_FUNC(granul) { if (Narg!=2 && Narg!=3) NargError(Narg,"2 or 3"); @@ -1012,6 +1041,7 @@ PARSE_FUNC(granul) gr_mat--; // converted to usual indexing starting from 0 sh_granul=true; } +#endif PARSE_FUNC(grid) { if (Narg!=1 && Narg!=3) NargError(Narg,"1 or 3"); @@ -1059,6 +1089,10 @@ PARSE_FUNC(h) while (options[i].sub[++j].name!=NULL) printf(" %s %s\n",options[i].sub[j].name,options[i].sub[j].usage); printf("Type '%s -h %s ' for details\n",exename,options[i].name); +#ifdef SPARSE + if (strcmp(options[i].name,"shape")==0) + printf("!!! Most of the shape options are disabled in sparse mode\n"); +#endif } } found=true; @@ -1071,6 +1105,9 @@ PARSE_FUNC(h) "Available options:\n",exename,exeusage); for (i=0;i' for details\n",exename); +#ifdef SPARSE + printf("!!! A number of options are disabled in sparse mode\n"); +#endif } } // exit @@ -1081,19 +1118,18 @@ PARSE_FUNC(init_field) if (strcmp(argv[1],"auto")==0) InitField=IF_AUTO; else if (strcmp(argv[1],"zero")==0) InitField=IF_ZERO; else if (strcmp(argv[1],"inc")==0) InitField=IF_INC; - else if (strcmp(argv[1],"wkb")==0) InitField=IF_WKB; + else if (strcmp(argv[1],"wkb")==0) +#ifdef SPARSE + PrintErrorHelp("Initial field 'wkb' is not supported in sparse mode"); +#else + InitField=IF_WKB; +#endif else NotSupported("Initial field prescription",argv[1]); } PARSE_FUNC(int) { double tmp; -#ifdef ADDA_SPARSE //only point dipoles in sparse mode - if (strcmp(argv[1],"poi")!=0) { - NotSupported("Interaction term prescription (sparse mode)",argv[1]); - } -#endif - if (Narg<1 || Narg>3) NargError(Narg,"from 1 to 3"); if (strcmp(argv[1],"fcd")==0) IntRelation=G_FCD; else if (strcmp(argv[1],"fcd_st")==0) IntRelation=G_FCD_ST; @@ -1254,12 +1290,14 @@ PARSE_FUNC(recalc_resid) { recalc_resid=true; } +#ifndef SPARSE PARSE_FUNC(save_geom) { if (Narg>1) NargError(Narg,"0 or 1"); save_geom=true; if (Narg==1) save_geom_fname=ScanStrError(argv[1],MAX_FNAME); } +#endif PARSE_FUNC(scat) { if (strcmp(argv[1],"dr")==0) ScatRelation=SQ_DRAINE; @@ -1286,6 +1324,7 @@ PARSE_FUNC(scat_matr) else if (strcmp(argv[1],"none")==0) store_mueller=store_ampl=false; else NotSupported("Scattering matrix specifier",argv[1]); } +#ifndef SPARSE PARSE_FUNC(sg_format) { if (strcmp(argv[1],"text")==0) sg_format=SF_TEXT; @@ -1303,11 +1342,13 @@ PARSE_FUNC(sg_format) */ else NotSupported("Geometry format",argv[1]); } +#endif PARSE_FUNC(shape) { int i,j,need; bool found; + if (Narg==0) PrintErrorHelp("Suboption (argument) is missing for '-shape' option"); Narg--; found=false; i=-1; @@ -1360,10 +1401,12 @@ PARSE_FUNC(store_force) store_force = true; calc_mat_force = true; } +#ifndef SPARSE PARSE_FUNC(store_grans) { store_grans=true; } +#endif PARSE_FUNC(store_int_field) { store_int_field=true; @@ -1521,6 +1564,12 @@ PARSE_FUNC(V) #endif #ifdef CLFFT_APPLE "CLFFT_APPLE, " +#endif +#ifdef SPARSE + "SPARSE, " +#endif +#ifdef USE_SSE3 + "USE_SSE3, " #endif ""; printf("Extra build options: "); @@ -1857,6 +1906,9 @@ void VariablesInterconnect(void) if (sizeX!=UNDEF && a_eq!=UNDEF) PrintError("'-size' and '-eq_rad' can not be used together"); if (calc_mat_force && beamtype!=B_PLANE) PrintError("Currently radiation forces can not be calculated for non-plane incident wave"); +#ifdef SPARSE + if (shape==SH_SPHERE) PrintError("Sparse mode requires shape to be read from file (-shape read ...)"); +#endif // scale boxes by jagged; should be completely robust to overflows #define JAGGED_BOX(a) if (a!=UNDEF) { \ if ((BOX_MAX/(size_t)jagged)<(size_t)a) \ @@ -2030,12 +2082,12 @@ void PrintInfo(void) fprintf(logfile,"lambda: "GFORM"\n",lambda); fprintf(logfile,"shape: "); fprintf(logfile,"%s"GFORM"%s\n",sh_form_str1,sizeX,sh_form_str2); -#ifndef ADDA_SPARSE +#ifndef SPARSE if (sh_granul) fprintf(logfile, " domain %d is filled with %d granules of diameter "GFORMDEF"\n" " volume fraction: specified - "GFORMDEF", actual - "GFORMDEF"\n", gr_mat+1,gr_N,gr_d,gr_vf,gr_vf_real); -#endif //ADDA_SPARSE +#endif // SPARSE fprintf(logfile,"box dimensions: %ix%ix%i\n",boxX,boxY,boxZ); if (anisotropy) { fprintf(logfile,"refractive index (diagonal elements of the tensor):\n"); @@ -2151,8 +2203,10 @@ void PrintInfo(void) fprintf(logfile,"FFTW3\n"); #elif defined(FFT_TEMPERTON) fprintf(logfile,"by C.Temperton\n"); +#elif defined(SPARSE) + fprintf(logfile,"none (sparse mode)\n"); #endif -#ifdef OPENCL +#if defined(OPENCL) && !defined(SPARSE) fprintf(logfile,"OpenCL FFT algorithm: "); # ifdef CLFFT_AMD fprintf(logfile,"by AMD\n"); diff --git a/src/sparse_ops.h b/src/sparse_ops.h index 26de2469..f9624580 100644 --- a/src/sparse_ops.h +++ b/src/sparse_ops.h @@ -1,186 +1,176 @@ /* File: sparse_ops.h - * Descr: low-level routines for the sparse mode ADDA + * Descr: low-level routines for the sparse mode ADDA; void in non-sparse mode * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2011-2013 ADDA contributors * This file is part of ADDA. * - * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU - * General Public License as published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. + * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. * - * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even - * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. + * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with ADDA. If not, see * . */ +#ifdef SPARSE -#ifndef __aijprod_h -#define __aijprod_h +#ifndef __sparse_ops_h +#define __sparse_ops_h #include "cmplx.h" #include "interaction.h" #include "vars.h" - -#ifdef ADDA_SPARSE #ifdef USE_SSE3 -inline static void CcMul(doublecomplex * restrict argvec_src, - doublecomplex * restrict argvec_dest, const int j) -/* - Takes the j'th block in argvec_src, multiplies by cc_sqrt at that - position and stores the result in argvec_dest. -*/ +//===================================================================================================================== + +static inline void CcMul(doublecomplex * restrict argvec_src,doublecomplex * restrict argvec_dest, const size_t j) +// Takes the j'th block in argvec_src, multiplies by cc_sqrt at that position and stores the result in argvec_dest. { - const int j3 = j*3; - *(__m128d *)&(argvec_dest[j3]) = cmul(*(__m128d *)&(argvec_src[j3]), - *(__m128d *)&(cc_sqrt[material[j]][0])); - *(__m128d *)&(argvec_dest[j3+1]) = cmul(*(__m128d *)&(argvec_src[j3+1]), - *(__m128d *)&(cc_sqrt[material[j]][1])); - *(__m128d *)&(argvec_dest[j3+2]) = cmul(*(__m128d *)&(argvec_src[j3+2]), - *(__m128d *)&(cc_sqrt[material[j]][2])); + const size_t j3=j*3; + *(__m128d *)&(argvec_dest[j3]) = cmul(*(__m128d *)&(argvec_src[j3]), + *(__m128d *)&(cc_sqrt[material[j]][0])); + *(__m128d *)&(argvec_dest[j3+1]) = cmul(*(__m128d *)&(argvec_src[j3+1]), + *(__m128d *)&(cc_sqrt[material[j]][1])); + *(__m128d *)&(argvec_dest[j3+2]) = cmul(*(__m128d *)&(argvec_src[j3+2]), + *(__m128d *)&(cc_sqrt[material[j]][2])); } -inline static void AijProd(doublecomplex * restrict argvec, - doublecomplex * restrict resultvec, - const int i, const int j) -/* - Handles the multiplication of the i'th block of - argvec with the G_ij block of the G-matrix, and adds the result - to the j'th block of resultvec. -*/ +//===================================================================================================================== + +static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i, + const size_t j) +/* Handles the multiplication of the i'th block of argvec with the G_ij block of the G-matrix, and adds the result + * to the j'th block of resultvec. + */ { - doublecomplex iterm[6]; - const unsigned int i3 = 3*i, j3 = 3*j; - - CalcInterTerm(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1], - position[i3+2]-position_full[j3+2], iterm); - - __m128d res, tmp; - - const __m128d argX = _mm_load_pd((double *)(argvec+j3)); - const __m128d argY = _mm_load_pd((double *)(argvec+j3+1)); - const __m128d argZ = _mm_load_pd((double *)(argvec+j3+2)); - - res = cmul(argX, *(__m128d *)&(iterm[0])); - tmp = cmul(argY, *(__m128d *)&(iterm[1])); - res = cadd(tmp,res); - tmp = cmul(argZ, *(__m128d *)&(iterm[2])); - res = cadd(tmp,res); - *(__m128d *)&(resultvec[i3]) = cadd(res, *(__m128d *)&(resultvec[i3])); - - res = cmul(argX, *(__m128d *)&(iterm[1])); - tmp = cmul(argY, *(__m128d *)&(iterm[3])); - res = cadd(tmp,res); - tmp = cmul(argZ, *(__m128d *)&(iterm[4])); - res = cadd(tmp,res); - *(__m128d *)&(resultvec[i3+1]) = cadd(res, *(__m128d *)&(resultvec[i3+1])); - - res = cmul(argX, *(__m128d *)&(iterm[2])); - tmp = cmul(argY, *(__m128d *)&(iterm[4])); - res = cadd(tmp,res); - tmp = cmul(argZ, *(__m128d *)&(iterm[5])); - res = cadd(tmp,res); - *(__m128d *)&(resultvec[i3+2]) = cadd(res, *(__m128d *)&(resultvec[i3+2])); + doublecomplex iterm[6]; + const size_t i3 = 3*i, j3 = 3*j; + + (*CalcInterTerm)(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1], + position[i3+2]-position_full[j3+2], iterm); + + __m128d res, tmp; + + const __m128d argX = _mm_load_pd((double *)(argvec+j3)); + const __m128d argY = _mm_load_pd((double *)(argvec+j3+1)); + const __m128d argZ = _mm_load_pd((double *)(argvec+j3+2)); + + res = cmul(argX, *(__m128d *)&(iterm[0])); + tmp = cmul(argY, *(__m128d *)&(iterm[1])); + res = cadd(tmp,res); + tmp = cmul(argZ, *(__m128d *)&(iterm[2])); + res = cadd(tmp,res); + *(__m128d *)&(resultvec[i3]) = cadd(res, *(__m128d *)&(resultvec[i3])); + + res = cmul(argX, *(__m128d *)&(iterm[1])); + tmp = cmul(argY, *(__m128d *)&(iterm[3])); + res = cadd(tmp,res); + tmp = cmul(argZ, *(__m128d *)&(iterm[4])); + res = cadd(tmp,res); + *(__m128d *)&(resultvec[i3+1]) = cadd(res, *(__m128d *)&(resultvec[i3+1])); + + res = cmul(argX, *(__m128d *)&(iterm[2])); + tmp = cmul(argY, *(__m128d *)&(iterm[4])); + res = cadd(tmp,res); + tmp = cmul(argZ, *(__m128d *)&(iterm[5])); + res = cadd(tmp,res); + *(__m128d *)&(resultvec[i3+2]) = cadd(res, *(__m128d *)&(resultvec[i3+2])); } -inline static void DiagProd(doublecomplex * restrict argvec, doublecomplex * restrict resultvec, - const int i) -/* - Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, - subtracts the result from the i'th block of argvec, and stores the result in - the i'th block if resultvec. -*/ +//===================================================================================================================== + +static inline void DiagProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i) +/* Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, subtracts the result from the i'th + * block of argvec, and stores the result in the i'th block if resultvec. + */ { - const int i3 = i*3; - - const __m128d tmp1 = cmul(*(__m128d *)&(resultvec[i3]),*(__m128d *)&(cc_sqrt[material[i]][0])); - const __m128d tmp2 = cmul(*(__m128d *)&(resultvec[i3+1]),*(__m128d *)&(cc_sqrt[material[i]][1])); - const __m128d tmp3 = cmul(*(__m128d *)&(resultvec[i3+2]),*(__m128d *)&(cc_sqrt[material[i]][2])); - - *(__m128d *)&(resultvec[i3]) = _mm_sub_pd(*(__m128d *)&(argvec[i3]),tmp1); - *(__m128d *)&(resultvec[i3+1]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+1]),tmp2); - *(__m128d *)&(resultvec[i3+2]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+2]),tmp3); + const size_t i3 = i*3; + + const __m128d tmp1 = cmul(*(__m128d *)&(resultvec[i3]),*(__m128d *)&(cc_sqrt[material[i]][0])); + const __m128d tmp2 = cmul(*(__m128d *)&(resultvec[i3+1]),*(__m128d *)&(cc_sqrt[material[i]][1])); + const __m128d tmp3 = cmul(*(__m128d *)&(resultvec[i3+2]),*(__m128d *)&(cc_sqrt[material[i]][2])); + + *(__m128d *)&(resultvec[i3]) = _mm_sub_pd(*(__m128d *)&(argvec[i3]),tmp1); + *(__m128d *)&(resultvec[i3+1]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+1]),tmp2); + *(__m128d *)&(resultvec[i3+2]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+2]),tmp3); } #else //SSE3 not used -inline static void CcMul(doublecomplex * restrict argvec_src, - doublecomplex * restrict argvec_dest, const int j) -/* - Takes the j'th block in argvec_src, multiplies by cc_sqrt at that - position and stores the result in argvec_dest. -*/ +//===================================================================================================================== + +static inline void CcMul(doublecomplex * restrict argvec_src,doublecomplex * restrict argvec_dest, const size_t j) +// Takes the j'th block in argvec_src, multiplies by cc_sqrt at that position and stores the result in argvec_dest. { - const int j3 = j*3; - cMult(argvec_src[j3],cc_sqrt[material_full[j]][0],argvec_dest[j3]); - cMult(argvec_src[j3+1],cc_sqrt[material_full[j]][1],argvec_dest[j3+1]); - cMult(argvec_src[j3+2],cc_sqrt[material_full[j]][2],argvec_dest[j3+2]); + const size_t j3 = j*3; + cMult(argvec_src[j3],cc_sqrt[material[j]][0],argvec_dest[j3]); + cMult(argvec_src[j3+1],cc_sqrt[material[j]][1],argvec_dest[j3+1]); + cMult(argvec_src[j3+2],cc_sqrt[material[j]][2],argvec_dest[j3+2]); } -inline static void AijProd(doublecomplex * restrict argvec, - doublecomplex * restrict resultvec, - const int i, const int j) -/* - Handles the multiplication of the i'th block of - argvec with the G_ij block of the G-matrix, and adds the result - to the j'th block of resultvec. -*/ -{ - static doublecomplex tmp1, resX, resY, resZ; +//===================================================================================================================== + +static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i, + const size_t j) +/* Handles the multiplication of the i'th block of argvec with the G_ij block of the G-matrix, and adds the result + * to the j'th block of resultvec. + */ +{ + static doublecomplex tmp1,resX,resY,resZ; static doublecomplex iterm[6]; - const unsigned int i3 = 3*i, j3 = 3*j; - - //D("%d %d %d %d %d %d %d %d", i, j, position[3*i], position[3*i+1], position[3*i+2], position[3*j], position[3*j+1], position[3*j+2]); - CalcInterTerm(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1], - position[i3+2]-position_full[j3+2], iterm); - + const size_t i3=3*i,j3=3*j; + + //D("%d %d %d %d %d %d %d %d",i,j,position[3*i],position[3*i+1],position[3*i+2], + // position[3*j],position[3*j+1],position[3*j+2]); + (*CalcInterTerm)(position[i3]-position_full[j3],position[i3+1]-position_full[j3+1], + position[i3+2]-position_full[j3+2],iterm); + cMult(argvec[j3],iterm[0],resX); cMult(argvec[j3+1],iterm[1],tmp1); cAdd(resX,tmp1,resX); cMult(argvec[j3+2],iterm[2],tmp1); cAdd(resX,tmp1,resX); - + cMult(argvec[j3],iterm[1],resY); cMult(argvec[j3+1],iterm[3],tmp1); cAdd(resY,tmp1,resY); cMult(argvec[j3+2],iterm[4],tmp1); cAdd(resY,tmp1,resY); - + cMult(argvec[j3],iterm[2],resZ); cMult(argvec[j3+1],iterm[4],tmp1); cAdd(resZ,tmp1,resZ); cMult(argvec[j3+2],iterm[5],tmp1); cAdd(resZ,tmp1,resZ); - + cAdd(resX,resultvec[i3],resultvec[i3]); cAdd(resY,resultvec[i3+1],resultvec[i3+1]); cAdd(resZ,resultvec[i3+2],resultvec[i3+2]); } -inline static void DiagProd(doublecomplex * restrict argvec, doublecomplex * restrict resultvec, - const int i) -/* - Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, - subtracts the result from the i'th block of argvec, and stores the result in - the i'th block if resultvec. -*/ +//===================================================================================================================== + +static inline void DiagProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i) +/* Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, subtracts the result from the i'th + * block of argvec, and stores the result in the i'th block if resultvec. + */ { - const int i3 = i*3; - static doublecomplex tmp1, tmp2, tmp3; - - cMult(resultvec[i3],cc_sqrt[material[i]][0],tmp1); - cMult(resultvec[i3+1],cc_sqrt[material[i]][1],tmp2); - cMult(resultvec[i3+2],cc_sqrt[material[i]][2],tmp3); - cSubtr(argvec[i3], tmp1, resultvec[i3]); - cSubtr(argvec[i3+1], tmp2, resultvec[i3+1]); - cSubtr(argvec[i3+2], tmp3, resultvec[i3+2]); + const size_t i3 = i*3; + static doublecomplex tmp1, tmp2, tmp3; + + cMult(resultvec[i3],cc_sqrt[material[i]][0],tmp1); + cMult(resultvec[i3+1],cc_sqrt[material[i]][1],tmp2); + cMult(resultvec[i3+2],cc_sqrt[material[i]][2],tmp3); + cSubtr(argvec[i3], tmp1, resultvec[i3]); + cSubtr(argvec[i3+1], tmp2, resultvec[i3+1]); + cSubtr(argvec[i3+2], tmp3, resultvec[i3+2]); } -#endif //USE_SSE3 +#endif // !USE_SSE3 -#endif //ADDA_SPARSE +#endif // __sparse_ops_h -#endif //__aijprod_h +#endif // SPARSE diff --git a/src/timing.c b/src/timing.c index 6ff82f71..63722540 100644 --- a/src/timing.c +++ b/src/timing.c @@ -2,7 +2,7 @@ * $Date:: $ * Descr: basic timing and statistics routines * - * Copyright (C) 2006,2008-2012 ADDA contributors + * Copyright (C) 2006,2008-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -95,7 +95,7 @@ void InitTiming(void) TotalIter=TotalMatVec=TotalEval=TotalEFieldPlane=0; Timing_EField=Timing_FileIO=Timing_IntField=Timing_ScatQuan=Timing_Integration=0; Timing_ScatQuanComm=Timing_InitDmComm=0; -#ifdef ADDA_SPARSE +#ifdef SPARSE Timing_Dm_Init=Timing_Granul=Timing_FFT_Init=Timing_GranulComm=0; #endif } @@ -147,14 +147,16 @@ void FinalStatistics(void) " init OpenCL "FFORMT"\n",TO_SEC(Timing_OCL_Init)); #endif +#ifndef SPARSE fprintf(logfile, " init Dmatrix "FFORMT"\n",TO_SEC(Timing_Dm_Init)); -#ifdef PARALLEL +# ifdef PARALLEL fprintf(logfile, " communication: "FFORMT"\n",TO_SEC(Timing_InitDmComm)); -#endif +# endif fprintf(logfile, " FFT setup: "FFORMT"\n",TO_SEC(Timing_FFT_Init)); +#endif // !SPARSE } fprintf(logfile, " make particle: "FFORMT"\n",TO_SEC(Timing_Particle)); diff --git a/src/vars.c b/src/vars.c index e027cd14..0ce81ddb 100644 --- a/src/vars.c +++ b/src/vars.c @@ -6,7 +6,7 @@ * source files are called 'semi-global' and not listed here. They are defined in one file * and referenced with 'extern' in another one. * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -135,7 +135,7 @@ TIME_TYPE Timing_EField, // time for calculating scattered fields Timing_Integration, // time for all integrations (with precomputed values) tstart_main; // starting time of the program (after MPI_Init in parallel) -#ifndef ADDA_SPARSE //These variables are exclusive to the FFT mode +#ifndef SPARSE //These variables are exclusive to the FFT mode unsigned short * restrict position; /* position of the dipoles; in the very end of make_particle() * z-components are adjusted to be relative to the local_z0 @@ -168,14 +168,9 @@ size_t local_x0,local_x1,local_Nx; /* starting, ending x for current processor a #else //These variables are exclusive to the sparse mode -int * restrict position; // no reason to restrict this to short in sparse mode +int *position; // no reason to restrict this to short in sparse mode; actually it points to a part of position_full //in sparse mode, all coordinates must be available to each process -int * restrict position_full; +int * restrict position_full; -unsigned char * restrict material_full; -doublecomplex * restrict arg_full; - -double local_f0, local_f1; - -#endif //ADDA_SPARSE +#endif //SPARSE diff --git a/src/vars.h b/src/vars.h index 5ee8a82e..d6733419 100644 --- a/src/vars.h +++ b/src/vars.h @@ -6,7 +6,7 @@ * source files are called 'semi-global' and not listed here. They are defined in one file * and referenced with 'extern' in another one. * - * Copyright (C) 2006-2012 ADDA contributors + * Copyright (C) 2006-2013 ADDA contributors * This file is part of ADDA. * * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU @@ -89,7 +89,7 @@ extern size_t local_Ndip,local_nvoid_Ndip,local_nRows,local_nvoid_d0,local_nvoid extern time_t wt_start,last_chp_wt; extern TIME_TYPE Timing_EField,Timing_FileIO,Timing_Integration,tstart_main; -#ifndef ADDA_SPARSE //These variables are exclusive to the FFT mode +#ifndef SPARSE //These variables are exclusive to the FFT mode extern unsigned short * restrict position; @@ -105,14 +105,9 @@ extern size_t local_Nz,local_x0,local_x1,local_Nx; #else //These variables are exclusive to the sparse mode -extern int * restrict position; -extern double * restrict DipoleCoord_full; +extern int *position; extern int * restrict position_full; -extern unsigned char * restrict material_full; -extern doublecomplex * restrict arg_full; -extern double local_f0, local_f1; - -#endif //ADDA_SPARSE +#endif //SPARSE #endif // __vars_h diff --git a/tests/2exec/README b/tests/2exec/README index 3525fc7d..5d002a5f 100644 --- a/tests/2exec/README +++ b/tests/2exec/README @@ -1,15 +1,13 @@ -Utils for automatic cross-testing different ADDA executables (by comparing the simulation results). -The main part is a bash script 'comp2exec'. Under Windows one may use MSYS to run it. This script -contains comments in the head and throughout the file to explain its usage. In particular, one may -specify the particular executables to cross test (a few typical pairs can be selected by command -line options to this script). +Utils for automatic cross-testing different ADDA executables (by comparing the simulation results). The main part is a +bash script 'comp2exec'. Under Windows one may use MSYS to run it. This script contains comments in the head and +throughout the file to explain its usage. In particular, one may specify the particular executables to cross test (a few +typical pairs can be selected by command line options to this script). -The particular tests to perform are specified by file 'suite', by a list of command lines with -certain additions and abbreviations (see comments in the file). Currently, this file provides an -extensive list of tests for testing future ADDA versions against a stable release. Some of this -tests probably need to be removed for more specific tests, since they will produce errors. -A different file (instead of 'suite') can also be used, if specified by command line option to +The particular tests to perform are specified by file 'suite', by a list of command lines with certain additions and +abbreviations (see comments in the file). Currently, this file provides an extensive list of tests for testing future +ADDA versions against a stable release. Some of this tests probably need to be removed for more specific tests, since +they will produce errors. A different file (instead of 'suite') can also be used, if specified by command line option to 'comp2exec'. -Another important part is script 'diff_numeric.awk', which compares two files with given numerical -tolerance. It may also need modification to fit a specific application. \ No newline at end of file +Another important part is script 'diff_numeric.awk', which compares two files with given numerical tolerance. It may +also need modification to fit a specific application. diff --git a/tests/2exec/comp2exec b/tests/2exec/comp2exec index 8edfebb2..2e4cd985 100755 --- a/tests/2exec/comp2exec +++ b/tests/2exec/comp2exec @@ -1,16 +1,15 @@ #!/bin/bash -# First parameter is seq (default), mpi, mpi_seq, ocl, or ocl_seq. -# One-word mode compares corresponding current version with previous stable version (e.g. 1.0) -# - some differences are always expected. -# Two-word mode compares two current versions corresponding to different modes (e.g. mpi vs. seq) -# - only minor differences are generally expected. +# First parameter is seq (default), mpi, mpi_seq, ocl, or ocl_seq. +# One-word mode compares corresponding current version with previous stable version (e.g. 1.0) - some differences are +# always expected. Two-word mode compares two current versions corresponding to different modes (e.g. mpi vs. seq) - +# only minor differences are generally expected. # Second (if given) specifies file with a test suite (default - "suite"). -# Third (if given) specifies bash pattern (best if quoted). Tests are started only from the command -# line that matches it. (convenient to restart the test suite from a break point) +# Third (if given) specifies bash pattern (best if quoted). Tests are started only from the command line that matches +# it. (convenient to restart the test suite from a break point) # # Look below for "!!!", which mark the places where adjustments are probably need to be done -#---------------- Set parameters and define internal functions --------------------------------- +#---------------- Set parameters and define internal functions --------------------------------------------------------- # Location of sample input files (not needed if all input files are already present) INPUTDIR="./../../input" @@ -19,25 +18,53 @@ ADDASEQ="./../../src/seq/adda" ADDAMPI="./../../src/mpi/adda_mpi" ADDAOCL="./../../src/ocl/adda_ocl" # MPI command prefix -MPIRUN="mpiexec -n 2" +MPIRUN="mpiexec -n 4" + +# Set the following flag to ignore differences related to different FFT methods, such as FFT grid sizes and memory. +# Also useful for comparison of sparse version with the standard one +#FFTCOMP=1 + +# SPARSE indicates that sparse mode is compared with sparse. SPARSE_STANDARD - that sparse is compared with standard +# (automatically implies FFTCOMP above). Use at most one of them +#SPARSE=1 +#SPARSE_STANDARD=1 + +if [ -n "$SPARSE_STANDARD" ]; then + FFTCOMP=1 +fi + +if [ -n "$SPARSE" ]; then + DEFSUITE=suite_sparse + SEQREF="./adda_sparse" # !!! This should be adjusted + MPIREF="./adda_mpi_sparse" # !!! This should be adjusted + OCLREF="./adda_ocl_sparse" # !!! This should be adjusted +else + if [ -n "$SPARSE_STANDARD" ]; then + DEFSUITE=suite_sparse + else + DEFSUITE=suite + fi + SEQREF="./adda_1.1" # !!! This should be adjusted + MPIREF="./adda_mpi_1.1.exe" # !!! This should be adjusted + OCLREF="./adda_ocl_1.1" # !!! This should be adjusted +fi MODE=${1:-seq} -DEFSUITE=suite SUITEFILE=${2:-$DEFSUITE} if [ $MODE == "seq" ]; then - EXECREF="./adda" # !!! This should be adjusted + EXECREF=$SEQREF EXECTEST=$ADDASEQ IGERRREF=1 elif [ $MODE == "mpi" ]; then - EXECREF="$MPIRUN ./adda_mpi" # !!! This should be adjusted + EXECREF="$MPIRUN $MPIREF" EXECTEST="$MPIRUN $ADDAMPI" IGERRREF=1 elif [ $MODE == "mpi_seq" ]; then EXECREF=$ADDASEQ EXECTEST="$MPIRUN $ADDAMPI" elif [ $MODE == "ocl" ]; then - EXECREF="./adda_ocl_amd" # !!! This should be adjusted + EXECREF=$OCLREF EXECTEST=$ADDAOCL IGERRREF=1 elif [ $MODE == "ocl_seq" ]; then @@ -48,12 +75,8 @@ else exit 1 fi -# Set the following flag to ignore differences related to different FFT methods, such as FFT grid sizes and memory -#FFTCOMP=1 - -# Whether errors in running reference version of ADDA should be ignored. Useful for comparing with -# older versions, which lack all the tested functionality. This variable is set above for some -# modes. +# Whether errors in running reference version of ADDA should be ignored. Useful for comparing with older versions, which +# lack all the tested functionality. This variable is set above for some modes. # !!! It can also be explicitly set here. #IGERRREF=1 @@ -67,8 +90,8 @@ TMPREF=ref.tmp # temporary files for text processing TMPTEST=test.tmp function cleanfile { - # Processes file $1 and stores the result in $2. Basic step is removing lines matching regexp $3. - # Additional optional step is cutting the end of the file starting from line matching regexp $4. + # Processes file $1 and stores the result in $2. Basic step is removing lines matching regexp $3. Additional optional + # step is cutting the end of the file starting from line matching regexp $4. if [ -n "$4" ]; then awk "BEGIN{p=1} /$4/{p=0} p" $1 | grep -v -E -e "$3" > $2 else @@ -76,21 +99,20 @@ function cleanfile { fi } function numdiff { - # Performs comparison of two files, ignoring differences in numerical values smaller than - # absolute and relative tolerances, specified by variables atol and rtol (-log of real value). - # The main logic is inside awk script 'diff_numeric'. + # Performs comparison of two files, ignoring differences in numerical values smaller than absolute and relative + # tolerances, specified by variables atol and rtol (-log of real value). The main logic is inside awk script + # 'diff_numeric'. diff $1 $2 | awk -f diff_numeric.awk -v abs_tol=1e-$atol -v rel_tol=1e-$rtol - >&2 } function numigndiff { - # Same as numdiff, but additionally ignores certain differences in input files - see description - # of function cleanfile. + # Same as numdiff, but additionally ignores certain differences in input files - see description of function cleanfile cleanfile $1 $TMPREF "$3" "$4" cleanfile $2 $TMPTEST "$3" "$4" diff $TMPREF $TMPTEST | awk -f diff_numeric.awk -v abs_tol=1e-$atol -v rel_tol=1e-$rtol - >&2 } function igndiff { - # Performs comparison of two files, ignoring differences in lines matching regexp $3 and all the - # differences after the line matching regexp $4. + # Performs comparison of two files, ignoring differences in lines matching regexp $3 and all the differences after the + # line matching regexp $4. cleanfile $1 $TMPREF "$3" "$4" cleanfile $2 $TMPTEST "$3" "$4" diff $TMPREF $TMPTEST >&2 @@ -102,10 +124,9 @@ function asmin { fi } function mycmp { - # Determines which differences are considered significant, depending on files. Use of $ at the - # end of ignore patterns seems to be not portable, since it does not work on Windows because of - # different EOL style. So we skip it for now. - + # Determines which differences are considered significant, depending on files. Use of $ at the end of ignore patterns + # seems to be not portable, since it does not work on Windows because of different EOL style. So we skip it for now. + # First, set default numerical accuracy or adjust it based on cmdline if [[ "$cmdline" == -granul\ * ]]; then asmin atol 1 @@ -124,10 +145,15 @@ function mycmp { if [ $MODE == "mpi_seq" ]; then IGNORE="$IGNORE|^(M|Total m|Maximum m|Additional m)emory usage" elif [ $MODE == "ocl_seq" ]; then - IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^Searching for OpenCL devices|^Initializing (clFFT|FFTW3)|^(M|Total m|OpenCL m)emory usage" + # double definition to wrap line + IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^Searching for OpenCL devices|^Initializing (clFFT|FFTW3)" + IGNORE="$IGNORE|^(M|Total m|OpenCL m)emory usage" fi if [ -n "$FFTCOMP" ]; then - IGNORE="$IGNORE|^(M|Total m|OpenCL m)emory usage" + IGNORE="$IGNORE|^(M|Total m|OpenCL m|Maximum m)emory usage" + fi + if [ -n "$SPARSE_STANDARD" ]; then + IGNORE="$IGNORE|^Calculating Green's function|^Fourier transform of Dmatrix|^Initializing (clFFT|FFTW3)" fi if [[ $MODE == "mpi" || $MODE == "mpi_seq" ]]; then CUT="^Error posting writev, " # due to typical random errors of MPICH under Windows @@ -148,10 +174,10 @@ function mycmp { if [ $MODE == "mpi_seq" ]; then IGNORE="$IGNORE|^The program was run on: |^(M|Total m|Maximum m|Additional m)emory usage" elif [ $MODE == "ocl_seq" ]; then - IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^OpenCL FFT algorithm: by |^(M|Total m|OpenCL m)emory usage" + IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^OpenCL FFT algorithm: |^(M|Total m|OpenCL m)emory usage" fi if [ -n "$FFTCOMP" ]; then - IGNORE="$IGNORE|^(|OpenCL )FFT algorithm: by |The FFT grid is: [0-9]+x[0-9]+x[0-9]+|^(M|Total m|OpenCL m)emory usage" + IGNORE="$IGNORE|^(|OpenCL )FFT algorithm: |^The FFT grid is: |^(M|Total m|OpenCL m|Maximum m)emory usage" fi CUT="^Total wall time: " asmin rtol 4 @@ -163,15 +189,15 @@ function mycmp { asmin rtol 5 numdiff $1 $2 elif [[ "$base" == log_int_* || "$base" == "log_orient_avg" ]]; then - asmin atol 13 + asmin atol 12 numdiff $1 $2 elif [[ "$base" == "granules" ]]; then #compare only some comments and total number of lines if [ `wc -l < $1` == `wc -l < $2` ]; then igndiff $1 $2 "^([^#]|#generated by ADDA v\.)" - else + else echo "Different number of granules" return 1 - fi + fi elif [[ "$base" == *.geom || "$base" == *.dat ]]; then igndiff $1 $2 "generated by ADDA v\." else @@ -183,17 +209,16 @@ function mydiff { if !(mycmp $1 $2); then echo "!!! Difference between files '$1' and '$2'" >&2 # !!! This should be adjusted - # It is recommended to put here a GUI diff program, which allows quick estimate of the - # importance of differences (e.g. when there are differences in minor digits of many numbers). - # However, most effort should be put in improving mycmp, so that calling the function below - # will indicate significant error by itself. The following line can be commented out at all, - # since function mycmp will produce (significant) diff of compared files to stderr. + # It is recommended to put here a GUI diff program, which allows quick estimate of the importance of differences + # (e.g. when there are differences in minor digits of many numbers). However, most effort should be put in improving + # mycmp, so that calling the function below will indicate significant error by itself. The following line can be + # also commented out, since function mycmp will produce (significant) diff of compared files to stderr. # Options include, for example: tortoisemerge, meld, vimdiff tortoisemerge $1 $2 fi } -#---------------- Prepare input files ----------------------------------------- +#---------------- Prepare input files ---------------------------------------------------------------------------------- NEEDEDFILES="scat_params.dat avg_params.dat alldir_params.dat" NEEDEDDIRS="tables" @@ -210,16 +235,16 @@ for dir in $NEEDEDDIRS; do fi done -#---------------- Run comparison ---------------------------------------------- +#---------------- Run comparison --------------------------------------------------------------------------------------- # This in combination with stdin redirection to ADDA calls below redirects current stdin directly # to ADDA call. While the stdin of the code block (while...done) is kept intact. This is especially -# relevant to mpi version, since then ADDA is executed with mpiexec, which creates forks and a big +# relevant to mpi version, since then ADDA is executed with mpiexec, which creates forks and a big # mess with stdin. exec 3<&0 imax=-1 # initialize skipping of lines up to pattern $3 -if [ -n "$3" ]; then +if [ -n "$3" ]; then skip=1 else skip=0 @@ -249,7 +274,7 @@ while read -r cmpfiles cmdline; do echo -e "\nERROR while running \n$runref\nsee $SOREF" >&2 exit 1 fi - else + else refok=1 fi # test run diff --git a/tests/2exec/diff_numeric.awk b/tests/2exec/diff_numeric.awk index 9416a104..3c9f9b87 100644 --- a/tests/2exec/diff_numeric.awk +++ b/tests/2exec/diff_numeric.awk @@ -1,8 +1,6 @@ -# Processes the diff of two files and ignores the small errors in numbers - -# either small absolute or relative errors - controlled by 'abs_tol' and 'rel_tol'. -# These values can be either specified outside by e.g. '-v abs_tol=1e-10' or assume -# default values specified below. -# Numbers should be separated from other text by spaces, '=', ',', or brackets - +# Processes the diff of two files and ignores the small errors in numbers - either small absolute or relative errors - +# controlled by 'abs_tol' and 'rel_tol'. These values can be either specified outside by e.g. '-v abs_tol=1e-10' or +# assume default values specified below. Numbers should be separated from other text by spaces, '=', ',', or brackets - # controlled by 'FS'. Output happens only if differences are found. BEGIN { @@ -23,15 +21,6 @@ function relerr(a,b, c) { return (c==0) ? 0 : abs(a-b)/c } -/^[0-9]/ { - if (i1>i2) { - printf "First file contains unmatched line:\n%s\n", ref[i2+1] - exit 3 - } - i1=0 - i2=0 -} - /^< / { i1++ ref[i1]=substr($0,3) @@ -55,9 +44,26 @@ function relerr(a,b, c) { exit 2 } else if (abs(p1[j]-p2[j])>abs_tol && relerr(p1[j],p2[j])>rel_tol) { + # wrapping the following line produces some errors on Windows, so keep it like this printf "Significant difference between numbers: '%s' and '%s' (abs_tol='%s', rel_tol='%s')\n", p1[j], p2[j], abs_tol, rel_tol exit 1 } } } +} + +/^[0-9]/ { + if (i1>i2) { + printf "First file contains unmatched line:\n%s\n", ref[i2+1] + exit 3 + } + i1=0 + i2=0 +} + +END { + if (i1>i2) { + printf "First file contains unmatched line:\n%s\n", ref[i2+1] + exit 3 + } } \ No newline at end of file diff --git a/tests/2exec/ellipsoid.geom b/tests/2exec/ellipsoid.geom index d8d91072..98c0807b 100644 --- a/tests/2exec/ellipsoid.geom +++ b/tests/2exec/ellipsoid.geom @@ -1,283 +1,203 @@ -#generated by ADDA v.1.0a3 +#generated by ADDA v.1.1 #shape: 'ellipsoid' -#box size: 16x8x4 -6 1 0 -7 1 0 -8 1 0 -9 1 0 +#box size: 8x4x12 +3 1 0 +4 1 0 +3 2 0 4 2 0 -5 2 0 -6 2 0 -7 2 0 -8 2 0 -9 2 0 -10 2 0 -11 2 0 -3 3 0 -4 3 0 -5 3 0 -6 3 0 -7 3 0 -8 3 0 -9 3 0 -10 3 0 -11 3 0 -12 3 0 -3 4 0 -4 4 0 -5 4 0 -6 4 0 -7 4 0 -8 4 0 -9 4 0 -10 4 0 -11 4 0 -12 4 0 -4 5 0 -5 5 0 -6 5 0 -7 5 0 -8 5 0 -9 5 0 -10 5 0 -11 5 0 -6 6 0 -7 6 0 -8 6 0 -9 6 0 -5 0 1 -6 0 1 -7 0 1 -8 0 1 -9 0 1 -10 0 1 2 1 1 3 1 1 4 1 1 5 1 1 -6 1 1 -7 1 1 -8 1 1 -9 1 1 -10 1 1 -11 1 1 -12 1 1 -13 1 1 -1 2 1 2 2 1 3 2 1 4 2 1 5 2 1 -6 2 1 -7 2 1 -8 2 1 -9 2 1 -10 2 1 -11 2 1 -12 2 1 -13 2 1 -14 2 1 -0 3 1 -1 3 1 -2 3 1 -3 3 1 -4 3 1 -5 3 1 -6 3 1 -7 3 1 -8 3 1 -9 3 1 -10 3 1 -11 3 1 -12 3 1 -13 3 1 -14 3 1 -15 3 1 -0 4 1 -1 4 1 -2 4 1 -3 4 1 -4 4 1 -5 4 1 -6 4 1 -7 4 1 -8 4 1 -9 4 1 -10 4 1 -11 4 1 -12 4 1 -13 4 1 -14 4 1 -15 4 1 -1 5 1 -2 5 1 -3 5 1 -4 5 1 -5 5 1 -6 5 1 -7 5 1 -8 5 1 -9 5 1 -10 5 1 -11 5 1 -12 5 1 -13 5 1 -14 5 1 -2 6 1 -3 6 1 -4 6 1 -5 6 1 -6 6 1 -7 6 1 -8 6 1 -9 6 1 -10 6 1 -11 6 1 -12 6 1 -13 6 1 -5 7 1 -6 7 1 -7 7 1 -8 7 1 -9 7 1 -10 7 1 -5 0 2 -6 0 2 -7 0 2 -8 0 2 -9 0 2 -10 0 2 +3 0 2 +4 0 2 +1 1 2 2 1 2 3 1 2 4 1 2 5 1 2 6 1 2 -7 1 2 -8 1 2 -9 1 2 -10 1 2 -11 1 2 -12 1 2 -13 1 2 1 2 2 2 2 2 3 2 2 4 2 2 5 2 2 6 2 2 -7 2 2 -8 2 2 -9 2 2 -10 2 2 -11 2 2 -12 2 2 -13 2 2 -14 2 2 -0 3 2 -1 3 2 -2 3 2 3 3 2 4 3 2 -5 3 2 -6 3 2 -7 3 2 -8 3 2 -9 3 2 -10 3 2 -11 3 2 -12 3 2 -13 3 2 -14 3 2 -15 3 2 -0 4 2 -1 4 2 -2 4 2 -3 4 2 -4 4 2 -5 4 2 -6 4 2 -7 4 2 -8 4 2 -9 4 2 -10 4 2 -11 4 2 -12 4 2 -13 4 2 -14 4 2 -15 4 2 -1 5 2 -2 5 2 -3 5 2 -4 5 2 -5 5 2 -6 5 2 -7 5 2 -8 5 2 -9 5 2 -10 5 2 -11 5 2 -12 5 2 -13 5 2 -14 5 2 -2 6 2 -3 6 2 -4 6 2 -5 6 2 -6 6 2 -7 6 2 -8 6 2 -9 6 2 -10 6 2 -11 6 2 -12 6 2 -13 6 2 -5 7 2 -6 7 2 -7 7 2 -8 7 2 -9 7 2 -10 7 2 +2 0 3 +3 0 3 +4 0 3 +5 0 3 +1 1 3 +2 1 3 +3 1 3 +4 1 3 +5 1 3 6 1 3 -7 1 3 -8 1 3 -9 1 3 +1 2 3 +2 2 3 +3 2 3 4 2 3 5 2 3 6 2 3 -7 2 3 -8 2 3 -9 2 3 -10 2 3 -11 2 3 +2 3 3 3 3 3 4 3 3 5 3 3 -6 3 3 -7 3 3 -8 3 3 -9 3 3 -10 3 3 -11 3 3 -12 3 3 -3 4 3 -4 4 3 -5 4 3 -6 4 3 -7 4 3 -8 4 3 -9 4 3 -10 4 3 -11 4 3 -12 4 3 -4 5 3 -5 5 3 -6 5 3 -7 5 3 -8 5 3 -9 5 3 -10 5 3 -11 5 3 -6 6 3 -7 6 3 -8 6 3 -9 6 3 +2 0 4 +3 0 4 +4 0 4 +5 0 4 +0 1 4 +1 1 4 +2 1 4 +3 1 4 +4 1 4 +5 1 4 +6 1 4 +7 1 4 +0 2 4 +1 2 4 +2 2 4 +3 2 4 +4 2 4 +5 2 4 +6 2 4 +7 2 4 +2 3 4 +3 3 4 +4 3 4 +5 3 4 +1 0 5 +2 0 5 +3 0 5 +4 0 5 +5 0 5 +6 0 5 +0 1 5 +1 1 5 +2 1 5 +3 1 5 +4 1 5 +5 1 5 +6 1 5 +7 1 5 +0 2 5 +1 2 5 +2 2 5 +3 2 5 +4 2 5 +5 2 5 +6 2 5 +7 2 5 +1 3 5 +2 3 5 +3 3 5 +4 3 5 +5 3 5 +6 3 5 +1 0 6 +2 0 6 +3 0 6 +4 0 6 +5 0 6 +6 0 6 +0 1 6 +1 1 6 +2 1 6 +3 1 6 +4 1 6 +5 1 6 +6 1 6 +7 1 6 +0 2 6 +1 2 6 +2 2 6 +3 2 6 +4 2 6 +5 2 6 +6 2 6 +7 2 6 +1 3 6 +2 3 6 +3 3 6 +4 3 6 +5 3 6 +6 3 6 +2 0 7 +3 0 7 +4 0 7 +5 0 7 +0 1 7 +1 1 7 +2 1 7 +3 1 7 +4 1 7 +5 1 7 +6 1 7 +7 1 7 +0 2 7 +1 2 7 +2 2 7 +3 2 7 +4 2 7 +5 2 7 +6 2 7 +7 2 7 +2 3 7 +3 3 7 +4 3 7 +5 3 7 +2 0 8 +3 0 8 +4 0 8 +5 0 8 +1 1 8 +2 1 8 +3 1 8 +4 1 8 +5 1 8 +6 1 8 +1 2 8 +2 2 8 +3 2 8 +4 2 8 +5 2 8 +6 2 8 +2 3 8 +3 3 8 +4 3 8 +5 3 8 +3 0 9 +4 0 9 +1 1 9 +2 1 9 +3 1 9 +4 1 9 +5 1 9 +6 1 9 +1 2 9 +2 2 9 +3 2 9 +4 2 9 +5 2 9 +6 2 9 +3 3 9 +4 3 9 +2 1 10 +3 1 10 +4 1 10 +5 1 10 +2 2 10 +3 2 10 +4 2 10 +5 2 10 +3 1 11 +4 1 11 +3 2 11 +4 2 11 diff --git a/tests/2exec/sphere.geom b/tests/2exec/sphere.geom new file mode 100644 index 00000000..76c32bfb --- /dev/null +++ b/tests/2exec/sphere.geom @@ -0,0 +1,283 @@ +#generated by ADDA v.1.0 +#shape: 'sphere' +#box size: 8x8x8 +3 2 0 +4 2 0 +2 3 0 +3 3 0 +4 3 0 +5 3 0 +2 4 0 +3 4 0 +4 4 0 +5 4 0 +3 5 0 +4 5 0 +2 1 1 +3 1 1 +4 1 1 +5 1 1 +1 2 1 +2 2 1 +3 2 1 +4 2 1 +5 2 1 +6 2 1 +1 3 1 +2 3 1 +3 3 1 +4 3 1 +5 3 1 +6 3 1 +1 4 1 +2 4 1 +3 4 1 +4 4 1 +5 4 1 +6 4 1 +1 5 1 +2 5 1 +3 5 1 +4 5 1 +5 5 1 +6 5 1 +2 6 1 +3 6 1 +4 6 1 +5 6 1 +3 0 2 +4 0 2 +1 1 2 +2 1 2 +3 1 2 +4 1 2 +5 1 2 +6 1 2 +1 2 2 +2 2 2 +3 2 2 +4 2 2 +5 2 2 +6 2 2 +0 3 2 +1 3 2 +2 3 2 +3 3 2 +4 3 2 +5 3 2 +6 3 2 +7 3 2 +0 4 2 +1 4 2 +2 4 2 +3 4 2 +4 4 2 +5 4 2 +6 4 2 +7 4 2 +1 5 2 +2 5 2 +3 5 2 +4 5 2 +5 5 2 +6 5 2 +1 6 2 +2 6 2 +3 6 2 +4 6 2 +5 6 2 +6 6 2 +3 7 2 +4 7 2 +2 0 3 +3 0 3 +4 0 3 +5 0 3 +1 1 3 +2 1 3 +3 1 3 +4 1 3 +5 1 3 +6 1 3 +0 2 3 +1 2 3 +2 2 3 +3 2 3 +4 2 3 +5 2 3 +6 2 3 +7 2 3 +0 3 3 +1 3 3 +2 3 3 +3 3 3 +4 3 3 +5 3 3 +6 3 3 +7 3 3 +0 4 3 +1 4 3 +2 4 3 +3 4 3 +4 4 3 +5 4 3 +6 4 3 +7 4 3 +0 5 3 +1 5 3 +2 5 3 +3 5 3 +4 5 3 +5 5 3 +6 5 3 +7 5 3 +1 6 3 +2 6 3 +3 6 3 +4 6 3 +5 6 3 +6 6 3 +2 7 3 +3 7 3 +4 7 3 +5 7 3 +2 0 4 +3 0 4 +4 0 4 +5 0 4 +1 1 4 +2 1 4 +3 1 4 +4 1 4 +5 1 4 +6 1 4 +0 2 4 +1 2 4 +2 2 4 +3 2 4 +4 2 4 +5 2 4 +6 2 4 +7 2 4 +0 3 4 +1 3 4 +2 3 4 +3 3 4 +4 3 4 +5 3 4 +6 3 4 +7 3 4 +0 4 4 +1 4 4 +2 4 4 +3 4 4 +4 4 4 +5 4 4 +6 4 4 +7 4 4 +0 5 4 +1 5 4 +2 5 4 +3 5 4 +4 5 4 +5 5 4 +6 5 4 +7 5 4 +1 6 4 +2 6 4 +3 6 4 +4 6 4 +5 6 4 +6 6 4 +2 7 4 +3 7 4 +4 7 4 +5 7 4 +3 0 5 +4 0 5 +1 1 5 +2 1 5 +3 1 5 +4 1 5 +5 1 5 +6 1 5 +1 2 5 +2 2 5 +3 2 5 +4 2 5 +5 2 5 +6 2 5 +0 3 5 +1 3 5 +2 3 5 +3 3 5 +4 3 5 +5 3 5 +6 3 5 +7 3 5 +0 4 5 +1 4 5 +2 4 5 +3 4 5 +4 4 5 +5 4 5 +6 4 5 +7 4 5 +1 5 5 +2 5 5 +3 5 5 +4 5 5 +5 5 5 +6 5 5 +1 6 5 +2 6 5 +3 6 5 +4 6 5 +5 6 5 +6 6 5 +3 7 5 +4 7 5 +2 1 6 +3 1 6 +4 1 6 +5 1 6 +1 2 6 +2 2 6 +3 2 6 +4 2 6 +5 2 6 +6 2 6 +1 3 6 +2 3 6 +3 3 6 +4 3 6 +5 3 6 +6 3 6 +1 4 6 +2 4 6 +3 4 6 +4 4 6 +5 4 6 +6 4 6 +1 5 6 +2 5 6 +3 5 6 +4 5 6 +5 5 6 +6 5 6 +2 6 6 +3 6 6 +4 6 6 +5 6 6 +3 2 7 +4 2 7 +2 3 7 +3 3 7 +4 3 7 +5 3 7 +2 4 7 +3 4 7 +4 4 7 +5 4 7 +3 5 7 +4 5 7 diff --git a/tests/2exec/sphere4.geom b/tests/2exec/sphere4.geom new file mode 100644 index 00000000..4aa65c6f --- /dev/null +++ b/tests/2exec/sphere4.geom @@ -0,0 +1,35 @@ +#generated by ADDA v.1.0 +#shape: 'sphere' +#box size: 4x4x4 +1 1 0 +2 1 0 +1 2 0 +2 2 0 +1 0 1 +2 0 1 +0 1 1 +1 1 1 +2 1 1 +3 1 1 +0 2 1 +1 2 1 +2 2 1 +3 2 1 +1 3 1 +2 3 1 +1 0 2 +2 0 2 +0 1 2 +1 1 2 +2 1 2 +3 1 2 +0 2 2 +1 2 2 +2 2 2 +3 2 2 +1 3 2 +2 3 2 +1 1 3 +2 1 3 +1 2 3 +2 2 3 diff --git a/tests/2exec/suite b/tests/2exec/suite index e6d6a002..40c52b4d 100644 --- a/tests/2exec/suite +++ b/tests/2exec/suite @@ -1,7 +1,6 @@ -#------------------------- Variable definitons --------------------------------- -# All variable names should be enclosed in ';' and start the line - its value -# is assigned to the rest of the line. It can be defined through other variables -# defined below. Each variable must be defined only once. +#------------------------- Variable definitons ------------------------------------------------------------------------- +# All variable names should be enclosed in ';' and start the line - its value is assigned to the rest of the line. It +# can be defined through other variables defined below. Each variable must be defined only once. # default for most tests ;mgn; ;m; ;g; ;n; @@ -22,13 +21,13 @@ ;n; -ntheta 5 ;p; -prop 1 2 3 ;se; -shape ellipsoid 0.5 1.5 -#----------------------------- List of tests ----------------------------------- -# The format is the following: ' ' -# the first one is coma-separated list of files to compare or 'all' (which compares -# all produced files. is everything after the first space and it is passed +#----------------------------- List of tests --------------------------------------------------------------------------- +# The format is the following: ' ' the first one is coma-separated list of files to +# compare or 'all' (which compares all produced files. is everything after the first space and it is passed # directly to ADDA. all + # testing of different grids - relevant for FFT methods # to remove redundant warnings for ocl_seq, only (2,3,5) numbers are used all -grid 2 ;smn; @@ -97,8 +96,8 @@ all -eps 10 ;mgn; all -h eq_rad all -eq_rad 1 ;mgn; -# it is hard to make meaningful comparison of stdout and log for random placement of granules -# however, optical properties are compared using rather large tolerances +# It is hard to make meaningful comparison of stdout and log for random placement of granules. However, optical +# properties are compared using rather large tolerances all -h granul CrossSec-Y,CrossSec-X,mueller -granul 0.2 0.5 2 -size 8 -shape coated 0.5 ;3m; ;n; CrossSec-Y,CrossSec-X,mueller -granul 0.2 2 2 -size 8 -shape coated 0.5 ;3m; ;n; @@ -208,7 +207,7 @@ all -h scat_matr all -scat_matr muel ;mgn; all -scat_matr ampl ;mgn; all -scat_matr both ;mgn; -all -scat_matr none ;m; ;g; +all -scat_matr none ;m; ;g; all -h shape all -h shape axisymmetric @@ -253,7 +252,7 @@ all -shape sphere ;mgn; all -h shape spherebox all -shape spherebox 0.5 ;2mgn; -all -h size +all -h size all -size 8 ;mgn; all -h store_beam @@ -276,7 +275,7 @@ all -store_int_field ;se; ;mgn; all -h store_scat_grid all -store_scat_grid ;sep; ;mgn; -all -h sym +all -h sym all -sym auto ;mgn; all -sym no ;mgn; all -sym enf ;mgn; diff --git a/tests/2exec/suite_sparse b/tests/2exec/suite_sparse new file mode 100644 index 00000000..4b67bed8 --- /dev/null +++ b/tests/2exec/suite_sparse @@ -0,0 +1,275 @@ +#------------------------- Variable definitons ------------------------------------------------------------------------- +# All variable names should be enclosed in ';' and start the line - its value is assigned to the rest of the line. It +# can be defined through other variables defined below. Each variable must be defined only once. + +# default for most tests +;mgn; ;m; ;g; ;n; +# default for two-domain particles +;2mgn; ;2m; ;g; ;n; +# for large computations (e.g. orientation averaging) +;mg4n; ;m; -shape read sphere4.geom -sym enf ;n; +# default addition to make particle completely non-symmetric +;sep; ;se; ;p; +# default addition for non-spherical shapes +;mn; ;m; ;n; + +# Elementary variables +;m; -m 1.1 0.1 +;2m; -m 1.1 0.1 1.2 0.2 +;3m; -m 1.05 0.1 1.1 0.2 1.2 0.3 +;g; -shape read sphere.geom -sym enf +;n; -ntheta 5 +;p; -prop 1 2 3 +;se; -shape read ellipsoid.geom + +#----------------------------- List of tests --------------------------------------------------------------------------- +# The format is the following: ' ' the first one is coma-separated list of files to +# compare or 'all' (which compares all produced files. is everything after the first space and it is passed +# directly to ADDA. + +all ;g; + +all -h alldir_inp +all -alldir_inp adp.dat -Csca ;mgn; + +all -h anisotr +all -anisotr ;3m; ;g; ;n; + +all -h asym +all -asym ;sep; ;mn; + +all -h beam +all -h beam plane +all -beam plane ;mgn; +all -h beam lminus +all -beam lminus 2 1 2 3 ;mgn; +all -h beam davis3 +all -beam davis3 2 1 2 3 ;mgn; +all -h beam barton5 +all -beam barton5 2 1 2 3 ;mgn; +all -h beam read +all -beam read IncBeam-Y IncBeam-X ;se; ;mn; + +all -h chpoint +all -chpoint 1s -eps 3 ;mgn; +all -h chp_type +all -chp_type normal -chpoint 1s -eps 3 ;mgn; +all -chp_dir chp_tmp -chp_type regular -chpoint 1s -eps 3 ;mgn; +all -h chp_dir +all -chp_dir chp_tmp -chp_type always -eps 3 ;mgn; +all -h chp_load +all -chp_dir chp_tmp -chp_load ;mgn; + +all -h Cpr +all -Cpr ;sep; ;mn; + +all -h Csca +all -Csca ;se; ;mn; + +all -h dir + +all -h dpl +all -dpl 20 ;mgn; + +all -h eps +all -eps 10 ;mgn; + +all -h eq_rad +all -eq_rad 1 ;mgn; + +# It is hard to make meaningful comparison of stdout and log for random placement of granules. However, optical +# properties are compared using rather large tolerances +#all -h granul +#CrossSec-Y,CrossSec-X,mueller -granul 0.2 0.5 2 -size 8 -shape coated 0.5 ;3m; ;n; +#CrossSec-Y,CrossSec-X,mueller -granul 0.2 2 2 -size 8 -shape coated 0.5 ;3m; ;n; + +all -h grid +#all -grid 4 6 8 ;m; ;n; + +all -h +all -h h + +all -h init_field +all -init_field auto ;mgn; +all -init_field zero ;mgn; +all -init_field inc ;mgn; +#all -init_field wkb ;mgn; + +all -h int +all -int fcd ;mgn; +all -int fcd_st ;mgn; +all -int igt ;mg4n; +all -int igt 3 ;mg4n; +all -int igt 3 0.01 ;mg4n; +all -int igt_so ;mgn; +all -int poi ;mgn; +all -int so ;mgn; + +all -h iter +all -iter bicg ;mgn; +all -iter bicgstab ;mgn; +all -iter cgnr ;mgn; +all -iter csym ;mgn; +all -iter qmr ;mgn; +all -iter qmr2 ;mgn; + +all -h jagged +all -jagged 2 ;mg4n; + +all -h lambda +all -lambda 1 ;mgn; + +all -h m +all -m 1.2 0.2 ;g; ;n; + +all -h maxiter +all -maxiter 5 ;mgn; + +all -h no_reduced_fft +all -no_reduced_fft ;mgn; + +all -h no_vol_cor +all -no_vol_cor -size 3 ;mgn; + +all -h ntheta +all -ntheta 10 ;m; ;g; + +all -h opt +all -opt speed ;mgn; +all -opt mem ;mgn; + +all -h orient +all -orient 10 20 30 ;se; ;mn; +all -orient avg ;se; ;mn; +all -orient avg ap.dat ;se; ;mn; + +all -h phi_integr +all -phi_integr 31 ;sep; ;mn; + +all -h pol +all -pol cldr ;p; ;mgn; +all -pol cm ;mgn; +all -pol dgf ;mgn; +all -pol fcd ;mgn; +all -pol igt_so ;mgn; +all -pol ldr ;p; ;mgn; +all -pol ldr avgpol ;p; ;mgn; +all -pol rrc ;mgn; +all -pol so ;p; ;mgn; + +all -h prognosis +all -prognosis ;g; + +all -h prop +all -prop 5 1 3 ;se; ;mn; + +all -h recalc_resid +all -recalc_resid ;mgn; + +#all -h save_geom +#all -save_geom -shape read coated.geom -prognosis +#all -h sg_format +#all -save_geom ;se; -prognosis -sg_format text +#all -save_geom ;se; -prognosis -sg_format text_ext +#all -save_geom ;se; -prognosis -sg_format ddscat6 +#all -save_geom ;se; -prognosis -sg_format ddscat7 + +all -h scat +all -scat dr ;mgn; +all -scat fin ;mgn; +all -scat igt_so ;mgn; +all -scat so ;mgn; + +all -h scat_grid_inp +all -scat_grid_inp sp.dat ;mgn; + +all -h scat_matr +all -scat_matr muel ;mgn; +all -scat_matr ampl ;mgn; +all -scat_matr both ;mgn; +all -scat_matr none ;m; ;g; + +all -h shape +#all -h shape axisymmetric +#all -shape axisymmetric 196.txt ;mgn; +#all -h shape bicoated +#all -shape bicoated 3 0.5 ;2mgn; +#all -h shape biellipsoid +#all -shape biellipsoid 0.5 1.5 0.75 0.5 1.5 ;2mgn; +#all -h shape bisphere +#all -shape bisphere 2 ;mgn; +#all -h shape box +#all -shape box ;mgn; +#all -shape box 0.5 1.5 ;mgn; +#all -h shape capsule +#all -shape capsule 1.5 ;mgn; +#all -h shape chebyshev +#all -shape chebyshev 0.3 5 ;mgn; +#all -h shape coated +#all -shape coated 0.5 ;2mgn; +#all -shape coated 0.4 0.1 0.15 0.2 ;2mgn; +#all -h shape cylinder +#all -shape cylinder 1 ;mgn; +#all -h shape egg +#all -shape egg 0.5 0.2 ;mgn; +#all -h shape ellipsoid +#all -shape ellipsoid 0.25 2 ;mgn; +#all -h shape line +#all -shape line -grid 16 ;m; ;n; +#all -h shape plate +#all -shape plate 0.5 ;mgn; +#all -h shape prism +#all -shape prism 5 1.5 ;mgn; +#all -h shape rbc +#all -shape rbc 0.3 0.1 0.3 ;mgn; +all -h shape read +all -shape read ellipsoid.geom ;m; ;n; +all -shape read coated.geom ;2m; ;n; +all -shape read ell_ddscat6.dat ;m; ;n; +all -shape read ell_ddscat7.dat ;m; ;n; +#all -h shape sphere +#all -shape sphere ;mgn; +#all -h shape spherebox +#all -shape spherebox 0.5 ;2mgn; + +all -h size +all -size 8 ;mgn; + +all -h store_beam +all -store_beam ;se; ;mn; + +all -h store_dip_pol +all -store_dip_pol ;se; ;mn; + +all -h store_force +all -store_force -Cpr ;sep; ;mn; +all -store_force ;mgn; + +#all -h store_grans +#granules -store_grans -granul 0.2 1 -size 4 ;2mgn; + +all -h store_int_field +all -store_int_field ;se; ;mn; + +all -h store_scat_grid +all -store_scat_grid ;sep; ;mn; + +all -h sym +all -sym auto ;mn; -shape read sphere.geom +all -sym no ;mn; -shape read sphere.geom +all -sym enf ;mn; -shape read sphere.geom +all -sym auto ;sep; ;mn; +all -sym no ;sep; ;mn; +all -sym enf ;sep; ;mn; + +all -h test +all -test ;mgn; + +all -h V +all -V + +all -h vec +all -vec ;sep; ;mn; + +all -h yz +all -yz -store_scat_grid ;mgn;