Created
November 25, 2012 02:11
-
-
Save jstroem/4142117 to your computer and use it in GitHub Desktop.
cse260 project shareing hole neighbors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # This script is intepreted by the Bourne Shell, sh | |
| # | |
| # Documentation for SGE is found in: | |
| # http://docs.oracle.com/cd/E19279-01/820-3257-12/n1ge.html | |
| # | |
| # Tell SGE which shell to run the job script in rather than depending | |
| # on SGE to try and figure it out. | |
| #$ -S /bin/bash | |
| # | |
| # Export all my environment variables to the job | |
| #$ -V | |
| # | |
| # Tun the job in the same directory from which you submitted it | |
| #$ -cwd | |
| # | |
| # | |
| # --- Don't change anything above this line --- | |
| # | |
| # Give a name to the job | |
| #$ -N MPI | |
| # | |
| # Specify a time limit for the job, not more than 30 minutes | |
| #$ -l h_rt=00:10:00 | |
| # | |
| # Specify the parallel environment and number of cores, | |
| # If not a multiple of 8, you'll get the whole node anyway | |
| #$ -pe orte 8 | |
| # | |
| # Join stdout and stderr so they are reported in job output file | |
| #$ -j y | |
| # | |
| # | |
| # Choose the queue to run the job | |
| # | |
| # Debug queue: only one node may be used at a time for up to 30 minutes | |
| # Interactive or batch jobs, maximum of 1 job per user running at a time | |
| # | |
| # Normal queue: job may use all available compute nodes (256 cores) | |
| # for up to 60 minutes | |
| # Batch jobs, maximum of 2 jobs per user running at a time | |
| # To use more than one node, specify the "normal" queue | |
| #$ -q normal.q | |
| # #$ -q debug.q | |
| # | |
| # Specifies the circumstances under which mail is to be sent to the job owner | |
| # defined by -M option. For example, options "bea" cause mail to be sent at the | |
| # begining, end, and at abort time (if it happens) of the job. | |
| # Option "n" means no mail will be sent. | |
| #$ -m aeb | |
| # | |
| # *** Change to the address you want the notification sent to, and | |
| # *** REMOVE the blank between the # and the $ | |
| #$ -M jesper.jln@gmail.com | |
| # | |
| echo | |
| echo " *** Current working directory" | |
| pwd | |
| echo | |
| echo " *** Compiler" | |
| # Output which compiler are we using and the environment | |
| mpicc -v | |
| echo | |
| echo " *** Environment" | |
| printenv | |
| echo | |
| echo ">>> Job Starts" | |
| date | |
| mpirun -np $NSLOTS ./mpi -n 5000 -proc_width 4 -proc_height 2 -o mpi.txt -c checksum.txt | |
| date | |
| echo ">>> Job Ends" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <float.h> | |
| #include <string.h> | |
| #include <math.h> | |
| #include <time.h> | |
| #include <mpi.h> | |
| #include <sys/time.h> | |
| #include "common.h" | |
| double size; | |
| // | |
| // tuned constants | |
| // | |
| #define density 0.0005 | |
| #define mass 0.01 | |
| #define cutoff 0.01 | |
| #define min_r (cutoff/100) | |
| #define dt 0.0005 | |
| // | |
| // timer | |
| // | |
| double read_timer( ) | |
| { | |
| static bool initialized = false; | |
| static struct timeval start; | |
| struct timeval end; | |
| if( !initialized ) | |
| { | |
| gettimeofday( &start, NULL ); | |
| initialized = true; | |
| } | |
| gettimeofday( &end, NULL ); | |
| return (end.tv_sec - start.tv_sec) + 1.0e-6 * (end.tv_usec - start.tv_usec); | |
| } | |
| // | |
| // keep density constant | |
| // | |
| double set_size( int n ) | |
| { | |
| size = sqrt( density * n ); | |
| return size; | |
| } | |
| // | |
| // Initialize the particle positions and velocities | |
| // | |
| void init_particles( int n, particle_t *p ) | |
| { | |
| srand48( 1 ); | |
| int sx = (int)ceil(sqrt((double)n)); | |
| int sy = (n+sx-1)/sx; | |
| int *shuffle = (int*)malloc( n * sizeof(int) ); | |
| for( int i = 0; i < n; i++ ) | |
| shuffle[i] = i; | |
| for( int i = 0; i < n; i++ ) | |
| { | |
| // | |
| // make sure particles are not spatially sorted | |
| // | |
| int j = lrand48()%(n-i); | |
| int k = shuffle[j]; | |
| shuffle[j] = shuffle[n-i-1]; | |
| // | |
| // distribute particles evenly to ensure proper spacing | |
| // | |
| p[i].x = size*(1.+(k%sx))/(1+sx); | |
| p[i].y = size*(1.+(k/sx))/(1+sy); | |
| p[i].index = i; | |
| // | |
| // assign random velocities within a bound | |
| // | |
| p[i].vx = drand48()*2-1; | |
| p[i].vy = drand48()*2-1; | |
| } | |
| free( shuffle ); | |
| } | |
| // | |
| // interact two particles | |
| // | |
| void apply_force( particle_t &particle, particle_t &neighbor ) | |
| { | |
| double dx = neighbor.x - particle.x; | |
| double dy = neighbor.y - particle.y; | |
| double r2 = dx * dx + dy * dy; | |
| if( r2 > cutoff*cutoff ) | |
| return; | |
| r2 = fmax( r2, min_r*min_r ); | |
| double r = sqrt( r2 ); | |
| // | |
| // very simple short-range repulsive force | |
| // | |
| double coef = ( 1 - cutoff / r ) / r2 / mass; | |
| particle.ax += coef * dx; | |
| particle.ay += coef * dy; | |
| } | |
| // | |
| // integrate the ODE | |
| // | |
| void move( particle_t &p ) | |
| { | |
| // | |
| // slightly simplified Velocity Verlet integration | |
| // conserves energy better than explicit Euler method | |
| // | |
| p.vx += p.ax * dt; | |
| p.vy += p.ay * dt; | |
| p.x += p.vx * dt; | |
| p.y += p.vy * dt; | |
| // | |
| // bounce from walls | |
| // | |
| while( p.x < 0 || p.x > size ) | |
| { | |
| p.x = p.x < 0 ? -p.x : 2*size-p.x; | |
| p.vx = -p.vx; | |
| } | |
| while( p.y < 0 || p.y > size ) | |
| { | |
| p.y = p.y < 0 ? -p.y : 2*size-p.y; | |
| p.vy = -p.vy; | |
| } | |
| } | |
| // | |
| // I/O routines | |
| // | |
| void save( FILE *f, int n, particle_t *p ) | |
| { | |
| static bool first = true; | |
| if( first ) | |
| { | |
| fprintf( f, "%d %g\n", n, size ); | |
| first = false; | |
| } | |
| for( int i = 0; i < n; i++ ) | |
| fprintf( f, "%g %g\n", p[i].x, p[i].y ); | |
| } | |
| // | |
| // command line option processing | |
| // | |
| int find_option( int argc, char **argv, const char *option ) | |
| { | |
| for( int i = 1; i < argc; i++ ) | |
| if( strcmp( argv[i], option ) == 0 ) | |
| return i; | |
| return -1; | |
| } | |
| int read_int( int argc, char **argv, const char *option, int default_value ) | |
| { | |
| int iplace = find_option( argc, argv, option ); | |
| if( iplace >= 0 && iplace < argc-1 ) | |
| return atoi( argv[iplace+1] ); | |
| return default_value; | |
| } | |
| char *read_string( int argc, char **argv, const char *option, char *default_value ) | |
| { | |
| int iplace = find_option( argc, argv, option ); | |
| if( iplace >= 0 && iplace < argc-1 ) | |
| return argv[iplace+1]; | |
| return default_value; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #ifndef __CS267_COMMON_H__ | |
| #define __CS267_COMMON_H__ | |
| inline int min( int a, int b ) { return a < b ? a : b; } | |
| inline int max( int a, int b ) { return a > b ? a : b; } | |
| // | |
| // saving parameters | |
| // | |
| const int NSTEPS = 1000; | |
| const int SAVEFREQ = 10; | |
| // | |
| // particle data structure | |
| // | |
| typedef struct | |
| { | |
| double index; | |
| double x; | |
| double y; | |
| double vx; | |
| double vy; | |
| double ax; | |
| double ay; | |
| } particle_t; | |
| typedef struct | |
| { | |
| int offset; | |
| int size; | |
| int proc_x; | |
| int proc_y; | |
| int rank; | |
| double from_x; | |
| double from_y; | |
| double to_x; | |
| double to_y; | |
| MPI_Request recv_req; | |
| MPI_Request send_req; | |
| MPI_Status recv_stat; | |
| MPI_Status send_stat; | |
| } neighbor_t; | |
| // | |
| // timing routines | |
| // | |
| double read_timer( ); | |
| // | |
| // simulation routines | |
| // | |
| double set_size( int n ); | |
| void init_particles( int n, particle_t *p ); | |
| void apply_force( particle_t &particle, particle_t &neighbor ); | |
| void move( particle_t &p ); | |
| // | |
| // I/O routines | |
| // | |
| FILE *open_save( char *filename, int n ); | |
| void save( FILE *f, int n, particle_t *p ); | |
| // | |
| // argument processing routines | |
| // | |
| int find_option( int argc, char **argv, const char *option ); | |
| int read_int( int argc, char **argv, const char *option, int default_value ); | |
| char *read_string( int argc, char **argv, const char *option, char *default_value ); | |
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| HOST = $(shell hostname) | |
| BANG = $(shell expr match `hostname` ccom-bang) | |
| BANG-COMPUTE = $(shell expr match `hostname` compute) | |
| LILLIPUT = $(shell expr match `hostname` lilliput) | |
| ifneq ($(BANG), 0) | |
| PUB = /share/class/public/cse260-fa12 | |
| include $(PUB)/Arch/arch.gnu.generic | |
| else | |
| ifneq ($(BANG-COMPUTE), 0) | |
| PUB = /share/class/public/cse260-fa12 | |
| include $(PUB)/Arch/arch.gnu.generic | |
| else | |
| ifneq ($(LILLIPUT), 0) | |
| PUB = /class/public/cse260-fa12 | |
| include $(PUB)/Arch/arch.intel.generic | |
| else | |
| # PUB = /Users/baden/lib | |
| include $(PUB)/Arch/arch.gnu | |
| # include $(PUB)/Arch/arch.gnu-4.5 | |
| endif | |
| endif | |
| endif | |
| # | |
| # Add symbol table information for gdb/cachegrind | |
| ifeq ($(debug), 1) | |
| CFLAGS += -g | |
| LDFLAGS += -g | |
| C++FLAGS += -g | |
| endif | |
| # Add symbol table information for gprof | |
| ifeq ($(gprof), 1) | |
| CFLAGS += -g -pg | |
| C++FLAGS += -g -pg | |
| LDFLAGS += -g -pg | |
| endif | |
| # If you want to compile for single precision, | |
| # specify single=1 on the "make" command line | |
| ifeq ($(single), 1) | |
| else | |
| C++FLAGS += -D_DOUBLE | |
| CFLAGS += -D_DOUBLE | |
| endif | |
| # If you want to compile so that you call the plotter for | |
| # flattened 2D arrays (implemented as 1D arrays) | |
| # specify flattened=1 on the "make" command line | |
| ifeq ($(flattened), 1) | |
| C++FLAGS += -DPLOT1D | |
| CFLAGS += -DPLOT1D | |
| endif | |
| # If you want to use restrict pointers, make restrict=1 | |
| # This applies to the hand code version | |
| ifeq ($(restrict), 1) | |
| C++FLAGS += -D__RESTRICT | |
| CFLAGS += -D__RESTRICT | |
| ifneq ($(CARVER), 0) | |
| C++FLAGS += -restrict | |
| CFLAGS += -restrict | |
| endif | |
| endif | |
| #DEBUG += -DDEBUG | |
| TARGETS = mpi | |
| app: $(TARGETS) | |
| OBJECTS = common.o mpi.o.o | |
| #ifeq ($(no-mpi),1) | |
| #OBJECTS += Timer.o | |
| #endif | |
| app: $(TARGETS) | |
| mpi: mpi.o common.o | |
| $(C++LINK) $(LDFLAGS) -o $@ mpi.o common.o $(LDLIBS) | |
| clean: | |
| $(RM) *.o $(TARGETS); | |
| $(RM) core.*; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <mpi.h> | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include "common.h" | |
| const int SELF = 0, | |
| BOTTOM_STRAIGHT = 1, | |
| BOTTOM_LEFT = 2, | |
| STRAIGHT_LEFT = 3, | |
| TOP_LEFT = 4, | |
| TOP_STRAIGHT = 5, | |
| TOP_RIGHT = 6, | |
| STRAIGHT_RIGHT = 7, | |
| BOTTOM_RIGHT = 8; | |
| // | |
| // benchmarking program | |
| // | |
| int main( int argc, char **argv ) | |
| { | |
| // | |
| // process command line parameters | |
| // | |
| if( find_option( argc, argv, "-h" ) >= 0 ) | |
| { | |
| printf( "Options:\n" ); | |
| printf( "-h to see this help\n" ); | |
| printf( "-n <int> to set the number of particles\n" ); | |
| printf( "-proc_width <int> to set the width of the processor grid\n" ); | |
| printf( "-proc_height <int> to set the height of the processor grid\n" ); | |
| printf( "-o <filename> to specify the output file name\n" ); | |
| printf( "-c <checksum> to specify the output checksum file name\n" ); | |
| return 0; | |
| } | |
| int n = read_int( argc, argv, "-n", 1000 ); | |
| int proc_width = read_int( argc, argv, "-proc_width", NULL ); | |
| int proc_height = read_int( argc, argv, "-proc_height", NULL ); | |
| char *savename = read_string( argc, argv, "-o", NULL ); | |
| char *checksum = read_string( argc, argv, "-c", NULL ); | |
| // | |
| // set up MPI | |
| // | |
| int n_proc, myrank; | |
| MPI_Init( &argc, &argv ); | |
| MPI_Comm_size( MPI_COMM_WORLD, &n_proc ); | |
| MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); | |
| //Ensure that px and py are correct setup. | |
| if ((proc_height * proc_width) != n_proc && !myrank){ | |
| printf("\n *** The number of processors in the geometry (%d) is not the same as the number requested (%d)",proc_height*proc_width,n_proc); | |
| exit(-1); | |
| } | |
| int my_x = myrank % proc_width; | |
| int my_y = myrank / proc_width; | |
| // | |
| // Setup the neighbor setup | |
| // | |
| neighbor_t *neighbors = (neighbor_t*) malloc( 9 * sizeof(neighbor_t)); | |
| neighbors[ SELF ].proc_x = my_x; neighbors[ SELF ].proc_y = my_y; | |
| neighbors[ BOTTOM_STRAIGHT ].proc_x = my_x; neighbors[ BOTTOM_STRAIGHT ].proc_y = my_y + 1; | |
| neighbors[ BOTTOM_LEFT ].proc_x = my_x - 1; neighbors[ BOTTOM_LEFT ].proc_y = my_y + 1; | |
| neighbors[ STRAIGHT_LEFT ].proc_x = my_x - 1; neighbors[ STRAIGHT_LEFT ].proc_y = my_y; | |
| neighbors[ TOP_LEFT ].proc_x = my_x - 1; neighbors[ TOP_LEFT ].proc_y = my_y - 1; | |
| neighbors[ TOP_STRAIGHT ].proc_x = my_x; neighbors[ TOP_STRAIGHT ].proc_y = my_y - 1; | |
| neighbors[ TOP_RIGHT ].proc_x = my_x + 1; neighbors[ TOP_RIGHT ].proc_y = my_y - 1; | |
| neighbors[ STRAIGHT_RIGHT ].proc_x = my_x + 1; neighbors[ STRAIGHT_RIGHT ].proc_y = my_y; | |
| neighbors[ BOTTOM_RIGHT ].proc_x = my_x + 1; neighbors[ BOTTOM_RIGHT ].proc_y = my_y + 1; | |
| // | |
| // allocate generic resources | |
| // | |
| FILE *fsave = savename && myrank == 0 ? fopen( savename, "w" ) : NULL; | |
| FILE *fchecksum = checksum && myrank == 0 ? fopen( checksum, "w" ) : NULL; | |
| particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t)); | |
| particle_t *particles_prev = (particle_t*) malloc( n * sizeof(particle_t)); | |
| MPI_Datatype PARTICLE; | |
| MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE ); | |
| MPI_Type_commit( &PARTICLE ); | |
| // | |
| // initialize and distribute the particles (that's fine to leave it unoptimized) | |
| // | |
| double size = set_size( n ); | |
| double size_x = size / proc_width; | |
| double size_y = size / proc_height; | |
| // | |
| // set up the data partitioning across processors | |
| // | |
| if (!myrank) { //Init all particles at rank 0 | |
| init_particles( n, particles_prev ); | |
| } | |
| MPI_Bcast(particles_prev, n, PARTICLE, 0, MPI_COMM_WORLD); | |
| for( int i = 0; i < 9; i++ ) { | |
| neighbors[ i ].from_x = size_x * neighbors[ i ].proc_x; | |
| neighbors[ i ].to_x = size_x + neighbors[ i ].from_x; | |
| neighbors[ i ].from_y = size_y * neighbors[ i ].proc_y; | |
| neighbors[ i ].to_y = size_y + neighbors[ i ].from_y; | |
| if (neighbors[ i ].proc_x > -1 && neighbors[i].proc_x < proc_width && neighbors[ i ].proc_y > -1 && neighbors[i].proc_y < proc_height) { | |
| neighbors[ i ].rank = neighbors[ i ].proc_x + neighbors[ i ].proc_y * proc_width; | |
| neighbors[ i ].offset = (i == 0 ? 0 : neighbors[ i - 1 ].size + neighbors[ i - 1 ].offset); | |
| neighbors[ i ].size = 0; | |
| for(int j = 0; j < n; j++){ | |
| if (particles_prev[j].x >= neighbors[i].from_x && particles_prev[j].x < neighbors[i].to_x && | |
| particles_prev[j].y >= neighbors[i].from_y && particles_prev[j].y < neighbors[i].to_y){ | |
| particles[ neighbors[ i ].size + neighbors[ i ].offset ] = particles_prev[j]; | |
| neighbors[ i ].size += 1; | |
| } | |
| } | |
| } else { | |
| neighbors[ i ].rank = -1; | |
| neighbors[ i ].offset = neighbors[ i - 1 ].size + neighbors[ i - 1 ].offset; | |
| neighbors[ i ].size = 0; | |
| } | |
| } | |
| particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
| //Myrank is main so he is the one gathering the info | |
| int *n_proc_sizes = new int[n_proc]; | |
| int *n_proc_offset = new int[n_proc]; | |
| int count = 0; | |
| MPI_Gather( &neighbors[SELF].size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
| if(!myrank ) { | |
| int real_n = 0; | |
| for(int i = 0; i < n_proc; i++){ | |
| real_n += n_proc_sizes[i]; | |
| } | |
| printf("Before correctness check: n: %d, real-n: %d\n",n, real_n); | |
| } | |
| // | |
| // simulate a number of time steps | |
| // | |
| double simulation_time = read_timer( ); | |
| for( int step = 0; step < NSTEPS; step++ ) | |
| { | |
| // | |
| // save current step if necessary (slightly different semantics than in other codes) | |
| // | |
| if( (step%SAVEFREQ) == 0 && savename ) { | |
| //To to collect from every node | |
| MPI_Gather( &neighbors[SELF].size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
| //Calculate offsets | |
| if (myrank == 0){ | |
| for(int i = 0; i < n_proc; i++){ | |
| if (i == 0) { | |
| n_proc_offset[i] = 0; | |
| } else { | |
| n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1]; | |
| } | |
| } | |
| } | |
| MPI_Gatherv( particles_prev, neighbors[SELF].size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD ); | |
| if (myrank == 0 && fsave) { | |
| save( fsave, n, particles ); | |
| } | |
| } | |
| int look_nodes = neighbors[ 8 ].offset + neighbors[ 8 ].size; | |
| //Get the new sizes from the other neighbors | |
| for( int i = 1; i < 9; i++) { | |
| if (neighbors[i].rank != -1) { | |
| MPI_Irecv( &neighbors[i].size, 1, MPI_INT, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req); | |
| } | |
| } | |
| // | |
| // compute forces in us and the neighbors | |
| // | |
| for( int i = 0; i < look_nodes; i++ ) | |
| { | |
| particles_prev[i].ax = particles_prev[i].ay = 0; | |
| for (int j = 0; j < look_nodes; j++ ) { | |
| apply_force( particles_prev[i], particles_prev[j] ); | |
| } | |
| } | |
| // | |
| // move particles | |
| // | |
| count = 0; | |
| for( int i = 0; i < look_nodes; i++ ) { | |
| move(particles_prev[i]); | |
| //Find the particles that are still in this cube | |
| if (particles_prev[i].x >= neighbors[SELF].from_x && particles_prev[i].x < neighbors[SELF].to_x && | |
| particles_prev[i].y >= neighbors[SELF].from_y && particles_prev[i].y < neighbors[SELF].to_y){ | |
| particles[count] = particles_prev[i]; | |
| count++; | |
| } | |
| } | |
| neighbors[SELF].size = count; | |
| //Send sizes to the neighbors | |
| for( int i = 1; i < 9; i++) { | |
| if (neighbors[i].rank != -1){ | |
| MPI_Isend( &neighbors[SELF].size,1,MPI_INT,neighbors[i].rank,0, MPI_COMM_WORLD, &neighbors[i].send_req ); | |
| } | |
| } | |
| for( int i = 1; i < 9; i++) { | |
| if (neighbors[i].rank != -1){ | |
| MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat); | |
| } | |
| neighbors[ i ].offset = neighbors[ i - 1 ].size + neighbors[ i - 1 ].offset; | |
| if (neighbors[i].rank != -1){ | |
| MPI_Irecv( &particles[neighbors[i].offset],neighbors[i].size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].recv_req ); | |
| } | |
| } | |
| for( int i = 1; i < 9; i++) { | |
| if (neighbors[i].rank != -1){ | |
| MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat); | |
| MPI_Isend( &particles[0],neighbors[SELF].size, PARTICLE, neighbors[i].rank, 0, MPI_COMM_WORLD, &neighbors[i].send_req ); | |
| } | |
| } | |
| particle_t *tmp = particles; particles = particles_prev; particles_prev = tmp; | |
| for( int i = 1; i < 9; i++) { | |
| if (neighbors[i].rank != -1){ | |
| MPI_Wait(&neighbors[i].recv_req,&neighbors[i].recv_stat); | |
| MPI_Wait(&neighbors[i].send_req,&neighbors[i].send_stat); | |
| } | |
| } | |
| } | |
| simulation_time = read_timer( ) - simulation_time; | |
| MPI_Gather( &neighbors[SELF].size, 1, MPI_INT, n_proc_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD ); | |
| if(!myrank ) { | |
| int real_n = 0; | |
| for(int i = 0; i < n_proc; i++){ | |
| if (i == 0) { | |
| n_proc_offset[i] = 0; | |
| } else { | |
| n_proc_offset[i] = n_proc_offset[i-1] + n_proc_sizes[i-1]; | |
| } | |
| real_n += n_proc_sizes[i]; | |
| } | |
| printf("After correctness check: n: %d, real-n: %d\n",n, real_n); | |
| printf( "n = %d, n_procs = %d, proc_width = %d, proc_height = %d, simulation time = %g s\n", n, n_proc, proc_width, proc_height, simulation_time ); | |
| } | |
| MPI_Gatherv( particles_prev, neighbors[SELF].size, PARTICLE, particles, n_proc_sizes, n_proc_offset, PARTICLE, 0, MPI_COMM_WORLD ); | |
| //Sort every particle to their i value | |
| if (!myrank && fchecksum){ | |
| for(int j = 0; j < n; j++){ | |
| particles_prev[(int)particles[j].index] = particles[j]; | |
| } | |
| save( fchecksum, n, particles_prev ); | |
| } | |
| // | |
| // release resources | |
| // | |
| free( neighbors ); | |
| free( particles ); | |
| free( particles_prev ); | |
| if( fsave ) | |
| fclose( fsave ); | |
| if (fchecksum) | |
| fclose( fchecksum ); | |
| MPI_Finalize( ); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment