NESSi  v1.0.2
The NonEquilibrium Systems Simulation Library
cntr_distributed_array_impl.hpp
Go to the documentation of this file.
1 /*
2  * Copyright: D. Golez (2018), The Simons Foundation (2019)
3  * Authors: D. Golez, H. UR Strand
4  */
5 
6 #ifndef CNTR_DISTRIBUTED_ARRAY_IMPL_H
7 #define CNTR_DISTRIBUTED_ARRAY_IMPL_H
8 
11 
12 
13 namespace cntr {
14 /* #######################################################################################
15 #
16 # CONSTRUCTION/DESTRUCTION
17 #
18 ########################################################################################*/
19 template <typename T>
21  data_=0;
22  maxlen_=0;
23  blocksize_=0;
24  n_=0;
25  tid_=0;
26  ntasks_=1;
27  tid_map_=std::vector<int>(0);
28 }
30  if(data_!=0) delete [] data_;
31 }
33  size_t len;
34  blocksize_=g.blocksize_;
35  n_=g.n();
36  tid_=g.tid();
37  ntasks_=g.ntasks();
38  tid_map_=g.tid_map();
39  maxlen_=g.maxlen();
40  len=maxlen_*n_;
41  if(len>0){
42  data_ = new T [len];
43  memcpy(data_, g.data(), sizeof(T)*len);
44  }else{
45  data_=0;
46  }
47 }
49  size_t len;
50  if(this==&g) return *this;
51  if(data_!=0) delete [] data_;
52  blocksize_=g.blocksize();
53  n_=g.n();
54  tid_=g.tid();
55  ntasks_=g.ntasks();
56  tid_map_=g.tid_map();
57  maxlen_=g.maxlen();
58  len=maxlen_*n_;
59  if(len>0){
60  data_ = new T [len];
61  memcpy(data_, g.data(), sizeof(T)*len);
62  }else{
63  data_=0;
64  }
65  return *this;
66 }
67 
88  template <typename T> distributed_array<T>::distributed_array(int n,int maxlen,bool mpi){
89  assert(0<=maxlen && 0<=n);
90  size_t len;
91  maxlen_=maxlen;
92  blocksize_ = maxlen_;
93  n_=n;
94  tid_map_.resize(n_);
95  len=maxlen_*n_;
96  if(len==0){
97  data_=0;
98  }else{
99  data_ = new T [len];
100  memset(data_, 0, sizeof(T)*len);
101  }
102 
103  if(mpi){
104  #if CNTR_USE_MPI==1
105  MPI_Comm_size(MPI_COMM_WORLD, &ntasks_);
106  MPI_Comm_rank(MPI_COMM_WORLD, &tid_);
107  #else
108  tid_=0;
109  ntasks_=1;
110  #endif
111  }else{
112  tid_=0;
113  ntasks_=1;
114  }
115  {
116  // This distribution tries to spread evenly number of tasks
117  int nr_alloced = 0;
118  int remainer,buckets;
119  for(int i=0;i<ntasks_;i++){
120  remainer = n - nr_alloced;
121  buckets = (ntasks_ - i);
122  int size=remainer/ buckets;
123  for(int k=0;k<size;k++){
124  tid_map_[nr_alloced+k]=i;
125  }
126  nr_alloced +=size; //Ceiling division
127  }
128 
129 
130  // int rank_totblock=0,rank_firstblock=0;
131  // // Set position of first block on a given rank
132  // for(int i=0;i<n_;i++){
133  // if(tid_map_[i]==tid_){
134  // rank_firstblock=i;
135  // break;
136  // }
137  // }
138  // // Set number of all blocks on a given rank
139  // for(int i=0;i<n_;i++){
140  // if(tid_map_[i]==tid_){
141  // rank_totblock+=1;
142  // }
143  // }
144 
145  // for(int i=0;i<n_;i++){
146  // std::cout << "Tid map " << tid_ << " " << i << " " << tid_map_[i] << " " << rank_totblock << " " << rank_firstblock << std::endl;
147  // }
148 
149  // std::cout << " -------------------------------------- " << std::endl;
150 
151  }
152 }
153 
170 template <typename T> T* distributed_array<T>::block(int j){
171  assert(j<=n_-1);
172  return data_+j*blocksize_;
173 }
174 
186 template <typename T> void distributed_array<T>::clear(void){
187  if(data_!=0) memset(data_, 0, sizeof(T)*maxlen_*n_);
188 }
189 
206 template <typename T> void distributed_array<T>::reset_blocksize(int blocksize){
207  assert(0<=blocksize && 0<=blocksize_ && 0<= (int) maxlen_);
208  clear();
209  blocksize_=blocksize;
210 }
211 
223 template <typename T> int distributed_array<T>::numblock_rank(void){
224  int rank_totblock=0;
225 
226  for(int i=0;i<n_;i++){
227  if(tid_map_[i]==tid_){
228  rank_totblock+=1;
229  }
230  }
231 
232  return rank_totblock;
233 }
234 
235 
247 template <typename T> int distributed_array<T>::firstblock_rank(void){
248  int rank_firstblock=0;
249 
250  for(int i=0;i<n_;i++){
251  if(tid_map_[i]==tid_){
252  rank_firstblock=i;
253  break;
254  }
255  }
256  return rank_firstblock;
257 }
258 
259 /* #######################################################################################
260 #
261 # MPI UTILS
262 #
263 ########################################################################################*/
264 // In case you don't use MPI all routines are given "trivial" no MPI versions, assume ntasks=1 and do nothing
265 #if CNTR_USE_MPI==0
266 template <typename T> void distributed_array<T>::mpi_bcast_block(int j){
267  // donothing
268 }
269 template <typename T> void distributed_array<T>::mpi_bcast_all(void){
270  // donothing
271 }
272 template <typename T> void distributed_array<T>::mpi_gather(void){
273  // donothing
274 }
275 #else // CNTR_USE_MPI==1
276 
292 template <typename T> void distributed_array<T>::mpi_send_block(int j,int dest){
293  assert(0<=dest && 0<=ntasks_-1);
294  int root=tid_map_[j];
295  if(dest==root){
296  return;
297  }else{
298  int tag=100;
299  size_t int_per_t=sizeof(T)/sizeof(int);
300  size_t len=int_per_t*blocksize_;
301  if(tid_map_[j]==tid_){
302  MPI_Send((int*) block(j), len, MPI_INT, dest, tag, MPI_COMM_WORLD);
303  // MPI::COMM_WORLD.Send((int*) block(j),len,MPI::INT,dest,tag);
304  }
305  if(tid_==dest){
306  // MPI::COMM_WORLD.Recv((int*) block(j),len,MPI::INT,root,tag);
307  MPI_Recv((int*) block(j), len, MPI_INT, root, tag, MPI_COMM_WORLD,
308  MPI_STATUS_IGNORE);
309  }
310  }
311 }
326 template <typename T> void distributed_array<T>::mpi_gather(int dest){
327  assert(0<=dest && 0<=ntasks_-1);
328  for(int j=0;j<n_;j++) mpi_send_block(j,dest);
329 }
330 
345 template <typename T> void distributed_array<T>::mpi_bcast_block(int j){
346  int root=tid_map_[j];
347  size_t int_per_t=sizeof(T)/sizeof(int);
348  assert(int_per_t*sizeof(int)==sizeof(T));
349  size_t len=int_per_t*blocksize_;
350  // MPI::COMM_WORLD.Bcast((int*)block(j),len,MPI::INT,root);
351  MPI_Bcast((int*)block(j), len, MPI_INT, root, MPI_COMM_WORLD);
352 }
353 
354 //* in a global allgather operation, the data are send from the root to all
355 // other processes
356 
370 template <typename T> void distributed_array<T>::mpi_bcast_all(void){
371 
372  size_t int_per_t = sizeof(T) / sizeof(int);
373  assert(int_per_t * sizeof(int) == sizeof(T));
374  int element_size = blocksize_ * int_per_t;
375 
376  std::vector<int> recvcount(ntasks_, 0);
377  for(int j=0;j<n_;j++) {
378  int tid_ = tid_map_[j];
379  recvcount[tid_] += 1;
380  }
381 
382  std::vector<int> displs(ntasks_, 0);
383  for(int rank = 1; rank < ntasks_; rank++) {
384  displs[rank] = displs[rank - 1] + recvcount[rank - 1];
385  }
386 
387  for(int rank = 0; rank < ntasks_; rank++) {
388  recvcount[rank] *= element_size;
389  displs[rank] *= element_size;
390  }
391 
392  // MPI::COMM_WORLD.Allgatherv(MPI::IN_PLACE, 0, MPI::DATATYPE_NULL,
393  // data_, recvcount.data(), displs.data(), MPI::INT);
394 
395  MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, data_,
396  recvcount.data(), displs.data(), MPI_INT, MPI_COMM_WORLD);
397 
398 }
399 #endif // CNTR_USE_MPI
400 
401 
402 }
403 #endif
std::vector< int > tid_map(void) const
int firstblock_rank(void)
Return first blocks on rank.
void clear(void)
Clear all data
void mpi_bcast_block(int j)
MPI broadcast of j-th block
distributed_array & operator=(const distributed_array &g)
T * block(int j)
Returns the pointer to block j.
Auxiliary data structure for handling set of data blocks and includes usual MPI processes on them...
void mpi_bcast_all(void)
MPI Allgather equivalent for the distributed array
int numblock_rank(void)
Return number of blocks on rank.
void reset_blocksize(int blocksize)
Reset the block size for each block.
void mpi_send_block(int j, int dest)
Sends the j-th block to the MPI rank dest