Zoltan2
Zoltan2_MachineRCAForTest.hpp
Go to the documentation of this file.
1 #ifndef _ZOLTAN2_MACHINE_RCALIBTEST_HPP_
2 #define _ZOLTAN2_MACHINE_RCALIBTEST_HPP_
3 
4 #include <Teuchos_Comm.hpp>
5 #include <Teuchos_CommHelpers.hpp>
6 #include <Zoltan2_Machine.hpp>
7 #include <cstdlib> /* srand, rand */
8 #include <string>
9 
10 namespace Zoltan2{
11 
16 template <typename pcoord_t, typename part_t>
17 class MachineRCATest : public Machine <pcoord_t, part_t> {
18 
19 public:
24  MachineRCATest(const Teuchos::Comm<int> &comm):
25  Machine<pcoord_t,part_t>(comm),
26  networkDim(3), actual_networkDim(3),
27  procCoords(NULL), actual_procCoords(NULL),
28  machine_extent(NULL),actual_machine_extent(NULL),
29  is_transformed(false), pl(NULL)
30  {
31  actual_machine_extent = machine_extent = new int[networkDim];
32  this->getRealMachineExtent(this->machine_extent);
33  actual_machine_extent = machine_extent;
34 
35  //allocate memory for processor coordinates.
36  actual_procCoords = procCoords = new pcoord_t *[networkDim];
37  for (int i = 0; i < networkDim; ++i){
38  procCoords[i] = new pcoord_t[this->numRanks];
39  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
40  }
41 
42  //obtain the coordinate of the processor.
43  pcoord_t *xyz = new pcoord_t[networkDim];
45  for (int i = 0; i < networkDim; i++)
46  procCoords[i][this->myRank] = xyz[i];
47  delete [] xyz;
48 
49 
50  //reduceAll the coordinates of each processor.
51  gatherMachineCoordinates(comm);
52  }
53 
54  virtual bool getMachineExtentWrapArounds(bool *wrap_around) const {
55  int dim = 0;
56  int transformed_network_dim = networkDim;
57  if (dim < transformed_network_dim)
58  wrap_around[dim++] = true;
59  if (dim < transformed_network_dim)
60  wrap_around[dim++] = true;
61  if (dim < transformed_network_dim)
62  wrap_around[dim++] = true;
63  return true;
64  }
65 
66  MachineRCATest(const Teuchos::Comm<int> &comm, const Teuchos::ParameterList &pl_ ):
67  Machine<pcoord_t,part_t>(comm),
68  networkDim(3), actual_networkDim(3),
69  procCoords(NULL), actual_procCoords(NULL),
70  machine_extent(NULL),actual_machine_extent(NULL),
71  is_transformed(false), pl(&pl_)
72  {
73 
74  actual_machine_extent = machine_extent = new int[networkDim];
75  this->getRealMachineExtent(this->machine_extent);
76  actual_machine_extent = machine_extent;
77 
78  //allocate memory for processor coordinates.
79  actual_procCoords = procCoords = new pcoord_t *[networkDim];
80 
81 
82  const Teuchos::ParameterEntry *pe1 = this->pl->getEntryPtr("Input_RCA_Machine_Coords");
83  if (pe1){
84  std::string input_coord_file;
85  input_coord_file = pe1->getValue<std::string>(&input_coord_file);
86  if (input_coord_file != ""){
87 
88  if (this->myRank == 0){
89  std::vector < std::vector <pcoord_t> > proc_coords(networkDim);
90  std::fstream machine_coord_file(input_coord_file.c_str());
91 
92  part_t i = 0;
93  pcoord_t a,b, c;
94  machine_coord_file >> a >> b >> c;
95  while(!machine_coord_file.eof()){
96  proc_coords[0].push_back(a);
97  proc_coords[1].push_back(b);
98  proc_coords[2].push_back(c);
99  ++i;
100  machine_coord_file >> a >> b >> c;
101  }
102 
103  machine_coord_file.close();
104  std::cout << "Rewriting numprocs from:" << this->numRanks << " to:" << i << std::endl;
105  this->numRanks = i;
106 
107  for(int ii = 0; ii < networkDim; ++ii){
108  procCoords[ii] = new pcoord_t[this->numRanks];
109  for (int j = 0; j < this->numRanks; ++j){
110  procCoords[ii][j] = proc_coords[ii][j];
111  }
112  }
113  }
114  comm.broadcast(0, sizeof(int), (char *) &(this->numRanks));
115 
116  if (this->myRank != 0){
117  for (int i = 0; i < networkDim; ++i){
118  procCoords[i] = new pcoord_t[this->numRanks];
119  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
120  }
121  }
122  }
123  }
124  else {
125  for (int i = 0; i < networkDim; ++i){
126  procCoords[i] = new pcoord_t[this->numRanks];
127  memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
128  }
129  //obtain the coordinate of the processor.
130  pcoord_t *xyz = new pcoord_t[networkDim];
132  for (int i = 0; i < networkDim; i++)
133  procCoords[i][this->myRank] = xyz[i];
134  delete [] xyz;
135  }
136 
137  //reduceAll the coordinates of each processor.
138  gatherMachineCoordinates(comm);
139 
140  const Teuchos::ParameterEntry *pe2 = this->pl->getEntryPtr("Machine_Optimization_Level");
141  //this->printAllocation();
142  if (pe2){
143  int optimization_level;
144  optimization_level = pe2->getValue<int>(&optimization_level);
145 
146  if (optimization_level == 1){
147  is_transformed = true;
148  this->networkDim = 3;
149  procCoords = new pcoord_t * [networkDim];
150  for(int i = 0; i < networkDim; ++i){
151  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
152  }
153  for (int i = 0; i < this->numRanks; ++i){
154  procCoords[0][i] = this->actual_procCoords[0][i] * 8;
155  int yordinal = this->actual_procCoords[1][i];
156  procCoords[1][i] = yordinal/2 * (16 + 8) + (yordinal %2) * 8;
157  int zordinal = this->actual_procCoords[2][i];
158  procCoords[2][i] = zordinal * 5 + (zordinal / 8) * 3;
159  }
160  int mx = this->machine_extent[0];
161  int my = this->machine_extent[1];
162  int mz = this->machine_extent[2];
163 
164 
165  this->machine_extent = new int[networkDim];
166  this->machine_extent[0] = mx * 8;
167  this->machine_extent[1] = my/2 * (16 + 8) + (my %2) * 8;
168  this->machine_extent[2] = mz * 5 + (mz / 8) * 3;
169  if(this->myRank == 0) std::cout << "Transforming the coordinates" << std::endl;
170  //this->printAllocation();
171  }
172  else if(optimization_level >= 3){
173  is_transformed = true;
174  this->networkDim = 6;
175  procCoords = new pcoord_t * [networkDim];
176  for(int i = 0; i < networkDim; ++i){
177  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
178  }
179 
180  //this->machine_extent[0] = this->actual_machine_extent
181  this->machine_extent = new int[networkDim];
182 
183  this->machine_extent[0] = ceil (int (this->actual_machine_extent[0]) / 2.0) * 64 ;
184  this->machine_extent[3] = 2 * 8 ;
185  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
186  this->machine_extent[4] = 2 * 8;
187  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
188  this->machine_extent[5] = 8 * 5;
189 
190  for (int k = 0; k < this->numRanks ; k++){
191  //This part is for titan.
192  //But it holds for other 3D torus machines such as Bluewaters.
193 
194  //Bandwitdh along
195  // X = 75
196  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
197  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
198 
199  //Along X we make groups of 2. Then scale the distance with 64.
200  //First dimension is represents x/2
201  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 2) * 64;
202  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
203  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 2) * 8 ;
204 
205  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
206  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
207  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
208  //The other one is scaled with 8 as in X.
209  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
210 
211  //We make groups of 8 along Z. Then distances between these groups are scaled with 160.
212  //So that it is more than 2x distance than the distance with X grouping.
213  //That is we scale the groups of Zs with 160. Groups of X with 64.
214  //Zs has 8 processors connecting them, while X has only one. We want to divide along
215  //Z twice before dividing along X.
216  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
217  //In the second group everything is scaled with 5, as bw=120
218  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
219  }
220  }
221  else if(optimization_level == 2){
222  //This is as above case. but we make groups of 3 along X instead.
223  is_transformed = true;
224  this->networkDim = 6;
225  procCoords = new pcoord_t * [networkDim];
226  for(int i = 0; i < networkDim; ++i){
227  procCoords[i] = new pcoord_t[this->numRanks] ;//this->proc_coords[permutation[i]];
228  }
229 
230  //this->machine_extent[0] = this->actual_machine_extent
231  this->machine_extent = new int[networkDim];
232 
233  this->machine_extent[0] = ceil(int (this->actual_machine_extent[0]) / 3.0) * 128 ;
234  this->machine_extent[3] = 3 * 8 ;
235  this->machine_extent[1] = ceil(int (this->actual_machine_extent[1]) / 2.0) * 8 * 2400;
236  this->machine_extent[4] = 2 * 8;
237  this->machine_extent[2] = ceil((int (this->actual_machine_extent[2])) / 8.0) * 160;
238  this->machine_extent[5] = 8 * 5;
239 
240 
241  for (int k = 0; k < this->numRanks ; k++){
242  //This part is for titan.
243  //But it holds for other 3D torus machines such as Bluewaters.
244 
245  //Bandwitdh along
246  // X = 75
247  // Y = 37.5 or 75 --- everyother has 37.5 --- Y[0-1] =75 but Y[1-2]=37.5
248  // Z = 75 or 120 ---- Y[0-1-2-3-4-5-6-7] = 120, Y[7-8] = 75
249 
250  //In this case we make groups of 3. along X.
251  procCoords[0][k] = (int (this->actual_procCoords[0][k]) / 3) * 128;
252  //Then the 3rd dimension is x%2. distance is scaled with 8, reversely proportional with bw=75
253  procCoords[3][k] = (int (this->actual_procCoords[0][k]) % 3) * 8 ;
254 
255  //Along Y. Every other one has the slowest link. So we want distances between Y/2 huge.
256  //We scale Y/2 with 2400 so that we make sure that it is the first one we divie.
257  procCoords[1][k] = (int (this->actual_procCoords[1][k]) / 2) * 8 * 2400;
258  //The other one is scaled with 8 as in X.
259  procCoords[4][k] = (int (this->actual_procCoords[1][k]) % 2) * 8;
260 
261 
262  procCoords[2][k] = ((int (this->actual_procCoords[2][k])) / 8) * 160;
263  //In the second group everything is scaled with 5, as bw=120
264  procCoords[5][k] = ((int (this->actual_procCoords[2][k])) % 8) * 5;
265  }
266  }
267  }
268  }
269 
270 
271 
272 
273  virtual ~MachineRCATest() {
274  if (is_transformed){
275  is_transformed = false;
276  for (int i = 0; i < actual_networkDim; i++){
277  delete [] actual_procCoords[i];
278  }
279  delete [] actual_procCoords;
280  delete [] actual_machine_extent;
281  }
282  for (int i = 0; i < networkDim; i++){
283  delete [] procCoords[i];
284  }
285  delete [] procCoords;
286  delete [] machine_extent;
287  }
288 
289  bool hasMachineCoordinates() const { return true; }
290 
291  int getMachineDim() const { return this->networkDim;/*transformed_network_dim;*/ }
292  int getRealMachineDim() const { return this->actual_networkDim;/*transformed_network_dim;*/ }
293 
294  bool getMachineExtent(int *nxyz) const {
295  if (is_transformed){
296  return false;
297  }
298  else {
299  int dim = 0;
300  nxyz[dim++] = this->machine_extent[0]; //x
301  nxyz[dim++] = this->machine_extent[1]; //y
302  nxyz[dim++] = this->machine_extent[2]; //z
303  return true;
304  }
305  }
306 
307  bool getRealMachineExtent(int *nxyz) const {
308  int dim = 0;
309  nxyz[dim++] = 25; //x
310  nxyz[dim++] = 16; //y
311  nxyz[dim++] = 24; //z
312  return true;
313  }
314 
315 
317  if(this->myRank == 0){
318  for (int i = 0; i < this->numRanks; ++i){
319  std::cout << "Rank:" << i << " " << procCoords[0][i] << " " << procCoords[1][i] << " " << procCoords[2][i] << std::endl;
320  }
321  std::cout << "Machine Extent:" << " " << this->machine_extent[0] << " " << this->machine_extent[1] << " " << this->machine_extent[2] << std::endl;
322  }
323  }
324 
325  bool getMyMachineCoordinate(pcoord_t *xyz) {
326  for (int i = 0; i < this->networkDim; ++i){
327  xyz[i] = procCoords[i][this->myRank];
328  }
329  return true;
330  }
331 
332  bool getMyActualMachineCoordinate(pcoord_t *xyz) {
333  xyz[0] = rand() % 25;
334  xyz[1] = rand() % 16;
335  xyz[2] = rand() % 24;
336  return true;
337  }
338 
339  inline bool getMachineCoordinate(const int rank,
340  pcoord_t *xyz) const {
341  for (int i = 0; i < this->networkDim; ++i){
342  xyz[i] = procCoords[i][rank];
343  }
344  return true;
345  }
346 
347 
348  bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
349  return false; // cannot yet return from nodename
350  }
351 
352  bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
353  allCoords = procCoords;
354  return true;
355  }
356 
357  virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops){
358  hops = 0;
359  for (int i = 0; i < networkDim; ++i){
360  pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
361  if (distance < 0 ) distance = -distance;
362  if (machine_extent[i] - distance < distance) distance = machine_extent[i] - distance;
363  hops += distance;
364  }
365  return true;
366  }
367 
368 
369 private:
370 
371  int networkDim;
372  int actual_networkDim;
373 
374  pcoord_t **procCoords;
375  pcoord_t **actual_procCoords;
376 
377  part_t *machine_extent;
378  part_t *actual_machine_extent;
379  bool is_transformed;
380 
381 
382  const Teuchos::ParameterList *pl;
383  //bool delete_tranformed_coords;
384 
385 /*
386  bool delete_transformed_coords;
387  int transformed_network_dim;
388  pcoord_t **transformed_coordinates;
389 */
390  void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
391  // reduces and stores all machine coordinates.
392  pcoord_t *tmpVect = new pcoord_t [this->numRanks];
393 
394  for (int i = 0; i < networkDim; i++) {
395  Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
396  this->numRanks, procCoords[i], tmpVect);
397  pcoord_t *tmp = tmpVect;
398  tmpVect = procCoords[i];
399  procCoords[i] = tmp;
400  }
401  delete [] tmpVect;
402  }
403 
404 };
405 }
406 #endif
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
bool getRealMachineExtent(int *nxyz) const
bool getMyMachineCoordinate(pcoord_t *xyz)
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
A Machine Class for testing only A more realistic machine should be used for task mapping...
MachineClass Base class for representing machine coordinates, networks, etc.
bool getMyActualMachineCoordinate(pcoord_t *xyz)
MachineRCATest(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
SparseMatrixAdapter_t::part_t part_t
bool getMachineExtent(int *nxyz) const
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
virtual bool getMachineExtentWrapArounds(bool *wrap_around) const
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops)
MachineRCATest(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)