Zoltan2
TaskMappingSimulate.cpp
Go to the documentation of this file.
1 
5 
6 #include <string>
7 
8 #include <Teuchos_RCP.hpp>
9 #include <Teuchos_Array.hpp>
10 #include <Teuchos_GlobalMPISession.hpp>
11 #include <Teuchos_ParameterList.hpp>
12 #include "Teuchos_XMLParameterListHelpers.hpp"
13 
14 #include "Tpetra_MultiVector.hpp"
15 #include <Tpetra_CrsGraph.hpp>
16 #include <Tpetra_Map.hpp>
17 
20 #include <Zoltan2_TimerManager.hpp>
25 
26 /*
27 typedef int test_lno_t;
28 typedef long long test_gno_t;
29 typedef double test_scalar_t;
30 */
34 
35 typedef Tpetra::CrsGraph<test_lno_t, test_gno_t, znode_t> mytest_tcrsGraph_t;
36 typedef Tpetra::MultiVector<test_scalar_t, test_lno_t, test_gno_t, znode_t> mytest_tMVector_t;
38 typedef Tpetra::Map<>::node_type mytest_znode_t;
39 typedef Tpetra::Map<test_lno_t, test_gno_t, mytest_znode_t> mytest_map_t;
41 
46 };
47 
51 };
52 
53 RCP<mytest_tcrsGraph_t> create_tpetra_input_matrix(
54  char *input_binary_graph,
55  Teuchos::RCP<const Teuchos::Comm<int> > tcomm,
56  test_gno_t & myTasks,
57  std::vector <int> &task_communication_xadj_,
58  std::vector <int> &task_communication_adj_,
59  std::vector <double> &task_communication_adjw_){
60 
61  int rank = tcomm->getRank();
62  using namespace Teuchos;
63 
64  myTasks = 0;
65  test_lno_t myEdges = 0;
66 
67 
68  if (rank == 0){
69  FILE *f2 = fopen(input_binary_graph, "rb");
70  int num_vertices = 0;
71  int num_edges = 0;
72  fread(&num_vertices,sizeof(int),1,f2); // write 10 bytes to our buffer
73  fread(&num_edges, sizeof(int),1,f2); // write 10 bytes to our buffer
74 
75  myTasks = num_vertices;
76  myEdges = num_edges;
77  std::cout << "numParts:" << num_vertices << " ne:" << num_edges << std::endl;
78 
79  task_communication_xadj_.resize(num_vertices+1);
80  task_communication_adj_.resize(num_edges);
81  task_communication_adjw_.resize(num_edges);
82 
83  fread((void *)&(task_communication_xadj_[0]),sizeof(int),num_vertices + 1,f2); // write 10 bytes to our buffer
84  fread((void *)&(task_communication_adj_[0]),sizeof(int),num_edges ,f2); // write 10 bytes to our buffer
85  fread((void *)&(task_communication_adjw_[0]),sizeof(double),num_edges,f2); // write 10 bytes to our buffer
86  fclose(f2);
87 
88  }
89 
90 
91  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myTasks);
92  tcomm->broadcast(0, sizeof(test_lno_t), (char *) &myEdges);
93 
94  if (rank != 0){
95  task_communication_xadj_.resize(myTasks+1);
96  task_communication_adj_.resize(myEdges);
97  task_communication_adjw_.resize(myEdges);
98  }
99 
100  tcomm->broadcast(0, sizeof(test_lno_t) * (myTasks+1), (char *) &(task_communication_xadj_[0]));
101  tcomm->broadcast(0, sizeof(test_lno_t)* (myEdges), (char *) &(task_communication_adj_[0]));
102  tcomm->broadcast(0, sizeof(test_scalar_t)* (myEdges), (char *) &(task_communication_adjw_[0]));
103 
104 
105  using namespace Teuchos;
106  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
107  RCP<const mytest_map_t> map = rcp (new mytest_map_t (myTasks, myTasks, 0, serial_comm));
108 
109  RCP<mytest_tcrsGraph_t> TpetraCrsGraph(new mytest_tcrsGraph_t (map, 0));
110 
111 
112  std::vector<test_gno_t> tmp(myEdges);
113  for (test_lno_t lclRow = 0; lclRow < myTasks; ++lclRow) {
114  const test_gno_t gblRow = map->getGlobalElement (lclRow);
115  test_lno_t begin = task_communication_xadj_[gblRow];
116  test_lno_t end = task_communication_xadj_[gblRow + 1];
117  for (test_lno_t m = begin; m < end; ++m){
118  tmp[m - begin] = task_communication_adj_[m];
119  }
120  const ArrayView< const test_gno_t > indices(&(tmp[0]), end-begin);
121  TpetraCrsGraph->insertGlobalIndices(gblRow, indices);
122  }
123  TpetraCrsGraph->fillComplete ();
124 
125 
126  return TpetraCrsGraph;
127 }
128 
129 
130 RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > create_multi_vector_adapter(
131  RCP<const mytest_map_t> map, int coord_dim,
132  test_scalar_t ** partCenters, test_gno_t myTasks){
133 
134 
135  Teuchos::Array<Teuchos::ArrayView<const test_scalar_t> > coordView(coord_dim);
136 
137  if(myTasks > 0){
138  for (int i = 0; i < coord_dim; ++i){
139  Teuchos::ArrayView<const test_scalar_t> a(partCenters[i], myTasks);
140  coordView[i] = a;
141  }
142  }
143  else {
144  for (int i = 0; i < coord_dim; ++i){
145  Teuchos::ArrayView<const test_scalar_t> a;
146  coordView[i] = a;
147  }
148  }
149  RCP<mytest_tMVector_t> coords(new mytest_tMVector_t(map, coordView.view(0, coord_dim), coord_dim));//= set multivector;
150  RCP<const mytest_tMVector_t> const_coords = rcp_const_cast<const mytest_tMVector_t>(coords);
151  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > adapter (new Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t>(const_coords));
152  return adapter;
153 }
154 
155 
156 void test_serial_input_adapter(Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm,
157  char *input_binary_graph, char *input_binary_coordinate, char *input_machine_file,
158  int machine_optimization_level, bool divide_prime_first, int rank_per_node, bool visualize_mapping, int reduce_best_mapping){
159 
160  if (input_binary_graph == NULL || input_binary_coordinate == NULL || input_machine_file == NULL){
161  throw "Binary files is mandatory";
162  }
163  //all processors have the all input in this case.
164  Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::createSerialComm<int>();
165 
166  //for the input creation, let processor think that it is the only processor.
167 
168  Teuchos::ParameterList serial_problemParams;
169  //create mapping problem parameters
170  serial_problemParams.set("mapping_algorithm", "geometric");
171  serial_problemParams.set("distributed_input_adapter", false);
172  serial_problemParams.set("algorithm", "multijagged");
173  serial_problemParams.set("Machine_Optimization_Level", machine_optimization_level);
174  serial_problemParams.set("Input_RCA_Machine_Coords", input_machine_file);
175  serial_problemParams.set("divide_prime_first", divide_prime_first);
176  serial_problemParams.set("ranks_per_node", rank_per_node);
177  if (reduce_best_mapping)
178  serial_problemParams.set("reduce_best_mapping", true);
179  else
180  serial_problemParams.set("reduce_best_mapping", false);
181 
182  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> transformed_machine(*global_tcomm, serial_problemParams);
183  int numProcs = transformed_machine.getNumRanks();
184  //TODO MOVE THIS DOWN.
185  serial_problemParams.set("num_global_parts", numProcs);
186  RCP<Zoltan2::Environment> env (new Zoltan2::Environment(serial_problemParams, global_tcomm));
187  RCP<Zoltan2::TimerManager> timer(new Zoltan2::TimerManager(global_tcomm, &std::cout, Zoltan2::MACRO_TIMERS));
188  env->setTimer(timer);
190 
191  std::vector <double> task_communication_adjw_;
192 
193  std::vector <int> task_communication_xadj_;
194  std::vector <int> task_communication_adj_;
195 
196  test_scalar_t **partCenters = NULL;
197  test_gno_t myTasks ;
198  //create tpetra input graph
199  RCP<mytest_tcrsGraph_t> serial_tpetra_graph = create_tpetra_input_matrix(
200  input_binary_graph,
201  global_tcomm,
202  myTasks,
203  task_communication_xadj_, task_communication_adj_,
204  task_communication_adjw_);
205  RCP<const mytest_map_t> serial_map = serial_tpetra_graph->getMap();
206  global_tcomm->barrier();
207 
208  //create input adapter from tpetra graph
209  env->timerStart(Zoltan2::MACRO_TIMERS, "AdapterCreate");
210  RCP<const mytest_tcrsGraph_t> const_tpetra_graph = rcp_const_cast<const mytest_tcrsGraph_t>(serial_tpetra_graph);
211  RCP<mytest_adapter_t> ia (new mytest_adapter_t(const_tpetra_graph, 0, 1));
212 
213  int rank = global_tcomm->getRank();
214 
215  int numParts = 0; int coordDim = 0;
216 
217  if (rank == 0)
218  {
219  FILE *f2 = fopen(input_binary_coordinate, "rb");
220  fread((void *)&numParts,sizeof(int),1,f2); // write 10 bytes to our buffer
221  fread((void *)&coordDim,sizeof(int),1,f2); // write 10 bytes to our buffer
222 
223 
224  partCenters = new test_scalar_t * [coordDim];
225  for(int i = 0; i < coordDim; ++i){
226  partCenters[i] = new test_scalar_t[numParts];
227  fread((void *) partCenters[i],sizeof(double),numParts, f2); // write 10 bytes to our buffer
228  }
229  fclose(f2);
230  }
231 
232  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &numParts);
233  global_tcomm->broadcast(0, sizeof(test_lno_t), (char *) &coordDim);
234 
235  if (numParts!= myTasks){
236  throw "myTasks is different than numParts";
237  }
238  if (rank != 0){
239  partCenters = new test_scalar_t * [coordDim];
240  for(int i = 0; i < coordDim; ++i){
241  partCenters[i] = new test_scalar_t[numParts];
242  }
243  }
244 
245  for(int i = 0; i < coordDim; ++i){
246  global_tcomm->broadcast(0, sizeof(test_scalar_t)* (numParts), (char *) partCenters[i]);
247  }
248 
249  //create multivector for coordinates and
250  RCP <Zoltan2::XpetraMultiVectorAdapter<mytest_tMVector_t> > serial_adapter = create_multi_vector_adapter(serial_map, coordDim, partCenters, myTasks);
251  ia->setCoordinateInput(serial_adapter.getRawPtr());
252 
253  ia->setEdgeWeights(&(task_communication_adjw_[0]), 1, 0);
254 /*
255  for (int i = 0; i < task_communication_adjw_.size(); ++i){
256  std::cout << task_communication_adjw_[i] << " ";
257  }
258  std::cout << std::endl;
259  for (int i = 0; i < task_communication_adjw_.size(); ++i){
260  std::cout << task_communication_adj_[i] << " ";
261  }
262  std::cout << std::endl;
263 */
264  env->timerStop(Zoltan2::MACRO_TIMERS, "AdapterCreate");
265  global_tcomm->barrier();
267 
268 
269  //NOW, it only makes sense to map them serially. This is a case for the applications,
270  //where they already have the whole graph in all processes, and they want to do the mapping.
271  //Basically it will same time mapping algorithm, if that is the case.
272 
273  //First case from the distributed case does not really make sense and it is errornous.
274  //zoltan partitioning algorithms require distributed input. One still can do that in two phases,
275  //but needs to make sure that distributed and serial input adapters matches correctly.
276 
277  //Second case does not make sense and errornous. All elements are within the same node and they should not be
278  //assumed to be in the same part, since it will result only a single part.
279 
280  //If input adapter is not distributed, we are only interested in the third case.
281  //Each element will be its own unique part at the beginning of the mapping.
282 
283  //FOR the third case we create our own solution and set unique parts to each element.
284  //Basically each element has its global id as part number.
285  //It global ids are same as local ids here because each processors owns the whole thing.
286  Zoltan2::PartitioningSolution<mytest_adapter_t> single_phase_mapping_solution(env, global_tcomm, 0);
287  Teuchos::ArrayView< const test_gno_t> gids = serial_map->getNodeElementList();
288 
289  ArrayRCP<int> initial_part_ids(myTasks);
290  for (test_gno_t i = 0; i < myTasks; ++i){
291  initial_part_ids[i] = gids[i];
292  }
293  single_phase_mapping_solution.setParts(initial_part_ids);
294 
295 
296  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Create");
297  //create a mapping problem for the third case. We provide a solution in which all elements belong to unique part.
298  //even the input is not distributed, we still provide the global_tcomm because processors will calculate different mappings
299  //and the best one will be chosen.
301  ia.getRawPtr(), &serial_problemParams, global_tcomm, &single_phase_mapping_solution, &transformed_machine);
302 
303  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Create");
304  //solve mapping problem.
305  env->timerStart(Zoltan2::MACRO_TIMERS, "Problem Solve");
306  serial_map_problem.solve(true);
307  env->timerStop(Zoltan2::MACRO_TIMERS, "Problem Solve");
308 
309  //get the solution.
310  Zoltan2::MappingSolution<mytest_adapter_t> *msoln3 = serial_map_problem.getSolution();
311 
312  timer->printAndResetToZero();
313 
314  //typedef Zoltan2::EvaluatePartition<my_adapter_t> quality_t;
316 
317 
318  //input is not distributed in this case.
319  //metric object should be given the serial comm so that it can calculate the correct metrics without global communication.
320  RCP<quality_t> metricObject_3 = rcp(
321  new quality_t(ia.getRawPtr(),&serial_problemParams,serial_comm,msoln3, serial_map_problem.getMachine().getRawPtr()));
322 
323  if (global_tcomm->getRank() == 0){
324  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
325  metricObject_3->printMetrics(cout);
326  }
327  if (machine_optimization_level > 0){
328 
329  Teuchos::ParameterList serial_problemParams_2;
330  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
331 
332  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
333 
334  RCP<quality_t> metricObject_4 = rcp(
335  new quality_t(ia.getRawPtr(),&serial_problemParams_2,serial_comm,msoln3, &actual_machine));
336 
337  if (global_tcomm->getRank() == 0){
338  std::cout << "METRICS FOR THE SERIAL CASE - ONE PHASE MAPPING - EACH ELEMENT IS ASSUMED TO BE IN UNIQUE PART AT THE BEGINNING" << std::endl;
339  metricObject_4->printMetrics(cout);
340  }
341  }
342 
343  if (visualize_mapping && global_tcomm->getRank() == 0){
344 
345  Teuchos::ParameterList serial_problemParams_2;
346  serial_problemParams_2.set("Input_RCA_Machine_Coords", input_machine_file);
347  Zoltan2::MachineRepresentation <test_scalar_t, mytest_part_t> actual_machine(*global_tcomm, serial_problemParams_2);
348  test_scalar_t ** coords;
349  actual_machine.getAllMachineCoordinatesView(coords);
350  Zoltan2::visualize_mapping<zscalar_t, int> (0, actual_machine.getMachineDim(), actual_machine.getNumRanks(), coords,
351  int (task_communication_xadj_.size())-1, &(task_communication_xadj_[0]), &(task_communication_adj_[0]), msoln3->getPartListView());
352 
353  }
354 
355 }
356 
357 int main(int argc, char *argv[]){
358 
359 
360  Teuchos::GlobalMPISession session(&argc, &argv);
361  Teuchos::RCP<const Teuchos::Comm<int> > global_tcomm = Teuchos::DefaultComm<int>::getComm();
362 
363  char *input_binary_graph = NULL;
364  char *input_binary_coordinate = NULL;
365  char *input_machine_file = NULL;
366  int machine_optimization_level = 10;
367  bool divide_prime_first = false;
368  int rank_per_node = 1;
369  int reduce_best_mapping = 1;
370  bool visualize_mapping = false;
371  for ( int i = 1 ; i < argc ; ++i ) {
372  if ( 0 == strcasecmp( argv[i] , "BG" ) ) {
373 
374  input_binary_graph = argv[++i];
375  }
376  else if ( 0 == strcasecmp( argv[i] , "BC" ) ) {
377  input_binary_coordinate = argv[++i];
378  }
379  else if ( 0 == strcasecmp( argv[i] , "MF" ) ) {
380  //not binary.
381  input_machine_file = argv[++i];
382  }
383  else if ( 0 == strcasecmp( argv[i] , "OL" ) ) {
384  machine_optimization_level = atoi( argv[++i] );
385  }
386  else if ( 0 == strcasecmp( argv[i] , "DP" ) ) {
387  if (atoi( argv[++i] )){
388  divide_prime_first = true;
389  }
390  }
391  else if ( 0 == strcasecmp( argv[i] , "RPN" ) ) {
392  rank_per_node = atoi( argv[++i] );
393  }
394  else if ( 0 == strcasecmp( argv[i] , "VM" ) ) {
395  visualize_mapping = true;
396  }
397  else if ( 0 == strcasecmp( argv[i] , "RBM" ) ) {
398  reduce_best_mapping = atoi( argv[++i] );
399  }
400  else{
401  std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ;
402  return 1;
403  }
404  }
405 
406 
407  try{
408 
409  test_serial_input_adapter(global_tcomm, input_binary_graph, input_binary_coordinate, input_machine_file,
410  machine_optimization_level, divide_prime_first, rank_per_node, visualize_mapping, reduce_best_mapping);
411 
412 #if 0
413  {
414  part_t my_parts = 0, *my_result_parts;
415  //const part_t *local_element_to_rank = msoln1->getPartListView();
416 
417  std::cout << "me:" << global_tcomm->getRank() << " my_parts:" << my_parts << " myTasks:" << myTasks << std::endl;
418  if (global_tcomm->getRank() == 0) {
419 
420  //zscalar_t **dots = partCenters;
421  //int i = 0, j =0;
422  FILE *f2 = fopen("plot.gnuplot", "w");
423  for (int i = 0; i< global_tcomm->getSize(); ++i){
424  char str[20];
425  sprintf(str, "coords%d.txt", i);
426  if (i == 0){
427  fprintf(f2,"splot \"%s\"\n", str);
428  }
429  else {
430  fprintf(f2,"replot \"%s\"\n", str);
431  }
432  }
433  fprintf(f2,"pause-1\n");
434  fclose(f2);
435  }
436  char str[20];
437  int myrank = global_tcomm->getRank();
438  sprintf(str, "coords%d.txt", myrank);
439  FILE *coord_files = fopen(str, "w");
440 
441 
442  for (int j = 0; j < my_parts; ++j){
443  int findex = my_result_parts[j];
444  std::cout << "findex " << findex << std::endl;
445  fprintf(coord_files,"%lf %lf %lf\n", partCenters[0][findex], partCenters[1][findex], partCenters[2][findex]);
446  }
447  fclose(coord_files);
448  }
449 #endif
450 
451  if (global_tcomm->getRank() == 0){
452  cout << "PASS" << endl;
453  }
454  }
455  catch(std::string &s){
456  cerr << s << endl;
457  }
458 
459  catch(char * s){
460  cerr << s << endl;
461  }
462 }
463 
Time an algorithm (or other entity) as a whole.
MappingInputDistributution
Tpetra::CrsGraph< zlno_t, zgno_t, znode_t > mytest_tcrsGraph_t
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
PartitionMapping maps a solution or an input distribution to ranks.
mytest_adapter_t::part_t mytest_part_t
int main(int argc, char *argv[])
Tpetra::MultiVector< test_scalar_t, test_lno_t, test_gno_t, znode_t > mytest_tMVector_t
void solve(bool updateInputData=true)
Direct the problem to create a solution.
double zscalar_t
Provides access for Zoltan2 to Xpetra::CrsGraph data.
void test_serial_input_adapter(Teuchos::RCP< const Teuchos::Comm< int > > global_tcomm, char *input_binary_graph, char *input_binary_coordinate, char *input_machine_file, int machine_optimization_level, bool divide_prime_first, int rank_per_node, bool visualize_mapping, int reduce_best_mapping)
int zlno_t
common code used by tests
RCP< mytest_tcrsGraph_t > create_tpetra_input_matrix(char *input_binary_graph, Teuchos::RCP< const Teuchos::Comm< int > > tcomm, test_gno_t &myTasks, std::vector< int > &task_communication_xadj_, std::vector< int > &task_communication_adj_, std::vector< double > &task_communication_adjw_)
Tpetra::Map ::node_type mytest_znode_t
Tpetra::Map< zlno_t, zgno_t, mytest_znode_t > mytest_map_t
Defines the XpetraMultiVectorAdapter.
Defines XpetraCrsGraphAdapter class.
SparseMatrixAdapter_t::part_t part_t
zgno_t test_gno_t
A PartitioningSolution is a solution to a partitioning problem.
Defines the EvaluatePartition class.
zlno_t test_lno_t
Zoltan2::XpetraCrsGraphAdapter< mytest_tcrsGraph_t, mytest_tMVector_t > mytest_adapter_t
int getMachineDim() const
returns the dimension (number of coords per node) in the machine
InputTraits< User >::part_t part_t
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
zscalar_t test_scalar_t
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
getProcDim function set the coordinates of all ranks allCoords[i][j], i=0,...,getMachineDim(), j=0,...,getNumRanks(), is the i-th dimensional coordinate for rank j. return true if coordinates are available for all ranks
An adapter for Xpetra::MultiVector.
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
void setParts(ArrayRCP< part_t > &partList)
The algorithm uses setParts to set the solution.
int zgno_t
Tpetra::MultiVector< zscalar_t, zlno_t, zgno_t, znode_t > mytest_tMVector_t
Defines the MappingSolution class.
RCP< Zoltan2::XpetraMultiVectorAdapter< mytest_tMVector_t > > create_multi_vector_adapter(RCP< const mytest_map_t > map, int coord_dim, test_scalar_t **partCenters, test_gno_t myTasks)
int getNumRanks() const
return the number of ranks.
Defines the PartitioningProblem class.
Tpetra::Map< test_lno_t, test_gno_t, mytest_znode_t > mytest_map_t
MappingProblem enables mapping of a partition (either computed or input) to MPI ranks.
A class that computes and returns quality metrics.
Declarations for TimerManager.
Defines the MappingProblem class.
Tpetra::CrsGraph< test_lno_t, test_gno_t, znode_t > mytest_tcrsGraph_t