1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 11 #include "Teuchos_ArrayViewDecl.hpp" 14 #include "Teuchos_ReductionOp.hpp" 19 #include <zoltan_dd.h> 22 #include "Teuchos_Comm.hpp" 23 #ifdef HAVE_ZOLTAN2_MPI 24 #include "Teuchos_DefaultMpiComm.hpp" 25 #endif // HAVE_ZOLTAN2_MPI 26 #include <Teuchos_DefaultSerialComm.hpp> 35 template <
typename Ordinal,
typename T>
48 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const 51 for (Ordinal i=0; i < count; i++){
52 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
53 inoutBuffer[0] = inBuffer[0];
54 inoutBuffer[1] = inBuffer[1];
55 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
56 inBuffer[1] - inoutBuffer[1] < _EPSILON){
57 inoutBuffer[0] = inBuffer[0];
58 inoutBuffer[1] = inBuffer[1];
68 template <
typename it>
70 return (x == 1 ? x : x * z2Fact<it>(x - 1));
73 template <
typename gno_t,
typename part_t>
81 template <
typename IT>
91 fact[k] = fact[k - 1] * k;
94 for (k = 0; k < n; ++k)
96 perm[k] = i / fact[n - 1 - k];
97 i = i % fact[n - 1 - k];
102 for (k = n - 1; k > 0; --k)
103 for (j = k - 1; j >= 0; --j)
104 if (perm[j] <= perm[k])
110 template <
typename part_t>
112 int dim = grid_dims.size();
113 int neighborCount = 2 * dim;
114 task_comm_xadj = allocMemory<part_t>(taskCount+1);
115 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
118 task_comm_xadj[0] = 0;
119 for (
part_t i = 0; i < taskCount; ++i){
121 for (
int j = 0; j < dim; ++j){
122 part_t lNeighbor = i - prevDimMul;
123 part_t rNeighbor = i + prevDimMul;
124 prevDimMul *= grid_dims[j];
125 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
126 task_comm_adj[neighBorIndex++] = lNeighbor;
128 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
129 task_comm_adj[neighBorIndex++] = rNeighbor;
132 task_comm_xadj[i+1] = neighBorIndex;
137 template <
typename Adapter,
typename scalar_t,
typename part_t>
140 const Teuchos::Comm<int> *comm,
146 scalar_t **partCenters){
148 typedef typename Adapter::lno_t lno_t;
149 typedef typename Adapter::gno_t gno_t;
152 ArrayView<const gno_t> gnos;
153 ArrayView<input_t> xyz;
154 ArrayView<input_t> wgts;
164 gno_t *point_counts = allocMemory<gno_t>(ntasks);
165 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
168 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
171 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
173 for (
int dim=0; dim < coordDim; dim++){
174 ArrayRCP<const scalar_t> ar;
175 xyz[dim].getInputArray(ar);
177 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
178 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
193 for (lno_t i=0; i < numLocalCoords; i++){
196 for(
int j = 0; j < coordDim; ++j){
197 scalar_t c = multiJagged_coordinates[j][i];
198 partCenters[j][p] += c;
204 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
205 ntasks, point_counts, global_point_counts
208 for(
int j = 0; j < coordDim; ++j){
209 for (
part_t i=0; i < ntasks; ++i){
210 partCenters[j][i] /= global_point_counts[i];
214 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
215 for(
int j = 0; j < coordDim; ++j){
216 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
217 ntasks, partCenters[j], tmpCoords
220 scalar_t *tmp = partCenters[j];
221 partCenters[j] = tmpCoords;
226 freeArray<gno_t> (point_counts);
227 freeArray<gno_t> (global_point_counts);
229 freeArray<scalar_t> (tmpCoords);
230 freeArray<scalar_t *>(multiJagged_coordinates);
234 template <
typename Adapter,
typename scalar_t,
typename part_t>
237 const Teuchos::Comm<int> *comm,
242 ArrayRCP<part_t> &g_part_xadj,
243 ArrayRCP<part_t> &g_part_adj,
244 ArrayRCP<scalar_t> &g_part_ew
247 typedef typename Adapter::lno_t t_lno_t;
248 typedef typename Adapter::gno_t t_gno_t;
249 typedef typename Adapter::scalar_t t_scalar_t;
270 ArrayView<const t_gno_t> Ids;
271 ArrayView<t_input_t> v_wghts;
275 ArrayView<const t_gno_t> edgeIds;
276 ArrayView<const t_lno_t> offsets;
277 ArrayView<t_input_t> e_wgts;
281 std::vector <t_scalar_t> edge_weights;
284 if (numWeightPerEdge > 0){
285 edge_weights = std::vector <t_scalar_t> (localNumEdges);
286 for (t_lno_t i = 0; i < localNumEdges; ++i){
287 edge_weights[i] = e_wgts[0][i];
293 std::vector <part_t> e_parts (localNumEdges);
294 #ifdef HAVE_ZOLTAN2_MPI 295 if (comm->getSize() > 1)
297 Zoltan_DD_Struct *dd = NULL;
299 MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
303 Zoltan_DD_Create(&dd, mpicomm,
305 sizeof(
part_t), localNumVertices, debug_level);
307 ZOLTAN_ID_PTR ddnotneeded = NULL;
310 (ZOLTAN_ID_PTR) Ids.getRawPtr(),
314 int(localNumVertices));
318 (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
320 (
char *)&(e_parts[0]),
325 Zoltan_DD_Destroy(&dd);
337 for (t_lno_t i = 0; i < localNumEdges; ++i){
338 t_gno_t ei = edgeIds[i];
344 std::vector <t_lno_t> part_begins(np, -1);
345 std::vector <t_lno_t> part_nexts(localNumVertices, -1);
349 for (t_lno_t i = 0; i < localNumVertices; ++i){
351 part_nexts[i] = part_begins[ap];
356 g_part_xadj = ArrayRCP<part_t> (np + 1);
357 g_part_adj = ArrayRCP<part_t> (localNumEdges);
358 g_part_ew = ArrayRCP<t_scalar_t> (localNumEdges);
361 std::vector <part_t> part_neighbors (np);
362 std::vector <t_scalar_t> part_neighbor_weights(np, 0);
363 std::vector <t_scalar_t> part_neighbor_weights_ordered(np);
366 for (t_lno_t i = 0; i < np; ++i){
367 part_t num_neighbor_parts = 0;
368 t_lno_t v = part_begins[i];
372 for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){
377 if (numWeightPerEdge > 0){
378 ew = edge_weights[j];
382 if (part_neighbor_weights[ep] < 0.00001){
383 part_neighbors[num_neighbor_parts++] = ep;
385 part_neighbor_weights[ep] += ew;
392 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
393 part_t neighbor_part = part_neighbors[j];
394 g_part_adj[nindex] = neighbor_part;
395 g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
396 part_neighbor_weights[neighbor_part] = 0;
398 g_part_xadj[i + 1] = nindex;
403 RCP<const Teuchos::Comm<int> > tcomm = rcpFromRef(*comm);
404 typedef Tpetra::Map<>::node_type t_node_t;
405 typedef Tpetra::Map<part_t, part_t, t_node_t> t_map_t;
406 Teuchos::RCP<const t_map_t> map = Teuchos::rcp (
new t_map_t (np, 0, tcomm));
407 typedef Tpetra::CrsMatrix<t_scalar_t, part_t, part_t, t_node_t>
tcrsMatrix_t;
408 Teuchos::RCP<tcrsMatrix_t> tMatrix(
new tcrsMatrix_t (map, 0));
415 std::vector <t_lno_t> part_begins(np, -1);
416 std::vector <t_lno_t> part_nexts(localNumVertices, -1);
420 for (t_lno_t i = 0; i < localNumVertices; ++i){
422 part_nexts[i] = part_begins[ap];
426 std::vector <part_t> part_neighbors (np);
427 std::vector <t_scalar_t> part_neighbor_weights(np, 0);
428 std::vector <t_scalar_t> part_neighbor_weights_ordered(np);
431 for (t_lno_t i = 0; i < np; ++i){
432 part_t num_neighbor_parts = 0;
433 t_lno_t v = part_begins[i];
437 for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){
442 if (numWeightPerEdge > 0){
443 ew = edge_weights[j];
446 if (part_neighbor_weights[ep] < 0.00001){
447 part_neighbors[num_neighbor_parts++] = ep;
449 part_neighbor_weights[ep] += ew;
455 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
456 part_t neighbor_part = part_neighbors[j];
457 part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
458 part_neighbor_weights[neighbor_part] = 0;
462 if (num_neighbor_parts > 0){
463 Teuchos::ArrayView<const part_t> destinations(
464 &(part_neighbors[0]), num_neighbor_parts);
465 Teuchos::ArrayView<const t_scalar_t>
466 vals(&(part_neighbor_weights_ordered[0]), num_neighbor_parts);
467 tMatrix->insertGlobalValues (i,destinations, vals);
473 tMatrix->fillComplete ();
477 std::vector <part_t> part_indices(np);
479 for (
part_t i = 0; i < np; ++i) part_indices[i] = i;
481 Teuchos::ArrayView<const part_t>
482 global_ids( &(part_indices[0]), np);
486 Teuchos::RCP<const t_map_t> gatherRowMap(
new t_map_t (
487 Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(), global_ids, 0, tcomm));
491 Teuchos::RCP<tcrsMatrix_t> A_gather =
492 Teuchos::rcp (
new tcrsMatrix_t (gatherRowMap, 0));
493 typedef Tpetra::Import<
typename t_map_t::local_ordinal_type,
494 typename t_map_t::global_ordinal_type,
495 typename t_map_t::node_type> import_type;
496 import_type
import (map, gatherRowMap);
497 A_gather->doImport (*tMatrix,
import, Tpetra::INSERT);
498 A_gather->fillComplete ();
503 g_part_xadj = ArrayRCP<part_t> (np + 1);
504 g_part_adj = ArrayRCP<part_t> (A_gather->getNodeNumEntries ());
505 g_part_ew = ArrayRCP<t_scalar_t> (A_gather->getNodeNumEntries ());
507 part_t *taskidx = g_part_xadj.getRawPtr();
508 part_t *taskadj = g_part_adj.getRawPtr();
509 t_scalar_t *taskadjwgt = g_part_ew.getRawPtr();
514 for (
part_t i = 0; i < np; i++) {
515 part_t length = A_gather->getNumEntriesInLocalRow(i);
517 taskidx[i+1] = taskidx[i] + length;
519 Teuchos::ArrayView<part_t> Indices(taskadj + taskidx[i], length);
520 Teuchos::ArrayView<t_scalar_t> Values(taskadjwgt + taskidx[i], length);
521 A_gather->getLocalRowCopy(i, Indices, Values, nentries);
529 template <
class IT,
class WT>
539 this->heapSize = heapsize_;
540 this->indices = allocMemory<IT>(heapsize_ );
541 this->values = allocMemory<WT>(heapsize_ );
546 freeArray<IT>(this->indices);
547 freeArray<WT>(this->values);
552 WT maxVal = this->values[0];
555 if (distance >= maxVal)
return;
557 this->values[0] = distance;
558 this->indices[0] = index;
565 IT child_index1 = 2 * index_on_heap + 1;
566 IT child_index2 = 2 * index_on_heap + 2;
569 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
571 if (this->values[child_index1] < this->values[child_index2]){
572 biggerIndex = child_index2;
575 biggerIndex = child_index1;
578 else if(child_index1 < this->heapSize){
579 biggerIndex = child_index1;
582 else if(child_index2 < this->heapSize){
583 biggerIndex = child_index2;
585 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
586 WT tmpVal = this->values[biggerIndex];
587 this->values[biggerIndex] = this->values[index_on_heap];
588 this->values[index_on_heap] = tmpVal;
590 IT tmpIndex = this->indices[biggerIndex];
591 this->indices[biggerIndex] = this->indices[index_on_heap];
592 this->indices[index_on_heap] = tmpIndex;
593 this->push_down(biggerIndex);
598 WT MAXVAL = std::numeric_limits<WT>::max();
599 for(IT i = 0; i < this->heapSize; ++i){
600 this->values[i] = MAXVAL;
601 this->indices[i] = -1;
609 for(IT j = 0; j < this->heapSize; ++j){
610 nc += this->values[j];
620 for(
int i = 0; i < dimension; ++i){
622 for(IT j = 0; j < this->heapSize; ++j){
623 IT k = this->indices[j];
627 nc /= this->heapSize;
628 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
636 for(IT i = 0; i < this->heapSize; ++i){
637 permutation[i] = this->indices[i];
644 template <
class IT,
class WT>
653 freeArray<WT>(center);
657 this->dimension = dimension_;
658 this->center = allocMemory<WT>(dimension_);
667 return this->closestPoints.
getNewCenters(center, coords, dimension);
674 for (
int i = 0; i < this->dimension; ++i){
675 WT d = (center[i] - elementCoords[i][index]);
678 distance = pow(distance, WT(1.0 / this->dimension));
679 closestPoints.
addPoint(index, distance);
695 template <
class IT,
class WT>
702 IT required_elements;
708 freeArray<KMeansCluster <IT,WT> >(clusters);
709 freeArray<WT>(maxCoordinates);
710 freeArray<WT>(minCoordinates);
719 IT required_elements_):
721 numElements(numElements_),
722 elementCoords(elementCoords_),
723 numClusters ((1 << dim_) + 1),
724 required_elements(required_elements_)
726 this->clusters = allocMemory<KMeansCluster <IT,WT> >(this->numClusters);
728 for (
int i = 0; i < numClusters; ++i){
729 this->clusters[i].
setParams(this->dim, this->required_elements);
732 this->maxCoordinates = allocMemory <WT> (this->dim);
733 this->minCoordinates = allocMemory <WT> (this->dim);
736 for (
int j = 0; j < dim; ++j){
737 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
738 for(IT i = 1; i < numElements; ++i){
739 WT t = this->elementCoords[j][i];
740 if(t > this->maxCoordinates[j]){
741 this->maxCoordinates[j] = t;
743 if (t < minCoordinates[j]){
744 this->minCoordinates[j] = t;
751 for (
int j = 0; j < dim; ++j){
752 int mod = (1 << (j+1));
753 for (
int i = 0; i < numClusters - 1; ++i){
755 if ( (i % mod) < mod / 2){
756 c = this->maxCoordinates[j];
760 c = this->minCoordinates[j];
762 this->clusters[i].
center[j] = c;
767 for (
int j = 0; j < dim; ++j){
768 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
784 for(
int it = 0; it < 10; ++it){
786 for (IT j = 0; j < this->numClusters; ++j){
789 for (IT i = 0; i < this->numElements; ++i){
791 for (IT j = 0; j < this->numClusters; ++j){
793 this->clusters[j].
getDistance(i,this->elementCoords);
797 for (IT j = 0; j < this->numClusters; ++j){
798 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
814 for (IT j = 1; j < this->numClusters; ++j){
817 if(minTmpDistance < minDistance){
818 minDistance = minTmpDistance;
830 #define MINOF(a,b) (((a)<(b))?(a):(b)) 838 template <
typename T>
842 #ifdef HAVE_ZOLTAN2_OMP 843 #pragma omp parallel for 845 for(
size_t i = 0; i < arrSize; ++i){
852 #ifdef HAVE_ZOLTAN2_OMP 853 #pragma omp parallel for 855 for(
size_t i = 0; i < arrSize; ++i){
864 template <
typename part_t,
typename pcoord_t>
876 no_tasks(no_tasks_){}
879 return this->no_procs;
882 return this->no_tasks;
888 part_t *task_communication_xadj,
889 part_t *task_communication_adj,
890 pcoord_t *task_communication_edge_weight){
892 double totalCost = 0;
895 for (
part_t task = 0; task < this->no_tasks; ++task){
896 int assigned_proc = task_to_proc[task];
898 part_t task_adj_begin = task_communication_xadj[task];
899 part_t task_adj_end = task_communication_xadj[task+1];
901 commCount += task_adj_end - task_adj_begin;
903 for (
part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
906 part_t neighborTask = task_communication_adj[task2];
908 int neighborProc = task_to_proc[neighborTask];
910 double distance = getProcDistance(assigned_proc, neighborProc);
912 if (task_communication_edge_weight == NULL){
913 totalCost += distance ;
916 totalCost += distance * task_communication_edge_weight[task2];
929 this->commCost = totalCost;
933 return this->commCost;
936 virtual double getProcDistance(
int procId1,
int procId2)
const = 0;
944 virtual void getMapping(
946 const RCP<const Environment> &env,
947 ArrayRCP <part_t> &proc_to_task_xadj,
948 ArrayRCP <part_t> &proc_to_task_adj,
949 ArrayRCP <part_t> &task_to_proc
950 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
956 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
983 machine_extent(NULL),
984 machine_extent_wrap_around(NULL),
986 num_ranks_per_node(1),
987 divide_to_prime_first(false){}
1001 pcoord_t **pcoords_,
1003 tcoord_t **tcoords_,
1006 int *machine_extent_,
1007 bool *machine_extent_wrap_around_,
1011 proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
1012 task_coord_dim(tcoord_dim_), task_coords(tcoords_),
1015 machine_extent(machine_extent_),
1016 machine_extent_wrap_around(machine_extent_wrap_around_),
1018 num_ranks_per_node(1),
1019 divide_to_prime_first(false){
1024 this->partArraySize = psize;
1027 this->partNoArray = pNo;
1039 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
1041 minCoordDim, nprocs,
1042 this->proc_coords, ntasks);
1047 for(
int i = ntasks; i < nprocs; ++i){
1048 proc_permutation[i] = -1;
1072 lo = _u_umpa_seed % q;
1073 hi = _u_umpa_seed / q;
1074 test = (a*lo)-(r*hi);
1076 _u_umpa_seed = test;
1078 _u_umpa_seed = test + m;
1079 d = (double) ((
double) _u_umpa_seed / (double) m);
1080 return (
part_t) (d*(double)l);
1084 pcoord_t distance = 0;
1085 if (machine == NULL){
1086 for (
int i = 0 ; i < this->proc_coord_dim; ++i){
1087 double d =
ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
1088 if (machine_extent_wrap_around && machine_extent_wrap_around[i]){
1089 if (machine_extent[i] - d < d){
1090 d = machine_extent[i] - d;
1097 this->machine->
getHopCount(procId1, procId2, distance);
1118 for (i=0; i<n; i+=16)
1120 u = umpa_uRandom(n-4, _u_umpa_seed);
1121 v = umpa_uRandom(n-4, _u_umpa_seed);
1135 for (i=1; i<end; i++)
1137 part_t j=umpa_uRandom(n-i, _u_umpa_seed);
1156 const RCP<const Environment> &env,
1157 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
1158 ArrayRCP <part_t> &rcp_proc_to_task_adj,
1159 ArrayRCP <part_t> &rcp_task_to_proc
1160 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
1163 rcp_proc_to_task_xadj = ArrayRCP <part_t> (this->no_procs+1);
1164 rcp_proc_to_task_adj = ArrayRCP <part_t> (this->no_tasks);
1165 rcp_task_to_proc = ArrayRCP <part_t> (this->no_tasks);
1167 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
1168 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
1169 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
1173 fillContinousArray<part_t> (proc_to_task_xadj, this->no_procs+1, &invalid);
1176 part_t num_parts =
MINOF(this->no_procs, this->no_tasks);
1182 int recursion_depth = partArraySize;
1184 if (partArraySize == -1){
1186 if (divide_to_prime_first){
1192 recursion_depth = log(
float(this->no_procs)) / log(2.0) * 2 + 1;
1195 recursion_depth = log(
float(this->no_procs)) / log(2.0) + 1;
1200 int taskPerm = z2Fact<int>(this->task_coord_dim);
1201 int procPerm = z2Fact<int>(this->proc_coord_dim);
1203 int permutations = taskPerm * procPerm;
1206 permutations += taskPerm;
1208 permutations += procPerm;
1214 part_t *proc_xadj = allocMemory<part_t> (num_parts+1);
1217 part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
1220 part_t used_num_procs = this->no_procs;
1221 if(this->no_procs > this->no_tasks){
1223 this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
1224 used_num_procs = this->no_tasks;
1227 fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
1230 int myPermutation = myRank % permutations;
1231 bool task_partition_along_longest_dim =
false;
1232 bool proc_partition_along_longest_dim =
false;
1238 if (myPermutation == 0){
1239 task_partition_along_longest_dim =
true;
1240 proc_partition_along_longest_dim =
true;
1244 if (myPermutation < taskPerm){
1245 proc_partition_along_longest_dim =
true;
1246 myTaskPerm = myPermutation;
1249 myPermutation -= taskPerm;
1250 if (myPermutation < procPerm){
1251 task_partition_along_longest_dim =
true;
1252 myProcPerm = myPermutation;
1255 myPermutation -= procPerm;
1256 myProcPerm = myPermutation % procPerm;
1257 myTaskPerm = myPermutation / procPerm;
1282 int *permutation = allocMemory<int> ((this->proc_coord_dim > this->task_coord_dim)
1283 ? this->proc_coord_dim : this->task_coord_dim);
1286 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
1296 int procdim = this->proc_coord_dim;
1297 pcoord_t **pcoords = this->proc_coords;
1339 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1354 proc_partition_along_longest_dim
1356 ,divide_to_prime_first
1358 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1366 part_t *task_xadj = allocMemory<part_t> (num_parts+1);
1367 part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
1369 fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
1372 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
1375 tcoord_t **tcoords = allocMemory<tcoord_t *> (this->task_coord_dim);
1376 for(
int i = 0; i < this->task_coord_dim; ++i){
1377 tcoords[i] = this->task_coords[permutation[i]];
1381 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1388 this->task_coord_dim,
1395 task_partition_along_longest_dim
1397 ,divide_to_prime_first
1401 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1407 freeArray<pcoord_t *> (tcoords);
1408 freeArray<int> (permutation);
1412 for(
part_t i = 0; i < num_parts; ++i){
1414 part_t proc_index_begin = proc_xadj[i];
1415 part_t task_begin_index = task_xadj[i];
1416 part_t proc_index_end = proc_xadj[i+1];
1417 part_t task_end_index = task_xadj[i+1];
1420 if(proc_index_end - proc_index_begin != 1){
1421 std::cerr <<
"Error at partitioning of processors" << std::endl;
1422 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
1425 part_t assigned_proc = proc_adjList[proc_index_begin];
1426 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1432 part_t *proc_to_task_xadj_work = allocMemory<part_t> (this->no_procs);
1434 for(
part_t i = 0; i < this->no_procs; ++i){
1435 part_t tmp = proc_to_task_xadj[i];
1436 proc_to_task_xadj[i] = sum;
1438 proc_to_task_xadj_work[i] = sum;
1440 proc_to_task_xadj[this->no_procs] = sum;
1442 for(
part_t i = 0; i < num_parts; ++i){
1444 part_t proc_index_begin = proc_xadj[i];
1445 part_t task_begin_index = task_xadj[i];
1446 part_t task_end_index = task_xadj[i+1];
1448 part_t assigned_proc = proc_adjList[proc_index_begin];
1450 for (
part_t j = task_begin_index; j < task_end_index; ++j){
1451 part_t taskId = task_adjList[j];
1453 task_to_proc[taskId] = assigned_proc;
1455 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1508 freeArray<part_t>(proc_to_task_xadj_work);
1509 freeArray<part_t>(task_xadj);
1510 freeArray<part_t>(task_adjList);
1511 freeArray<part_t>(proc_xadj);
1512 freeArray<part_t>(proc_adjList);
1517 template <
typename Adapter,
typename part_t>
1521 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1523 typedef typename Adapter::scalar_t pcoord_t;
1524 typedef typename Adapter::scalar_t tcoord_t;
1525 typedef typename Adapter::scalar_t scalar_t;
1526 typedef typename Adapter::lno_t
lno_t;
1547 void doMapping(
int myRank,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_){
1549 if(this->proc_task_comm){
1553 this->proc_to_task_xadj,
1554 this->proc_to_task_adj,
1560 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1572 int taskPerm = z2Fact<int>(procDim);
1573 int procPerm = z2Fact<int>(taskDim);
1574 int idealGroupSize = taskPerm * procPerm;
1576 idealGroupSize += taskPerm + procPerm + 1;
1578 int myRank = this->comm->getRank();
1579 int commSize = this->comm->getSize();
1581 int myGroupIndex = myRank / idealGroupSize;
1583 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1584 if (prevGroupBegin < 0) prevGroupBegin = 0;
1585 int myGroupBegin = myGroupIndex * idealGroupSize;
1586 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1587 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1589 if (myGroupEnd > commSize){
1590 myGroupBegin = prevGroupBegin;
1591 myGroupEnd = commSize;
1593 if (nextGroupEnd > commSize){
1594 myGroupEnd = commSize;
1596 int myGroupSize = myGroupEnd - myGroupBegin;
1598 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1599 for (
int i = 0; i < myGroupSize; ++i){
1600 myGroup[i] = myGroupBegin + i;
1604 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1606 RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1607 freeArray<part_t>(myGroup);
1616 RCP<Comm<int> > subComm = this->create_subCommunicator();
1620 double localCost[2], globalCost[2];
1622 localCost[0] = myCost;
1623 localCost[1] = double(subComm->getRank());
1625 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1627 reduceAll<int, double>(*subComm, reduceBest,
1628 2, localCost, globalCost);
1630 int sender = int(globalCost[1]);
1644 broadcast (*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1645 broadcast (*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1646 broadcast (*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1653 std::ofstream gnuPlotCode (
"gnuPlot.plot", std::ofstream::out);
1656 std::string ss =
"";
1657 for(
part_t i = 0; i < this->nprocs; ++i){
1659 std::string procFile = Teuchos::toString<int>(i) +
"_mapping.txt";
1661 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1664 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1667 std::ofstream inpFile (procFile.c_str(), std::ofstream::out);
1669 std::string gnuPlotArrow =
"set arrow from ";
1670 for(
int j = 0; j < mindim; ++j){
1671 if (j == mindim - 1){
1673 gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->
proc_coords[j][i]);
1677 inpFile << proc_task_comm->
proc_coords[j][i] <<
" ";
1678 gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->
proc_coords[j][i]) +
",";
1681 gnuPlotArrow +=
" to ";
1684 inpFile << std::endl;
1685 ArrayView<part_t> a = this->getAssignedTasksForProc(i);
1686 for(
int k = 0; k < a.size(); ++k){
1689 std::string gnuPlotArrow2 = gnuPlotArrow;
1690 for(
int z = 0; z < mindim; ++z){
1691 if(z == mindim - 1){
1695 gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->
task_coords[z][j]);
1698 inpFile << proc_task_comm->
task_coords[z][j] <<
" ";
1699 gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->
task_coords[z][j]) +
",";
1702 ss += gnuPlotArrow2 +
"\n";
1703 inpFile << std::endl;
1708 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1709 gnuPlotCode.close();
1716 std::string rankStr = Teuchos::toString<int>(myRank);
1717 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1718 std::string outF = gnuPlots + rankStr+ extentionS;
1719 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
1726 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
1729 std::string ss =
"";
1730 std::string procs =
"";
1732 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
1733 for(
part_t origin_rank = 0; origin_rank < this->nprocs; ++origin_rank){
1734 ArrayView<part_t> a = this->getAssignedTasksForProc(origin_rank);
1739 std::string gnuPlotArrow =
"set arrow from ";
1740 for(
int j = 0; j < mindim; ++j){
1741 if (j == mindim - 1){
1742 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1743 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1747 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]) +
",";
1748 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank])+
" ";
1753 gnuPlotArrow +=
" to ";
1756 for(
int k = 0; k < a.size(); ++k){
1757 int origin_task = a[k];
1759 for (
int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
1760 int neighbor_task = task_communication_adj[nind];
1762 bool differentnode =
false;
1764 int neighbor_rank = this->getAssignedProcForTask(neighbor_task);
1766 for(
int j = 0; j < mindim; ++j){
1767 if (
int (tmpproc_task_comm->proc_coords[j][origin_rank]) != int (tmpproc_task_comm->proc_coords[j][neighbor_rank])){
1768 differentnode =
true;
break;
1771 std::tuple<int,int,int, int, int, int> foo (
1772 int (tmpproc_task_comm->proc_coords[0][origin_rank]),
1773 int (tmpproc_task_comm->proc_coords[1][origin_rank]),
1774 int (tmpproc_task_comm->proc_coords[2][origin_rank]),
1775 int (tmpproc_task_comm->proc_coords[0][neighbor_rank]),
1776 int (tmpproc_task_comm->proc_coords[1][neighbor_rank]),
1777 int (tmpproc_task_comm->proc_coords[2][neighbor_rank]));
1780 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
1781 my_arrows.insert(foo);
1783 std::string gnuPlotArrow2 =
"";
1784 for(
int j = 0; j < mindim; ++j){
1785 if(j == mindim - 1){
1786 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]);
1789 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]) +
",";
1792 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
1800 std::ofstream procFile (
"procPlot.plot", std::ofstream::out);
1801 procFile << procs <<
"\n";
1806 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1808 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1811 gnuPlotCode << ss <<
"\nreplot\n pause -1 \n";
1812 gnuPlotCode.close();
1819 const Teuchos::Comm<int> *comm_,
1822 tcoord_t **partCenters
1824 std::string file =
"gggnuPlot";
1825 std::string exten =
".plot";
1826 std::ofstream mm(
"2d.txt");
1827 file += Teuchos::toString<int>(comm_->getRank()) + exten;
1828 std::ofstream ff(file.c_str());
1832 for (
part_t i = 0; i < this->ntasks;++i){
1833 outPartBoxes[i].writeGnuPlot(ff, mm);
1836 ff <<
"plot \"2d.txt\"" << std::endl;
1840 ff <<
"splot \"2d.txt\"" << std::endl;
1845 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1846 for (
part_t i = 0; i < this->ntasks;++i){
1847 part_t pb = task_communication_xadj[i];
1848 part_t pe = task_communication_xadj[i+1];
1849 for (
part_t p = pb; p < pe; ++p){
1850 part_t n = task_communication_adj[p];
1853 std::string arrowline =
"set arrow from ";
1854 for (
int j = 0; j < coordDim - 1; ++j){
1855 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][n]) +
",";
1857 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1860 for (
int j = 0; j < coordDim - 1; ++j){
1861 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][i]) +
",";
1863 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1870 ff <<
"replot\n pause -1" << std::endl;
1878 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1879 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1886 mappingsoln->setMap_RankForLocalElements(local_task_to_rank);
1900 if(this->isOwnerofModel){
1901 delete this->proc_task_comm;
1906 const lno_t num_local_coords,
1907 const part_t *local_coord_parts,
1908 const ArrayRCP<part_t> task_to_proc_){
1909 local_task_to_rank = ArrayRCP <part_t> (num_local_coords);
1911 for (lno_t i = 0; i < num_local_coords; ++i){
1912 part_t local_coord_part = local_coord_parts[i];
1913 part_t rank_index = task_to_proc_[local_coord_part];
1914 local_task_to_rank[i] = rank_index;
1931 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
1933 const Teuchos::RCP <const Adapter> input_adapter_,
1935 const Teuchos::RCP <const Environment> envConst,
1936 bool is_input_adapter_distributed =
true,
1937 int num_ranks_per_node = 1,
1938 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
1939 PartitionMapping<Adapter> (comm_, machine_, input_adapter_, soln_, envConst),
1940 proc_to_task_xadj(0),
1941 proc_to_task_adj(0),
1943 isOwnerofModel(true),
1945 task_communication_xadj(0),
1946 task_communication_adj(0),
1947 task_communication_edge_weight(0){
1951 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
1952 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
1954 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
1955 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
1956 if (!is_input_adapter_distributed){
1957 ia_comm = Teuchos::createSerialComm<int>();
1960 RCP<const Environment> envConst_ = envConst;
1962 RCP<const ctm_base_adapter_t> baseInputAdapter_ (
1963 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
1971 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
1981 baseInputAdapter_, envConst_, ia_comm,
1985 if (!machine_->hasMachineCoordinates()) {
1986 throw std::runtime_error(
"Existing machine does not provide coordinates " 1987 "for coordinate task mapping");
1991 int procDim = machine_->getMachineDim();
1992 this->nprocs = machine_->getNumRanks();
1995 pcoord_t **procCoordinates = NULL;
1996 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
1997 throw std::runtime_error(
"Existing machine does not implement " 1998 "getAllMachineCoordinatesView");
2005 std::vector <int> machine_extent_vec (procDim);
2007 int *machine_extent = &(machine_extent_vec[0]);
2008 bool *machine_extent_wrap_around =
new bool[procDim];
2009 for (
int i = 0; i < procDim; ++i)machine_extent_wrap_around[i] =
false;
2010 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2018 if (machine_->getMachineExtent(machine_extent)) {
2020 this->shiftMachineCoordinates (
2023 machine_extent_wrap_around,
2031 int coordDim = coordinateModel_->getCoordinateDim();
2041 tcoord_t **partCenters = NULL;
2042 partCenters = allocMemory<tcoord_t *>(coordDim);
2043 for (
int i = 0; i < coordDim; ++i){
2044 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2047 typedef typename Adapter::scalar_t t_scalar_t;
2050 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2053 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2054 envConst.getRawPtr(),
2055 ia_comm.getRawPtr(),
2056 coordinateModel_.getRawPtr(),
2057 this->solution_parts,
2064 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2068 if (graph_model_.getRawPtr() != NULL){
2069 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t> (
2070 envConst.getRawPtr(),
2071 ia_comm.getRawPtr(),
2072 graph_model_.getRawPtr(),
2074 this->solution_parts,
2077 task_communication_xadj,
2078 task_communication_adj,
2079 task_communication_edge_weight
2084 this->proc_task_comm =
2093 machine_extent_wrap_around,
2094 machine_.getRawPtr());
2096 int myRank = comm_->getRank();
2098 this->proc_task_comm->divide_to_prime_first = divide_to_prime_first;
2101 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2102 this->doMapping(myRank, comm_);
2103 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2106 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2112 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2114 if (comm_->getRank() == 0){
2116 part_t taskCommCount = task_communication_xadj.size();
2117 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2118 part_t maxN = task_communication_xadj[0];
2119 for (
part_t i = 1; i <= taskCommCount; ++i){
2120 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
2121 if (maxN < nc) maxN = nc;
2123 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2126 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2129 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2131 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2132 this->proc_task_comm->calculateCommunicationCost(
2133 task_to_proc.getRawPtr(),
2134 task_communication_xadj.getRawPtr(),
2135 task_communication_adj.getRawPtr(),
2136 task_communication_edge_weight.getRawPtr()
2142 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2147 this->getBestMapping();
2148 this->create_local_task_to_rank(
2149 coordinateModel_->getLocalNumCoordinates(),
2150 this->solution_parts,
2151 this->task_to_proc);
2167 this->writeMapping2(comm_->getRank());
2170 delete []machine_extent_wrap_around;
2171 if (machine_->getMachineExtent(machine_extent)){
2172 for (
int i = 0; i < procDim; ++i){
2173 delete [] procCoordinates[i];
2175 delete [] procCoordinates;
2178 for (
int i = 0; i < coordDim; ++i){
2179 freeArray<tcoord_t>(partCenters[i]);
2181 freeArray<tcoord_t *>(partCenters);
2197 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2199 const Teuchos::RCP <const Adapter> input_adapter_,
2201 const part_t *result_parts,
2202 const Teuchos::RCP <const Environment> envConst,
2203 bool is_input_adapter_distributed =
true,
2204 int num_ranks_per_node = 1,
2205 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
2206 PartitionMapping<Adapter> (comm_, machine_, input_adapter_, num_parts_, result_parts, envConst),
2207 proc_to_task_xadj(0),
2208 proc_to_task_adj(0),
2210 isOwnerofModel(true),
2212 task_communication_xadj(0),
2213 task_communication_adj(0),
2214 task_communication_edge_weight(0){
2218 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2219 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2221 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2222 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2223 if (!is_input_adapter_distributed){
2224 ia_comm = Teuchos::createSerialComm<int>();
2226 RCP<const Environment> envConst_ = envConst;
2228 RCP<const ctm_base_adapter_t> baseInputAdapter_ (
2229 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
2237 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2247 baseInputAdapter_, envConst_, ia_comm,
2251 if (!machine_->hasMachineCoordinates()) {
2252 throw std::runtime_error(
"Existing machine does not provide coordinates " 2253 "for coordinate task mapping");
2257 int procDim = machine_->getMachineDim();
2258 this->nprocs = machine_->getNumRanks();
2261 pcoord_t **procCoordinates = NULL;
2262 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2263 throw std::runtime_error(
"Existing machine does not implement " 2264 "getAllMachineCoordinatesView");
2271 std::vector <int> machine_extent_vec (procDim);
2273 int *machine_extent = &(machine_extent_vec[0]);
2274 bool *machine_extent_wrap_around =
new bool[procDim];
2275 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2283 if (machine_->getMachineExtent(machine_extent)) {
2285 this->shiftMachineCoordinates (
2288 machine_extent_wrap_around,
2296 int coordDim = coordinateModel_->getCoordinateDim();
2299 this->ntasks = num_parts_;
2300 this->solution_parts = result_parts;
2303 tcoord_t **partCenters = NULL;
2304 partCenters = allocMemory<tcoord_t *>(coordDim);
2305 for (
int i = 0; i < coordDim; ++i){
2306 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2309 typedef typename Adapter::scalar_t t_scalar_t;
2312 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2315 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2316 envConst.getRawPtr(),
2317 ia_comm.getRawPtr(),
2318 coordinateModel_.getRawPtr(),
2319 this->solution_parts,
2326 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2330 if (graph_model_.getRawPtr() != NULL){
2331 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t> (
2332 envConst.getRawPtr(),
2333 ia_comm.getRawPtr(),
2334 graph_model_.getRawPtr(),
2336 this->solution_parts,
2339 task_communication_xadj,
2340 task_communication_adj,
2341 task_communication_edge_weight
2347 envConst->timerStart(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2349 this->proc_task_comm =
2358 machine_extent_wrap_around,
2359 machine_.getRawPtr());
2361 envConst->timerStop(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2367 int myRank = comm_->getRank();
2370 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2371 this->doMapping(myRank, comm_);
2372 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2375 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2381 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2383 if (comm_->getRank() == 0){
2385 part_t taskCommCount = task_communication_xadj.size();
2386 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2387 part_t maxN = task_communication_xadj[0];
2388 for (
part_t i = 1; i <= taskCommCount; ++i){
2389 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
2390 if (maxN < nc) maxN = nc;
2392 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2395 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2398 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2400 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2402 task_to_proc.getRawPtr(),
2403 task_communication_xadj.getRawPtr(),
2404 task_communication_adj.getRawPtr(),
2405 task_communication_edge_weight.getRawPtr()
2411 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2416 this->getBestMapping();
2418 this->create_local_task_to_rank(
2419 coordinateModel_->getLocalNumCoordinates(),
2420 this->solution_parts,
2421 this->task_to_proc);
2439 this->writeMapping2(comm_->getRank());
2442 delete []machine_extent_wrap_around;
2443 if (machine_->getMachineExtent(machine_extent)){
2444 for (
int i = 0; i < procDim; ++i){
2445 delete [] procCoordinates[i];
2447 delete [] procCoordinates;
2450 for (
int i = 0; i < coordDim; ++i){
2451 freeArray<tcoord_t>(partCenters[i]);
2453 freeArray<tcoord_t *>(partCenters);
2505 const Teuchos::Comm<int> *problemComm,
2508 pcoord_t **machine_coords,
2512 tcoord_t **task_coords,
2513 ArrayRCP<part_t>task_comm_xadj,
2514 ArrayRCP<part_t>task_comm_adj,
2515 pcoord_t *task_communication_edge_weight_,
2516 int recursion_depth,
2518 const part_t *machine_dimensions,
2519 int num_ranks_per_node = 1,
2520 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true 2524 proc_to_task_xadj(0),
2525 proc_to_task_adj(0),
2527 isOwnerofModel(true),
2529 task_communication_xadj(task_comm_xadj),
2530 task_communication_adj(task_comm_adj){
2533 pcoord_t ** virtual_machine_coordinates = machine_coords;
2534 bool *wrap_arounds =
new bool [proc_dim];
2535 for (
int i = 0; i < proc_dim; ++i) wrap_arounds[i] =
true;
2537 if (machine_dimensions){
2538 virtual_machine_coordinates =
2539 this->shiftMachineCoordinates (
2547 this->nprocs = num_processors;
2549 int coordDim = task_dim;
2550 this->ntasks = num_tasks;
2553 tcoord_t **partCenters = task_coords;
2556 this->proc_task_comm =
2559 virtual_machine_coordinates,
2563 this->ntasks, NULL, NULL
2572 int myRank = problemComm->getRank();
2574 this->doMapping(myRank, this->comm);
2576 this->writeMapping2(myRank);
2579 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2581 task_to_proc.getRawPtr(),
2582 task_communication_xadj.getRawPtr(),
2583 task_communication_adj.getRawPtr(),
2584 task_communication_edge_weight_
2588 this->getBestMapping();
2603 delete [] wrap_arounds;
2605 if (machine_dimensions){
2606 for (
int i = 0; i < proc_dim; ++i){
2607 delete [] virtual_machine_coordinates[i];
2609 delete [] virtual_machine_coordinates;
2612 if(problemComm->getRank() == 0)
2613 this->writeMapping2(-1);
2640 const part_t *machine_dimensions,
2641 bool *machine_extent_wrap_around,
2643 pcoord_t **mCoords){
2644 pcoord_t **result_machine_coords = NULL;
2645 result_machine_coords =
new pcoord_t*[machine_dim];
2646 for (
int i = 0; i < machine_dim; ++i){
2647 result_machine_coords[i] =
new pcoord_t [numProcs];
2650 for (
int i = 0; i < machine_dim; ++i){
2651 part_t numMachinesAlongDim = machine_dimensions[i];
2652 part_t *machineCounts=
new part_t[numMachinesAlongDim];
2653 memset(machineCounts, 0,
sizeof(
part_t) *numMachinesAlongDim);
2655 int *filledCoordinates=
new int[numMachinesAlongDim];
2657 pcoord_t *coords = mCoords[i];
2658 for(
part_t j = 0; j < numProcs; ++j){
2660 ++machineCounts[mc];
2663 part_t filledCoordinateCount = 0;
2664 for(
part_t j = 0; j < numMachinesAlongDim; ++j){
2665 if (machineCounts[j] > 0){
2666 filledCoordinates[filledCoordinateCount++] = j;
2670 part_t firstProcCoord = filledCoordinates[0];
2671 part_t firstProcCount = machineCounts[firstProcCoord];
2673 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
2674 part_t lastProcCount = machineCounts[lastProcCoord];
2676 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
2677 part_t firstLastGapProc = lastProcCount + firstProcCount;
2679 part_t leftSideProcCoord = firstProcCoord;
2680 part_t leftSideProcCount = firstProcCount;
2682 part_t biggestGapProc = numProcs;
2684 part_t shiftBorderCoordinate = -1;
2685 for(
part_t j = 1; j < filledCoordinateCount; ++j){
2686 part_t rightSideProcCoord= filledCoordinates[j];
2687 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
2689 part_t gap = rightSideProcCoord - leftSideProcCoord;
2690 part_t gapProc = rightSideProcCount + leftSideProcCount;
2695 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
2696 shiftBorderCoordinate = rightSideProcCoord;
2697 biggestGapProc = gapProc;
2700 leftSideProcCoord = rightSideProcCoord;
2701 leftSideProcCount = rightSideProcCount;
2705 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
2706 shiftBorderCoordinate = -1;
2709 for(
part_t j = 0; j < numProcs; ++j){
2711 if (machine_extent_wrap_around[i] && coords[j] < shiftBorderCoordinate){
2712 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
2716 result_machine_coords[i][j] = coords[j];
2720 delete [] machineCounts;
2721 delete [] filledCoordinates;
2724 return result_machine_coords;
2736 procs = this->task_to_proc.getRawPtr() + taskId;
2742 return this->task_to_proc[taskId];
2752 part_t task_begin = this->proc_to_task_xadj[procId];
2753 part_t taskend = this->proc_to_task_xadj[procId+1];
2754 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
2755 numParts = taskend - task_begin;
2759 part_t task_begin = this->proc_to_task_xadj[procId];
2760 part_t taskend = this->proc_to_task_xadj[procId+1];
2768 if (taskend - task_begin > 0){
2769 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
2770 return assignedParts;
2773 ArrayView <part_t> assignedParts;
2774 return assignedParts;
2850 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
2852 RCP<
const Teuchos::Comm<int> > problemComm,
2855 pcoord_t **machine_coords,
2858 tcoord_t **task_coords,
2861 pcoord_t *task_communication_edge_weight_,
2862 part_t *proc_to_task_xadj,
2863 part_t *proc_to_task_adj,
2864 int recursion_depth,
2866 const part_t *machine_dimensions,
2867 int num_ranks_per_node = 1,
2868 bool divide_to_prime_first =
false 2877 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
2879 Teuchos::ArrayRCP<part_t> task_communication_xadj (task_comm_xadj, 0, num_tasks+1,
false);
2881 Teuchos::ArrayRCP<part_t> task_communication_adj;
2882 if (task_comm_xadj){
2883 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj (task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
2884 task_communication_adj = tmp_task_communication_adj;
2891 problemComm.getRawPtr(),
2900 task_communication_xadj,
2901 task_communication_adj,
2902 task_communication_edge_weight_,
2907 divide_to_prime_first
2911 part_t* proc_to_task_xadj_;
2912 part_t* proc_to_task_adj_;
2914 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
2916 for (
part_t i = 0; i <= num_processors; ++i){
2917 proc_to_task_xadj[i] = proc_to_task_xadj_[i];
2919 for (
part_t i = 0; i < num_tasks; ++i){
2920 proc_to_task_adj[i] = proc_to_task_adj_[i];
2927 template <
typename proc_coord_t,
typename v_lno_t>
2929 const int machine_coord_dim,
const int num_ranks, proc_coord_t **machine_coords,
2930 const v_lno_t num_tasks,
const v_lno_t *task_communication_xadj,
const v_lno_t *task_communication_adj,
const int *task_to_rank){
2932 std::string rankStr = Teuchos::toString<int>(myRank);
2933 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
2934 std::string outF = gnuPlots + rankStr+ extentionS;
2935 std::ofstream gnuPlotCode ( outF.c_str(), std::ofstream::out);
2937 if (machine_coord_dim != 3) {
2938 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
2941 std::string ss =
"";
2942 std::string procs =
"";
2944 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
2945 for(v_lno_t origin_task = 0; origin_task < num_tasks; ++origin_task){
2946 int origin_rank = task_to_rank[origin_task];
2947 std::string gnuPlotArrow =
"set arrow from ";
2949 for(
int j = 0; j < machine_coord_dim; ++j){
2950 if (j == machine_coord_dim - 1){
2951 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2952 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2956 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]) +
",";
2957 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])+
" ";
2962 gnuPlotArrow +=
" to ";
2965 for (
int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
2966 int neighbor_task = task_communication_adj[nind];
2968 bool differentnode =
false;
2969 int neighbor_rank = task_to_rank[neighbor_task];
2971 for(
int j = 0; j < machine_coord_dim; ++j){
2972 if (
int (machine_coords[j][origin_rank]) != int (machine_coords[j][neighbor_rank])){
2973 differentnode =
true;
break;
2976 std::tuple<int,int,int, int, int, int> foo (
2977 (
int) (machine_coords[0][origin_rank]),
2978 (
int) (machine_coords[1][origin_rank]),
2979 (
int) (machine_coords[2][origin_rank]),
2980 (
int) (machine_coords[0][neighbor_rank]),
2981 (
int) (machine_coords[1][neighbor_rank]),
2982 (
int) (machine_coords[2][neighbor_rank]));
2985 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
2986 my_arrows.insert(foo);
2988 std::string gnuPlotArrow2 =
"";
2989 for(
int j = 0; j < machine_coord_dim; ++j){
2990 if(j == machine_coord_dim - 1){
2991 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]);
2994 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]) +
",";
2997 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
3002 std::ofstream procFile (
"procPlot.plot", std::ofstream::out);
3003 procFile << procs <<
"\n";
3007 if(machine_coord_dim == 2){
3008 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
3010 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
3013 gnuPlotCode << ss <<
"\nreplot\n pause -1\npause -1";
3014 gnuPlotCode.close();
void setParams(int dimension_, int heapsize)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
Zoltan2::BaseAdapter< userTypes_t > base_adapter_t
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
Tpetra::CrsMatrix< zscalar_t, zlno_t, zgno_t, znode_t > tcrsMatrix_t
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const part_t num_parts_, const part_t *result_parts, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. Instead of Solution we have two parameters, numparts When this constructor is called...
virtual ~CommunicationModel()
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
Time an algorithm (or other entity) as a whole.
bool * machine_extent_wrap_around
void setPartArraySize(int psize)
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getHopCount(int rank1, int rank2, pcoord_t &hops) const
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_, int *machine_extent_, bool *machine_extent_wrap_around_, const MachineRepresentation< pcoord_t, part_t > *machine_=NULL)
Class Constructor:
void getCoarsenedPartGraph(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::GraphModel< typename Adapter::base_adapter_t > *graph, part_t np, const part_t *parts, ArrayRCP< part_t > &g_part_xadj, ArrayRCP< part_t > &g_part_adj, ArrayRCP< scalar_t > &g_part_ew)
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
PartitionMapping maps a solution or an input distribution to ranks.
ArrayRCP< part_t > local_task_to_rank
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const part_t *parts, int coordDim, part_t ntasks, scalar_t **partCenters)
size_t getLocalNumVertices() const
Returns the number vertices on this process.
bool getNewCenters(WT **coords)
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
size_t getLocalNumEdges() const
Returns the number of edges on this process. In global or subset graphs, includes off-process edges...
virtual ~CoordinateTaskMapper()
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array, bool partition_along_longest_dim, int num_ranks_per_node, bool divide_to_prime_first_)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
ArrayView< part_t > getAssignedTasksForProc(part_t procId)
const MachineRepresentation< pcoord_t, part_t > * machine
size_t getEdgeList(ArrayView< const gno_t > &edgeIds, ArrayView< const lno_t > &offsets, ArrayView< input_t > &wgts) const
Sets pointers to this process' edge (neighbor) global Ids, including off-process edges.
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
void doMapping(int myRank, const Teuchos::RCP< const Teuchos::Comm< int > > comm_)
doMapping function, calls getMapping function of communicationModel object.
virtual void getPartsForProc(int procId, part_t &numParts, part_t *&parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Defines the XpetraMultiVectorAdapter.
SparseMatrixAdapter_t::part_t part_t
Multi Jagged coordinate partitioning algorithm.
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *&procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
void create_local_task_to_rank(const lno_t num_local_coords, const part_t *local_coord_parts, const ArrayRCP< part_t > task_to_proc_)
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
virtual void map(const RCP< MappingSolution< Adapter > > &mappingsoln)
Mapping method.
int getNumWeightsPerEdge() const
Returns the number (0 or greater) of weights per edge.
Contains the Multi-jagged algorthm.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
A PartitioningSolution is a solution to a partitioning problem.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
ArrayRCP< part_t > task_communication_adj
BaseAdapterType
An enum to identify general types of adapters.
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc, const Teuchos::RCP< const Teuchos::Comm< int > > comm_) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
The StridedData class manages lists of weights or coordinates.
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void setPartArray(part_t *pNo)
ArrayRCP< part_t > proc_to_task_adj
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
size_t getVertexList(ArrayView< const gno_t > &Ids, ArrayView< input_t > &wgts) const
Sets pointers to this process' vertex Ids and their weights.
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const Teuchos::RCP< const Zoltan2::PartitioningSolution< Adapter > > soln_, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. When this constructor is called, in order to calculate the communication metric...
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t...
double getCommunicationCostMetric()
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, bool *machine_extent_wrap_around, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
Defines the MappingSolution class.
ArrayRCP< part_t > task_communication_xadj
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
GraphModel defines the interface required for graph models.
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
Zoltan2_ReduceBestMapping()
Default Constructor.
bool divide_to_prime_first
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
KmeansHeap Class, max heap, but holds the minimum values.
Defines the GraphModel interface.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
ArrayRCP< scalar_t > task_communication_edge_weight
virtual ~CoordinateCommunicationModel()
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
ArrayRCP< part_t > task_to_proc
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
virtual double getProcDistance(int procId1, int procId2) const
void writeMapping2(int myRank)
void copyCoordinates(IT *permutation)
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
CoordinateCommunicationModel()
ArrayRCP< part_t > proc_to_task_xadj
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm