Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_Merge.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_MERGE_HPP
43 #define TPETRA_DETAILS_MERGE_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_TestForException.hpp"
47 #include <algorithm> // std::sort
48 #include <utility> // std::pair, std::make_pair
49 #include <stdexcept>
50 
51 namespace Tpetra {
52 namespace Details {
53 
72 template<class OrdinalType, class IndexType>
73 IndexType
74 countMergeUnsortedIndices (const OrdinalType curInds[],
75  const IndexType numCurInds,
76  const OrdinalType inputInds[],
77  const IndexType numInputInds)
78 {
79  IndexType mergeCount = 0;
80 
81  if (numCurInds <= numInputInds) {
82  // More input than current entries, so iterate linearly over
83  // input and scan current entries repeatedly.
84  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
85  const OrdinalType inVal = inputInds[inPos];
86  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
87  if (curInds[curPos] == inVal) {
88  ++mergeCount;
89  }
90  }
91  }
92  }
93  else { // numCurInds > numInputInds
94  // More current entries than input, so iterate linearly over
95  // current entries and scan input repeatedly.
96  for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
97  const OrdinalType curVal = curInds[curPos];
98  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
99  if (inputInds[inPos] == curVal) {
100  ++mergeCount;
101  }
102  }
103  }
104  }
105 
106 #ifdef HAVE_TPETRA_DEBUG
107  TEUCHOS_TEST_FOR_EXCEPTION
108  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
109  mergeCount << " > numInputInds = " << numInputInds << ".");
110 #endif // HAVE_TPETRA_DEBUG
111  return mergeCount;
112 }
113 
131 template<class OrdinalType, class IndexType>
132 IndexType
133 countMergeSortedIndices (const OrdinalType curInds[],
134  const IndexType numCurInds,
135  const OrdinalType inputInds[],
136  const IndexType numInputInds)
137 {
138  // Only count possible merges; don't merge yet. If the row
139  // doesn't have enough space, we want to return without side
140  // effects.
141  IndexType curPos = 0;
142  IndexType inPos = 0;
143  IndexType mergeCount = 0;
144  while (inPos < numInputInds && curPos < numCurInds) {
145  const OrdinalType inVal = inputInds[inPos];
146  const OrdinalType curVal = curInds[curPos];
147 
148  if (curVal == inVal) { // can merge
149  ++mergeCount;
150  ++inPos; // go on to next input
151  } else if (curVal < inVal) {
152  ++curPos; // go on to next row entry
153  } else { // curVal > inVal
154  ++inPos; // can't merge it ever, since row entries sorted
155  }
156  }
157 
158 #ifdef HAVE_TPETRA_DEBUG
159  TEUCHOS_TEST_FOR_EXCEPTION
160  (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
161  " > numInputInds = " << numInputInds << ".");
162  TEUCHOS_TEST_FOR_EXCEPTION
163  (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
164  " > numCurInds = " << numCurInds << ".");
165  TEUCHOS_TEST_FOR_EXCEPTION
166  (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
167  mergeCount << " > numInputInds = " << numInputInds << ".");
168 #endif // HAVE_TPETRA_DEBUG
169 
170  // At this point, 2 situations are possible:
171  //
172  // 1. inPos == numInputInds: We looked at all inputs. Some
173  // (mergeCount of them) could have been merged.
174  // 2. inPos < numInputInds: We didn't get to look at all inputs.
175  // Since the inputs are sorted, we know that those inputs we
176  // didn't examine weren't mergeable.
177  //
178  // Either way, mergeCount gives the number of mergeable inputs.
179  return mergeCount;
180 }
181 
182 
203 template<class OrdinalType, class IndexType>
204 std::pair<bool, IndexType>
205 mergeSortedIndices (OrdinalType curInds[],
206  const IndexType midPos,
207  const IndexType endPos,
208  const OrdinalType inputInds[],
209  const IndexType numInputInds)
210 {
211  // Optimize for the following cases, in decreasing order of
212  // optimization concern:
213  //
214  // a. Current row has allocated space but no entries
215  // b. All input indices already in the graph
216  //
217  // If the row has insufficient space for a merge, don't do
218  // anything! Just return an upper bound on the number of extra
219  // entries required to fit everything. This imposes extra cost,
220  // but correctly supports the count, allocate, fill, compute
221  // pattern. (If some entries were merged in and others weren't,
222  // how would you know which got merged in? CrsGraph insert is
223  // idempotent, but CrsMatrix insert does a += on the value and
224  // is therefore not idempotent.)
225  if (midPos == 0) {
226  // Current row has no entries, but may have preallocated space.
227  if (endPos >= numInputInds) {
228  // Sufficient space for new entries; copy directly.
229  for (IndexType k = 0; k < numInputInds; ++k) {
230  curInds[k] = inputInds[k];
231  }
232  std::sort (curInds, curInds + numInputInds);
233  return std::make_pair (true, numInputInds);
234  }
235  else { // not enough space
236  return std::make_pair (false, numInputInds);
237  }
238  }
239  else { // current row contains indices, requiring merge
240  // Only count possible merges; don't merge yet. If the row
241  // doesn't have enough space, we want to return without side
242  // effects.
243  const IndexType mergeCount =
244  countMergeSortedIndices<OrdinalType, IndexType> (curInds, midPos,
245  inputInds,
246  numInputInds);
247  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
248  const IndexType newRowLen = midPos + extraSpaceNeeded;
249  if (newRowLen > endPos) {
250  return std::make_pair (false, newRowLen);
251  }
252  else { // we have enough space; merge in
253  IndexType curPos = 0;
254  IndexType inPos = 0;
255  IndexType newPos = midPos;
256  while (inPos < numInputInds && curPos < midPos) {
257  const OrdinalType inVal = inputInds[inPos];
258  const OrdinalType curVal = curInds[curPos];
259 
260  if (curVal == inVal) { // can merge
261  ++inPos; // merge and go on to next input
262  } else if (curVal < inVal) {
263  ++curPos; // go on to next row entry
264  } else { // curVal > inVal
265  // The input doesn't exist in the row.
266  // Copy it to the end; we'll sort it in later.
267  curInds[newPos] = inVal;
268  ++newPos;
269  ++inPos; // move on to next input
270  }
271  }
272 
273  // If any inputs remain, and the current row has space for them,
274  // then copy them in. We'll sort them later.
275  for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
276  curInds[newPos] = inputInds[inPos];
277  }
278 
279 #ifdef HAVE_TPETRA_DEBUG
280  TEUCHOS_TEST_FOR_EXCEPTION
281  (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
282  << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
283  " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
284  "developers.");
285 #endif // HAVE_TPETRA_DEBUG
286 
287  if (newPos != midPos) { // new entries at end; sort them in
288  // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
289  // be faster (linear time) just to iterate backwards
290  // through the current entries, pushing them over to make
291  // room for unmerged input. However, I'm not so worried
292  // about the asymptotics here, because dense rows in a
293  // sparse matrix are ungood anyway.
294  std::sort (curInds, curInds + newPos);
295  }
296  return std::make_pair (true, newPos);
297  }
298  }
299 }
300 
301 
323 template<class OrdinalType, class IndexType>
324 std::pair<bool, IndexType>
325 mergeUnsortedIndices (OrdinalType curInds[],
326  const IndexType midPos,
327  const IndexType endPos,
328  const OrdinalType inputInds[],
329  const IndexType numInputInds)
330 {
331  // Optimize for the following cases, in decreasing order of
332  // optimization concern:
333  //
334  // a. Current row has allocated space but no entries
335  // b. All input indices already in the graph
336  //
337  // If the row has insufficient space for a merge, don't do
338  // anything! Just return an upper bound on the number of extra
339  // entries required to fit everything. This imposes extra cost,
340  // but correctly supports the count, allocate, fill, compute
341  // pattern. (If some entries were merged in and others weren't,
342  // how would you know which got merged in? CrsGraph insert is
343  // idempotent, but CrsMatrix insert does a += on the value and
344  // is therefore not idempotent.)
345  if (midPos == 0) {
346  // Current row has no entries, but may have preallocated space.
347  if (endPos >= numInputInds) {
348  // Sufficient space for new entries; copy directly.
349  for (IndexType k = 0; k < numInputInds; ++k) {
350  curInds[k] = inputInds[k];
351  }
352  return std::make_pair (true, numInputInds);
353  }
354  else { // not enough space
355  return std::make_pair (false, numInputInds);
356  }
357  }
358  else { // current row contains indices, requiring merge
359  // Only count possible merges; don't merge yet. If the row
360  // doesn't have enough space, we want to return without side
361  // effects.
362  const IndexType mergeCount =
363  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
364  inputInds,
365  numInputInds);
366  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
367  const IndexType newRowLen = midPos + extraSpaceNeeded;
368  if (newRowLen > endPos) {
369  return std::make_pair (false, newRowLen);
370  }
371  else { // we have enough space; merge in
372  // Iterate linearly over input. Scan current entries
373  // repeatedly. Add new entries at end.
374  IndexType newPos = midPos;
375  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
376  const OrdinalType inVal = inputInds[inPos];
377  bool merged = false;
378  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
379  if (curInds[curPos] == inVal) {
380  merged = true;
381  }
382  }
383  if (! merged) {
384  curInds[newPos] = inVal;
385  ++newPos;
386  }
387  }
388  return std::make_pair (true, newPos);
389  }
390  }
391 }
392 
417 template<class OrdinalType, class ValueType, class IndexType>
418 std::pair<bool, IndexType>
419 mergeUnsortedIndicesAndValues (OrdinalType curInds[],
420  ValueType curVals[],
421  const IndexType midPos,
422  const IndexType endPos,
423  const OrdinalType inputInds[],
424  const ValueType inputVals[],
425  const IndexType numInputInds)
426 {
427  // Optimize for the following cases, in decreasing order of
428  // optimization concern:
429  //
430  // a. Current row has allocated space but no entries
431  // b. All input indices already in the graph
432  //
433  // If the row has insufficient space for a merge, don't do
434  // anything! Just return an upper bound on the number of extra
435  // entries required to fit everything. This imposes extra cost,
436  // but correctly supports the count, allocate, fill, compute
437  // pattern. (If some entries were merged in and others weren't,
438  // how would you know which got merged in? CrsGraph insert is
439  // idempotent, but CrsMatrix insert does a += on the value and
440  // is therefore not idempotent.)
441  if (midPos == 0) {
442  // Current row has no entries, but may have preallocated space.
443  if (endPos >= numInputInds) {
444  // Sufficient space for new entries; copy directly.
445  for (IndexType k = 0; k < numInputInds; ++k) {
446  curInds[k] = inputInds[k];
447  curVals[k] = inputVals[k];
448  }
449  return std::make_pair (true, numInputInds);
450  }
451  else { // not enough space
452  return std::make_pair (false, numInputInds);
453  }
454  }
455  else { // current row contains indices, requiring merge
456  // Only count possible merges; don't merge yet. If the row
457  // doesn't have enough space, we want to return without side
458  // effects.
459  const IndexType mergeCount =
460  countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
461  inputInds,
462  numInputInds);
463  const IndexType extraSpaceNeeded = numInputInds - mergeCount;
464  const IndexType newRowLen = midPos + extraSpaceNeeded;
465  if (newRowLen > endPos) {
466  return std::make_pair (false, newRowLen);
467  }
468  else { // we have enough space; merge in
469  // Iterate linearly over input. Scan current entries
470  // repeatedly. Add new entries at end.
471  IndexType newPos = midPos;
472  for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
473  const OrdinalType inInd = inputInds[inPos];
474  bool merged = false;
475  for (IndexType curPos = 0; curPos < midPos; ++curPos) {
476  if (curInds[curPos] == inInd) {
477  merged = true;
478  curVals[curPos] += inputVals[inPos];
479  }
480  }
481  if (! merged) {
482  curInds[newPos] = inInd;
483  curVals[newPos] = inputVals[inPos];
484  ++newPos;
485  }
486  }
487  return std::make_pair (true, newPos);
488  }
489  }
490 }
491 
492 } // namespace Details
493 } // namespace Tpetra
494 
495 #endif // TPETRA_DETAILS_MERGE_HPP
Implementation details of Tpetra.
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row's column indices and corresponding...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.