Akumuli
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
timsort.hpp
1 /*
2  * C++ implementation of timsort
3  *
4  * ported from Python's and OpenJDK's:
5  * - http://svn.python.org/projects/python/trunk/Objects/listobject.c
6  * - http://cr.openjdk.java.net/~martin/webrevs/openjdk7/timsort/raw_files/new/src/share/classes/java/util/TimSort.java
7  *
8  * Copyright (c) 2011 Fuji, Goro (gfx) <gfuji@cpan.org>.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to
12  * deal in the Software without restriction, including without limitation the
13  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14  * sell copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #ifndef GFX_TIMSORT_HPP
30 #define GFX_TIMSORT_HPP
31 
32 #include <vector>
33 #include <cassert>
34 #include <iterator>
35 #include <algorithm>
36 #include <utility>
37 
38 #ifdef ENABLE_TIMSORT_LOG
39 #include <iostream>
40 #define GFX_TIMSORT_LOG(expr) (std::clog << "# " << __func__ << ": " << expr << std::endl)
41 #else
42 #define GFX_TIMSORT_LOG(expr) ((void)0)
43 #endif
44 
45 #if ENABLE_STD_MOVE && __cplusplus >= 201103L
46 #define GFX_TIMSORT_MOVE(x) std::move(x)
47 #else
48 #define GFX_TIMSORT_MOVE(x) (x)
49 #endif
50 
51 namespace gfx {
52 
53 // ---------------------------------------
54 // Declaration
55 // ---------------------------------------
56 
60 template<typename RandomAccessIterator>
61 inline void timsort(RandomAccessIterator const first, RandomAccessIterator const last);
62 
66 template<typename RandomAccessIterator, typename LessFunction>
67 inline void timsort(RandomAccessIterator const first, RandomAccessIterator const last, LessFunction compare);
68 
69 
70 // ---------------------------------------
71 // Implementation
72 // ---------------------------------------
73 
74 template <typename Value, typename LessFunction> class Compare {
75  public:
76  typedef Value value_type;
77  typedef LessFunction func_type;
78 
79  Compare(LessFunction f)
80  : less_(f) { }
82  : less_(other.less_) { }
83 
84  bool lt(value_type x, value_type y) {
85  return less_(x, y);
86  }
87  bool le(value_type x, value_type y) {
88  return less_(x, y) || !less_(y, x);
89  }
90  bool gt(value_type x, value_type y) {
91  return !less_(x, y) && less_(y, x);
92  }
93  bool ge(value_type x, value_type y) {
94  return !less_(x, y);
95  }
96 
97  func_type& less_function() {
98  return less_;
99  }
100  private:
101  func_type less_;
102 };
103 
104 template <typename RandomAccessIterator, typename LessFunction>
105 class TimSort {
106  typedef RandomAccessIterator iter_t;
107  typedef typename std::iterator_traits<iter_t>::value_type value_t;
108  typedef typename std::iterator_traits<iter_t>::reference ref_t;
109  typedef typename std::iterator_traits<iter_t>::difference_type diff_t;
111 
112  static const int MIN_MERGE = 32;
113 
114  compare_t comp_;
115 
116  static const int MIN_GALLOP = 7;
117 
118  int minGallop_; // default to MIN_GALLOP
119 
120  std::vector<value_t> tmp_; // temp storage for merges
121  typedef typename std::vector<value_t>::iterator tmp_iter_t;
122 
123  struct run {
124  iter_t base;
125  diff_t len;
126 
127  run(iter_t const b, diff_t const l) : base(b), len(l) { }
128  };
129  std::vector<run> pending_;
130 
131  static
132  void sort(iter_t const lo, iter_t const hi, compare_t c) {
133  assert( lo <= hi );
134 
135  diff_t nRemaining = (hi - lo);
136  if(nRemaining < 2) {
137  return; // nothing to do
138  }
139 
140  if(nRemaining < MIN_MERGE) {
141  diff_t const initRunLen = countRunAndMakeAscending(lo, hi, c);
142  GFX_TIMSORT_LOG("initRunLen: " << initRunLen);
143  binarySort(lo, hi, lo + initRunLen, c);
144  return;
145  }
146 
147  TimSort ts(c);
148  diff_t const minRun = minRunLength(nRemaining);
149  iter_t cur = lo;
150  do {
151  diff_t runLen = countRunAndMakeAscending(cur, hi, c);
152 
153  if(runLen < minRun) {
154  diff_t const force = std::min(nRemaining, minRun);
155  binarySort(cur, cur + force, cur + runLen, c);
156  runLen = force;
157  }
158 
159  ts.pushRun(cur, runLen);
160  ts.mergeCollapse();
161 
162  cur += runLen;
163  nRemaining -= runLen;
164  } while(nRemaining != 0);
165 
166  assert( cur == hi );
167  ts.mergeForceCollapse();
168  assert( ts.pending_.size() == 1 );
169 
170  GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size() << " pending_.size(): " << ts.pending_.size());
171  } // sort()
172 
173  static
174  void binarySort(iter_t const lo, iter_t const hi, iter_t start, compare_t compare) {
175  assert( lo <= start && start <= hi );
176  if(start == lo) {
177  ++start;
178  }
179  for( ; start < hi; ++start ) {
180  assert(lo <= start);
181  /*const*/ value_t pivot = GFX_TIMSORT_MOVE(*start);
182 
183  iter_t const pos = std::upper_bound(lo, start, pivot, compare.less_function());
184  for(iter_t p = start; p > pos; --p) {
185  *p = GFX_TIMSORT_MOVE(*(p - 1));
186  }
187  *pos = GFX_TIMSORT_MOVE(pivot);
188  }
189  }
190 
191  static
192  diff_t countRunAndMakeAscending(iter_t const lo, iter_t const hi, compare_t compare) {
193  assert( lo < hi );
194 
195  iter_t runHi = lo + 1;
196  if( runHi == hi ) {
197  return 1;
198  }
199 
200  if(compare.lt(*(runHi++), *lo)) { // descending
201  while(runHi < hi && compare.lt(*runHi, *(runHi - 1))) {
202  ++runHi;
203  }
204  std::reverse(lo, runHi);
205  }
206  else { // ascending
207  while(runHi < hi && compare.ge(*runHi, *(runHi - 1))) {
208  ++runHi;
209  }
210  }
211 
212  return runHi - lo;
213  }
214 
215  static
216  diff_t minRunLength(diff_t n) {
217  assert( n >= 0 );
218 
219  diff_t r = 0;
220  while(n >= MIN_MERGE) {
221  r |= (n & 1);
222  n >>= 1;
223  }
224  return n + r;
225  }
226 
227  TimSort(compare_t c)
228  : comp_(c), minGallop_(MIN_GALLOP) {
229  }
230 
231  void pushRun(iter_t const runBase, diff_t const runLen) {
232  pending_.push_back(run(runBase, runLen));
233  }
234 
235  void mergeCollapse() {
236  while( pending_.size() > 1 ) {
237  diff_t n = pending_.size() - 2;
238 
239  if(n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) {
240  if(pending_[n - 1].len < pending_[n + 1].len) {
241  --n;
242  }
243  mergeAt(n);
244  }
245  else if(pending_[n].len <= pending_[n + 1].len) {
246  mergeAt(n);
247  }
248  else {
249  break;
250  }
251  }
252  }
253 
254  void mergeForceCollapse() {
255  while( pending_.size() > 1 ) {
256  diff_t n = pending_.size() - 2;
257 
258  if(n > 0 && pending_[n - 1].len < pending_[n + 1].len) {
259  --n;
260  }
261  mergeAt(n);
262  }
263  }
264 
265  void mergeAt(diff_t const i) {
266  diff_t const stackSize = pending_.size();
267  assert( stackSize >= 2 );
268  assert( i >= 0 );
269  assert( i == stackSize - 2 || i == stackSize - 3 );
270 
271  iter_t base1 = pending_[i].base;
272  diff_t len1 = pending_[i].len;
273  iter_t base2 = pending_[i + 1].base;
274  diff_t len2 = pending_[i + 1].len;
275 
276  assert( len1 > 0 && len2 > 0 );
277  assert( base1 + len1 == base2 );
278 
279  pending_[i].len = len1 + len2;
280 
281  if(i == stackSize - 3) {
282  pending_[i + 1] = pending_[i + 2];
283  }
284 
285  pending_.pop_back();
286 
287  diff_t const k = gallopRight(*base2, base1, len1, 0);
288  assert( k >= 0 );
289 
290  base1 += k;
291  len1 -= k;
292 
293  if(len1 == 0) {
294  return;
295  }
296 
297  len2 = gallopLeft(*(base1 + (len1 - 1)), base2, len2, len2 - 1);
298  assert( len2 >= 0 );
299  if(len2 == 0) {
300  return;
301  }
302 
303  if(len1 <= len2) {
304  mergeLo(base1, len1, base2, len2);
305  }
306  else {
307  mergeHi(base1, len1, base2, len2);
308  }
309  }
310 
311  template <typename Iter>
312  diff_t gallopLeft(ref_t key, Iter const base, diff_t const len, diff_t const hint) {
313  assert( len > 0 && hint >= 0 && hint < len );
314 
315  diff_t lastOfs = 0;
316  diff_t ofs = 1;
317 
318  if(comp_.gt(key, *(base + hint))) {
319  diff_t const maxOfs = len - hint;
320  while(ofs < maxOfs && comp_.gt(key, *(base + (hint + ofs)))) {
321  lastOfs = ofs;
322  ofs = (ofs << 1) + 1;
323 
324  if(ofs <= 0) { // int overflow
325  ofs = maxOfs;
326  }
327  }
328  if(ofs > maxOfs) {
329  ofs = maxOfs;
330  }
331 
332  lastOfs += hint;
333  ofs += hint;
334  }
335  else {
336  diff_t const maxOfs = hint + 1;
337  while(ofs < maxOfs && comp_.le(key, *(base + (hint - ofs)))) {
338  lastOfs = ofs;
339  ofs = (ofs << 1) + 1;
340 
341  if(ofs <= 0) {
342  ofs = maxOfs;
343  }
344  }
345  if(ofs > maxOfs) {
346  ofs = maxOfs;
347  }
348 
349  diff_t const tmp = lastOfs;
350  lastOfs = hint - ofs;
351  ofs = hint - tmp;
352  }
353  assert( -1 <= lastOfs && lastOfs < ofs && ofs <= len );
354 
355  return std::lower_bound(base+(lastOfs+1), base+ofs, key, comp_.less_function()) - base;
356  }
357 
358  template <typename Iter>
359  diff_t gallopRight(ref_t key, Iter const base, diff_t const len, diff_t const hint) {
360  assert( len > 0 && hint >= 0 && hint < len );
361 
362  diff_t ofs = 1;
363  diff_t lastOfs = 0;
364 
365  if(comp_.lt(key, *(base + hint))) {
366  diff_t const maxOfs = hint + 1;
367  while(ofs < maxOfs && comp_.lt(key, *(base + (hint - ofs)))) {
368  lastOfs = ofs;
369  ofs = (ofs << 1) + 1;
370 
371  if(ofs <= 0) {
372  ofs = maxOfs;
373  }
374  }
375  if(ofs > maxOfs) {
376  ofs = maxOfs;
377  }
378 
379  diff_t const tmp = lastOfs;
380  lastOfs = hint - ofs;
381  ofs = hint - tmp;
382  }
383  else {
384  diff_t const maxOfs = len - hint;
385  while(ofs < maxOfs && comp_.ge(key, *(base + (hint + ofs)))) {
386  lastOfs = ofs;
387  ofs = (ofs << 1) + 1;
388 
389  if(ofs <= 0) { // int overflow
390  ofs = maxOfs;
391  }
392  }
393  if(ofs > maxOfs) {
394  ofs = maxOfs;
395  }
396 
397  lastOfs += hint;
398  ofs += hint;
399  }
400  assert( -1 <= lastOfs && lastOfs < ofs && ofs <= len );
401 
402  return std::upper_bound(base+(lastOfs+1), base+ofs, key, comp_.less_function()) - base;
403  }
404 
405  void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) {
406  assert( len1 > 0 && len2 > 0 && base1 + len1 == base2 );
407 
408  copy_to_tmp(base1, len1);
409 
410  tmp_iter_t cursor1 = tmp_.begin();
411  iter_t cursor2 = base2;
412  iter_t dest = base1;
413 
414  *(dest++) = *(cursor2++);
415  if(--len2 == 0) {
416  std::copy(cursor1, cursor1 + len1, dest);
417  return;
418  }
419  if(len1 == 1) {
420  std::copy(cursor2, cursor2 + len2, dest);
421  *(dest + len2) = *cursor1;
422  return;
423  }
424 
425  int minGallop(minGallop_);
426 
427  // outer:
428  while(true) {
429  int count1 = 0;
430  int count2 = 0;
431 
432  bool break_outer = false;
433  do {
434  assert( len1 > 1 && len2 > 0 );
435 
436  if(comp_.lt(*cursor2, *cursor1)) {
437  *(dest++) = *(cursor2++);
438  ++count2;
439  count1 = 0;
440  if(--len2 == 0) {
441  break_outer = true;
442  break;
443  }
444  }
445  else {
446  *(dest++) = *(cursor1++);
447  ++count1;
448  count2 = 0;
449  if(--len1 == 1) {
450  break_outer = true;
451  break;
452  }
453  }
454  } while( (count1 | count2) < minGallop );
455  if(break_outer) {
456  break;
457  }
458 
459  do {
460  assert( len1 > 1 && len2 > 0 );
461 
462  count1 = gallopRight(*cursor2, cursor1, len1, 0);
463  if(count1 != 0) {
464  std::copy_backward(cursor1, cursor1 + count1, dest + count1);
465  dest += count1;
466  cursor1 += count1;
467  len1 -= count1;
468 
469  if(len1 <= 1) {
470  break_outer = true;
471  break;
472  }
473  }
474  *(dest++) = *(cursor2++);
475  if(--len2 == 0) {
476  break_outer = true;
477  break;
478  }
479 
480  count2 = gallopLeft(*cursor1, cursor2, len2, 0);
481  if(count2 != 0) {
482  std::copy(cursor2, cursor2 + count2, dest);
483  dest += count2;
484  cursor2 += count2;
485  len2 -= count2;
486  if(len2 == 0) {
487  break_outer = true;
488  break;
489  }
490  }
491  *(dest++) = *(cursor1++);
492  if(--len1 == 1) {
493  break_outer = true;
494  break;
495  }
496 
497  --minGallop;
498  } while( (count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP) );
499  if(break_outer) {
500  break;
501  }
502 
503  if(minGallop < 0) {
504  minGallop = 0;
505  }
506  minGallop += 2;
507  } // end of "outer" loop
508 
509  minGallop_ = std::min(minGallop, 1);
510 
511  if(len1 == 1) {
512  assert( len2 > 0 );
513  std::copy(cursor2, cursor2 + len2, dest);
514  *(dest + len2) = *cursor1;
515  }
516  else {
517  assert( len1 != 0 && "Comparision function violates its general contract");
518  assert( len2 == 0 );
519  assert( len1 > 1 );
520  std::copy(cursor1, cursor1 + len1, dest);
521  }
522  }
523 
524  void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) {
525  assert( len1 > 0 && len2 > 0 && base1 + len1 == base2 );
526 
527  copy_to_tmp(base2, len2);
528 
529  iter_t cursor1 = base1 + (len1 - 1);
530  tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1);
531  iter_t dest = base2 + (len2 - 1);
532 
533  *(dest--) = *(cursor1--);
534  if(--len1 == 0) {
535  std::copy(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1));
536  return;
537  }
538  if(len2 == 1) {
539  dest -= len1;
540  cursor1 -= len1;
541  std::copy_backward(cursor1 + 1, cursor1 + (1 + len1), dest + (1 + len1));
542  *dest = *cursor2;
543  return;
544  }
545 
546  int minGallop( minGallop_ );
547 
548  // outer:
549  while(true) {
550  int count1 = 0;
551  int count2 = 0;
552 
553  bool break_outer = false;
554  do {
555  assert( len1 > 0 && len2 > 1 );
556 
557  if(comp_.lt(*cursor2, *cursor1)) {
558  *(dest--) = *(cursor1--);
559  ++count1;
560  count2 = 0;
561  if(--len1 == 0) {
562  break_outer = true;
563  break;
564  }
565  }
566  else {
567  *(dest--) = *(cursor2--);
568  ++count2;
569  count1 = 0;
570  if(--len2 == 1) {
571  break_outer = true;
572  break;
573  }
574  }
575  } while( (count1 | count2) < minGallop );
576  if(break_outer) {
577  break;
578  }
579 
580  do {
581  assert( len1 > 0 && len2 > 1 );
582 
583  count1 = len1 - gallopRight(*cursor2, base1, len1, len1 - 1);
584  if(count1 != 0) {
585  dest -= count1;
586  cursor1 -= count1;
587  len1 -= count1;
588  std::copy_backward(cursor1 + 1, cursor1 + (1 + count1), dest + (1 + count1));
589 
590  if(len1 == 0) {
591  break_outer = true;
592  break;
593  }
594  }
595  *(dest--) = *(cursor2--);
596  if(--len2 == 1) {
597  break_outer = true;
598  break;
599  }
600 
601  count2 = len2 - gallopLeft(*cursor1, tmp_.begin(), len2, len2 - 1);
602  if(count2 != 0) {
603  dest -= count2;
604  cursor2 -= count2;
605  len2 -= count2;
606  std::copy(cursor2 + 1, cursor2 + (1 + count2), dest + 1);
607  if(len2 <= 1) {
608  break_outer = true;
609  break;
610  }
611  }
612  *(dest--) = *(cursor1--);
613  if(--len1 == 0) {
614  break_outer = true;
615  break;
616  }
617 
618  minGallop--;
619  } while( (count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP) );
620  if(break_outer) {
621  break;
622  }
623 
624  if(minGallop < 0) {
625  minGallop = 0;
626  }
627  minGallop += 2;
628  } // end of "outer" loop
629 
630  minGallop_ = std::min(minGallop, 1);
631 
632  if(len2 == 1) {
633  assert( len1 > 0 );
634  dest -= len1;
635  cursor1 -= len1;
636  std::copy_backward(cursor1 + 1, cursor1 + (1 + len1), dest + (1 + len1));
637  *dest = *cursor2;
638  }
639  else {
640  assert( len2 != 0 && "Comparision function violates its general contract");
641  assert( len1 == 0 );
642  assert( len2 > 1 );
643  std::copy(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1));
644  }
645  }
646 
647  void copy_to_tmp(iter_t const begin, diff_t const len) {
648  tmp_.clear();
649  tmp_.reserve(len);
650  std::copy(begin, begin + len, std::back_inserter(tmp_));
651  }
652 
653  // the only interface is the friend timsort() function
654  template <typename IterT, typename LessT>
655  friend void timsort(IterT first, IterT last, LessT c);
656 };
657 
658 template<typename RandomAccessIterator>
659 inline void timsort(RandomAccessIterator const first, RandomAccessIterator const last) {
660  typedef typename std::iterator_traits<RandomAccessIterator>::value_type value_type;
661  timsort(first, last, std::less<value_type>());
662 }
663 
664 template<typename RandomAccessIterator, typename LessFunction>
665 inline void timsort(RandomAccessIterator const first, RandomAccessIterator const last, LessFunction compare) {
666  TimSort<RandomAccessIterator, LessFunction>::sort(first, last, compare);
667 }
668 
669 } // namespace gfx
670 
671 #undef GFX_TIMSORT_LOG
672 #undef GFX_TIMSORT_MOVE
673 #endif // GFX_TIMSORT_HPP
674 
Definition: timsort.hpp:74
Definition: timsort.hpp:105