]> Cypherpunks.ru repositories - gostls13.git/commitdiff
sort: improve average quicksort performance
authorSokolov Yura <funny.falcon@gmail.com>
Mon, 12 Oct 2015 13:06:38 +0000 (16:06 +0300)
committerRuss Cox <rsc@golang.org>
Fri, 4 Dec 2015 15:17:47 +0000 (15:17 +0000)
- change way of protection from O(N^2) on duplicate values.
  Previous algorithm does additional comparisons and swaps
  on every split pass.
  Changed algorithm does one ordinal quicksort split pass,
  and if distribution is skewed, then additional pass to
  separate pivot's duplicates.
  Changed algorithm could be slower on very ununique slice,
  but it is still protected from O(N^2).

- increase small slice size and do simple shell sort pass
  to amortize worst case on small slices.
  Small slice has higher probability to have skewed
  distribution, so lets sort it with simpler algorithm.

benchmark                 old ns/op      new ns/op      delta
BenchmarkSortString1K     458374         388641         -15.21%
BenchmarkSortInt1K        217851         181796         -16.55%
BenchmarkSortInt64K       20539264       16730340       -18.54%
BenchmarkSort1e2          98668          95554          -3.16%
BenchmarkSort1e4          20278500       18316829       -9.67%
BenchmarkSort1e6          3215724392     2795999911     -13.05%

number of operations:
       Size:         Total:     Swap:     Less:
                          %         %         %
Sort     100  Avg    -5.98%   -18.43%    -1.90%
Sort     100  Max   -14.43%   -16.02%    -4.51%
Sort     300  Avg    -7.50%   -12.76%    -5.96%
Sort     300  Max   -11.29%    -9.60%    -4.30%
Sort    1000  Avg   -12.13%   -11.65%   -12.25%
Sort    1000  Max   -13.81%   -11.77%   -11.89%
Sort    3000  Avg   -14.61%    -9.30%   -15.86%
Sort    3000  Max   -15.81%    -8.66%   -15.19%
Sort   10000  Avg   -16.10%    -8.47%   -17.80%
Sort   10000  Max   -17.13%    -7.63%   -16.97%
Sort   30000  Avg   -17.46%    -7.56%   -19.57%
Sort   30000  Max   -18.24%    -7.62%   -17.68%
Sort  100000  Avg   -18.83%    -6.64%   -21.33%
Sort  100000  Max   -19.72%    -6.70%   -20.96%
Sort  300000  Avg   -19.61%    -6.16%   -22.30%
Sort  300000  Max   -20.69%    -6.15%   -21.81%
Sort 1000000  Avg   -20.42%    -5.58%   -23.31%
Sort 1000000  Max   -21.54%    -5.56%   -23.61%

Change-Id: I23868e8b52b5841b358cd5403967c9a97871e4d5
Reviewed-on: https://go-review.googlesource.com/15688
Reviewed-by: Russ Cox <rsc@golang.org>
src/sort/example_multi_test.go
src/sort/sort.go

index ac316540fd58aad5ddbf4f63e9f69a9baf7c9700..40d12152ce28c111af0d083a9a0b05320d6b8bd0 100644 (file)
@@ -122,10 +122,10 @@ func Example_sortMultiKeys() {
        fmt.Println("By language,<lines,user:", changes)
 
        // Output:
-       // By user: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken Go 200} {ken C 150} {r Go 100} {r C 150} {rsc Go 200}]
+       // By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
        // By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
        // By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}]
-       // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
+       // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {r Go 100} {gri Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
        // By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
 
 }
index c7c30426aec138ec56f20348d51136d467b1e400..ac8f4a661fc7c345f118239bbf74bf53e5cab8b2 100644 (file)
@@ -72,7 +72,7 @@ func heapSort(data Interface, a, b int) {
        }
 }
 
-// Quicksort, following Bentley and McIlroy,
+// Quicksort, loosely following Bentley and McIlroy,
 // ``Engineering a Sort Function,'' SP&E November 1993.
 
 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
@@ -111,59 +111,82 @@ func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
 
        // Invariants are:
        //      data[lo] = pivot (set up by ChoosePivot)
-       //      data[lo <= i < a] = pivot
-       //      data[a <= i < b] < pivot
-       //      data[b <= i < c] is unexamined
-       //      data[c <= i < d] > pivot
-       //      data[d <= i < hi] = pivot
-       //
-       // Once b meets c, can swap the "= pivot" sections
-       // into the middle of the slice.
+       //      data[lo < i < a] < pivot
+       //      data[a <= i < b] <= pivot
+       //      data[b <= i < c] unexamined
+       //      data[c <= i < hi-1] > pivot
+       //      data[hi-1] >= pivot
        pivot := lo
-       a, b, c, d := lo+1, lo+1, hi, hi
+       a, c := lo+1, hi-1
+
+       for ; a != c && data.Less(a, pivot); a++ {
+       }
+       b := a
        for {
-               for b < c {
-                       if data.Less(b, pivot) { // data[b] < pivot
-                               b++
-                       } else if !data.Less(pivot, b) { // data[b] = pivot
-                               data.Swap(a, b)
-                               a++
-                               b++
-                       } else {
-                               break
-                       }
+               for ; b != c && !data.Less(pivot, b); b++ { // data[b] <= pivot
                }
-               for b < c {
-                       if data.Less(pivot, c-1) { // data[c-1] > pivot
-                               c--
-                       } else if !data.Less(c-1, pivot) { // data[c-1] = pivot
-                               data.Swap(c-1, d-1)
-                               c--
-                               d--
-                       } else {
-                               break
-                       }
+               for ; b != c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
                }
-               if b >= c {
+               if b == c {
                        break
                }
-               // data[b] > pivot; data[c-1] < pivot
+               // data[b] > pivot; data[c-1] <= pivot
                data.Swap(b, c-1)
                b++
                c--
        }
-
-       n := min(b-a, a-lo)
-       swapRange(data, lo, b-n, n)
-
-       n = min(hi-d, d-c)
-       swapRange(data, c, hi-n, n)
-
-       return lo + b - a, hi - (d - c)
+       // If hi-c<3 then there are duplicates (by property of median of nine).
+       // Let be a bit more conservative, and set border to 5.
+       protect := hi-c < 5
+       if !protect && hi-c < (hi-lo)/4 {
+               // Lets test some points for equality to pivot
+               dups := 0
+               if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
+                       data.Swap(c, hi-1)
+                       c++
+                       dups++
+               }
+               if !data.Less(b-1, pivot) { // data[b-1] = pivot
+                       b--
+                       dups++
+               }
+               // m-lo = (hi-lo)/2 > 6
+               // b-lo > (hi-lo)*3/4-1 > 8
+               // ==> m < b ==> data[m] <= pivot
+               if !data.Less(m, pivot) { // data[m] = pivot
+                       data.Swap(m, b-1)
+                       b--
+                       dups++
+               }
+               // if at least 2 points are equal to pivot, assume skewed distribution
+               protect = dups > 1
+       }
+       if protect {
+               // Protect against a lot of duplicates
+               // Add invariant:
+               //      data[a <= i < b] unexamined
+               //      data[b <= i < c] = pivot
+               for {
+                       for ; a != b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
+                       }
+                       for ; a != b && data.Less(a, pivot); a++ { // data[a] < pivot
+                       }
+                       if a == b {
+                               break
+                       }
+                       // data[a] == pivot; data[b-1] < pivot
+                       data.Swap(a, b-1)
+                       a++
+                       b--
+               }
+       }
+       // Swap pivot into middle
+       data.Swap(pivot, b-1)
+       return b - 1, c
 }
 
 func quickSort(data Interface, a, b, maxDepth int) {
-       for b-a > 7 {
+       for b-a > 12 { // Use ShellSort for slices <= 12 elements
                if maxDepth == 0 {
                        heapSort(data, a, b)
                        return
@@ -181,6 +204,13 @@ func quickSort(data Interface, a, b, maxDepth int) {
                }
        }
        if b-a > 1 {
+               // Do ShellSort pass with gap 6
+               // It could be written in this simplified form cause b-a <= 12
+               for i := a + 6; i < b; i++ {
+                       if data.Less(i, i-6) {
+                               data.Swap(i, i-6)
+                       }
+               }
                insertionSort(data, a, b)
        }
 }