]> Cypherpunks.ru repositories - gostls13.git/commitdiff
sort: improve average quicksort performance
authorRuss Cox <rsc@golang.org>
Fri, 4 Dec 2015 20:03:20 +0000 (20:03 +0000)
committerRuss Cox <rsc@golang.org>
Fri, 4 Dec 2015 20:41:47 +0000 (20:41 +0000)
Revert "Revert "sort: improve average quicksort performance""
This reverts commit 30b87bb9aa0c6658830f3d111920e2f366476644.

See https://golang.org/cl/15688 for the CL being replayed.

Change-Id: I2ba36334003f4971f95a10536adfdd86be9a50de
Reviewed-on: https://go-review.googlesource.com/17389
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
src/sort/example_multi_test.go
src/sort/sort.go

index ac316540fd58aad5ddbf4f63e9f69a9baf7c9700..40d12152ce28c111af0d083a9a0b05320d6b8bd0 100644 (file)
@@ -122,10 +122,10 @@ func Example_sortMultiKeys() {
        fmt.Println("By language,<lines,user:", changes)
 
        // Output:
-       // By user: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken Go 200} {ken C 150} {r Go 100} {r C 150} {rsc Go 200}]
+       // By user: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
        // By user,<lines: [{dmr C 100} {glenda Go 200} {gri Smalltalk 80} {gri Go 100} {ken C 150} {ken Go 200} {r Go 100} {r C 150} {rsc Go 200}]
        // By user,>lines: [{dmr C 100} {glenda Go 200} {gri Go 100} {gri Smalltalk 80} {ken Go 200} {ken C 150} {r C 150} {r Go 100} {rsc Go 200}]
-       // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
+       // By language,<lines: [{dmr C 100} {ken C 150} {r C 150} {r Go 100} {gri Go 100} {ken Go 200} {glenda Go 200} {rsc Go 200} {gri Smalltalk 80}]
        // By language,<lines,user: [{dmr C 100} {ken C 150} {r C 150} {gri Go 100} {r Go 100} {glenda Go 200} {ken Go 200} {rsc Go 200} {gri Smalltalk 80}]
 
 }
index c7c30426aec138ec56f20348d51136d467b1e400..ac8f4a661fc7c345f118239bbf74bf53e5cab8b2 100644 (file)
@@ -72,7 +72,7 @@ func heapSort(data Interface, a, b int) {
        }
 }
 
-// Quicksort, following Bentley and McIlroy,
+// Quicksort, loosely following Bentley and McIlroy,
 // ``Engineering a Sort Function,'' SP&E November 1993.
 
 // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
@@ -111,59 +111,82 @@ func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
 
        // Invariants are:
        //      data[lo] = pivot (set up by ChoosePivot)
-       //      data[lo <= i < a] = pivot
-       //      data[a <= i < b] < pivot
-       //      data[b <= i < c] is unexamined
-       //      data[c <= i < d] > pivot
-       //      data[d <= i < hi] = pivot
-       //
-       // Once b meets c, can swap the "= pivot" sections
-       // into the middle of the slice.
+       //      data[lo < i < a] < pivot
+       //      data[a <= i < b] <= pivot
+       //      data[b <= i < c] unexamined
+       //      data[c <= i < hi-1] > pivot
+       //      data[hi-1] >= pivot
        pivot := lo
-       a, b, c, d := lo+1, lo+1, hi, hi
+       a, c := lo+1, hi-1
+
+       for ; a != c && data.Less(a, pivot); a++ {
+       }
+       b := a
        for {
-               for b < c {
-                       if data.Less(b, pivot) { // data[b] < pivot
-                               b++
-                       } else if !data.Less(pivot, b) { // data[b] = pivot
-                               data.Swap(a, b)
-                               a++
-                               b++
-                       } else {
-                               break
-                       }
+               for ; b != c && !data.Less(pivot, b); b++ { // data[b] <= pivot
                }
-               for b < c {
-                       if data.Less(pivot, c-1) { // data[c-1] > pivot
-                               c--
-                       } else if !data.Less(c-1, pivot) { // data[c-1] = pivot
-                               data.Swap(c-1, d-1)
-                               c--
-                               d--
-                       } else {
-                               break
-                       }
+               for ; b != c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
                }
-               if b >= c {
+               if b == c {
                        break
                }
-               // data[b] > pivot; data[c-1] < pivot
+               // data[b] > pivot; data[c-1] <= pivot
                data.Swap(b, c-1)
                b++
                c--
        }
-
-       n := min(b-a, a-lo)
-       swapRange(data, lo, b-n, n)
-
-       n = min(hi-d, d-c)
-       swapRange(data, c, hi-n, n)
-
-       return lo + b - a, hi - (d - c)
+       // If hi-c<3 then there are duplicates (by property of median of nine).
+       // Let be a bit more conservative, and set border to 5.
+       protect := hi-c < 5
+       if !protect && hi-c < (hi-lo)/4 {
+               // Lets test some points for equality to pivot
+               dups := 0
+               if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
+                       data.Swap(c, hi-1)
+                       c++
+                       dups++
+               }
+               if !data.Less(b-1, pivot) { // data[b-1] = pivot
+                       b--
+                       dups++
+               }
+               // m-lo = (hi-lo)/2 > 6
+               // b-lo > (hi-lo)*3/4-1 > 8
+               // ==> m < b ==> data[m] <= pivot
+               if !data.Less(m, pivot) { // data[m] = pivot
+                       data.Swap(m, b-1)
+                       b--
+                       dups++
+               }
+               // if at least 2 points are equal to pivot, assume skewed distribution
+               protect = dups > 1
+       }
+       if protect {
+               // Protect against a lot of duplicates
+               // Add invariant:
+               //      data[a <= i < b] unexamined
+               //      data[b <= i < c] = pivot
+               for {
+                       for ; a != b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
+                       }
+                       for ; a != b && data.Less(a, pivot); a++ { // data[a] < pivot
+                       }
+                       if a == b {
+                               break
+                       }
+                       // data[a] == pivot; data[b-1] < pivot
+                       data.Swap(a, b-1)
+                       a++
+                       b--
+               }
+       }
+       // Swap pivot into middle
+       data.Swap(pivot, b-1)
+       return b - 1, c
 }
 
 func quickSort(data Interface, a, b, maxDepth int) {
-       for b-a > 7 {
+       for b-a > 12 { // Use ShellSort for slices <= 12 elements
                if maxDepth == 0 {
                        heapSort(data, a, b)
                        return
@@ -181,6 +204,13 @@ func quickSort(data Interface, a, b, maxDepth int) {
                }
        }
        if b-a > 1 {
+               // Do ShellSort pass with gap 6
+               // It could be written in this simplified form cause b-a <= 12
+               for i := a + 6; i < b; i++ {
+                       if data.Less(i, i-6) {
+                               data.Swap(i, i-6)
+                       }
+               }
                insertionSort(data, a, b)
        }
 }