blob: 66a52c7125d61a017ddc5ebbdb63aea16630afc7 [file] [log] [blame]
Serge Bazanskicc25bdf2018-10-25 14:02:58 +02001package sortorder
2
3// Natural implements sort.Interface to sort strings in natural order. This
4// means that e.g. "abc2" < "abc12".
5//
6// Non-digit sequences and numbers are compared separately. The former are
7// compared bytewise, while the latter are compared numerically (except that
8// the number of leading zeros is used as a tie-breaker, so e.g. "2" < "02")
9//
10// Limitation: only ASCII digits (0-9) are considered.
11type Natural []string
12
13func (n Natural) Len() int { return len(n) }
14func (n Natural) Swap(i, j int) { n[i], n[j] = n[j], n[i] }
15func (n Natural) Less(i, j int) bool { return NaturalLess(n[i], n[j]) }
16
17func isdigit(b byte) bool { return '0' <= b && b <= '9' }
18
19// NaturalLess compares two strings using natural ordering. This means that e.g.
20// "abc2" < "abc12".
21//
22// Non-digit sequences and numbers are compared separately. The former are
23// compared bytewise, while the latter are compared numerically (except that
24// the number of leading zeros is used as a tie-breaker, so e.g. "2" < "02")
25//
26// Limitation: only ASCII digits (0-9) are considered.
27func NaturalLess(str1, str2 string) bool {
28 idx1, idx2 := 0, 0
29 for idx1 < len(str1) && idx2 < len(str2) {
30 c1, c2 := str1[idx1], str2[idx2]
31 dig1, dig2 := isdigit(c1), isdigit(c2)
32 switch {
33 case dig1 != dig2: // Digits before other characters.
34 return dig1 // True if LHS is a digit, false if the RHS is one.
35 case !dig1: // && !dig2, because dig1 == dig2
36 // UTF-8 compares bytewise-lexicographically, no need to decode
37 // codepoints.
38 if c1 != c2 {
39 return c1 < c2
40 }
41 idx1++
42 idx2++
43 default: // Digits
44 // Eat zeros.
45 for ; idx1 < len(str1) && str1[idx1] == '0'; idx1++ {
46 }
47 for ; idx2 < len(str2) && str2[idx2] == '0'; idx2++ {
48 }
49 // Eat all digits.
50 nonZero1, nonZero2 := idx1, idx2
51 for ; idx1 < len(str1) && isdigit(str1[idx1]); idx1++ {
52 }
53 for ; idx2 < len(str2) && isdigit(str2[idx2]); idx2++ {
54 }
55 // If lengths of numbers with non-zero prefix differ, the shorter
56 // one is less.
57 if len1, len2 := idx1-nonZero1, idx2-nonZero2; len1 != len2 {
58 return len1 < len2
59 }
60 // If they're equal, string comparison is correct.
61 if nr1, nr2 := str1[nonZero1:idx1], str2[nonZero2:idx2]; nr1 != nr2 {
62 return nr1 < nr2
63 }
64 // Otherwise, the one with less zeros is less.
65 // Because everything up to the number is equal, comparing the index
66 // after the zeros is sufficient.
67 if nonZero1 != nonZero2 {
68 return nonZero1 < nonZero2
69 }
70 }
71 // They're identical so far, so continue comparing.
72 }
73 // So far they are identical. At least one is ended. If the other continues,
74 // it sorts last.
75 return len(str1) < len(str2)
76}