blob: 0049f700a2f68bdfe742dca669ede816dbe4cf86 [file] [log] [blame]
Serge Bazanskicc25bdf2018-10-25 14:02:58 +02001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package width
6
7import (
8 "unicode/utf8"
9
10 "golang.org/x/text/transform"
11)
12
13type foldTransform struct {
14 transform.NopResetter
15}
16
17func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
18 for n < len(src) {
19 if src[n] < utf8.RuneSelf {
20 // ASCII fast path.
21 for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
22 }
23 continue
24 }
25 v, size := trie.lookup(src[n:])
26 if size == 0 { // incomplete UTF-8 encoding
27 if !atEOF {
28 err = transform.ErrShortSrc
29 } else {
30 n = len(src)
31 }
32 break
33 }
34 if elem(v)&tagNeedsFold != 0 {
35 err = transform.ErrEndOfSpan
36 break
37 }
38 n += size
39 }
40 return n, err
41}
42
43func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
44 for nSrc < len(src) {
45 if src[nSrc] < utf8.RuneSelf {
46 // ASCII fast path.
47 start, end := nSrc, len(src)
48 if d := len(dst) - nDst; d < end-start {
49 end = nSrc + d
50 }
51 for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
52 }
53 n := copy(dst[nDst:], src[start:nSrc])
54 if nDst += n; nDst == len(dst) {
55 nSrc = start + n
56 if nSrc == len(src) {
57 return nDst, nSrc, nil
58 }
59 if src[nSrc] < utf8.RuneSelf {
60 return nDst, nSrc, transform.ErrShortDst
61 }
62 }
63 continue
64 }
65 v, size := trie.lookup(src[nSrc:])
66 if size == 0 { // incomplete UTF-8 encoding
67 if !atEOF {
68 return nDst, nSrc, transform.ErrShortSrc
69 }
70 size = 1 // gobble 1 byte
71 }
72 if elem(v)&tagNeedsFold == 0 {
73 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
74 return nDst, nSrc, transform.ErrShortDst
75 }
76 nDst += size
77 } else {
78 data := inverseData[byte(v)]
79 if len(dst)-nDst < int(data[0]) {
80 return nDst, nSrc, transform.ErrShortDst
81 }
82 i := 1
83 for end := int(data[0]); i < end; i++ {
84 dst[nDst] = data[i]
85 nDst++
86 }
87 dst[nDst] = data[i] ^ src[nSrc+size-1]
88 nDst++
89 }
90 nSrc += size
91 }
92 return nDst, nSrc, nil
93}
94
95type narrowTransform struct {
96 transform.NopResetter
97}
98
99func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
100 for n < len(src) {
101 if src[n] < utf8.RuneSelf {
102 // ASCII fast path.
103 for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
104 }
105 continue
106 }
107 v, size := trie.lookup(src[n:])
108 if size == 0 { // incomplete UTF-8 encoding
109 if !atEOF {
110 err = transform.ErrShortSrc
111 } else {
112 n = len(src)
113 }
114 break
115 }
116 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
117 } else {
118 err = transform.ErrEndOfSpan
119 break
120 }
121 n += size
122 }
123 return n, err
124}
125
126func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
127 for nSrc < len(src) {
128 if src[nSrc] < utf8.RuneSelf {
129 // ASCII fast path.
130 start, end := nSrc, len(src)
131 if d := len(dst) - nDst; d < end-start {
132 end = nSrc + d
133 }
134 for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
135 }
136 n := copy(dst[nDst:], src[start:nSrc])
137 if nDst += n; nDst == len(dst) {
138 nSrc = start + n
139 if nSrc == len(src) {
140 return nDst, nSrc, nil
141 }
142 if src[nSrc] < utf8.RuneSelf {
143 return nDst, nSrc, transform.ErrShortDst
144 }
145 }
146 continue
147 }
148 v, size := trie.lookup(src[nSrc:])
149 if size == 0 { // incomplete UTF-8 encoding
150 if !atEOF {
151 return nDst, nSrc, transform.ErrShortSrc
152 }
153 size = 1 // gobble 1 byte
154 }
155 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
156 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
157 return nDst, nSrc, transform.ErrShortDst
158 }
159 nDst += size
160 } else {
161 data := inverseData[byte(v)]
162 if len(dst)-nDst < int(data[0]) {
163 return nDst, nSrc, transform.ErrShortDst
164 }
165 i := 1
166 for end := int(data[0]); i < end; i++ {
167 dst[nDst] = data[i]
168 nDst++
169 }
170 dst[nDst] = data[i] ^ src[nSrc+size-1]
171 nDst++
172 }
173 nSrc += size
174 }
175 return nDst, nSrc, nil
176}
177
178type wideTransform struct {
179 transform.NopResetter
180}
181
182func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
183 for n < len(src) {
184 // TODO: Consider ASCII fast path. Special-casing ASCII handling can
185 // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
186 // not enough to warrant the extra code and complexity.
187 v, size := trie.lookup(src[n:])
188 if size == 0 { // incomplete UTF-8 encoding
189 if !atEOF {
190 err = transform.ErrShortSrc
191 } else {
192 n = len(src)
193 }
194 break
195 }
196 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
197 } else {
198 err = transform.ErrEndOfSpan
199 break
200 }
201 n += size
202 }
203 return n, err
204}
205
206func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
207 for nSrc < len(src) {
208 // TODO: Consider ASCII fast path. Special-casing ASCII handling can
209 // reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
210 // not enough to warrant the extra code and complexity.
211 v, size := trie.lookup(src[nSrc:])
212 if size == 0 { // incomplete UTF-8 encoding
213 if !atEOF {
214 return nDst, nSrc, transform.ErrShortSrc
215 }
216 size = 1 // gobble 1 byte
217 }
218 if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
219 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
220 return nDst, nSrc, transform.ErrShortDst
221 }
222 nDst += size
223 } else {
224 data := inverseData[byte(v)]
225 if len(dst)-nDst < int(data[0]) {
226 return nDst, nSrc, transform.ErrShortDst
227 }
228 i := 1
229 for end := int(data[0]); i < end; i++ {
230 dst[nDst] = data[i]
231 nDst++
232 }
233 dst[nDst] = data[i] ^ src[nSrc+size-1]
234 nDst++
235 }
236 nSrc += size
237 }
238 return nDst, nSrc, nil
239}