blob: 51f056615c3adc8c996215c9ae1743cd8685ea65 [file] [log] [blame]
Serge Bazanskicc25bdf2018-10-25 14:02:58 +02001// Package jlexer contains a JSON lexer implementation.
2//
3// It is expected that it is mostly used with generated parser code, so the interface is tuned
4// for a parser that knows what kind of data is expected.
5package jlexer
6
7import (
8 "encoding/base64"
9 "encoding/json"
10 "errors"
11 "fmt"
12 "io"
13 "strconv"
14 "unicode"
15 "unicode/utf16"
16 "unicode/utf8"
17)
18
19// tokenKind determines type of a token.
20type tokenKind byte
21
22const (
23 tokenUndef tokenKind = iota // No token.
24 tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
25 tokenString // A string literal, e.g. "abc\u1234"
26 tokenNumber // Number literal, e.g. 1.5e5
27 tokenBool // Boolean literal: true or false.
28 tokenNull // null keyword.
29)
30
31// token describes a single token: type, position in the input and value.
32type token struct {
33 kind tokenKind // Type of a token.
34
35 boolValue bool // Value if a boolean literal token.
36 byteValue []byte // Raw value of a token.
37 delimValue byte
38}
39
40// Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice.
41type Lexer struct {
42 Data []byte // Input data given to the lexer.
43
44 start int // Start of the current token.
45 pos int // Current unscanned position in the input stream.
46 token token // Last scanned token, if token.kind != tokenUndef.
47
48 firstElement bool // Whether current element is the first in array or an object.
49 wantSep byte // A comma or a colon character, which need to occur before a token.
50
51 UseMultipleErrors bool // If we want to use multiple errors.
52 fatalError error // Fatal error occurred during lexing. It is usually a syntax error.
53 multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors.
54}
55
56// FetchToken scans the input for the next token.
57func (r *Lexer) FetchToken() {
58 r.token.kind = tokenUndef
59 r.start = r.pos
60
61 // Check if r.Data has r.pos element
62 // If it doesn't, it mean corrupted input data
63 if len(r.Data) < r.pos {
64 r.errParse("Unexpected end of data")
65 return
66 }
67 // Determine the type of a token by skipping whitespace and reading the
68 // first character.
69 for _, c := range r.Data[r.pos:] {
70 switch c {
71 case ':', ',':
72 if r.wantSep == c {
73 r.pos++
74 r.start++
75 r.wantSep = 0
76 } else {
77 r.errSyntax()
78 }
79
80 case ' ', '\t', '\r', '\n':
81 r.pos++
82 r.start++
83
84 case '"':
85 if r.wantSep != 0 {
86 r.errSyntax()
87 }
88
89 r.token.kind = tokenString
90 r.fetchString()
91 return
92
93 case '{', '[':
94 if r.wantSep != 0 {
95 r.errSyntax()
96 }
97 r.firstElement = true
98 r.token.kind = tokenDelim
99 r.token.delimValue = r.Data[r.pos]
100 r.pos++
101 return
102
103 case '}', ']':
104 if !r.firstElement && (r.wantSep != ',') {
105 r.errSyntax()
106 }
107 r.wantSep = 0
108 r.token.kind = tokenDelim
109 r.token.delimValue = r.Data[r.pos]
110 r.pos++
111 return
112
113 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
114 if r.wantSep != 0 {
115 r.errSyntax()
116 }
117 r.token.kind = tokenNumber
118 r.fetchNumber()
119 return
120
121 case 'n':
122 if r.wantSep != 0 {
123 r.errSyntax()
124 }
125
126 r.token.kind = tokenNull
127 r.fetchNull()
128 return
129
130 case 't':
131 if r.wantSep != 0 {
132 r.errSyntax()
133 }
134
135 r.token.kind = tokenBool
136 r.token.boolValue = true
137 r.fetchTrue()
138 return
139
140 case 'f':
141 if r.wantSep != 0 {
142 r.errSyntax()
143 }
144
145 r.token.kind = tokenBool
146 r.token.boolValue = false
147 r.fetchFalse()
148 return
149
150 default:
151 r.errSyntax()
152 return
153 }
154 }
155 r.fatalError = io.EOF
156 return
157}
158
159// isTokenEnd returns true if the char can follow a non-delimiter token
160func isTokenEnd(c byte) bool {
161 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':'
162}
163
164// fetchNull fetches and checks remaining bytes of null keyword.
165func (r *Lexer) fetchNull() {
166 r.pos += 4
167 if r.pos > len(r.Data) ||
168 r.Data[r.pos-3] != 'u' ||
169 r.Data[r.pos-2] != 'l' ||
170 r.Data[r.pos-1] != 'l' ||
171 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
172
173 r.pos -= 4
174 r.errSyntax()
175 }
176}
177
178// fetchTrue fetches and checks remaining bytes of true keyword.
179func (r *Lexer) fetchTrue() {
180 r.pos += 4
181 if r.pos > len(r.Data) ||
182 r.Data[r.pos-3] != 'r' ||
183 r.Data[r.pos-2] != 'u' ||
184 r.Data[r.pos-1] != 'e' ||
185 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
186
187 r.pos -= 4
188 r.errSyntax()
189 }
190}
191
192// fetchFalse fetches and checks remaining bytes of false keyword.
193func (r *Lexer) fetchFalse() {
194 r.pos += 5
195 if r.pos > len(r.Data) ||
196 r.Data[r.pos-4] != 'a' ||
197 r.Data[r.pos-3] != 'l' ||
198 r.Data[r.pos-2] != 's' ||
199 r.Data[r.pos-1] != 'e' ||
200 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
201
202 r.pos -= 5
203 r.errSyntax()
204 }
205}
206
207// fetchNumber scans a number literal token.
208func (r *Lexer) fetchNumber() {
209 hasE := false
210 afterE := false
211 hasDot := false
212
213 r.pos++
214 for i, c := range r.Data[r.pos:] {
215 switch {
216 case c >= '0' && c <= '9':
217 afterE = false
218 case c == '.' && !hasDot:
219 hasDot = true
220 case (c == 'e' || c == 'E') && !hasE:
221 hasE = true
222 hasDot = true
223 afterE = true
224 case (c == '+' || c == '-') && afterE:
225 afterE = false
226 default:
227 r.pos += i
228 if !isTokenEnd(c) {
229 r.errSyntax()
230 } else {
231 r.token.byteValue = r.Data[r.start:r.pos]
232 }
233 return
234 }
235 }
236
237 r.pos = len(r.Data)
238 r.token.byteValue = r.Data[r.start:]
239}
240
241// findStringLen tries to scan into the string literal for ending quote char to determine required size.
242// The size will be exact if no escapes are present and may be inexact if there are escaped chars.
243func findStringLen(data []byte) (isValid, hasEscapes bool, length int) {
244 delta := 0
245
246 for i := 0; i < len(data); i++ {
247 switch data[i] {
248 case '\\':
249 i++
250 delta++
251 if i < len(data) && data[i] == 'u' {
252 delta++
253 }
254 case '"':
255 return true, (delta > 0), (i - delta)
256 }
257 }
258
259 return false, false, len(data)
260}
261
262// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
263// or it returns -1.
264func getu4(s []byte) rune {
265 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
266 return -1
267 }
268 var val rune
269 for i := 2; i < len(s) && i < 6; i++ {
270 var v byte
271 c := s[i]
272 switch c {
273 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
274 v = c - '0'
275 case 'a', 'b', 'c', 'd', 'e', 'f':
276 v = c - 'a' + 10
277 case 'A', 'B', 'C', 'D', 'E', 'F':
278 v = c - 'A' + 10
279 default:
280 return -1
281 }
282
283 val <<= 4
284 val |= rune(v)
285 }
286 return val
287}
288
289// processEscape processes a single escape sequence and returns number of bytes processed.
290func (r *Lexer) processEscape(data []byte) (int, error) {
291 if len(data) < 2 {
292 return 0, fmt.Errorf("syntax error at %v", string(data))
293 }
294
295 c := data[1]
296 switch c {
297 case '"', '/', '\\':
298 r.token.byteValue = append(r.token.byteValue, c)
299 return 2, nil
300 case 'b':
301 r.token.byteValue = append(r.token.byteValue, '\b')
302 return 2, nil
303 case 'f':
304 r.token.byteValue = append(r.token.byteValue, '\f')
305 return 2, nil
306 case 'n':
307 r.token.byteValue = append(r.token.byteValue, '\n')
308 return 2, nil
309 case 'r':
310 r.token.byteValue = append(r.token.byteValue, '\r')
311 return 2, nil
312 case 't':
313 r.token.byteValue = append(r.token.byteValue, '\t')
314 return 2, nil
315 case 'u':
316 rr := getu4(data)
317 if rr < 0 {
318 return 0, errors.New("syntax error")
319 }
320
321 read := 6
322 if utf16.IsSurrogate(rr) {
323 rr1 := getu4(data[read:])
324 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
325 read += 6
326 rr = dec
327 } else {
328 rr = unicode.ReplacementChar
329 }
330 }
331 var d [4]byte
332 s := utf8.EncodeRune(d[:], rr)
333 r.token.byteValue = append(r.token.byteValue, d[:s]...)
334 return read, nil
335 }
336
337 return 0, errors.New("syntax error")
338}
339
340// fetchString scans a string literal token.
341func (r *Lexer) fetchString() {
342 r.pos++
343 data := r.Data[r.pos:]
344
345 isValid, hasEscapes, length := findStringLen(data)
346 if !isValid {
347 r.pos += length
348 r.errParse("unterminated string literal")
349 return
350 }
351 if !hasEscapes {
352 r.token.byteValue = data[:length]
353 r.pos += length + 1
354 return
355 }
356
357 r.token.byteValue = make([]byte, 0, length)
358 p := 0
359 for i := 0; i < len(data); {
360 switch data[i] {
361 case '"':
362 r.pos += i + 1
363 r.token.byteValue = append(r.token.byteValue, data[p:i]...)
364 i++
365 return
366
367 case '\\':
368 r.token.byteValue = append(r.token.byteValue, data[p:i]...)
369 off, err := r.processEscape(data[i:])
370 if err != nil {
371 r.errParse(err.Error())
372 return
373 }
374 i += off
375 p = i
376
377 default:
378 i++
379 }
380 }
381 r.errParse("unterminated string literal")
382}
383
384// scanToken scans the next token if no token is currently available in the lexer.
385func (r *Lexer) scanToken() {
386 if r.token.kind != tokenUndef || r.fatalError != nil {
387 return
388 }
389
390 r.FetchToken()
391}
392
393// consume resets the current token to allow scanning the next one.
394func (r *Lexer) consume() {
395 r.token.kind = tokenUndef
396 r.token.delimValue = 0
397}
398
399// Ok returns true if no error (including io.EOF) was encountered during scanning.
400func (r *Lexer) Ok() bool {
401 return r.fatalError == nil
402}
403
404const maxErrorContextLen = 13
405
406func (r *Lexer) errParse(what string) {
407 if r.fatalError == nil {
408 var str string
409 if len(r.Data)-r.pos <= maxErrorContextLen {
410 str = string(r.Data)
411 } else {
412 str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..."
413 }
414 r.fatalError = &LexerError{
415 Reason: what,
416 Offset: r.pos,
417 Data: str,
418 }
419 }
420}
421
422func (r *Lexer) errSyntax() {
423 r.errParse("syntax error")
424}
425
426func (r *Lexer) errInvalidToken(expected string) {
427 if r.fatalError != nil {
428 return
429 }
430 if r.UseMultipleErrors {
431 r.pos = r.start
432 r.consume()
433 r.SkipRecursive()
434 switch expected {
435 case "[":
436 r.token.delimValue = ']'
437 r.token.kind = tokenDelim
438 case "{":
439 r.token.delimValue = '}'
440 r.token.kind = tokenDelim
441 }
442 r.addNonfatalError(&LexerError{
443 Reason: fmt.Sprintf("expected %s", expected),
444 Offset: r.start,
445 Data: string(r.Data[r.start:r.pos]),
446 })
447 return
448 }
449
450 var str string
451 if len(r.token.byteValue) <= maxErrorContextLen {
452 str = string(r.token.byteValue)
453 } else {
454 str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..."
455 }
456 r.fatalError = &LexerError{
457 Reason: fmt.Sprintf("expected %s", expected),
458 Offset: r.pos,
459 Data: str,
460 }
461}
462
463func (r *Lexer) GetPos() int {
464 return r.pos
465}
466
467// Delim consumes a token and verifies that it is the given delimiter.
468func (r *Lexer) Delim(c byte) {
469 if r.token.kind == tokenUndef && r.Ok() {
470 r.FetchToken()
471 }
472
473 if !r.Ok() || r.token.delimValue != c {
474 r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled.
475 r.errInvalidToken(string([]byte{c}))
476 } else {
477 r.consume()
478 }
479}
480
481// IsDelim returns true if there was no scanning error and next token is the given delimiter.
482func (r *Lexer) IsDelim(c byte) bool {
483 if r.token.kind == tokenUndef && r.Ok() {
484 r.FetchToken()
485 }
486 return !r.Ok() || r.token.delimValue == c
487}
488
489// Null verifies that the next token is null and consumes it.
490func (r *Lexer) Null() {
491 if r.token.kind == tokenUndef && r.Ok() {
492 r.FetchToken()
493 }
494 if !r.Ok() || r.token.kind != tokenNull {
495 r.errInvalidToken("null")
496 }
497 r.consume()
498}
499
500// IsNull returns true if the next token is a null keyword.
501func (r *Lexer) IsNull() bool {
502 if r.token.kind == tokenUndef && r.Ok() {
503 r.FetchToken()
504 }
505 return r.Ok() && r.token.kind == tokenNull
506}
507
508// Skip skips a single token.
509func (r *Lexer) Skip() {
510 if r.token.kind == tokenUndef && r.Ok() {
511 r.FetchToken()
512 }
513 r.consume()
514}
515
516// SkipRecursive skips next array or object completely, or just skips a single token if not
517// an array/object.
518//
519// Note: no syntax validation is performed on the skipped data.
520func (r *Lexer) SkipRecursive() {
521 r.scanToken()
522 var start, end byte
523
524 if r.token.delimValue == '{' {
525 start, end = '{', '}'
526 } else if r.token.delimValue == '[' {
527 start, end = '[', ']'
528 } else {
529 r.consume()
530 return
531 }
532
533 r.consume()
534
535 level := 1
536 inQuotes := false
537 wasEscape := false
538
539 for i, c := range r.Data[r.pos:] {
540 switch {
541 case c == start && !inQuotes:
542 level++
543 case c == end && !inQuotes:
544 level--
545 if level == 0 {
546 r.pos += i + 1
547 return
548 }
549 case c == '\\' && inQuotes:
550 wasEscape = !wasEscape
551 continue
552 case c == '"' && inQuotes:
553 inQuotes = wasEscape
554 case c == '"':
555 inQuotes = true
556 }
557 wasEscape = false
558 }
559 r.pos = len(r.Data)
560 r.fatalError = &LexerError{
561 Reason: "EOF reached while skipping array/object or token",
562 Offset: r.pos,
563 Data: string(r.Data[r.pos:]),
564 }
565}
566
567// Raw fetches the next item recursively as a data slice
568func (r *Lexer) Raw() []byte {
569 r.SkipRecursive()
570 if !r.Ok() {
571 return nil
572 }
573 return r.Data[r.start:r.pos]
574}
575
576// IsStart returns whether the lexer is positioned at the start
577// of an input string.
578func (r *Lexer) IsStart() bool {
579 return r.pos == 0
580}
581
582// Consumed reads all remaining bytes from the input, publishing an error if
583// there is anything but whitespace remaining.
584func (r *Lexer) Consumed() {
585 if r.pos > len(r.Data) || !r.Ok() {
586 return
587 }
588
589 for _, c := range r.Data[r.pos:] {
590 if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
591 r.AddError(&LexerError{
592 Reason: "invalid character '" + string(c) + "' after top-level value",
593 Offset: r.pos,
594 Data: string(r.Data[r.pos:]),
595 })
596 return
597 }
598
599 r.pos++
600 r.start++
601 }
602}
603
604func (r *Lexer) unsafeString() (string, []byte) {
605 if r.token.kind == tokenUndef && r.Ok() {
606 r.FetchToken()
607 }
608 if !r.Ok() || r.token.kind != tokenString {
609 r.errInvalidToken("string")
610 return "", nil
611 }
612 bytes := r.token.byteValue
613 ret := bytesToStr(r.token.byteValue)
614 r.consume()
615 return ret, bytes
616}
617
618// UnsafeString returns the string value if the token is a string literal.
619//
620// Warning: returned string may point to the input buffer, so the string should not outlive
621// the input buffer. Intended pattern of usage is as an argument to a switch statement.
622func (r *Lexer) UnsafeString() string {
623 ret, _ := r.unsafeString()
624 return ret
625}
626
627// UnsafeBytes returns the byte slice if the token is a string literal.
628func (r *Lexer) UnsafeBytes() []byte {
629 _, ret := r.unsafeString()
630 return ret
631}
632
633// String reads a string literal.
634func (r *Lexer) String() string {
635 if r.token.kind == tokenUndef && r.Ok() {
636 r.FetchToken()
637 }
638 if !r.Ok() || r.token.kind != tokenString {
639 r.errInvalidToken("string")
640 return ""
641 }
642 ret := string(r.token.byteValue)
643 r.consume()
644 return ret
645}
646
647// Bytes reads a string literal and base64 decodes it into a byte slice.
648func (r *Lexer) Bytes() []byte {
649 if r.token.kind == tokenUndef && r.Ok() {
650 r.FetchToken()
651 }
652 if !r.Ok() || r.token.kind != tokenString {
653 r.errInvalidToken("string")
654 return nil
655 }
656 ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
657 n, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
658 if err != nil {
659 r.fatalError = &LexerError{
660 Reason: err.Error(),
661 }
662 return nil
663 }
664
665 r.consume()
666 return ret[:n]
667}
668
669// Bool reads a true or false boolean keyword.
670func (r *Lexer) Bool() bool {
671 if r.token.kind == tokenUndef && r.Ok() {
672 r.FetchToken()
673 }
674 if !r.Ok() || r.token.kind != tokenBool {
675 r.errInvalidToken("bool")
676 return false
677 }
678 ret := r.token.boolValue
679 r.consume()
680 return ret
681}
682
683func (r *Lexer) number() string {
684 if r.token.kind == tokenUndef && r.Ok() {
685 r.FetchToken()
686 }
687 if !r.Ok() || r.token.kind != tokenNumber {
688 r.errInvalidToken("number")
689 return ""
690 }
691 ret := bytesToStr(r.token.byteValue)
692 r.consume()
693 return ret
694}
695
696func (r *Lexer) Uint8() uint8 {
697 s := r.number()
698 if !r.Ok() {
699 return 0
700 }
701
702 n, err := strconv.ParseUint(s, 10, 8)
703 if err != nil {
704 r.addNonfatalError(&LexerError{
705 Offset: r.start,
706 Reason: err.Error(),
707 Data: s,
708 })
709 }
710 return uint8(n)
711}
712
713func (r *Lexer) Uint16() uint16 {
714 s := r.number()
715 if !r.Ok() {
716 return 0
717 }
718
719 n, err := strconv.ParseUint(s, 10, 16)
720 if err != nil {
721 r.addNonfatalError(&LexerError{
722 Offset: r.start,
723 Reason: err.Error(),
724 Data: s,
725 })
726 }
727 return uint16(n)
728}
729
730func (r *Lexer) Uint32() uint32 {
731 s := r.number()
732 if !r.Ok() {
733 return 0
734 }
735
736 n, err := strconv.ParseUint(s, 10, 32)
737 if err != nil {
738 r.addNonfatalError(&LexerError{
739 Offset: r.start,
740 Reason: err.Error(),
741 Data: s,
742 })
743 }
744 return uint32(n)
745}
746
747func (r *Lexer) Uint64() uint64 {
748 s := r.number()
749 if !r.Ok() {
750 return 0
751 }
752
753 n, err := strconv.ParseUint(s, 10, 64)
754 if err != nil {
755 r.addNonfatalError(&LexerError{
756 Offset: r.start,
757 Reason: err.Error(),
758 Data: s,
759 })
760 }
761 return n
762}
763
764func (r *Lexer) Uint() uint {
765 return uint(r.Uint64())
766}
767
768func (r *Lexer) Int8() int8 {
769 s := r.number()
770 if !r.Ok() {
771 return 0
772 }
773
774 n, err := strconv.ParseInt(s, 10, 8)
775 if err != nil {
776 r.addNonfatalError(&LexerError{
777 Offset: r.start,
778 Reason: err.Error(),
779 Data: s,
780 })
781 }
782 return int8(n)
783}
784
785func (r *Lexer) Int16() int16 {
786 s := r.number()
787 if !r.Ok() {
788 return 0
789 }
790
791 n, err := strconv.ParseInt(s, 10, 16)
792 if err != nil {
793 r.addNonfatalError(&LexerError{
794 Offset: r.start,
795 Reason: err.Error(),
796 Data: s,
797 })
798 }
799 return int16(n)
800}
801
802func (r *Lexer) Int32() int32 {
803 s := r.number()
804 if !r.Ok() {
805 return 0
806 }
807
808 n, err := strconv.ParseInt(s, 10, 32)
809 if err != nil {
810 r.addNonfatalError(&LexerError{
811 Offset: r.start,
812 Reason: err.Error(),
813 Data: s,
814 })
815 }
816 return int32(n)
817}
818
819func (r *Lexer) Int64() int64 {
820 s := r.number()
821 if !r.Ok() {
822 return 0
823 }
824
825 n, err := strconv.ParseInt(s, 10, 64)
826 if err != nil {
827 r.addNonfatalError(&LexerError{
828 Offset: r.start,
829 Reason: err.Error(),
830 Data: s,
831 })
832 }
833 return n
834}
835
836func (r *Lexer) Int() int {
837 return int(r.Int64())
838}
839
840func (r *Lexer) Uint8Str() uint8 {
841 s, b := r.unsafeString()
842 if !r.Ok() {
843 return 0
844 }
845
846 n, err := strconv.ParseUint(s, 10, 8)
847 if err != nil {
848 r.addNonfatalError(&LexerError{
849 Offset: r.start,
850 Reason: err.Error(),
851 Data: string(b),
852 })
853 }
854 return uint8(n)
855}
856
857func (r *Lexer) Uint16Str() uint16 {
858 s, b := r.unsafeString()
859 if !r.Ok() {
860 return 0
861 }
862
863 n, err := strconv.ParseUint(s, 10, 16)
864 if err != nil {
865 r.addNonfatalError(&LexerError{
866 Offset: r.start,
867 Reason: err.Error(),
868 Data: string(b),
869 })
870 }
871 return uint16(n)
872}
873
874func (r *Lexer) Uint32Str() uint32 {
875 s, b := r.unsafeString()
876 if !r.Ok() {
877 return 0
878 }
879
880 n, err := strconv.ParseUint(s, 10, 32)
881 if err != nil {
882 r.addNonfatalError(&LexerError{
883 Offset: r.start,
884 Reason: err.Error(),
885 Data: string(b),
886 })
887 }
888 return uint32(n)
889}
890
891func (r *Lexer) Uint64Str() uint64 {
892 s, b := r.unsafeString()
893 if !r.Ok() {
894 return 0
895 }
896
897 n, err := strconv.ParseUint(s, 10, 64)
898 if err != nil {
899 r.addNonfatalError(&LexerError{
900 Offset: r.start,
901 Reason: err.Error(),
902 Data: string(b),
903 })
904 }
905 return n
906}
907
908func (r *Lexer) UintStr() uint {
909 return uint(r.Uint64Str())
910}
911
912func (r *Lexer) UintptrStr() uintptr {
913 return uintptr(r.Uint64Str())
914}
915
916func (r *Lexer) Int8Str() int8 {
917 s, b := r.unsafeString()
918 if !r.Ok() {
919 return 0
920 }
921
922 n, err := strconv.ParseInt(s, 10, 8)
923 if err != nil {
924 r.addNonfatalError(&LexerError{
925 Offset: r.start,
926 Reason: err.Error(),
927 Data: string(b),
928 })
929 }
930 return int8(n)
931}
932
933func (r *Lexer) Int16Str() int16 {
934 s, b := r.unsafeString()
935 if !r.Ok() {
936 return 0
937 }
938
939 n, err := strconv.ParseInt(s, 10, 16)
940 if err != nil {
941 r.addNonfatalError(&LexerError{
942 Offset: r.start,
943 Reason: err.Error(),
944 Data: string(b),
945 })
946 }
947 return int16(n)
948}
949
950func (r *Lexer) Int32Str() int32 {
951 s, b := r.unsafeString()
952 if !r.Ok() {
953 return 0
954 }
955
956 n, err := strconv.ParseInt(s, 10, 32)
957 if err != nil {
958 r.addNonfatalError(&LexerError{
959 Offset: r.start,
960 Reason: err.Error(),
961 Data: string(b),
962 })
963 }
964 return int32(n)
965}
966
967func (r *Lexer) Int64Str() int64 {
968 s, b := r.unsafeString()
969 if !r.Ok() {
970 return 0
971 }
972
973 n, err := strconv.ParseInt(s, 10, 64)
974 if err != nil {
975 r.addNonfatalError(&LexerError{
976 Offset: r.start,
977 Reason: err.Error(),
978 Data: string(b),
979 })
980 }
981 return n
982}
983
984func (r *Lexer) IntStr() int {
985 return int(r.Int64Str())
986}
987
988func (r *Lexer) Float32() float32 {
989 s := r.number()
990 if !r.Ok() {
991 return 0
992 }
993
994 n, err := strconv.ParseFloat(s, 32)
995 if err != nil {
996 r.addNonfatalError(&LexerError{
997 Offset: r.start,
998 Reason: err.Error(),
999 Data: s,
1000 })
1001 }
1002 return float32(n)
1003}
1004
1005func (r *Lexer) Float32Str() float32 {
1006 s, b := r.unsafeString()
1007 if !r.Ok() {
1008 return 0
1009 }
1010 n, err := strconv.ParseFloat(s, 32)
1011 if err != nil {
1012 r.addNonfatalError(&LexerError{
1013 Offset: r.start,
1014 Reason: err.Error(),
1015 Data: string(b),
1016 })
1017 }
1018 return float32(n)
1019}
1020
1021func (r *Lexer) Float64() float64 {
1022 s := r.number()
1023 if !r.Ok() {
1024 return 0
1025 }
1026
1027 n, err := strconv.ParseFloat(s, 64)
1028 if err != nil {
1029 r.addNonfatalError(&LexerError{
1030 Offset: r.start,
1031 Reason: err.Error(),
1032 Data: s,
1033 })
1034 }
1035 return n
1036}
1037
1038func (r *Lexer) Float64Str() float64 {
1039 s, b := r.unsafeString()
1040 if !r.Ok() {
1041 return 0
1042 }
1043 n, err := strconv.ParseFloat(s, 64)
1044 if err != nil {
1045 r.addNonfatalError(&LexerError{
1046 Offset: r.start,
1047 Reason: err.Error(),
1048 Data: string(b),
1049 })
1050 }
1051 return n
1052}
1053
1054func (r *Lexer) Error() error {
1055 return r.fatalError
1056}
1057
1058func (r *Lexer) AddError(e error) {
1059 if r.fatalError == nil {
1060 r.fatalError = e
1061 }
1062}
1063
1064func (r *Lexer) AddNonFatalError(e error) {
1065 r.addNonfatalError(&LexerError{
1066 Offset: r.start,
1067 Data: string(r.Data[r.start:r.pos]),
1068 Reason: e.Error(),
1069 })
1070}
1071
1072func (r *Lexer) addNonfatalError(err *LexerError) {
1073 if r.UseMultipleErrors {
1074 // We don't want to add errors with the same offset.
1075 if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset {
1076 return
1077 }
1078 r.multipleErrors = append(r.multipleErrors, err)
1079 return
1080 }
1081 r.fatalError = err
1082}
1083
1084func (r *Lexer) GetNonFatalErrors() []*LexerError {
1085 return r.multipleErrors
1086}
1087
1088// JsonNumber fetches and json.Number from 'encoding/json' package.
1089// Both int, float or string, contains them are valid values
1090func (r *Lexer) JsonNumber() json.Number {
1091 if r.token.kind == tokenUndef && r.Ok() {
1092 r.FetchToken()
1093 }
1094 if !r.Ok() {
1095 r.errInvalidToken("json.Number")
1096 return json.Number("")
1097 }
1098
1099 switch r.token.kind {
1100 case tokenString:
1101 return json.Number(r.String())
1102 case tokenNumber:
1103 return json.Number(r.Raw())
1104 case tokenNull:
1105 r.Null()
1106 return json.Number("")
1107 default:
1108 r.errSyntax()
1109 return json.Number("")
1110 }
1111}
1112
1113// Interface fetches an interface{} analogous to the 'encoding/json' package.
1114func (r *Lexer) Interface() interface{} {
1115 if r.token.kind == tokenUndef && r.Ok() {
1116 r.FetchToken()
1117 }
1118
1119 if !r.Ok() {
1120 return nil
1121 }
1122 switch r.token.kind {
1123 case tokenString:
1124 return r.String()
1125 case tokenNumber:
1126 return r.Float64()
1127 case tokenBool:
1128 return r.Bool()
1129 case tokenNull:
1130 r.Null()
1131 return nil
1132 }
1133
1134 if r.token.delimValue == '{' {
1135 r.consume()
1136
1137 ret := map[string]interface{}{}
1138 for !r.IsDelim('}') {
1139 key := r.String()
1140 r.WantColon()
1141 ret[key] = r.Interface()
1142 r.WantComma()
1143 }
1144 r.Delim('}')
1145
1146 if r.Ok() {
1147 return ret
1148 } else {
1149 return nil
1150 }
1151 } else if r.token.delimValue == '[' {
1152 r.consume()
1153
1154 var ret []interface{}
1155 for !r.IsDelim(']') {
1156 ret = append(ret, r.Interface())
1157 r.WantComma()
1158 }
1159 r.Delim(']')
1160
1161 if r.Ok() {
1162 return ret
1163 } else {
1164 return nil
1165 }
1166 }
1167 r.errSyntax()
1168 return nil
1169}
1170
1171// WantComma requires a comma to be present before fetching next token.
1172func (r *Lexer) WantComma() {
1173 r.wantSep = ','
1174 r.firstElement = false
1175}
1176
1177// WantColon requires a colon to be present before fetching next token.
1178func (r *Lexer) WantColon() {
1179 r.wantSep = ':'
1180 r.firstElement = false
1181}