fiber/internal/encoding/json/token.go

287 lines
6.6 KiB
Go

package json
// Tokenizer is an iterator-style type which can be used to progressively parse
// through a json input.
//
// Tokenizing json is useful to build highly efficient parsing operations, for
// example when doing tranformations on-the-fly where as the program reads the
// input and produces the transformed json to an output buffer.
//
// Here is a common pattern to use a tokenizer:
//
// for t := json.NewTokenizer(b); t.Next(); {
// switch t.Delim {
// case '{':
// ...
// case '}':
// ...
// case '[':
// ...
// case ']':
// ...
// case ':':
// ...
// case ',':
// ...
// }
//
// switch {
// case t.Value.String():
// ...
// case t.Value.Null():
// ...
// case t.Value.True():
// ...
// case t.Value.False():
// ...
// case t.Value.Number():
// ...
// }
// }
//
type Tokenizer struct {
// When the tokenizer is positioned on a json delimiter this field is not
// zero. In this case the possible values are '{', '}', '[', ']', ':', and
// ','.
Delim Delim
// This field contains the raw json token that the tokenizer is pointing at.
// When Delim is not zero, this field is a single-element byte slice
// continaing the delimiter value. Otherwise, this field holds values like
// null, true, false, numbers, or quoted strings.
Value RawValue
// When the tokenizer has encountered invalid content this field is not nil.
Err error
// When the value is in an array or an object, this field contains the depth
// at which it was found.
Depth int
// When the value is in an array or an object, this field contains the
// position at which it was found.
Index int
// This field is true when the value is the key of an object.
IsKey bool
// Tells whether the next value read from the tokenizer is a key.
isKey bool
// json input for the tokenizer, pointing at data right after the last token
// that was parsed.
json []byte
// Stack used to track entering and leaving arrays, objects, and keys. The
// buffer is used as a pre-allocated space to
stack []state
buffer [8]state
}
type state struct {
typ scope
len int
}
type scope int
const (
inArray scope = iota
inObject
)
// NewTokenizer constructs a new Tokenizer which reads its json input from b.
func NewTokenizer(b []byte) *Tokenizer { return &Tokenizer{json: b} }
// Reset erases the state of t and re-initializes it with the json input from b.
func (t *Tokenizer) Reset(b []byte) {
// This code is similar to:
//
// *t = Tokenizer{json: b}
//
// However, it does not compile down to an invocation of duff-copy, which
// ends up being slower and prevents the code from being inlined.
t.Delim = 0
t.Value = nil
t.Err = nil
t.Depth = 0
t.Index = 0
t.IsKey = false
t.isKey = false
t.json = b
t.stack = nil
}
// Next returns a new tokenizer pointing at the next token, or the zero-value of
// Tokenizer if the end of the json input has been reached.
//
// If the tokenizer encounters malformed json while reading the input the method
// sets t.Err to an error describing the issue, and returns false. Once an error
// has been encountered, the tokenizer will always fail until its input is
// cleared by a call to its Reset method.
func (t *Tokenizer) Next() bool {
if t.Err != nil {
return false
}
// Inlined code of the skipSpaces function, this give a ~15% speed boost.
i := 0
skipLoop:
for _, c := range t.json {
switch c {
case sp, ht, nl, cr:
i++
default:
break skipLoop
}
}
if t.json = t.json[i:]; len(t.json) == 0 {
t.Reset(nil)
return false
}
var d Delim
var v []byte
var b []byte
var err error
switch t.json[0] {
case '"':
v, b, err = parseString(t.json)
case 'n':
v, b, err = parseNull(t.json)
case 't':
v, b, err = parseTrue(t.json)
case 'f':
v, b, err = parseFalse(t.json)
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
v, b, err = parseNumber(t.json)
case '{', '}', '[', ']', ':', ',':
d, v, b = Delim(t.json[0]), t.json[:1], t.json[1:]
default:
v, b, err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
}
t.Delim = d
t.Value = RawValue(v)
t.Err = err
t.Depth = t.depth()
t.Index = t.index()
t.IsKey = d == 0 && t.isKey
t.json = b
if d != 0 {
switch d {
case '{':
t.isKey = true
t.push(inObject)
case '[':
t.push(inArray)
case '}':
err = t.pop(inObject)
t.Depth--
t.Index = t.index()
case ']':
err = t.pop(inArray)
t.Depth--
t.Index = t.index()
case ':':
t.isKey = false
case ',':
if t.is(inObject) {
t.isKey = true
}
t.stack[len(t.stack)-1].len++
}
}
return (d != 0 || len(v) != 0) && err == nil
}
func (t *Tokenizer) push(typ scope) {
if t.stack == nil {
t.stack = t.buffer[:0]
}
t.stack = append(t.stack, state{typ: typ, len: 1})
}
func (t *Tokenizer) pop(expect scope) error {
i := len(t.stack) - 1
if i < 0 {
return syntaxError(t.json, "found unexpected character while tokenizing json input")
}
if found := t.stack[i]; expect != found.typ {
return syntaxError(t.json, "found unexpected character while tokenizing json input")
}
t.stack = t.stack[:i]
return nil
}
func (t *Tokenizer) is(typ scope) bool {
return len(t.stack) != 0 && t.stack[len(t.stack)-1].typ == typ
}
func (t *Tokenizer) depth() int {
return len(t.stack)
}
func (t *Tokenizer) index() int {
if len(t.stack) == 0 {
return 0
}
return t.stack[len(t.stack)-1].len - 1
}
// RawValue represents a raw json value, it is intended to carry null, true,
// false, number, and string values only.
type RawValue []byte
// String returns true if v contains a string value.
func (v RawValue) String() bool { return len(v) != 0 && v[0] == '"' }
// Null returns true if v contains a null value.
func (v RawValue) Null() bool { return len(v) != 0 && v[0] == 'n' }
// True returns true if v contains a true value.
func (v RawValue) True() bool { return len(v) != 0 && v[0] == 't' }
// False returns true if v contains a false value.
func (v RawValue) False() bool { return len(v) != 0 && v[0] == 'f' }
// Number returns true if v contains a number value.
func (v RawValue) Number() bool {
if len(v) != 0 {
switch v[0] {
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return true
}
}
return false
}
// AppendUnquote writes the unquoted version of the string value in v into b.
func (v RawValue) AppendUnquote(b []byte) []byte {
s, r, new, err := parseStringUnquote([]byte(v), b)
if err != nil {
panic(err)
}
if len(r) != 0 {
panic(syntaxError(r, "unexpected trailing tokens after json value"))
}
if new {
b = s
} else {
b = append(b, s...)
}
return b
}
// Unquote returns the unquoted version of the string value in v.
func (v RawValue) Unquote() []byte {
return v.AppendUnquote(nil)
}