8 // ReadString read string from iterator
9 func (iter *Iterator) ReadString() (ret string) {
12 for i := iter.head; i < iter.tail; i++ {
15 ret = string(iter.buf[iter.head:i])
21 iter.ReportError("ReadString",
22 fmt.Sprintf(`invalid control character found: %d`, c))
26 return iter.readStringSlowPath()
28 iter.skipThreeBytes('u', 'l', 'l')
31 iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
35 func (iter *Iterator) readStringSlowPath() (ret string) {
38 for iter.Error == nil {
45 str = iter.readEscapedChar(c, str)
50 iter.ReportError("readStringSlowPath", "unexpected end of input")
54 func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
58 if utf16.IsSurrogate(r) {
60 if iter.Error != nil {
65 str = appendRune(str, r)
69 if iter.Error != nil {
73 str = appendRune(str, r)
74 return iter.readEscapedChar(c, str)
77 if iter.Error != nil {
80 combined := utf16.DecodeRune(r, r2)
81 if combined == '\uFFFD' {
82 str = appendRune(str, r)
83 str = appendRune(str, r2)
85 str = appendRune(str, combined)
88 str = appendRune(str, r)
91 str = append(str, '"')
93 str = append(str, '\\')
95 str = append(str, '/')
97 str = append(str, '\b')
99 str = append(str, '\f')
101 str = append(str, '\n')
103 str = append(str, '\r')
105 str = append(str, '\t')
107 iter.ReportError("readEscapedChar",
108 `invalid escape char after \`)
114 // ReadStringAsSlice read string from iterator without copying into string form.
115 // The []byte can not be kept, as it will change after next iterator call.
116 func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
117 c := iter.nextToken()
119 for i := iter.head; i < iter.tail; i++ {
120 // require ascii string and no escape
121 // for: field name, base64, number
122 if iter.buf[i] == '"' {
123 // fast path: reuse the underlying buffer
124 ret = iter.buf[iter.head:i]
129 readLen := iter.tail - iter.head
130 copied := make([]byte, readLen, readLen*2)
131 copy(copied, iter.buf[iter.head:iter.tail])
132 iter.head = iter.tail
133 for iter.Error == nil {
138 copied = append(copied, c)
142 iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
146 func (iter *Iterator) readU4() (ret rune) {
147 for i := 0; i < 4; i++ {
149 if iter.Error != nil {
152 if c >= '0' && c <= '9' {
153 ret = ret*16 + rune(c-'0')
154 } else if c >= 'a' && c <= 'f' {
155 ret = ret*16 + rune(c-'a'+10)
156 } else if c >= 'A' && c <= 'F' {
157 ret = ret*16 + rune(c-'A'+10)
159 iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
167 t1 = 0x00 // 0000 0000
168 tx = 0x80 // 1000 0000
169 t2 = 0xC0 // 1100 0000
170 t3 = 0xE0 // 1110 0000
171 t4 = 0xF0 // 1111 0000
172 t5 = 0xF8 // 1111 1000
174 maskx = 0x3F // 0011 1111
175 mask2 = 0x1F // 0001 1111
176 mask3 = 0x0F // 0000 1111
177 mask4 = 0x07 // 0000 0111
183 surrogateMin = 0xD800
184 surrogateMax = 0xDFFF
186 maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
187 runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
190 func appendRune(p []byte, r rune) []byte {
191 // Negative values are erroneous. Making it unsigned addresses the problem.
192 switch i := uint32(r); {
194 p = append(p, byte(r))
197 p = append(p, t2|byte(r>>6))
198 p = append(p, tx|byte(r)&maskx)
200 case i > maxRune, surrogateMin <= i && i <= surrogateMax:
204 p = append(p, t3|byte(r>>12))
205 p = append(p, tx|byte(r>>6)&maskx)
206 p = append(p, tx|byte(r)&maskx)
209 p = append(p, t4|byte(r>>18))
210 p = append(p, tx|byte(r>>12)&maskx)
211 p = append(p, tx|byte(r>>6)&maskx)
212 p = append(p, tx|byte(r)&maskx)