1 // Copyright 2015 Unknwon
3 // Licensed under the Apache License, Version 2.0 (the "License"): you may
4 // not use this file except in compliance with the License. You may obtain
5 // a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations
28 var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)")
30 type parserOptions struct {
31 IgnoreContinuation bool
32 IgnoreInlineComment bool
33 AllowPythonMultilineValues bool
34 SpaceBeforeInlineComment bool
35 UnescapeValueDoubleQuotes bool
36 UnescapeValueCommentSymbols bool
37 PreserveSurroundedQuote bool
49 func newParser(r io.Reader, opts parserOptions) *parser {
51 buf: bufio.NewReader(r),
54 comment: &bytes.Buffer{},
58 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
59 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
60 func (p *parser) BOM() error {
61 mask, err := p.buf.Peek(2)
62 if err != nil && err != io.EOF {
64 } else if len(mask) < 2 {
69 case mask[0] == 254 && mask[1] == 255:
71 case mask[0] == 255 && mask[1] == 254:
73 case mask[0] == 239 && mask[1] == 187:
74 mask, err := p.buf.Peek(3)
75 if err != nil && err != io.EOF {
77 } else if len(mask) < 3 {
87 func (p *parser) readUntil(delim byte) ([]byte, error) {
88 data, err := p.buf.ReadBytes(delim)
99 func cleanComment(in []byte) ([]byte, bool) {
100 i := bytes.IndexAny(in, "#;")
107 func readKeyName(delimiters string, in []byte) (string, int, error) {
110 // Check if key name surrounded by quotes.
113 if len(line) > 6 && string(line[0:3]) == `"""` {
118 } else if line[0] == '`' {
124 if len(keyQuote) > 0 {
125 startIdx := len(keyQuote)
126 // FIXME: fail case -> """"""name"""=value
127 pos := strings.Index(line[startIdx:], keyQuote)
129 return "", -1, fmt.Errorf("missing closing key quote: %s", line)
133 // Find key-value delimiter
134 i := strings.IndexAny(line[pos+startIdx:], delimiters)
136 return "", -1, ErrDelimiterNotFound{line}
139 return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
142 endIdx = strings.IndexAny(line, delimiters)
144 return "", -1, ErrDelimiterNotFound{line}
146 return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
149 func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
151 data, err := p.readUntil('\n')
157 pos := strings.LastIndex(next, valQuote)
161 comment, has := cleanComment([]byte(next[pos:]))
163 p.comment.Write(bytes.TrimSpace(comment))
169 return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
175 func (p *parser) readContinuationLines(val string) (string, error) {
177 data, err := p.readUntil('\n')
181 next := strings.TrimSpace(string(data))
187 if val[len(val)-1] != '\\' {
190 val = val[:len(val)-1]
195 // hasSurroundedQuote check if and only if the first and last characters
196 // are quotes \" or \'.
197 // It returns false if any other parts also contain same kind of quotes.
198 func hasSurroundedQuote(in string, quote byte) bool {
199 return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
200 strings.IndexByte(in[1:], quote) == len(in)-2
203 func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
205 line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
207 if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
208 return p.readPythonMultilines(line, bufferSize)
214 if len(line) > 3 && string(line[0:3]) == `"""` {
216 } else if line[0] == '`' {
218 } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
222 if len(valQuote) > 0 {
223 startIdx := len(valQuote)
224 pos := strings.LastIndex(line[startIdx:], valQuote)
225 // Check for multi-line value
227 return p.readMultilines(line, line[startIdx:], valQuote)
230 if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
231 return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
233 return line[startIdx : pos+startIdx], nil
236 lastChar := line[len(line)-1]
237 // Won't be able to reach here if value only contains whitespace
238 line = strings.TrimSpace(line)
239 trimmedLastChar := line[len(line)-1]
241 // Check continuation lines when desired
242 if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
243 return p.readContinuationLines(line[:len(line)-1])
246 // Check if ignore inline comment
247 if !p.options.IgnoreInlineComment {
249 if p.options.SpaceBeforeInlineComment {
250 i = strings.Index(line, " #")
252 i = strings.Index(line, " ;")
256 i = strings.IndexAny(line, "#;")
260 p.comment.WriteString(line[i:])
261 line = strings.TrimSpace(line[:i])
266 // Trim single and double quotes
267 if (hasSurroundedQuote(line, '\'') ||
268 hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
269 line = line[1 : len(line)-1]
270 } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
271 if strings.Contains(line, `\;`) {
272 line = strings.Replace(line, `\;`, ";", -1)
274 if strings.Contains(line, `\#`) {
275 line = strings.Replace(line, `\#`, "#", -1)
277 } else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
278 return p.readPythonMultilines(line, bufferSize)
284 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
285 parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
286 peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
289 peekData, peekErr := peekBuffer.ReadBytes('\n')
291 if peekErr == io.EOF {
297 peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
298 if len(peekMatches) != 3 {
302 // NOTE: Return if not a python-ini multi-line value.
303 currentIdentSize := len(peekMatches[1])
304 if currentIdentSize <= 0 {
308 // NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
309 _, err := p.readUntil('\n')
314 line += fmt.Sprintf("\n%s", peekMatches[2])
318 // parse parses data through an io.Reader.
319 func (f *File) parse(reader io.Reader) (err error) {
320 p := newParser(reader, parserOptions{
321 IgnoreContinuation: f.options.IgnoreContinuation,
322 IgnoreInlineComment: f.options.IgnoreInlineComment,
323 AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
324 SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
325 UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
326 UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
327 PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
329 if err = p.BOM(); err != nil {
330 return fmt.Errorf("BOM: %v", err)
333 // Ignore error because default section name is never empty string.
334 name := DefaultSection
335 if f.options.Insensitive {
336 name = strings.ToLower(DefaultSection)
338 section, _ := f.NewSection(name)
340 // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
341 var isLastValueEmpty bool
342 var lastRegularKey *Key
345 var inUnparseableSection bool
347 // NOTE: Iterate and increase `currentPeekSize` until
348 // the size of the parser buffer is found.
349 // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
350 parserBufferSize := 0
351 // NOTE: Peek 1kb at a time.
352 currentPeekSize := 1024
354 if f.options.AllowPythonMultilineValues {
356 peekBytes, _ := p.buf.Peek(currentPeekSize)
357 peekBytesLength := len(peekBytes)
359 if parserBufferSize >= peekBytesLength {
364 parserBufferSize = peekBytesLength
369 line, err = p.readUntil('\n')
374 if f.options.AllowNestedValues &&
375 isLastValueEmpty && len(line) > 0 {
376 if line[0] == ' ' || line[0] == '\t' {
377 lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
382 line = bytes.TrimLeftFunc(line, unicode.IsSpace)
388 if line[0] == '#' || line[0] == ';' {
389 // Note: we do not care ending line break,
390 // it is needed for adding second line,
391 // so just clean it once at the end when set to value.
392 p.comment.Write(line)
398 // Read to the next ']' (TODO: support quoted strings)
399 closeIdx := bytes.LastIndexByte(line, ']')
401 return fmt.Errorf("unclosed section: %s", line)
404 name := string(line[1:closeIdx])
405 section, err = f.NewSection(name)
410 comment, has := cleanComment(line[closeIdx+1:])
412 p.comment.Write(comment)
415 section.Comment = strings.TrimSpace(p.comment.String())
417 // Reset aotu-counter and comments
421 inUnparseableSection = false
422 for i := range f.options.UnparseableSections {
423 if f.options.UnparseableSections[i] == name ||
424 (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
425 inUnparseableSection = true
432 if inUnparseableSection {
433 section.isRawSection = true
434 section.rawBody += string(line)
438 kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
440 // Treat as boolean key when desired, and whole line is key name.
441 if IsErrDelimiterNotFound(err) {
443 case f.options.AllowBooleanKeys:
444 kname, err := p.readValue(line, parserBufferSize)
448 key, err := section.NewBooleanKey(kname)
452 key.Comment = strings.TrimSpace(p.comment.String())
456 case f.options.SkipUnrecognizableLines:
467 kname = "#" + strconv.Itoa(p.count)
471 value, err := p.readValue(line[offset:], parserBufferSize)
475 isLastValueEmpty = len(value) == 0
477 key, err := section.NewKey(kname, value)
481 key.isAutoIncrement = isAutoIncr
482 key.Comment = strings.TrimSpace(p.comment.String())