1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 // This is a simplified copy of Google's buildifier parser.
20 // A Position describes the position between two bytes of input.
21 type Position struct {
22 Line int // line in input (starting at 1)
23 LineRune int // rune in line (starting at 1)
24 Byte int // byte in input (starting at 0)
27 // add returns the position at the end of s, assuming it starts at p.
28 func (p Position) add(s string) Position {
30 if n := strings.Count(s, "\n"); n > 0 {
32 s = s[strings.LastIndex(s, "\n")+1:]
35 p.LineRune += utf8.RuneCountInString(s)
39 // An Expr represents an input element.
41 // Span returns the start and end position of the expression,
42 // excluding leading or trailing comments.
43 Span() (start, end Position)
45 // Comment returns the comments attached to the expression.
46 // This method would normally be named 'Comments' but that
47 // would interfere with embedding a type of the same name.
51 // A Comment represents a single // comment.
54 Token string // without trailing newline
55 Suffix bool // an end of line (not whole line) comment
58 // Comments collects the comments associated with an expression.
59 type Comments struct {
60 Before []Comment // whole-line comments before this expression
61 Suffix []Comment // end-of-line comments after this expression
63 // For top-level expressions only, After lists whole-line
64 // comments following the expression.
68 // Comment returns the receiver. This isn't useful by itself, but
69 // a Comments struct is embedded into all the expression
70 // implementation types, and this gives each of those a Comment
71 // method to satisfy the Expr interface.
72 func (c *Comments) Comment() *Comments {
76 // A FileSyntax represents an entire go.mod file.
77 type FileSyntax struct {
78 Name string // file path
83 func (x *FileSyntax) Span() (start, end Position) {
87 start, _ = x.Stmt[0].Span()
88 _, end = x.Stmt[len(x.Stmt)-1].Span()
92 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
94 // If no hint given, add to the last statement of the given type.
96 for i := len(x.Stmt) - 1; i >= 0; i-- {
98 switch stmt := stmt.(type) {
100 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
105 if stmt.Token[0] == tokens[0] {
114 for i, stmt := range x.Stmt {
115 switch stmt := stmt.(type) {
118 // Convert line to line block.
120 block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
121 stmt.Token = stmt.Token[1:]
123 new := &Line{Token: tokens[1:], InBlock: true}
124 block.Line = append(block.Line, new)
129 new := &Line{Token: tokens[1:], InBlock: true}
130 stmt.Line = append(stmt.Line, new)
133 for j, line := range stmt.Line {
135 // Add new line after hint.
136 stmt.Line = append(stmt.Line, nil)
137 copy(stmt.Line[j+2:], stmt.Line[j+1:])
138 new := &Line{Token: tokens[1:], InBlock: true}
147 new := &Line{Token: tokens}
148 x.Stmt = append(x.Stmt, new)
152 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
159 func (x *FileSyntax) removeLine(line *Line) {
163 // Cleanup cleans up the file syntax x after any edit operations.
164 // To avoid quadratic behavior, removeLine marks the line as dead
165 // by setting line.Token = nil but does not remove it from the slice
166 // in which it appears. After edits have all been indicated,
167 // calling Cleanup cleans out the dead lines.
168 func (x *FileSyntax) Cleanup() {
170 for _, stmt := range x.Stmt {
171 switch stmt := stmt.(type) {
173 if stmt.Token == nil {
178 for _, line := range stmt.Line {
179 if line.Token != nil {
188 // Collapse block into single line.
191 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
192 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
193 After: commentsAdd(stmt.Line[0].After, stmt.After),
195 Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
201 stmt.Line = stmt.Line[:ww]
209 func commentsAdd(x, y []Comment) []Comment {
210 return append(x[:len(x):len(x)], y...)
213 func stringsAdd(x, y []string) []string {
214 return append(x[:len(x):len(x)], y...)
217 // A CommentBlock represents a top-level block of comments separate
219 type CommentBlock struct {
224 func (x *CommentBlock) Span() (start, end Position) {
225 return x.Start, x.Start
228 // A Line is a single line of tokens.
237 func (x *Line) Span() (start, end Position) {
238 return x.Start, x.End
241 // A LineBlock is a factored block of lines, like
248 type LineBlock struct {
257 func (x *LineBlock) Span() (start, end Position) {
258 return x.Start, x.RParen.Pos.add(")")
261 // An LParen represents the beginning of a parenthesized line block.
262 // It is a place to store suffix comments.
268 func (x *LParen) Span() (start, end Position) {
269 return x.Pos, x.Pos.add(")")
272 // An RParen represents the end of a parenthesized line block.
273 // It is a place to store whole-line (before) comments.
279 func (x *RParen) Span() (start, end Position) {
280 return x.Pos, x.Pos.add(")")
283 // An input represents a single input file being parsed.
286 filename string // name of input file, for errors
287 complete []byte // entire input
288 remaining []byte // remaining input
289 token []byte // token being scanned
290 lastToken string // most recently returned token, for error messages
291 pos Position // current input position
292 comments []Comment // accumulated comments
293 endRule int // position of end of current rule
296 file *FileSyntax // returned top-level syntax tree
297 parseError error // error encountered during parsing
299 // Comment assignment state.
300 pre []Expr // all expressions, in preorder traversal
301 post []Expr // all expressions, in postorder traversal
304 func newInput(filename string, data []byte) *input {
309 pos: Position{Line: 1, LineRune: 1, Byte: 0},
313 // parse parses the input file.
314 func parse(file string, data []byte) (f *FileSyntax, err error) {
315 in := newInput(file, data)
316 // The parser panics for both routine errors like syntax errors
317 // and for programmer bugs like array index errors.
318 // Turn both into error returns. Catching bug panics is
319 // especially important when processing many files.
321 if e := recover(); e != nil {
322 if e == in.parseError {
325 err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
330 // Invoke the parser.
332 if in.parseError != nil {
333 return nil, in.parseError
335 in.file.Name = in.filename
337 // Assign comments to nearby syntax.
343 // Error is called to report an error.
344 // The reason s is often "syntax error".
345 // Error does not return: it panics.
346 func (in *input) Error(s string) {
347 if s == "syntax error" && in.lastToken != "" {
348 s += " near " + in.lastToken
350 in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
354 // eof reports whether the input has reached end of file.
355 func (in *input) eof() bool {
356 return len(in.remaining) == 0
359 // peekRune returns the next rune in the input without consuming it.
360 func (in *input) peekRune() int {
361 if len(in.remaining) == 0 {
364 r, _ := utf8.DecodeRune(in.remaining)
368 // peekPrefix reports whether the remaining input begins with the given prefix.
369 func (in *input) peekPrefix(prefix string) bool {
370 // This is like bytes.HasPrefix(in.remaining, []byte(prefix))
371 // but without the allocation of the []byte copy of prefix.
372 for i := 0; i < len(prefix); i++ {
373 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
380 // readRune consumes and returns the next rune in the input.
381 func (in *input) readRune() int {
382 if len(in.remaining) == 0 {
383 in.Error("internal lexer error: readRune at EOF")
385 r, size := utf8.DecodeRune(in.remaining)
386 in.remaining = in.remaining[size:]
397 type symType struct {
403 // startToken marks the beginning of the next input token.
404 // It must be followed by a call to endToken, once the token has
405 // been consumed using readRune.
406 func (in *input) startToken(sym *symType) {
407 in.token = in.remaining
412 // endToken marks the end of an input token.
413 // It records the actual token string in sym.text if the caller
414 // has not done that already.
415 func (in *input) endToken(sym *symType) {
417 tok := string(in.token[:len(in.token)-len(in.remaining)])
419 in.lastToken = sym.text
424 // lex is called from the parser to obtain the next input token.
425 // It returns the token value (either a rune like '+' or a symbolic token _FOR)
426 // and sets val to the data associated with the token.
427 // For all our input tokens, the associated data is
428 // val.Pos (the position where the token begins)
429 // and val.Token (the input string corresponding to the token).
430 func (in *input) lex(sym *symType) int {
431 // Skip past spaces, stopping at non-space or EOF.
432 countNL := 0 // number of newlines we've skipped past
434 // Skip over spaces. Count newlines so we can give the parser
435 // information about where top-level blank lines are,
436 // for top-level comment assignment.
438 if c == ' ' || c == '\t' || c == '\r' {
443 // Comment runs to end of line.
444 if in.peekPrefix("//") {
447 // Is this comment the only thing on its line?
448 // Find the last \n before this // and see if it's all
449 // spaces from there to here.
450 i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
451 suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
456 for len(in.remaining) > 0 && in.readRune() != '\n' {
460 sym.text = strings.TrimRight(sym.text, "\n")
461 in.lastToken = "comment"
463 // If we are at top level (not in a statement), hand the comment to
464 // the parser as a _COMMENT token. The grammar is written
465 // to handle top-level comments itself.
467 // Not in a statement. Tell parser about top-level comment.
471 // Otherwise, save comment for later attachment to syntax tree.
473 in.comments = append(in.comments, Comment{sym.pos, "", false})
475 in.comments = append(in.comments, Comment{sym.pos, sym.text, suffix})
480 if in.peekPrefix("/*") {
481 in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
484 // Found non-space non-comment.
488 // Found the beginning of the next token.
490 defer in.endToken(sym)
498 // Punctuation tokens.
499 switch c := in.peekRune(); c {
512 case '"', '`': // quoted string
518 in.Error("unexpected EOF in string")
520 if in.peekRune() == '\n' {
521 in.Error("unexpected newline in string")
527 if c == '\\' && quote != '`' {
530 in.Error("unexpected EOF in string")
539 // Checked all punctuation. Must be identifier token.
540 if c := in.peekRune(); !isIdent(c) {
541 in.Error(fmt.Sprintf("unexpected input character %#q", c))
544 // Scan over identifier.
545 for isIdent(in.peekRune()) {
546 if in.peekPrefix("//") {
549 if in.peekPrefix("/*") {
550 in.Error(fmt.Sprintf("mod files must use // comments (not /* */ comments)"))
557 // isIdent reports whether c is an identifier rune.
558 // We treat nearly all runes as identifier runes.
559 func isIdent(c int) bool {
560 return c != 0 && !unicode.IsSpace(rune(c))
563 // Comment assignment.
564 // We build two lists of all subexpressions, preorder and postorder.
565 // The preorder list is ordered by start location, with outer expressions first.
566 // The postorder list is ordered by end location, with outer expressions last.
567 // We use the preorder list to assign each whole-line comment to the syntax
568 // immediately following it, and we use the postorder list to assign each
569 // end-of-line comment to the syntax immediately preceding it.
571 // order walks the expression adding it and its subexpressions to the
572 // preorder and postorder lists.
573 func (in *input) order(x Expr) {
575 in.pre = append(in.pre, x)
577 switch x := x.(type) {
579 panic(fmt.Errorf("order: unexpected type %T", x))
582 case *LParen, *RParen:
589 for _, stmt := range x.Stmt {
594 for _, l := range x.Line {
600 in.post = append(in.post, x)
604 // assignComments attaches comments to nearby syntax.
605 func (in *input) assignComments() {
608 // Generate preorder and postorder lists.
611 // Split into whole-line comments and suffix comments.
612 var line, suffix []Comment
613 for _, com := range in.comments {
615 suffix = append(suffix, com)
617 line = append(line, com)
622 for _, c := range line {
623 fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
627 // Assign line comments to syntax immediately following.
628 for _, x := range in.pre {
631 fmt.Printf("pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
634 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
636 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
638 xcom.Before = append(xcom.Before, line[0])
643 // Remaining line comments go at end of file.
644 in.file.After = append(in.file.After, line...)
647 for _, c := range suffix {
648 fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
652 // Assign suffix comments to syntax immediately before.
653 for i := len(in.post) - 1; i >= 0; i-- {
656 start, end := x.Span()
658 fmt.Printf("post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
661 // Do not assign suffix comments to end of line block or whole file.
662 // Instead assign them to the last element inside.
668 // Do not assign suffix comments to something that starts
669 // on an earlier line, so that in
674 // we assign the comment to z and not to x ( ... ).
675 if start.Line != end.Line {
679 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
681 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
683 xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
684 suffix = suffix[:len(suffix)-1]
688 // We assigned suffix comments in reverse.
689 // If multiple suffix comments were appended to the same
690 // expression node, they are now in reverse. Fix that.
691 for _, x := range in.post {
692 reverseComments(x.Comment().Suffix)
695 // Remaining suffix comments go at beginning of file.
696 in.file.Before = append(in.file.Before, suffix...)
699 // reverseComments reverses the []Comment list.
700 func reverseComments(list []Comment) {
701 for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
702 list[i], list[j] = list[j], list[i]
706 func (in *input) parseFile() {
707 in.file = new(FileSyntax)
715 in.file.Stmt = append(in.file.Stmt, cb)
720 cb = &CommentBlock{Start: sym.pos}
723 com.Before = append(com.Before, Comment{Start: sym.pos, Token: sym.text})
726 in.file.Stmt = append(in.file.Stmt, cb)
732 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
739 func (in *input) parseStmt(sym *symType) {
742 token := []string{sym.text}
746 case '\n', _EOF, _EOL:
747 in.file.Stmt = append(in.file.Stmt, &Line{
754 in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, token, sym))
757 token = append(token, sym.text)
763 func (in *input) parseLineBlock(start Position, token []string, sym *symType) *LineBlock {
767 LParen: LParen{Pos: sym.pos},
769 var comments []Comment
776 if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
777 comments = append(comments, Comment{})
780 comments = append(comments, Comment{Start: sym.pos, Token: sym.text})
782 in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
784 x.RParen.Before = comments
785 x.RParen.Pos = sym.pos
787 if tok != '\n' && tok != _EOF && tok != _EOL {
788 in.Error("syntax error (expected newline after closing paren)")
792 l := in.parseLine(sym)
793 x.Line = append(x.Line, l)
794 l.Comment().Before = comments
800 func (in *input) parseLine(sym *symType) *Line {
803 token := []string{sym.text}
807 case '\n', _EOF, _EOL:
815 token = append(token, sym.text)
830 slashSlash = []byte("//")
831 moduleStr = []byte("module")
834 // ModulePath returns the module path from the gomod file text.
835 // If it cannot find a module path, it returns an empty string.
836 // It is tolerant of unrelated problems in the go.mod file.
837 func ModulePath(mod []byte) string {
841 if i := bytes.IndexByte(line, '\n'); i >= 0 {
842 line, mod = line[:i], line[i+1:]
844 if i := bytes.Index(line, slashSlash); i >= 0 {
847 line = bytes.TrimSpace(line)
848 if !bytes.HasPrefix(line, moduleStr) {
851 line = line[len(moduleStr):]
853 line = bytes.TrimSpace(line)
854 if len(line) == n || len(line) == 0 {
858 if line[0] == '"' || line[0] == '`' {
859 p, err := strconv.Unquote(string(line))
861 return "" // malformed quoted string or multiline module path
868 return "" // missing module path