parser.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package dump
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "regexp"
  7. "strconv"
  8. "strings"
  9. "github.com/go-mysql-org/go-mysql/mysql"
  10. "github.com/pingcap/errors"
  11. )
  12. var (
  13. ErrSkip = errors.New("Handler error, but skipped")
  14. )
  15. type ParseHandler interface {
  16. // Parse CHANGE MASTER TO MASTER_LOG_FILE=name, MASTER_LOG_POS=pos;
  17. BinLog(name string, pos uint64) error
  18. GtidSet(gtidsets string) error
  19. Data(schema string, table string, values []string) error
  20. }
  21. var binlogExp *regexp.Regexp
  22. var useExp *regexp.Regexp
  23. var valuesExp *regexp.Regexp
  24. var gtidExp *regexp.Regexp
  25. func init() {
  26. binlogExp = regexp.MustCompile(`^CHANGE MASTER TO MASTER_LOG_FILE='(.+)', MASTER_LOG_POS=(\d+);`)
  27. useExp = regexp.MustCompile("^USE `(.+)`;")
  28. valuesExp = regexp.MustCompile("^INSERT INTO `(.+?)` VALUES \\((.+)\\);$")
  29. // The pattern will only match MySQL GTID, as you know SET GLOBAL gtid_slave_pos='0-1-4' is used for MariaDB.
  30. // SET @@GLOBAL.GTID_PURGED='1638041a-0457-11e9-bb9f-00505690b730:1-429405150';
  31. // https://dev.mysql.com/doc/refman/5.7/en/replication-gtids-concepts.html
  32. gtidExp = regexp.MustCompile(`(\w{8}(-\w{4}){3}-\w{12}(:\d+(-\d+)?)+)`)
  33. }
  34. // Parse the dump data with Dumper generate.
  35. // It can not parse all the data formats with mysqldump outputs
  36. func Parse(r io.Reader, h ParseHandler, parseBinlogPos bool) error {
  37. rb := bufio.NewReaderSize(r, 1024*16)
  38. var db string
  39. var binlogParsed bool
  40. for {
  41. line, err := rb.ReadString('\n')
  42. if err != nil && err != io.EOF {
  43. return errors.Trace(err)
  44. } else if mysql.ErrorEqual(err, io.EOF) {
  45. break
  46. }
  47. // Ignore '\n' on Linux or '\r\n' on Windows
  48. line = strings.TrimRightFunc(line, func(c rune) bool {
  49. return c == '\r' || c == '\n'
  50. })
  51. if parseBinlogPos && !binlogParsed {
  52. // parsed gtid set from mysqldump
  53. // gtid comes before binlog file-position
  54. if m := gtidExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  55. gtidStr := m[0][1]
  56. if gtidStr != "" {
  57. if err := h.GtidSet(gtidStr); err != nil {
  58. return errors.Trace(err)
  59. }
  60. }
  61. }
  62. if m := binlogExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  63. name := m[0][1]
  64. pos, err := strconv.ParseUint(m[0][2], 10, 64)
  65. if err != nil {
  66. return errors.Errorf("parse binlog %v err, invalid number", line)
  67. }
  68. if err = h.BinLog(name, pos); err != nil && err != ErrSkip {
  69. return errors.Trace(err)
  70. }
  71. binlogParsed = true
  72. }
  73. }
  74. if m := useExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  75. db = m[0][1]
  76. }
  77. if m := valuesExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
  78. table := m[0][1]
  79. values, err := parseValues(m[0][2])
  80. if err != nil {
  81. return errors.Errorf("parse values %v err", line)
  82. }
  83. if err = h.Data(db, table, values); err != nil && err != ErrSkip {
  84. return errors.Trace(err)
  85. }
  86. }
  87. }
  88. return nil
  89. }
  90. func parseValues(str string) ([]string, error) {
  91. // values are separated by comma, but we can not split using comma directly
  92. // string is enclosed by single quote
  93. // a simple implementation, may be more robust later.
  94. values := make([]string, 0, 8)
  95. i := 0
  96. for i < len(str) {
  97. if str[i] != '\'' {
  98. // no string, read until comma
  99. j := i + 1
  100. for ; j < len(str) && str[j] != ','; j++ {
  101. }
  102. values = append(values, str[i:j])
  103. // skip ,
  104. i = j + 1
  105. } else {
  106. // read string until another single quote
  107. j := i + 1
  108. escaped := false
  109. for j < len(str) {
  110. if str[j] == '\\' {
  111. // skip escaped character
  112. j += 2
  113. escaped = true
  114. continue
  115. } else if str[j] == '\'' {
  116. break
  117. } else {
  118. j++
  119. }
  120. }
  121. if j >= len(str) {
  122. return nil, fmt.Errorf("parse quote values error")
  123. }
  124. value := str[i : j+1]
  125. if escaped {
  126. value = unescapeString(value)
  127. }
  128. values = append(values, value)
  129. // skip ' and ,
  130. i = j + 2
  131. }
  132. // need skip blank???
  133. }
  134. return values, nil
  135. }
  136. // unescapeString un-escapes the string.
  137. // mysqldump will escape the string when dumps,
  138. // Refer http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
  139. func unescapeString(s string) string {
  140. i := 0
  141. value := make([]byte, 0, len(s))
  142. for i < len(s) {
  143. if s[i] == '\\' {
  144. j := i + 1
  145. if j == len(s) {
  146. // The last char is \, remove
  147. break
  148. }
  149. value = append(value, unescapeChar(s[j]))
  150. i += 2
  151. } else {
  152. value = append(value, s[i])
  153. i++
  154. }
  155. }
  156. return string(value)
  157. }
  158. func unescapeChar(ch byte) byte {
  159. // \" \' \\ \n \0 \b \Z \r \t ==> escape to one char
  160. switch ch {
  161. case 'n':
  162. ch = '\n'
  163. case '0':
  164. ch = 0
  165. case 'b':
  166. ch = 8
  167. case 'Z':
  168. ch = 26
  169. case 'r':
  170. ch = '\r'
  171. case 't':
  172. ch = '\t'
  173. }
  174. return ch
  175. }