pig.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. // CodeMirror, copyright (c) by Marijn Haverbeke and others
  2. // Distributed under an MIT license: https://codemirror.net/LICENSE
  3. /*
  4. * Pig Latin Mode for CodeMirror 2
  5. * @author Prasanth Jayachandran
  6. * @link https://github.com/prasanthj/pig-codemirror-2
  7. * This implementation is adapted from PL/SQL mode in CodeMirror 2.
  8. */
  9. ;(function (mod) {
  10. if (typeof exports == 'object' && typeof module == 'object')
  11. // CommonJS
  12. mod(require('../../lib/codemirror'))
  13. else if (typeof define == 'function' && define.amd)
  14. // AMD
  15. define(['../../lib/codemirror'], mod)
  16. // Plain browser env
  17. else mod(CodeMirror)
  18. })(function (CodeMirror) {
  19. 'use strict'
  20. CodeMirror.defineMode('pig', function (_config, parserConfig) {
  21. var keywords = parserConfig.keywords,
  22. builtins = parserConfig.builtins,
  23. types = parserConfig.types,
  24. multiLineStrings = parserConfig.multiLineStrings
  25. var isOperatorChar = /[*+\-%<>=&?:\/!|]/
  26. function chain(stream, state, f) {
  27. state.tokenize = f
  28. return f(stream, state)
  29. }
  30. function tokenComment(stream, state) {
  31. var isEnd = false
  32. var ch
  33. while ((ch = stream.next())) {
  34. if (ch == '/' && isEnd) {
  35. state.tokenize = tokenBase
  36. break
  37. }
  38. isEnd = ch == '*'
  39. }
  40. return 'comment'
  41. }
  42. function tokenString(quote) {
  43. return function (stream, state) {
  44. var escaped = false,
  45. next,
  46. end = false
  47. while ((next = stream.next()) != null) {
  48. if (next == quote && !escaped) {
  49. end = true
  50. break
  51. }
  52. escaped = !escaped && next == '\\'
  53. }
  54. if (end || !(escaped || multiLineStrings)) state.tokenize = tokenBase
  55. return 'error'
  56. }
  57. }
  58. function tokenBase(stream, state) {
  59. var ch = stream.next()
  60. // is a start of string?
  61. if (ch == '"' || ch == "'") return chain(stream, state, tokenString(ch))
  62. // is it one of the special chars
  63. else if (/[\[\]{}\(\),;\.]/.test(ch)) return null
  64. // is it a number?
  65. else if (/\d/.test(ch)) {
  66. stream.eatWhile(/[\w\.]/)
  67. return 'number'
  68. }
  69. // multi line comment or operator
  70. else if (ch == '/') {
  71. if (stream.eat('*')) {
  72. return chain(stream, state, tokenComment)
  73. } else {
  74. stream.eatWhile(isOperatorChar)
  75. return 'operator'
  76. }
  77. }
  78. // single line comment or operator
  79. else if (ch == '-') {
  80. if (stream.eat('-')) {
  81. stream.skipToEnd()
  82. return 'comment'
  83. } else {
  84. stream.eatWhile(isOperatorChar)
  85. return 'operator'
  86. }
  87. }
  88. // is it an operator
  89. else if (isOperatorChar.test(ch)) {
  90. stream.eatWhile(isOperatorChar)
  91. return 'operator'
  92. } else {
  93. // get the while word
  94. stream.eatWhile(/[\w\$_]/)
  95. // is it one of the listed keywords?
  96. if (keywords && keywords.propertyIsEnumerable(stream.current().toUpperCase())) {
  97. //keywords can be used as variables like flatten(group), group.$0 etc..
  98. if (!stream.eat(')') && !stream.eat('.')) return 'keyword'
  99. }
  100. // is it one of the builtin functions?
  101. if (builtins && builtins.propertyIsEnumerable(stream.current().toUpperCase())) return 'variable-2'
  102. // is it one of the listed types?
  103. if (types && types.propertyIsEnumerable(stream.current().toUpperCase())) return 'variable-3'
  104. // default is a 'variable'
  105. return 'variable'
  106. }
  107. }
  108. // Interface
  109. return {
  110. startState: function () {
  111. return {
  112. tokenize: tokenBase,
  113. startOfLine: true,
  114. }
  115. },
  116. token: function (stream, state) {
  117. if (stream.eatSpace()) return null
  118. var style = state.tokenize(stream, state)
  119. return style
  120. },
  121. }
  122. })
  123. ;(function () {
  124. function keywords(str) {
  125. var obj = {},
  126. words = str.split(' ')
  127. for (var i = 0; i < words.length; ++i) obj[words[i]] = true
  128. return obj
  129. }
  130. // builtin funcs taken from trunk revision 1303237
  131. var pBuiltins =
  132. 'ABS ACOS ARITY ASIN ATAN AVG BAGSIZE BINSTORAGE BLOOM BUILDBLOOM CBRT CEIL ' +
  133. 'CONCAT COR COS COSH COUNT COUNT_STAR COV CONSTANTSIZE CUBEDIMENSIONS DIFF DISTINCT DOUBLEABS ' +
  134. 'DOUBLEAVG DOUBLEBASE DOUBLEMAX DOUBLEMIN DOUBLEROUND DOUBLESUM EXP FLOOR FLOATABS FLOATAVG ' +
  135. 'FLOATMAX FLOATMIN FLOATROUND FLOATSUM GENERICINVOKER INDEXOF INTABS INTAVG INTMAX INTMIN ' +
  136. 'INTSUM INVOKEFORDOUBLE INVOKEFORFLOAT INVOKEFORINT INVOKEFORLONG INVOKEFORSTRING INVOKER ' +
  137. 'ISEMPTY JSONLOADER JSONMETADATA JSONSTORAGE LAST_INDEX_OF LCFIRST LOG LOG10 LOWER LONGABS ' +
  138. 'LONGAVG LONGMAX LONGMIN LONGSUM MAX MIN MAPSIZE MONITOREDUDF NONDETERMINISTIC OUTPUTSCHEMA ' +
  139. 'PIGSTORAGE PIGSTREAMING RANDOM REGEX_EXTRACT REGEX_EXTRACT_ALL REPLACE ROUND SIN SINH SIZE ' +
  140. 'SQRT STRSPLIT SUBSTRING SUM STRINGCONCAT STRINGMAX STRINGMIN STRINGSIZE TAN TANH TOBAG ' +
  141. 'TOKENIZE TOMAP TOP TOTUPLE TRIM TEXTLOADER TUPLESIZE UCFIRST UPPER UTF8STORAGECONVERTER '
  142. // taken from QueryLexer.g
  143. var pKeywords =
  144. 'VOID IMPORT RETURNS DEFINE LOAD FILTER FOREACH ORDER CUBE DISTINCT COGROUP ' +
  145. 'JOIN CROSS UNION SPLIT INTO IF OTHERWISE ALL AS BY USING INNER OUTER ONSCHEMA PARALLEL ' +
  146. 'PARTITION GROUP AND OR NOT GENERATE FLATTEN ASC DESC IS STREAM THROUGH STORE MAPREDUCE ' +
  147. 'SHIP CACHE INPUT OUTPUT STDERROR STDIN STDOUT LIMIT SAMPLE LEFT RIGHT FULL EQ GT LT GTE LTE ' +
  148. 'NEQ MATCHES TRUE FALSE DUMP'
  149. // data types
  150. var pTypes = 'BOOLEAN INT LONG FLOAT DOUBLE CHARARRAY BYTEARRAY BAG TUPLE MAP '
  151. CodeMirror.defineMIME('text/x-pig', {
  152. name: 'pig',
  153. builtins: keywords(pBuiltins),
  154. keywords: keywords(pKeywords),
  155. types: keywords(pTypes),
  156. })
  157. CodeMirror.registerHelper('hintWords', 'pig', (pBuiltins + pTypes + pKeywords).split(' '))
  158. })()
  159. })