Class | CodeRay::Scanners::Ruby |
In: |
lib/coderay/scanners/ruby/string_state.rb
lib/coderay/scanners/ruby.rb |
Parent: | Object |
# File lib/coderay/scanners/ruby.rb, line 19 19: def interpreted_string_state 20: StringState.new :string, true, '"' 21: end
# File lib/coderay/scanners/ruby.rb, line 29 29: def scan_tokens encoder, options 30: state, heredocs = options[:state] || @state 31: heredocs = heredocs.dup if heredocs.is_a?(Array) 32: 33: if state && state.instance_of?(StringState) 34: encoder.begin_group state.type 35: end 36: 37: last_state = nil 38: 39: method_call_expected = false 40: value_expected = true 41: 42: inline_block_stack = nil 43: inline_block_curly_depth = 0 44: 45: if heredocs 46: state = heredocs.shift 47: encoder.begin_group state.type 48: heredocs = nil if heredocs.empty? 49: end 50: 51: # def_object_stack = nil 52: # def_object_paren_depth = 0 53: 54: patterns = Patterns # avoid constant lookup 55: 56: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' 57: 58: until eos? 59: 60: if state.instance_of? ::Symbol 61: 62: if match = scan(/[ \t\f\v]+/) 63: encoder.text_token match, :space 64: 65: elsif match = scan(/\n/) 66: if heredocs 67: unscan # heredoc scanning needs \n at start 68: state = heredocs.shift 69: encoder.begin_group state.type 70: heredocs = nil if heredocs.empty? 71: else 72: state = :initial if state == :undef_comma_expected 73: encoder.text_token match, :space 74: value_expected = true 75: end 76: 77: elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/) 78: encoder.text_token match, self[1] ? :doctype : :comment 79: 80: elsif match = scan(/\\\n/) 81: if heredocs 82: unscan # heredoc scanning needs \n at start 83: encoder.text_token scan(/\\/), :space 84: state = heredocs.shift 85: encoder.begin_group state.type 86: heredocs = nil if heredocs.empty? 87: else 88: encoder.text_token match, :space 89: end 90: 91: elsif state == :initial 92: 93: # IDENTS # 94: if !method_call_expected && 95: match = scan(unicode ? /#{patterns::METHOD_NAME}/uo : 96: /#{patterns::METHOD_NAME}/o) 97: 98: kind = patterns::IDENT_KIND[match] 99: if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/) 100: value_expected = true 101: encoder.text_token match, :key 102: encoder.text_token ':', :operator 103: else 104: value_expected = false 105: if kind == :ident 106: if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/)) 107: kind = :constant 108: end 109: elsif kind == :keyword 110: state = patterns::KEYWORD_NEW_STATE[match] 111: if patterns::KEYWORDS_EXPECTING_VALUE[match] 112: value_expected = match == 'when' ? :colon_expected : true 113: end 114: end 115: value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o) 116: encoder.text_token match, kind 117: end 118: 119: elsif method_call_expected && 120: match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo : 121: /#{patterns::METHOD_AFTER_DOT}/o) 122: if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/) 123: encoder.text_token match, :constant 124: else 125: encoder.text_token match, :ident 126: end 127: method_call_expected = false 128: value_expected = check(/#{patterns::VALUE_FOLLOWS}/o) 129: 130: # OPERATORS # 131: elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x) 132: method_call_expected = self[1] 133: value_expected = !method_call_expected && !!self[2] 134: if inline_block_stack 135: case match 136: when '{' 137: inline_block_curly_depth += 1 138: when '}' 139: inline_block_curly_depth -= 1 140: if inline_block_curly_depth == 0 # closing brace of inline block reached 141: state, inline_block_curly_depth, heredocs = inline_block_stack.pop 142: inline_block_stack = nil if inline_block_stack.empty? 143: heredocs = nil if heredocs && heredocs.empty? 144: encoder.text_token match, :inline_delimiter 145: encoder.end_group :inline 146: next 147: end 148: end 149: end 150: encoder.text_token match, :operator 151: 152: elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo : 153: /#{patterns::SYMBOL}/o) 154: case delim = match[1] 155: when ?', ?" 156: encoder.begin_group :symbol 157: encoder.text_token ':', :symbol 158: match = delim.chr 159: encoder.text_token match, :delimiter 160: state = self.class::StringState.new :symbol, delim == ?", match 161: else 162: encoder.text_token match, :symbol 163: value_expected = false 164: end 165: 166: elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) 167: encoder.begin_group :string 168: if match.size == 1 169: encoder.text_token match, :delimiter 170: state = self.class::StringState.new :string, match == '"', match # important for streaming 171: else 172: encoder.text_token match[0,1], :delimiter 173: encoder.text_token match[1..-2], :content if match.size > 2 174: encoder.text_token match[-1,1], :delimiter 175: encoder.end_group :string 176: value_expected = false 177: end 178: 179: elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo : 180: /#{patterns::INSTANCE_VARIABLE}/o) 181: value_expected = false 182: encoder.text_token match, :instance_variable 183: 184: elsif value_expected && match = scan(/\//) 185: encoder.begin_group :regexp 186: encoder.text_token match, :delimiter 187: state = self.class::StringState.new :regexp, true, '/' 188: 189: elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o) 190: if method_call_expected 191: encoder.text_token match, :error 192: method_call_expected = false 193: else 194: encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary 195: end 196: value_expected = false 197: 198: elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) 199: value_expected = true 200: encoder.text_token match, :operator 201: 202: elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o) 203: quote = self[3] 204: delim = self[quote ? 4 : 2] 205: kind = patterns::QUOTE_TO_TYPE[quote] 206: encoder.begin_group kind 207: encoder.text_token match, :delimiter 208: encoder.end_group kind 209: heredocs ||= [] # create heredocs if empty 210: heredocs << self.class::StringState.new(kind, quote != "'", delim, 211: self[1] == '-' ? :indented : :linestart) 212: value_expected = false 213: 214: elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) 215: kind = patterns::FANCY_STRING_KIND[self[1]] 216: encoder.begin_group kind 217: state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2] 218: encoder.text_token match, :delimiter 219: 220: elsif value_expected && match = scan(/#{patterns::CHARACTER}/o) 221: value_expected = false 222: encoder.text_token match, :integer 223: 224: elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x) 225: value_expected = match == '?' ? :colon_expected : true 226: encoder.text_token match, :operator 227: 228: elsif match = scan(/`/) 229: encoder.begin_group :shell 230: encoder.text_token match, :delimiter 231: state = self.class::StringState.new :shell, true, match 232: 233: elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo : 234: /#{patterns::GLOBAL_VARIABLE}/o) 235: encoder.text_token match, :global_variable 236: value_expected = false 237: 238: elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo : 239: /#{patterns::CLASS_VARIABLE}/o) 240: encoder.text_token match, :class_variable 241: value_expected = false 242: 243: elsif match = scan(/\\\z/) 244: encoder.text_token match, :space 245: 246: else 247: if method_call_expected 248: method_call_expected = false 249: next 250: end 251: unless unicode 252: # check for unicode 253: $DEBUG_BEFORE, $DEBUG = $DEBUG, false 254: begin 255: if check(/./mu).size > 1 256: # seems like we should try again with unicode 257: unicode = true 258: end 259: rescue 260: # bad unicode char; use getch 261: ensure 262: $DEBUG = $DEBUG_BEFORE 263: end 264: next if unicode 265: end 266: 267: encoder.text_token getch, :error 268: 269: end 270: 271: if last_state 272: state = last_state 273: last_state = nil 274: end 275: 276: elsif state == :def_expected 277: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : 278: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) 279: encoder.text_token match, :method 280: state = :initial 281: else 282: last_state = :dot_expected 283: state = :initial 284: end 285: 286: elsif state == :dot_expected 287: if match = scan(/\.|::/) 288: # invalid definition 289: state = :def_expected 290: encoder.text_token match, :operator 291: else 292: state = :initial 293: end 294: 295: elsif state == :module_expected 296: if match = scan(/<</) 297: encoder.text_token match, :operator 298: else 299: state = :initial 300: if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux : 301: / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox) 302: encoder.text_token match, :class 303: end 304: end 305: 306: elsif state == :undef_expected 307: state = :undef_comma_expected 308: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : 309: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) 310: encoder.text_token match, :method 311: elsif match = scan(/#{patterns::SYMBOL}/o) 312: case delim = match[1] 313: when ?', ?" 314: encoder.begin_group :symbol 315: encoder.text_token ':', :symbol 316: match = delim.chr 317: encoder.text_token match, :delimiter 318: state = self.class::StringState.new :symbol, delim == ?", match 319: state.next_state = :undef_comma_expected 320: else 321: encoder.text_token match, :symbol 322: end 323: else 324: state = :initial 325: end 326: 327: elsif state == :undef_comma_expected 328: if match = scan(/,/) 329: encoder.text_token match, :operator 330: state = :undef_expected 331: else 332: state = :initial 333: end 334: 335: elsif state == :alias_expected 336: match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo : 337: /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o) 338: 339: if match 340: encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method) 341: encoder.text_token self[2], :space 342: encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method) 343: end 344: state = :initial 345: 346: else 347: #:nocov: 348: raise_inspect 'Unknown state: %p' % [state], encoder 349: #:nocov: 350: end 351: 352: else # StringState 353: 354: match = scan_until(state.pattern) || scan_rest 355: unless match.empty? 356: encoder.text_token match, :content 357: break if eos? 358: end 359: 360: if state.heredoc && self[1] # end of heredoc 361: match = getch 362: match << scan_until(/$/) unless eos? 363: encoder.text_token match, :delimiter unless match.empty? 364: encoder.end_group state.type 365: state = state.next_state 366: next 367: end 368: 369: case match = getch 370: 371: when state.delim 372: if state.paren_depth 373: state.paren_depth -= 1 374: if state.paren_depth > 0 375: encoder.text_token match, :content 376: next 377: end 378: end 379: encoder.text_token match, :delimiter 380: if state.type == :regexp && !eos? 381: match = scan(/#{patterns::REGEXP_MODIFIERS}/o) 382: encoder.text_token match, :modifier unless match.empty? 383: end 384: encoder.end_group state.type 385: value_expected = false 386: state = state.next_state 387: 388: when '\\' 389: if state.interpreted 390: if esc = scan(/#{patterns::ESCAPE}/o) 391: encoder.text_token match + esc, :char 392: else 393: encoder.text_token match, :error 394: end 395: else 396: case esc = getch 397: when nil 398: encoder.text_token match, :content 399: when state.delim, '\\' 400: encoder.text_token match + esc, :char 401: else 402: encoder.text_token match + esc, :content 403: end 404: end 405: 406: when '#' 407: case peek(1) 408: when '{' 409: inline_block_stack ||= [] 410: inline_block_stack << [state, inline_block_curly_depth, heredocs] 411: value_expected = true 412: state = :initial 413: inline_block_curly_depth = 1 414: encoder.begin_group :inline 415: encoder.text_token match + getch, :inline_delimiter 416: when '$', '@' 417: encoder.text_token match, :escape 418: last_state = state 419: state = :initial 420: else 421: #:nocov: 422: raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder 423: #:nocov: 424: end 425: 426: when state.opening_paren 427: state.paren_depth += 1 428: encoder.text_token match, :content 429: 430: else 431: #:nocov 432: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder 433: #:nocov: 434: 435: end 436: 437: end 438: 439: end 440: 441: # cleaning up 442: if state.is_a? StringState 443: encoder.end_group state.type 444: end 445: 446: if options[:keep_state] 447: if state.is_a?(StringState) && state.heredoc 448: (heredocs ||= []).unshift state 449: state = :initial 450: elsif heredocs && heredocs.empty? 451: heredocs = nil 452: end 453: @state = state, heredocs 454: end 455: 456: if inline_block_stack 457: until inline_block_stack.empty? 458: state, = *inline_block_stack.pop 459: encoder.end_group :inline 460: encoder.end_group state.type 461: end 462: end 463: 464: encoder 465: end