Class CodeRay::Scanners::Ruby
In: lib/coderay/scanners/ruby/string_state.rb
lib/coderay/scanners/ruby.rb
Parent: Object

This scanner is really complex, since Ruby is a complex language!

It tries to highlight 100% of all common code, and 90% of strange codes.

It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.

Methods

Public Instance methods

[Source]

    # File lib/coderay/scanners/ruby.rb, line 19
19:     def interpreted_string_state
20:       StringState.new :string, true, '"'
21:     end

Protected Instance methods

[Source]

     # File lib/coderay/scanners/ruby.rb, line 29
 29:     def scan_tokens encoder, options
 30:       state, heredocs = options[:state] || @state
 31:       heredocs = heredocs.dup if heredocs.is_a?(Array)
 32:       
 33:       if state && state.instance_of?(StringState)
 34:         encoder.begin_group state.type
 35:       end
 36:       
 37:       last_state = nil
 38:       
 39:       method_call_expected = false
 40:       value_expected = true
 41:       
 42:       inline_block_stack = nil
 43:       inline_block_curly_depth = 0
 44:       
 45:       if heredocs
 46:         state = heredocs.shift
 47:         encoder.begin_group state.type
 48:         heredocs = nil if heredocs.empty?
 49:       end
 50:       
 51:       # def_object_stack = nil
 52:       # def_object_paren_depth = 0
 53:       
 54:       patterns = Patterns  # avoid constant lookup
 55:       
 56:       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
 57:       
 58:       until eos?
 59:         
 60:         if state.instance_of? ::Symbol
 61:           
 62:           if match = scan(/[ \t\f\v]+/)
 63:             encoder.text_token match, :space
 64:             
 65:           elsif match = scan(/\n/)
 66:             if heredocs
 67:               unscan  # heredoc scanning needs \n at start
 68:               state = heredocs.shift
 69:               encoder.begin_group state.type
 70:               heredocs = nil if heredocs.empty?
 71:             else
 72:               state = :initial if state == :undef_comma_expected
 73:               encoder.text_token match, :space
 74:               value_expected = true
 75:             end
 76:             
 77:           elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
 78:             encoder.text_token match, self[1] ? :doctype : :comment
 79:             
 80:           elsif match = scan(/\\\n/)
 81:             if heredocs
 82:               unscan  # heredoc scanning needs \n at start
 83:               encoder.text_token scan(/\\/), :space
 84:               state = heredocs.shift
 85:               encoder.begin_group state.type
 86:               heredocs = nil if heredocs.empty?
 87:             else
 88:               encoder.text_token match, :space
 89:             end
 90:             
 91:           elsif state == :initial
 92:             
 93:             # IDENTS #
 94:             if !method_call_expected &&
 95:                match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
 96:                                       /#{patterns::METHOD_NAME}/o)
 97:               
 98:               kind = patterns::IDENT_KIND[match]
 99:               if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/)
100:                 value_expected = true
101:                 encoder.text_token match, :key
102:                 encoder.text_token ':',   :operator
103:               else
104:                 value_expected = false
105:                 if kind == :ident
106:                   if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
107:                     kind = :constant
108:                   end
109:                 elsif kind == :keyword
110:                   state = patterns::KEYWORD_NEW_STATE[match]
111:                   if patterns::KEYWORDS_EXPECTING_VALUE[match]
112:                     value_expected = match == 'when' ? :colon_expected : true
113:                   end
114:                 end
115:                 value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
116:                 encoder.text_token match, kind
117:               end
118:               
119:             elsif method_call_expected &&
120:                match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
121:                                       /#{patterns::METHOD_AFTER_DOT}/o)
122:               if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
123:                 encoder.text_token match, :constant
124:               else
125:                 encoder.text_token match, :ident
126:               end
127:               method_call_expected = false
128:               value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
129:               
130:             # OPERATORS #
131:             elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x)
132:               method_call_expected = self[1]
133:               value_expected = !method_call_expected && !!self[2]
134:               if inline_block_stack
135:                 case match
136:                 when '{'
137:                   inline_block_curly_depth += 1
138:                 when '}'
139:                   inline_block_curly_depth -= 1
140:                   if inline_block_curly_depth == 0  # closing brace of inline block reached
141:                     state, inline_block_curly_depth, heredocs = inline_block_stack.pop
142:                     inline_block_stack = nil if inline_block_stack.empty?
143:                     heredocs = nil if heredocs && heredocs.empty?
144:                     encoder.text_token match, :inline_delimiter
145:                     encoder.end_group :inline
146:                     next
147:                   end
148:                 end
149:               end
150:               encoder.text_token match, :operator
151:               
152:             elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
153:                                          /#{patterns::SYMBOL}/o)
154:               case delim = match[1]
155:               when ?', ?"
156:                 encoder.begin_group :symbol
157:                 encoder.text_token ':', :symbol
158:                 match = delim.chr
159:                 encoder.text_token match, :delimiter
160:                 state = self.class::StringState.new :symbol, delim == ?", match
161:               else
162:                 encoder.text_token match, :symbol
163:                 value_expected = false
164:               end
165:               
166:             elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
167:               encoder.begin_group :string
168:               if match.size == 1
169:                 encoder.text_token match, :delimiter
170:                 state = self.class::StringState.new :string, match == '"', match  # important for streaming
171:               else
172:                 encoder.text_token match[0,1], :delimiter
173:                 encoder.text_token match[1..-2], :content if match.size > 2
174:                 encoder.text_token match[-1,1], :delimiter
175:                 encoder.end_group :string
176:                 value_expected = false
177:               end
178:               
179:             elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
180:                                          /#{patterns::INSTANCE_VARIABLE}/o)
181:               value_expected = false
182:               encoder.text_token match, :instance_variable
183:               
184:             elsif value_expected && match = scan(/\//)
185:               encoder.begin_group :regexp
186:               encoder.text_token match, :delimiter
187:               state = self.class::StringState.new :regexp, true, '/'
188:               
189:             elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
190:               if method_call_expected
191:                 encoder.text_token match, :error
192:                 method_call_expected = false
193:               else
194:                 encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
195:               end
196:               value_expected = false
197:               
198:             elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
199:               value_expected = true
200:               encoder.text_token match, :operator
201:               
202:             elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
203:               quote = self[3]
204:               delim = self[quote ? 4 : 2]
205:               kind = patterns::QUOTE_TO_TYPE[quote]
206:               encoder.begin_group kind
207:               encoder.text_token match, :delimiter
208:               encoder.end_group kind
209:               heredocs ||= []  # create heredocs if empty
210:               heredocs << self.class::StringState.new(kind, quote != "'", delim,
211:                 self[1] == '-' ? :indented : :linestart)
212:               value_expected = false
213:               
214:             elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
215:               kind = patterns::FANCY_STRING_KIND[self[1]]
216:               encoder.begin_group kind
217:               state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
218:               encoder.text_token match, :delimiter
219:               
220:             elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
221:               value_expected = false
222:               encoder.text_token match, :integer
223:               
224:             elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
225:               value_expected = match == '?' ? :colon_expected : true
226:               encoder.text_token match, :operator
227:               
228:             elsif match = scan(/`/)
229:               encoder.begin_group :shell
230:               encoder.text_token match, :delimiter
231:               state = self.class::StringState.new :shell, true, match
232:               
233:             elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
234:                                          /#{patterns::GLOBAL_VARIABLE}/o)
235:               encoder.text_token match, :global_variable
236:               value_expected = false
237:               
238:             elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
239:                                          /#{patterns::CLASS_VARIABLE}/o)
240:               encoder.text_token match, :class_variable
241:               value_expected = false
242:               
243:             elsif match = scan(/\\\z/)
244:               encoder.text_token match, :space
245:               
246:             else
247:               if method_call_expected
248:                 method_call_expected = false
249:                 next
250:               end
251:               unless unicode
252:                 # check for unicode
253:                 $DEBUG_BEFORE, $DEBUG = $DEBUG, false
254:                 begin
255:                   if check(/./mu).size > 1
256:                     # seems like we should try again with unicode
257:                     unicode = true
258:                   end
259:                 rescue
260:                   # bad unicode char; use getch
261:                 ensure
262:                   $DEBUG = $DEBUG_BEFORE
263:                 end
264:                 next if unicode
265:               end
266:               
267:               encoder.text_token getch, :error
268:               
269:             end
270:             
271:             if last_state
272:               state = last_state
273:               last_state = nil
274:             end
275:             
276:           elsif state == :def_expected
277:             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
278:                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
279:               encoder.text_token match, :method
280:               state = :initial
281:             else
282:               last_state = :dot_expected
283:               state = :initial
284:             end
285:             
286:           elsif state == :dot_expected
287:             if match = scan(/\.|::/)
288:               # invalid definition
289:               state = :def_expected
290:               encoder.text_token match, :operator
291:             else
292:               state = :initial
293:             end
294:             
295:           elsif state == :module_expected
296:             if match = scan(/<</)
297:               encoder.text_token match, :operator
298:             else
299:               state = :initial
300:               if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
301:                                         / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
302:                 encoder.text_token match, :class
303:               end
304:             end
305:             
306:           elsif state == :undef_expected
307:             state = :undef_comma_expected
308:             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
309:                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
310:               encoder.text_token match, :method
311:             elsif match = scan(/#{patterns::SYMBOL}/o)
312:               case delim = match[1]
313:               when ?', ?"
314:                 encoder.begin_group :symbol
315:                 encoder.text_token ':', :symbol
316:                 match = delim.chr
317:                 encoder.text_token match, :delimiter
318:                 state = self.class::StringState.new :symbol, delim == ?", match
319:                 state.next_state = :undef_comma_expected
320:               else
321:                 encoder.text_token match, :symbol
322:               end
323:             else
324:               state = :initial
325:             end
326:             
327:           elsif state == :undef_comma_expected
328:             if match = scan(/,/)
329:               encoder.text_token match, :operator
330:               state = :undef_expected
331:             else
332:               state = :initial
333:             end
334:             
335:           elsif state == :alias_expected
336:             match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
337:                                    /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
338:             
339:             if match
340:               encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
341:               encoder.text_token self[2], :space
342:               encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
343:             end
344:             state = :initial
345:             
346:           else
347:             #:nocov:
348:             raise_inspect 'Unknown state: %p' % [state], encoder
349:             #:nocov:
350:           end
351:           
352:         else  # StringState
353:           
354:           match = scan_until(state.pattern) || scan_rest
355:           unless match.empty?
356:             encoder.text_token match, :content
357:             break if eos?
358:           end
359:           
360:           if state.heredoc && self[1]  # end of heredoc
361:             match = getch
362:             match << scan_until(/$/) unless eos?
363:             encoder.text_token match, :delimiter unless match.empty?
364:             encoder.end_group state.type
365:             state = state.next_state
366:             next
367:           end
368:           
369:           case match = getch
370:           
371:           when state.delim
372:             if state.paren_depth
373:               state.paren_depth -= 1
374:               if state.paren_depth > 0
375:                 encoder.text_token match, :content
376:                 next
377:               end
378:             end
379:             encoder.text_token match, :delimiter
380:             if state.type == :regexp && !eos?
381:               match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
382:               encoder.text_token match, :modifier unless match.empty?
383:             end
384:             encoder.end_group state.type
385:             value_expected = false
386:             state = state.next_state
387:             
388:           when '\\'
389:             if state.interpreted
390:               if esc = scan(/#{patterns::ESCAPE}/o)
391:                 encoder.text_token match + esc, :char
392:               else
393:                 encoder.text_token match, :error
394:               end
395:             else
396:               case esc = getch
397:               when nil
398:                 encoder.text_token match, :content
399:               when state.delim, '\\'
400:                 encoder.text_token match + esc, :char
401:               else
402:                 encoder.text_token match + esc, :content
403:               end
404:             end
405:             
406:           when '#'
407:             case peek(1)
408:             when '{'
409:               inline_block_stack ||= []
410:               inline_block_stack << [state, inline_block_curly_depth, heredocs]
411:               value_expected = true
412:               state = :initial
413:               inline_block_curly_depth = 1
414:               encoder.begin_group :inline
415:               encoder.text_token match + getch, :inline_delimiter
416:             when '$', '@'
417:               encoder.text_token match, :escape
418:               last_state = state
419:               state = :initial
420:             else
421:               #:nocov:
422:               raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
423:               #:nocov:
424:             end
425:             
426:           when state.opening_paren
427:             state.paren_depth += 1
428:             encoder.text_token match, :content
429:             
430:           else
431:             #:nocov
432:             raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
433:             #:nocov:
434:             
435:           end
436:           
437:         end
438:         
439:       end
440:       
441:       # cleaning up
442:       if state.is_a? StringState
443:         encoder.end_group state.type
444:       end
445:       
446:       if options[:keep_state]
447:         if state.is_a?(StringState) && state.heredoc
448:           (heredocs ||= []).unshift state
449:           state = :initial
450:         elsif heredocs && heredocs.empty?
451:           heredocs = nil
452:         end
453:         @state = state, heredocs
454:       end
455:       
456:       if inline_block_stack
457:         until inline_block_stack.empty?
458:           state, = *inline_block_stack.pop
459:           encoder.end_group :inline
460:           encoder.end_group state.type
461:         end
462:       end
463:       
464:       encoder
465:     end

[Source]

    # File lib/coderay/scanners/ruby.rb, line 25
25:     def setup
26:       @state = :initial
27:     end

[Validate]