Class CodeRay::Scanners::HTML
In: lib/coderay/scanners/html.rb
Parent: Scanner

HTML Scanner

Alias: xhtml

See also: Scanners::XML

Methods

Constants

EVENT_ATTRIBUTES = %w( onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick ondrag ondragdrop ondragend ondragenter ondragleave ondragover ondragstart ondrop ondurationchange onemptied onended onerror onfocus onformchange onforminput onhashchange oninput oninvalid onkeydown onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart onmessage onmousedown onmousemove onmouseout onmouseover onmouseup onmousewheel onmove onoffline ononline onpagehide onpageshow onpause onplay onplaying onpopstate onprogress onratechange onreadystatechange onredo onreset onresize onscroll onseeked onseeking onselect onshow onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload onvolumechange onwaiting )
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). add(EVENT_ATTRIBUTES, :script)

Public Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 61
61:     def reset
62:       super
63:       @state = :initial
64:       @plain_string_content = nil
65:     end

Protected Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 75
75:     def scan_java_script encoder, code
76:       if code && !code.empty?
77:         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
78:         # encoder.begin_group :inline
79:         @java_script_scanner.tokenize code, :tokens => encoder
80:         # encoder.end_group :inline
81:       end
82:     end

[Source]

     # File lib/coderay/scanners/html.rb, line 84
 84:     def scan_tokens encoder, options
 85:       state = options[:state] || @state
 86:       plain_string_content = @plain_string_content
 87:       in_tag = @in_tag
 88:       in_attribute = nil
 89:       
 90:       encoder.begin_group :string if state == :attribute_value_string
 91:       
 92:       until eos?
 93:         
 94:         if state != :in_special_tag && match = scan(/\s+/m)
 95:           encoder.text_token match, :space
 96:           
 97:         else
 98:           
 99:           case state
100:           
101:           when :initial
102:             if match = scan(/<!--(?:.*?-->|.*)/m)
103:               encoder.text_token match, :comment
104:             elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
105:               encoder.text_token match, :doctype
106:             elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
107:               encoder.text_token match, :preprocessor
108:             elsif match = scan(/<\?(?:.*?\?>|.*)/m)
109:               encoder.text_token match, :comment
110:             elsif match = scan(/<\/[-\w.:]*>?/m)
111:               in_tag = nil
112:               encoder.text_token match, :tag
113:             elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
114:               encoder.text_token match, :tag
115:               in_tag = self[1]
116:               if self[2]
117:                 state = :in_special_tag if in_tag
118:               else
119:                 state = :attribute
120:               end
121:             elsif match = scan(/[^<>&]+/)
122:               encoder.text_token match, :plain
123:             elsif match = scan(/#{ENTITY}/ox)
124:               encoder.text_token match, :entity
125:             elsif match = scan(/[<>&]/)
126:               in_tag = nil
127:               encoder.text_token match, :error
128:             else
129:               raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
130:             end
131:             
132:           when :attribute
133:             if match = scan(/#{TAG_END}/o)
134:               encoder.text_token match, :tag
135:               in_attribute = nil
136:               if in_tag
137:                 state = :in_special_tag
138:               else
139:                 state = :initial
140:               end
141:             elsif match = scan(/#{ATTR_NAME}/o)
142:               in_attribute = IN_ATTRIBUTE[match]
143:               encoder.text_token match, :attribute_name
144:               state = :attribute_equal
145:             else
146:               in_tag = nil
147:               encoder.text_token getch, :error
148:             end
149:             
150:           when :attribute_equal
151:             if match = scan(/=/)  #/
152:               encoder.text_token match, :operator
153:               state = :attribute_value
154:             else
155:               state = :attribute
156:               next
157:             end
158:             
159:           when :attribute_value
160:             if match = scan(/#{ATTR_NAME}/o)
161:               encoder.text_token match, :attribute_value
162:               state = :attribute
163:             elsif match = scan(/["']/)
164:               if in_attribute == :script
165:                 encoder.begin_group :inline
166:                 encoder.text_token match, :inline_delimiter
167:                 if scan(/javascript:[ \t]*/)
168:                   encoder.text_token matched, :comment
169:                 end
170:                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
171:                 scan_java_script encoder, code
172:                 match = scan(/["']/)
173:                 encoder.text_token match, :inline_delimiter if match
174:                 encoder.end_group :inline
175:                 state = :attribute
176:                 in_attribute = nil
177:               else
178:                 encoder.begin_group :string
179:                 state = :attribute_value_string
180:                 plain_string_content = PLAIN_STRING_CONTENT[match]
181:                 encoder.text_token match, :delimiter
182:               end
183:             elsif match = scan(/#{TAG_END}/o)
184:               encoder.text_token match, :tag
185:               state = :initial
186:             else
187:               encoder.text_token getch, :error
188:             end
189:             
190:           when :attribute_value_string
191:             if match = scan(plain_string_content)
192:               encoder.text_token match, :content
193:             elsif match = scan(/['"]/)
194:               encoder.text_token match, :delimiter
195:               encoder.end_group :string
196:               state = :attribute
197:             elsif match = scan(/#{ENTITY}/ox)
198:               encoder.text_token match, :entity
199:             elsif match = scan(/&/)
200:               encoder.text_token match, :content
201:             elsif match = scan(/[\n>]/)
202:               encoder.end_group :string
203:               state = :initial
204:               encoder.text_token match, :error
205:             end
206:             
207:           when :in_special_tag
208:             case in_tag
209:             when 'script'
210:               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
211:               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
212:                 code = self[2] || self[4]
213:                 closing = self[3]
214:                 encoder.text_token self[1], :comment
215:               else
216:                 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
217:                 closing = false
218:               end
219:               unless code.empty?
220:                 encoder.begin_group :inline
221:                 scan_java_script encoder, code
222:                 encoder.end_group :inline
223:               end
224:               encoder.text_token closing, :comment if closing
225:               state = :initial
226:             else
227:               raise 'unknown special tag: %p' % [in_tag]
228:             end
229:             
230:           else
231:             raise_inspect 'Unknown state: %p' % [state], encoder
232:             
233:           end
234:           
235:         end
236:         
237:       end
238:       
239:       if options[:keep_state]
240:         @state = state
241:         @plain_string_content = plain_string_content
242:         @in_tag = in_tag
243:       end
244:       
245:       encoder.end_group :string if state == :attribute_value_string
246:       
247:       encoder
248:     end

[Source]

    # File lib/coderay/scanners/html.rb, line 69
69:     def setup
70:       @state = :initial
71:       @plain_string_content = nil
72:       @in_tag = nil
73:     end

[Validate]