84: def scan_tokens encoder, options
85: state = options[:state] || @state
86: plain_string_content = @plain_string_content
87: in_tag = @in_tag
88: in_attribute = nil
89:
90: encoder.begin_group :string if state == :attribute_value_string
91:
92: until eos?
93:
94: if state != :in_special_tag && match = scan(/\s+/m)
95: encoder.text_token match, :space
96:
97: else
98:
99: case state
100:
101: when :initial
102: if match = scan(/<!--(?:.*?-->|.*)/m)
103: encoder.text_token match, :comment
104: elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
105: encoder.text_token match, :doctype
106: elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
107: encoder.text_token match, :preprocessor
108: elsif match = scan(/<\?(?:.*?\?>|.*)/m)
109: encoder.text_token match, :comment
110: elsif match = scan(/<\/[-\w.:]*>?/m)
111: in_tag = nil
112: encoder.text_token match, :tag
113: elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
114: encoder.text_token match, :tag
115: in_tag = self[1]
116: if self[2]
117: state = :in_special_tag if in_tag
118: else
119: state = :attribute
120: end
121: elsif match = scan(/[^<>&]+/)
122: encoder.text_token match, :plain
123: elsif match = scan(/#{ENTITY}/ox)
124: encoder.text_token match, :entity
125: elsif match = scan(/[<>&]/)
126: in_tag = nil
127: encoder.text_token match, :error
128: else
129: raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
130: end
131:
132: when :attribute
133: if match = scan(/#{TAG_END}/o)
134: encoder.text_token match, :tag
135: in_attribute = nil
136: if in_tag
137: state = :in_special_tag
138: else
139: state = :initial
140: end
141: elsif match = scan(/#{ATTR_NAME}/o)
142: in_attribute = IN_ATTRIBUTE[match]
143: encoder.text_token match, :attribute_name
144: state = :attribute_equal
145: else
146: in_tag = nil
147: encoder.text_token getch, :error
148: end
149:
150: when :attribute_equal
151: if match = scan(/=/)
152: encoder.text_token match, :operator
153: state = :attribute_value
154: else
155: state = :attribute
156: next
157: end
158:
159: when :attribute_value
160: if match = scan(/#{ATTR_NAME}/o)
161: encoder.text_token match, :attribute_value
162: state = :attribute
163: elsif match = scan(/["']/)
164: if in_attribute == :script
165: encoder.begin_group :inline
166: encoder.text_token match, :inline_delimiter
167: if scan(/javascript:[ \t]*/)
168: encoder.text_token matched, :comment
169: end
170: code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
171: scan_java_script encoder, code
172: match = scan(/["']/)
173: encoder.text_token match, :inline_delimiter if match
174: encoder.end_group :inline
175: state = :attribute
176: in_attribute = nil
177: else
178: encoder.begin_group :string
179: state = :attribute_value_string
180: plain_string_content = PLAIN_STRING_CONTENT[match]
181: encoder.text_token match, :delimiter
182: end
183: elsif match = scan(/#{TAG_END}/o)
184: encoder.text_token match, :tag
185: state = :initial
186: else
187: encoder.text_token getch, :error
188: end
189:
190: when :attribute_value_string
191: if match = scan(plain_string_content)
192: encoder.text_token match, :content
193: elsif match = scan(/['"]/)
194: encoder.text_token match, :delimiter
195: encoder.end_group :string
196: state = :attribute
197: elsif match = scan(/#{ENTITY}/ox)
198: encoder.text_token match, :entity
199: elsif match = scan(/&/)
200: encoder.text_token match, :content
201: elsif match = scan(/[\n>]/)
202: encoder.end_group :string
203: state = :initial
204: encoder.text_token match, :error
205: end
206:
207: when :in_special_tag
208: case in_tag
209: when 'script'
210: encoder.text_token match, :space if match = scan(/[ \t]*\n/)
211: if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
212: code = self[2] || self[4]
213: closing = self[3]
214: encoder.text_token self[1], :comment
215: else
216: code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
217: closing = false
218: end
219: unless code.empty?
220: encoder.begin_group :inline
221: scan_java_script encoder, code
222: encoder.end_group :inline
223: end
224: encoder.text_token closing, :comment if closing
225: state = :initial
226: else
227: raise 'unknown special tag: %p' % [in_tag]
228: end
229:
230: else
231: raise_inspect 'Unknown state: %p' % [state], encoder
232:
233: end
234:
235: end
236:
237: end
238:
239: if options[:keep_state]
240: @state = state
241: @plain_string_content = plain_string_content
242: @in_tag = in_tag
243: end
244:
245: encoder.end_group :string if state == :attribute_value_string
246:
247: encoder
248: end