1 /***
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 * @authors: Zev Blut zb@ubit.com
4 */
5 package net.sourceforge.pmd.cpd;
6
7 import java.util.List;
8
9 public class RubyTokenizer implements Tokenizer {
10 private boolean downcaseString = true;
11
12 public void tokenize(SourceCode tokens, Tokens tokenEntries) {
13 List code = tokens.getCode();
14 for (int i = 0; i < code.size(); i++) {
15 String currentLine = (String) code.get(i);
16 int loc = 0;
17 while (loc < currentLine.length()) {
18 StringBuffer token = new StringBuffer();
19 loc = getTokenFromLine(currentLine, token, loc);
20 if (token.length() > 0 && !isIgnorableString(token.toString())) {
21 if (downcaseString) {
22 token = new StringBuffer(token.toString().toLowerCase());
23 }
24 tokenEntries.add(new TokenEntry(token.toString(),
25 tokens.getFileName(),
26 i + 1));
27 }
28 }
29 }
30 tokenEntries.add(TokenEntry.getEOF());
31 }
32
33 private int getTokenFromLine(String line, StringBuffer token, int loc) {
34 for (int j = loc; j < line.length(); j++) {
35 char tok = line.charAt(j);
36 if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
37 if (isComment(tok)) {
38 if (token.length() > 0) {
39 return j;
40 } else {
41 return getCommentToken(line, token, loc);
42 }
43 } else if (isString(tok)) {
44 if (token.length() > 0) {
45
46 return j;
47 } else {
48
49 return parseString(line, token, j, tok);
50 }
51 } else {
52 token.append(tok);
53 }
54 } else {
55 if (token.length() > 0) {
56 return j;
57 }
58 }
59 loc = j;
60 }
61 return loc + 1;
62 }
63
64 private int parseString(String line, StringBuffer token, int loc, char stringType) {
65 boolean escaped = false;
66 boolean done = false;
67
68
69
70 char tok = ' ';
71 while ((loc < line.length()) && !done) {
72 tok = line.charAt(loc);
73 if (escaped && tok == stringType) {
74
75 escaped = false;
76 } else if (tok == stringType && (token.length() > 0)) {
77
78
79 done = true;
80 } else if (tok == '//') {
81
82 escaped = true;
83 } else {
84
85 escaped = false;
86 }
87
88 token.append(tok);
89 loc++;
90 }
91 return loc + 1;
92 }
93
94 private boolean ignoreCharacter(char tok) {
95 boolean result = false;
96 switch (tok) {
97 case '{':
98 case '}':
99 case '(':
100 case ')':
101 case ';':
102 case ',':
103 result = true;
104 break;
105 default :
106 result = false;
107 }
108 return result;
109 }
110
111 private boolean isString(char tok) {
112 boolean result = false;
113 switch (tok) {
114 case '\'':
115 case '"':
116 result = true;
117 break;
118 default:
119 result = false;
120 }
121 return result;
122 }
123
124 private boolean isComment(char tok) {
125 return tok == '#';
126 }
127
128 private int getCommentToken(String line, StringBuffer token, int loc) {
129 while (loc < line.length()) {
130 token.append(line.charAt(loc));
131 loc++;
132 }
133 return loc;
134 }
135
136 private boolean isIgnorableString(String token) {
137 return token == "do" || token == "end";
138 }
139 }