1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| package net.sourceforge.pmd.cpd; |
6 |
| |
7 |
| import java.util.List; |
8 |
| |
9 |
| public class RubyTokenizer implements Tokenizer { |
10 |
| private boolean downcaseString = true; |
11 |
| |
12 |
0
| public void tokenize(SourceCode tokens, Tokens tokenEntries) {
|
13 |
0
| List code = tokens.getCode();
|
14 |
0
| for (int i = 0; i < code.size(); i++) {
|
15 |
0
| String currentLine = (String) code.get(i);
|
16 |
0
| int loc = 0;
|
17 |
0
| while (loc < currentLine.length()) {
|
18 |
0
| StringBuffer token = new StringBuffer();
|
19 |
0
| loc = getTokenFromLine(currentLine, token, loc);
|
20 |
0
| if (token.length() > 0 && !isIgnorableString(token.toString())) {
|
21 |
0
| if (downcaseString) {
|
22 |
0
| token = new StringBuffer(token.toString().toLowerCase());
|
23 |
| } |
24 |
0
| tokenEntries.add(new TokenEntry(token.toString(),
|
25 |
| tokens.getFileName(), |
26 |
| i + 1)); |
27 |
| } |
28 |
| } |
29 |
| } |
30 |
0
| tokenEntries.add(TokenEntry.getEOF());
|
31 |
| } |
32 |
| |
33 |
0
| private int getTokenFromLine(String line, StringBuffer token, int loc) {
|
34 |
0
| for (int j = loc; j < line.length(); j++) {
|
35 |
0
| char tok = line.charAt(j);
|
36 |
0
| if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
|
37 |
0
| if (isComment(tok)) {
|
38 |
0
| if (token.length() > 0) {
|
39 |
0
| return j;
|
40 |
| } else { |
41 |
0
| return getCommentToken(line, token, loc);
|
42 |
| } |
43 |
0
| } else if (isString(tok)) {
|
44 |
0
| if (token.length() > 0) {
|
45 |
| |
46 |
0
| return j;
|
47 |
| } else { |
48 |
| |
49 |
0
| return parseString(line, token, j, tok);
|
50 |
| } |
51 |
| } else { |
52 |
0
| token.append(tok);
|
53 |
| } |
54 |
| } else { |
55 |
0
| if (token.length() > 0) {
|
56 |
0
| return j;
|
57 |
| } |
58 |
| } |
59 |
0
| loc = j;
|
60 |
| } |
61 |
0
| return loc + 1;
|
62 |
| } |
63 |
| |
64 |
0
| private int parseString(String line, StringBuffer token, int loc, char stringType) {
|
65 |
0
| boolean escaped = false;
|
66 |
0
| boolean done = false;
|
67 |
| |
68 |
| |
69 |
| |
70 |
0
| char tok = ' ';
|
71 |
0
| while ((loc < line.length()) && !done) {
|
72 |
0
| tok = line.charAt(loc);
|
73 |
0
| if (escaped && tok == stringType) {
|
74 |
| |
75 |
0
| escaped = false;
|
76 |
0
| } else if (tok == stringType && (token.length() > 0)) {
|
77 |
| |
78 |
| |
79 |
0
| done = true;
|
80 |
0
| } else if (tok == '\\') {
|
81 |
| |
82 |
0
| escaped = true;
|
83 |
| } else { |
84 |
| |
85 |
0
| escaped = false;
|
86 |
| } |
87 |
| |
88 |
0
| token.append(tok);
|
89 |
0
| loc++;
|
90 |
| } |
91 |
0
| return loc + 1;
|
92 |
| } |
93 |
| |
94 |
0
| private boolean ignoreCharacter(char tok) {
|
95 |
0
| boolean result = false;
|
96 |
0
| switch (tok) {
|
97 |
0
| case '{':
|
98 |
0
| case '}':
|
99 |
0
| case '(':
|
100 |
0
| case ')':
|
101 |
0
| case ';':
|
102 |
0
| case ',':
|
103 |
0
| result = true;
|
104 |
0
| break;
|
105 |
0
| default :
|
106 |
0
| result = false;
|
107 |
| } |
108 |
0
| return result;
|
109 |
| } |
110 |
| |
111 |
0
| private boolean isString(char tok) {
|
112 |
0
| boolean result = false;
|
113 |
0
| switch (tok) {
|
114 |
0
| case '\'':
|
115 |
0
| case '"':
|
116 |
0
| result = true;
|
117 |
0
| break;
|
118 |
0
| default:
|
119 |
0
| result = false;
|
120 |
| } |
121 |
0
| return result;
|
122 |
| } |
123 |
| |
124 |
0
| private boolean isComment(char tok) {
|
125 |
0
| return tok == '#';
|
126 |
| } |
127 |
| |
128 |
0
| private int getCommentToken(String line, StringBuffer token, int loc) {
|
129 |
0
| while (loc < line.length()) {
|
130 |
0
| token.append(line.charAt(loc));
|
131 |
0
| loc++;
|
132 |
| } |
133 |
0
| return loc;
|
134 |
| } |
135 |
| |
136 |
0
| private boolean isIgnorableString(String token) {
|
137 |
0
| return token == "do" || token == "end";
|
138 |
| } |
139 |
| } |