View Javadoc

1   /***
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    * @authors: Zev Blut zb@ubit.com
4    */
5   package net.sourceforge.pmd.cpd;
6   
7   import java.util.List;
8   
9   public class RubyTokenizer implements Tokenizer {
10      private boolean downcaseString = true;
11  
12      public void tokenize(SourceCode tokens, Tokens tokenEntries) {
13          List code = tokens.getCode();
14          for (int i = 0; i < code.size(); i++) {
15              String currentLine = (String) code.get(i);
16              int loc = 0;
17              while (loc < currentLine.length()) {
18                  StringBuffer token = new StringBuffer();
19                  loc = getTokenFromLine(currentLine, token, loc);
20                  if (token.length() > 0 && !isIgnorableString(token.toString())) {
21                      if (downcaseString) {
22                          token = new StringBuffer(token.toString().toLowerCase());
23                      }
24                      tokenEntries.add(new TokenEntry(token.toString(),
25                              tokens.getFileName(),
26                              i + 1));
27                  }
28              }
29          }
30          tokenEntries.add(TokenEntry.getEOF());
31      }
32  
33      private int getTokenFromLine(String line, StringBuffer token, int loc) {
34          for (int j = loc; j < line.length(); j++) {
35              char tok = line.charAt(j);
36              if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
37                  if (isComment(tok)) {
38                      if (token.length() > 0) {
39                          return j;
40                      } else {
41                          return getCommentToken(line, token, loc);
42                      }
43                  } else if (isString(tok)) {
44                      if (token.length() > 0) {
45                          //if (loc == lin
46                          return j; // we need to now parse the string as a seperate token.
47                      } else {
48                          // we are at the start of a string
49                          return parseString(line, token, j, tok);
50                      }
51                  } else {
52                      token.append(tok);
53                  }
54              } else {
55                  if (token.length() > 0) {
56                      return j;
57                  }
58              }
59              loc = j;
60          }
61          return loc + 1;
62      }
63  
64      private int parseString(String line, StringBuffer token, int loc, char stringType) {
65          boolean escaped = false;
66          boolean done = false;
67          //System.out.println("Parsing String:" + stringType);
68          //System.out.println("Starting loc:" + loc);
69          // problem of strings that span multiple lines :-(
70          char tok = ' '; // this will be replaced.
71          while ((loc < line.length()) && !done) {
72              tok = line.charAt(loc);
73              if (escaped && tok == stringType) {
74                  //     System.out.println("Found an escaped string");
75                  escaped = false;
76              } else if (tok == stringType && (token.length() > 0)) {
77                  // we are done
78                  //   System.out.println("Found an end string");
79                  done = true;
80              } else if (tok == '//') {
81                  // System.out.println("Found an escaped char");
82                  escaped = true;
83              } else {
84                  // System.out.println("Adding char:" + tok + ";loc:" + loc);
85                  escaped = false;
86              }
87              //System.out.println("Adding char to String:" + token.toString());
88              token.append(tok);
89              loc++;
90          }
91          return loc + 1;
92      }
93  
94      private boolean ignoreCharacter(char tok) {
95          boolean result = false;
96          switch (tok) {
97              case '{':
98              case '}':
99              case '(':
100             case ')':
101             case ';':
102             case ',':
103                 result = true;
104                 break;
105             default :
106                 result = false;
107         }
108         return result;
109     }
110 
111     private boolean isString(char tok) {
112         boolean result = false;
113         switch (tok) {
114             case '\'':
115             case '"':
116                 result = true;
117                 break;
118             default:
119                 result = false;
120         }
121         return result;
122     }
123 
124     private boolean isComment(char tok) {
125         return tok == '#';
126     }
127 
128     private int getCommentToken(String line, StringBuffer token, int loc) {
129         while (loc < line.length()) {
130             token.append(line.charAt(loc));
131             loc++;
132         }
133         return loc;
134     }
135 
136     private boolean isIgnorableString(String token) {
137         return token == "do" || token == "end";
138     }
139 }