1 /***
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.util.NoSuchElementException;
9 import java.util.StringTokenizer;
10
11 /***
12 * This class does a best-guess try-anything tokenization.
13 *
14 * @author jheintz
15 */
16 public class AnyTokenizer implements Tokenizer {
17 public static final String TOKENS = " \t!#$%^&*(){}-=+<>///`~;:";
18
19 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
20 StringBuffer sb = sourceCode.getCodeBuffer();
21 BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
22 try {
23 int lineNumber = 1;
24 String line = reader.readLine();
25 while (line != null) {
26 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
27 try {
28 String token = tokenizer.nextToken();
29 while (token != null) {
30 if (!token.equals(" ") && !token.equals("\t")) {
31 tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
32 }
33 token = tokenizer.nextToken();
34 }
35 } catch (NoSuchElementException ex) {
36
37 }
38
39 line = reader.readLine();
40 lineNumber++;
41 }
42 } catch (Exception ex) {
43 ex.printStackTrace();
44 } finally {
45 try {
46 reader.close();
47 } catch (Exception ex) {
48 }
49 tokenEntries.add(TokenEntry.getEOF());
50 }
51 }
52 }