View Javadoc

1   /***
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.util.ArrayList;
7   import java.util.Collections;
8   import java.util.HashMap;
9   import java.util.Iterator;
10  import java.util.List;
11  import java.util.Map;
12  
13  public class MatchAlgorithm {
14  
15      private final static int MOD = 37;
16      private int lastHash;
17      private int lastMod = 1;
18  
19      private List matches;
20      private Map source;
21      private Tokens tokens;
22      private List code;
23      private CPDListener cpdListener;
24      private int min;
25  
26      public MatchAlgorithm(Map sourceCode, Tokens tokens, int min) {
27          this(sourceCode, tokens, min, new CPDNullListener());
28      }
29  
30      public MatchAlgorithm(Map sourceCode, Tokens tokens, int min, CPDListener listener) {
31          this.source = sourceCode;
32          this.tokens = tokens;
33          this.code = tokens.getTokens();
34          this.min = min;
35          this.cpdListener = listener;
36          for (int i = 0; i < min; i++) {
37              lastMod *= MOD;
38          }
39      }
40  
41      public void setListener(CPDListener listener) {
42          this.cpdListener = listener;
43      }
44  
45      public Iterator matches() {
46          return matches.iterator();
47      }
48  
49      public TokenEntry tokenAt(int offset, TokenEntry m) {
50          return (TokenEntry) code.get(offset + m.getIndex());
51      }
52  
53      public int getMinimumTileSize() {
54          return this.min;
55      }
56  
57      public void findMatches() {
58          cpdListener.phaseUpdate(CPDListener.HASH);
59          Map markGroups = hash();
60  
61          cpdListener.phaseUpdate(CPDListener.MATCH);
62          MatchCollector matchCollector = new MatchCollector(this);
63          for (Iterator i = markGroups.values().iterator(); i.hasNext();) {
64              Object o = i.next();
65              if (o instanceof List) {
66                  Collections.reverse((List) o);
67                  matchCollector.collect((List) o);
68              }
69              i.remove();
70          }
71          cpdListener.phaseUpdate(CPDListener.GROUPING);
72          matches = matchCollector.getMatches();
73          matchCollector = null;
74          for (Iterator i = matches.iterator(); i.hasNext();) {
75              Match match = (Match) i.next();
76              for (Iterator occurrences = match.iterator(); occurrences.hasNext();) {
77                  TokenEntry mark = (TokenEntry) occurrences.next();
78                  match.setLineCount(tokens.getLineCount(mark, match));
79                  if (!occurrences.hasNext()) {
80                      int start = mark.getBeginLine();
81                      int end = start + match.getLineCount() - 1;
82                      SourceCode sourceCode = (SourceCode) source.get(mark.getTokenSrcID());
83                      match.setSourceCodeSlice(sourceCode.getSlice(start, end));
84                  }
85              }
86          }
87          cpdListener.phaseUpdate(CPDListener.DONE);
88      }
89  
90      private Map hash() {
91          Map markGroups = new HashMap(tokens.size());
92          for (int i = code.size() - 1; i >= 0; i--) {
93              TokenEntry token = (TokenEntry) code.get(i);
94              if (token != TokenEntry.EOF) {
95                  int last = tokenAt(min, token).getIdentifier();
96                  lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
97                  token.setHashCode(lastHash);
98                  Object o = markGroups.get(token);
99  
100                 // Note that this insertion method is worthwhile since the vast majority
101                 // markGroup keys will have only one value.
102                 if (o == null) {
103                     markGroups.put(token, token);
104                 } else if (o instanceof TokenEntry) {
105                     List l = new ArrayList();
106                     l.add(o);
107                     l.add(token);
108                     markGroups.put(token, l);
109                 } else {
110                     List l = (List) o;
111                     l.add(token);
112                 }
113             } else {
114                 lastHash = 0;
115                 for (int end = Math.max(0, i - min + 1); i > end; i--) {
116                     token = (TokenEntry) code.get(i - 1);
117                     lastHash = MOD * lastHash + token.getIdentifier();
118                     if (token == TokenEntry.EOF) {
119                         break;
120                     }
121                 }
122             }
123         }
124         return markGroups;
125     }
126 }