1 |
| |
2 |
| |
3 |
| |
4 |
| package net.sourceforge.pmd.cpd; |
5 |
| |
6 |
| import java.util.ArrayList; |
7 |
| import java.util.Collections; |
8 |
| import java.util.HashMap; |
9 |
| import java.util.HashSet; |
10 |
| import java.util.Iterator; |
11 |
| import java.util.List; |
12 |
| import java.util.Map; |
13 |
| import java.util.Set; |
14 |
| |
15 |
| public class MatchCollector { |
16 |
| |
17 |
| private MatchAlgorithm ma; |
18 |
| private Map startMap = new HashMap(); |
19 |
| private Map fileMap = new HashMap(); |
20 |
| |
21 |
2
| public MatchCollector(MatchAlgorithm ma) {
|
22 |
2
| this.ma = ma;
|
23 |
| } |
24 |
| |
25 |
8
| public void collect(List marks) {
|
26 |
| |
27 |
8
| for (int i = 0; i < marks.size() - 1; i++) {
|
28 |
12
| TokenEntry mark1 = (TokenEntry) marks.get(i);
|
29 |
12
| for (int j = i + 1; j < marks.size(); j++) {
|
30 |
16
| TokenEntry mark2 = (TokenEntry) marks.get(j);
|
31 |
16
| int diff = mark1.getIndex() - mark2.getIndex();
|
32 |
16
| if (-diff < ma.getMinimumTileSize()) {
|
33 |
0
| continue;
|
34 |
| } |
35 |
16
| if (hasPreviousDupe(mark1, mark2)) {
|
36 |
12
| continue;
|
37 |
| } |
38 |
| |
39 |
| |
40 |
4
| int dupes = countDuplicateTokens(mark1, mark2);
|
41 |
4
| if (dupes < ma.getMinimumTileSize()) {
|
42 |
0
| continue;
|
43 |
| } |
44 |
| |
45 |
4
| if (diff + dupes >= 1) {
|
46 |
0
| continue;
|
47 |
| } |
48 |
4
| determineMatch(mark1, mark2, dupes);
|
49 |
| } |
50 |
| } |
51 |
| } |
52 |
| |
53 |
2
| public List getMatches() {
|
54 |
2
| List matchList = new ArrayList(startMap.values());
|
55 |
2
| Collections.sort(matchList);
|
56 |
2
| Set matchSet = new HashSet();
|
57 |
2
| Match.MatchCode matchCode = new Match.MatchCode();
|
58 |
2
| for (int i = matchList.size(); i > 1; i--) {
|
59 |
1
| Match match1 = (Match) matchList.get(i - 1);
|
60 |
1
| TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next();
|
61 |
1
| matchSet.clear();
|
62 |
1
| matchSet.add(match1.getMatchCode());
|
63 |
1
| for (int j = i - 1; j > 0; j--) {
|
64 |
2
| Match match2 = (Match) matchList.get(j - 1);
|
65 |
2
| if (match1.getTokenCount() != match2.getTokenCount()) {
|
66 |
0
| break;
|
67 |
| } |
68 |
2
| TokenEntry mark2 = null;
|
69 |
3
| for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) {
|
70 |
3
| mark2 = (TokenEntry) iter.next();
|
71 |
3
| if (mark2 != mark1) {
|
72 |
2
| break;
|
73 |
| } |
74 |
| } |
75 |
2
| int dupes = countDuplicateTokens(mark1, mark2);
|
76 |
2
| if (dupes < match1.getTokenCount()) {
|
77 |
0
| break;
|
78 |
| } |
79 |
2
| matchSet.add(match2.getMatchCode());
|
80 |
2
| match1.getMarkSet().addAll(match2.getMarkSet());
|
81 |
2
| matchList.remove(i - 2);
|
82 |
2
| i--;
|
83 |
| } |
84 |
1
| if (matchSet.size() == 1) {
|
85 |
0
| continue;
|
86 |
| } |
87 |
| |
88 |
1
| Set pruned = match1.getMarkSet();
|
89 |
1
| boolean done = false;
|
90 |
1
| ArrayList a1 = new ArrayList(match1.getMarkSet());
|
91 |
1
| Collections.sort(a1);
|
92 |
1
| for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
|
93 |
2
| TokenEntry cmark1 = (TokenEntry) a1.get(outer);
|
94 |
2
| for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
|
95 |
3
| TokenEntry cmark2 = (TokenEntry) a1.get(inner);
|
96 |
3
| matchCode.setFirst(cmark1.getIndex());
|
97 |
3
| matchCode.setSecond(cmark2.getIndex());
|
98 |
3
| if (!matchSet.contains(matchCode)) {
|
99 |
0
| if (pruned.size() > 2) {
|
100 |
0
| pruned.remove(cmark2);
|
101 |
| } |
102 |
0
| if (pruned.size() == 2) {
|
103 |
0
| done = true;
|
104 |
| } |
105 |
| } |
106 |
| } |
107 |
| } |
108 |
| } |
109 |
2
| return matchList;
|
110 |
| } |
111 |
| |
112 |
| |
113 |
| |
114 |
| |
115 |
4
| private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
116 |
4
| Match match = new Match(dupes, mark1, mark2);
|
117 |
4
| String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
|
118 |
4
| List pairMatches = (ArrayList) fileMap.get(fileKey);
|
119 |
4
| if (pairMatches == null) {
|
120 |
2
| pairMatches = new ArrayList();
|
121 |
2
| fileMap.put(fileKey, pairMatches);
|
122 |
| } |
123 |
4
| boolean add = true;
|
124 |
4
| for (int i = 0; i < pairMatches.size(); i++) {
|
125 |
3
| Match other = (Match) pairMatches.get(i);
|
126 |
3
| if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
|
127 |
| > 0) { |
128 |
1
| boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
|
129 |
1
| if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|
130 |
| || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { |
131 |
0
| if (other.getTokenCount() >= match.getTokenCount()) {
|
132 |
0
| add = false;
|
133 |
0
| break;
|
134 |
| } else { |
135 |
0
| pairMatches.remove(i);
|
136 |
0
| startMap.remove(other.getMatchCode());
|
137 |
| } |
138 |
| } |
139 |
| } |
140 |
| } |
141 |
4
| if (add) {
|
142 |
4
| pairMatches.add(match);
|
143 |
4
| startMap.put(match.getMatchCode(), match);
|
144 |
| } |
145 |
| } |
146 |
| |
147 |
16
| private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
148 |
16
| if (mark1.getIndex() == 0) {
|
149 |
0
| return false;
|
150 |
| } |
151 |
16
| return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2));
|
152 |
| } |
153 |
| |
154 |
6
| private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
|
155 |
6
| int index = 0;
|
156 |
6
| while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
|
157 |
48
| index++;
|
158 |
| } |
159 |
6
| return index;
|
160 |
| } |
161 |
| |
162 |
70
| private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
|
163 |
70
| return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
|
164 |
| } |
165 |
| } |