this fixes two problems:
[rsync/rsync.git] / match.c
... / ...
CommitLineData
1/*
2 Copyright (C) Andrew Tridgell 1996
3 Copyright (C) Paul Mackerras 1996
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19
20#include "rsync.h"
21
22extern int csum_length;
23
24extern int verbose;
25extern int am_server;
26
27extern int remote_version;
28
29typedef unsigned short tag;
30
31#define TABLESIZE (1<<16)
32#define NULL_TAG ((tag)-1)
33
34static int false_alarms;
35static int tag_hits;
36static int matches;
37static int data_transfer;
38
39static int total_false_alarms;
40static int total_tag_hits;
41static int total_matches;
42static int64 total_data_transfer;
43
44
45struct target {
46 tag t;
47 int i;
48};
49
50static struct target *targets;
51
52static tag *tag_table;
53
54#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
55#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
56
57static int compare_targets(struct target *t1,struct target *t2)
58{
59 return((int)t1->t - (int)t2->t);
60}
61
62
63static void build_hash_table(struct sum_struct *s)
64{
65 int i;
66
67 if (!tag_table)
68 tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE);
69
70 targets = (struct target *)malloc(sizeof(targets[0])*s->count);
71 if (!tag_table || !targets)
72 out_of_memory("build_hash_table");
73
74 for (i=0;i<s->count;i++) {
75 targets[i].i = i;
76 targets[i].t = gettag(s->sums[i].sum1);
77 }
78
79 qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
80
81 for (i=0;i<TABLESIZE;i++)
82 tag_table[i] = NULL_TAG;
83
84 for (i=s->count-1;i>=0;i--) {
85 tag_table[targets[i].t] = i;
86 }
87}
88
89
90static OFF_T last_match;
91
92
93static void matched(int f,struct sum_struct *s,struct map_struct *buf,
94 OFF_T offset,int i)
95{
96 OFF_T n = offset - last_match;
97 int j;
98
99 if (verbose > 2 && i >= 0)
100 rprintf(FINFO,"match at %d last_match=%d j=%d len=%d n=%d\n",
101 (int)offset,(int)last_match,i,(int)s->sums[i].len,(int)n);
102
103 send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
104 data_transfer += n;
105
106 if (n > 0)
107 write_flush(f);
108
109 if (i >= 0)
110 n += s->sums[i].len;
111
112 for (j=0;j<n;j+=CHUNK_SIZE) {
113 int n1 = MIN(CHUNK_SIZE,n-j);
114 sum_update(map_ptr(buf,last_match+j,n1),n1);
115 }
116
117
118 if (i >= 0)
119 last_match = offset + s->sums[i].len;
120 else
121 last_match = offset;
122}
123
124
125static void hash_search(int f,struct sum_struct *s,
126 struct map_struct *buf,OFF_T len)
127{
128 OFF_T offset;
129 int j,k;
130 int end;
131 char sum2[SUM_LENGTH];
132 uint32 s1, s2, sum;
133 schar *map;
134 extern int do_compression;
135
136 if (verbose > 2)
137 rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
138
139 k = MIN(len, s->n);
140
141 map = (schar *)map_ptr(buf,0,k);
142
143 sum = get_checksum1((char *)map, k);
144 s1 = sum & 0xFFFF;
145 s2 = sum >> 16;
146 if (verbose > 3)
147 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k);
148
149 offset = 0;
150
151 end = len + 1 - s->sums[s->count-1].len;
152
153 if (verbose > 3)
154 rprintf(FINFO,"hash search s->n=%d len=%d count=%d\n",
155 s->n,(int)len,s->count);
156
157 do {
158 tag t = gettag2(s1,s2);
159 int done_csum2 = 0;
160
161 j = tag_table[t];
162 if (verbose > 4)
163 rprintf(FINFO,"offset=%d sum=%08x\n",(int)offset,sum);
164
165 if (j == NULL_TAG) {
166 goto null_tag;
167 }
168
169 sum = (s1 & 0xffff) | (s2 << 16);
170 tag_hits++;
171 for (; j<s->count && targets[j].t == t; j++) {
172 int i = targets[j].i;
173
174 if (sum != s->sums[i].sum1) continue;
175
176 if (verbose > 3)
177 rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
178 (int)offset,j,i,sum);
179
180 if (!done_csum2) {
181 int l = MIN(s->n,len-offset);
182 map = (schar *)map_ptr(buf,offset,l);
183 get_checksum2((char *)map,l,sum2);
184 done_csum2 = 1;
185 }
186
187 if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
188 false_alarms++;
189 continue;
190 }
191
192 matched(f,s,buf,offset,i);
193 offset += s->sums[i].len - 1;
194 k = MIN((len-offset), s->n);
195 map = (schar *)map_ptr(buf,offset,k);
196 sum = get_checksum1((char *)map, k);
197 s1 = sum & 0xFFFF;
198 s2 = sum >> 16;
199 matches++;
200 break;
201 }
202
203 null_tag:
204 /* Trim off the first byte from the checksum */
205 map = (schar *)map_ptr(buf,offset,k+1);
206 s1 -= map[0] + CHAR_OFFSET;
207 s2 -= k * (map[0]+CHAR_OFFSET);
208
209 /* Add on the next byte (if there is one) to the checksum */
210 if (k < (len-offset)) {
211 s1 += (map[k]+CHAR_OFFSET);
212 s2 += s1;
213 } else {
214 --k;
215 }
216
217 if (!do_compression) {
218 /* By matching early we avoid re-reading the
219 data 3 times in the case where a token
220 match comes a long way after last
221 match. The 3 reads are caused by the
222 running match, the checksum update and the
223 literal send.
224
225 we don't enable this for the compressed
226 case yet as the deflated token code can't
227 handle it. Paul is working on it */
228 if (offset-last_match >= CHUNK_SIZE+s->n &&
229 (end-offset > CHUNK_SIZE)) {
230 matched(f,s,buf,offset - s->n, -2);
231 }
232 }
233 } while (++offset < end);
234
235 matched(f,s,buf,len,-1);
236 map_ptr(buf,len-1,1);
237}
238
239
240void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
241{
242 char file_sum[MD4_SUM_LENGTH];
243
244 last_match = 0;
245 false_alarms = 0;
246 tag_hits = 0;
247 matches=0;
248 data_transfer=0;
249
250 sum_init();
251
252 if (len > 0 && s->count>0) {
253 build_hash_table(s);
254
255 if (verbose > 2)
256 rprintf(FINFO,"built hash table\n");
257
258 hash_search(f,s,buf,len);
259
260 if (verbose > 2)
261 rprintf(FINFO,"done hash search\n");
262 } else {
263 matched(f,s,buf,len,-1);
264 }
265
266 sum_end(file_sum);
267
268 if (remote_version >= 14) {
269 if (verbose > 2)
270 rprintf(FINFO,"sending file_sum\n");
271 write_buf(f,file_sum,MD4_SUM_LENGTH);
272 }
273
274 if (targets) {
275 free(targets);
276 targets=NULL;
277 }
278
279 if (verbose > 2)
280 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n",
281 false_alarms, tag_hits, matches);
282
283 total_tag_hits += tag_hits;
284 total_false_alarms += false_alarms;
285 total_matches += matches;
286 total_data_transfer += data_transfer;
287}
288
289void match_report(void)
290{
291 if (verbose <= 1)
292 return;
293
294 rprintf(FINFO,
295 "total: matches=%d tag_hits=%d false_alarms=%d data=%ld\n",
296 total_matches,total_tag_hits,
297 total_false_alarms,(long)total_data_transfer);
298}