This is now in CVS.
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
241013b4
MP
1From rusty@rustcorp.com.au Wed Apr 3 17:18:42 2002
2Return-Path: <rusty@rustcorp.com.au>
3Delivered-To: mbp@samba.org
4Received: from wagner.rustcorp.com.au (sydney1.au.ibm.com [202.135.142.193])
5 by lists.samba.org (Postfix) with ESMTP id EA7B849DC
6 for <mbp@samba.org>; Tue, 2 Apr 2002 23:06:29 -0800 (PST)
7Received: from wagner.rustcorp.com.au ([127.0.0.1] helo=rustcorp.com.au)
8 by wagner.rustcorp.com.au with esmtp (Exim 3.35 #1 (Debian))
9 id 16set7-0000pL-00
10 for <mbp@samba.org>; Wed, 03 Apr 2002 17:08:57 +1000
11From: Rusty Russell <rusty@rustcorp.com.au>
12To: Martin Pool <mbp@samba.org>
13Subject: Re: gzip patch
14In-reply-to: Your message of "Wed, 03 Apr 2002 12:04:59 +1000."
15 <20020403020455.GC18851@samba.org>
16Date: Wed, 03 Apr 2002 17:08:57 +1000
17Sender: rusty@rustcorp.com.au
18Message-Id: <E16set7-0000pL-00@wagner.rustcorp.com.au>
19Status: RO
20X-Status: A
21Content-Length: 12810
22Lines: 461
23
24In message <20020403020455.GC18851@samba.org> you write:
25> Hi,
26>
27> I think you said the other day that you had a working --rsyncable
28> patch for gzip. Could I have it please?
29
30Hi Martin,
31
32 Just got your mail, sorry for the delay. Found old patch on
33google, and updated it for 2.5.4 (I know, but that's what apt-get
34source gave me).
35
36Compiles, otherwise untested.
37Rusty.
38--
39 Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
40
41diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in
42--- rsync-2.5.4/Makefile.in Tue Feb 26 05:48:25 2002
43+++ rsync-2.5.4-fuzzy/Makefile.in Wed Apr 3 16:35:55 2002
44@@ -28,7 +28,7 @@
45 ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
46 zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
47 zlib/zutil.o zlib/adler32.o
48-OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o
49+OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o
50 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o batch.o \
51 clientname.o
52 DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
53diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c
54--- rsync-2.5.4/alternate.c Thu Jan 1 10:00:00 1970
55+++ rsync-2.5.4-fuzzy/alternate.c Wed Apr 3 17:04:15 2002
56@@ -0,0 +1,117 @@
57+#include "rsync.h"
58+
59+extern char *compare_dest;
60+extern int verbose;
61+
62+/* Alternate methods for opening files, if local doesn't exist */
63+/* Sanity check that we are about to open regular file */
64+int do_open_regular(char *fname)
65+{
66+ STRUCT_STAT st;
67+
68+ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
69+ return do_open(fname, O_RDONLY, 0);
70+
71+ return -1;
72+}
73+
74+static void split_names(char *fname, char **dirname, char **basename)
75+{
76+ char *slash;
77+
78+ slash = strrchr(fname, '/');
79+ if (slash) {
80+ *dirname = fname;
81+ *slash = '\0';
82+ *basename = slash+1;
83+ } else {
84+ *basename = fname;
85+ *dirname = ".";
86+ }
87+}
88+
89+static unsigned int measure_name(const char *name,
90+ const char *basename,
91+ const char *ext)
92+{
93+ int namelen = strlen(name);
94+ int extlen = strlen(ext);
95+ unsigned int score = 0;
96+
97+ /* Extensions must match */
98+ if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
99+ return 0;
100+
101+ /* Now score depends on similarity of prefix */
102+ for (; *name==*basename && *name; name++, basename++)
103+ score++;
104+ return score;
105+}
106+
107+int open_alternate_base_fuzzy(const char *fname)
108+{
109+ DIR *d;
110+ struct dirent *di;
111+ char *basename, *dirname;
112+ char mangled_name[MAXPATHLEN];
113+ char bestname[MAXPATHLEN];
114+ unsigned int bestscore = 0;
115+ const char *ext;
116+
117+ /* FIXME: can we assume fname fits here? */
118+ strcpy(mangled_name, fname);
119+
120+ split_names(mangled_name, &dirname, &basename);
121+ d = opendir(dirname);
122+ if (!d) {
123+ rprintf(FERROR,"recv_generator opendir(%s): %s\n",
124+ dirname,strerror(errno));
125+ return -1;
126+ }
127+
128+ /* Get final extension, eg. .gz; never full basename though. */
129+ ext = strrchr(basename + 1, '.');
130+ if (!ext)
131+ ext = basename + strlen(basename); /* ext = "" */
132+
133+ while ((di = readdir(d)) != NULL) {
134+ const char *dname = d_name(di);
135+ unsigned int score;
136+
137+ if (strcmp(dname,".")==0 ||
138+ strcmp(dname,"..")==0)
139+ continue;
140+
141+ score = measure_name(dname, basename, ext);
142+ if (verbose > 4)
143+ rprintf(FINFO,"fuzzy score for %s = %u\n",
144+ dname, score);
145+ if (score > bestscore) {
146+ strcpy(bestname, dname);
147+ bestscore = score;
148+ }
149+ }
150+ closedir(d);
151+
152+ /* Found a candidate. */
153+ if (bestscore != 0) {
154+ char fuzzyname[MAXPATHLEN];
155+
156+ snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
157+ if (verbose > 2)
158+ rprintf(FINFO,"fuzzy match %s->%s\n",
159+ fname, fuzzyname);
160+ return do_open_regular(fuzzyname);
161+ }
162+ return -1;
163+}
164+
165+int open_alternate_base_comparedir(const char *fname)
166+{
167+ char fnamebuf[MAXPATHLEN];
168+ /* try the file at compare_dest instead */
169+ snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
170+
171+ /* FIXME: now follows symlinks... */
172+ return do_open_regular(fnamebuf);
173+}
174diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c
175--- rsync-2.5.4/generator.c Fri Feb 8 03:36:12 2002
176+++ rsync-2.5.4-fuzzy/generator.c Wed Apr 3 17:00:06 2002
177@@ -42,11 +42,12 @@
178 extern int always_checksum;
179 extern int modify_window;
180 extern char *compare_dest;
181+extern int fuzzy;
182
183
184 /* choose whether to skip a particular file */
185 static int skip_file(char *fname,
186- struct file_struct *file, STRUCT_STAT *st)
187+ struct file_struct *file, const STRUCT_STAT *st)
188 {
189 if (st->st_size != file->length) {
190 return 0;
191@@ -185,7 +186,61 @@
192 return s;
193 }
194
195+/* Returns -1 for can't open (null file), -2 for skip */
196+static int open_base_file(struct file_struct *file,
197+ char *fname,
198+ int statret,
199+ STRUCT_STAT *st)
200+{
201+ int fd = -1;
202+
203+ if (statret == 0) {
204+ if (S_ISREG(st->st_mode)) {
205+ if (update_only
206+ && cmp_modtime(st->st_mtime, file->modtime) > 0) {
207+ if (verbose > 1)
208+ rprintf(FINFO,"%s is newer\n",fname);
209+ return -2;
210+ }
211+ if (skip_file(fname, file, st)) {
212+ set_perms(fname, file, st, 1);
213+ return -2;
214+ }
215+ fd = do_open(fname, O_RDONLY, 0);
216+ if (fd == -1) {
217+ rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno));
218+ return -1;
219+ } else
220+ return fd;
221+ } else {
222+ /* Try to use symlink contents */
223+ if (S_ISLNK(st->st_mode)) {
224+ fd = do_open_regular(fname);
225+ /* Don't delete yet; receiver will need it */
226+ } else {
227+ if (delete_file(fname) != 0) {
228+ if (fd != -1)
229+ close(fd);
230+ return -2;
231+ }
232+ }
233+ }
234+ }
235+
236+ if (fd == -1 && compare_dest != NULL)
237+ fd = open_alternate_base_comparedir(fname);
238
239+ if (fd == -1 && fuzzy)
240+ fd = open_alternate_base_fuzzy(fname);
241+
242+ /* Update stat to understand size */
243+ if (fd != -1) {
244+ if (do_fstat(fd, st) != 0)
245+ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
246+ }
247+
248+ return fd;
249+}
250
251 /*
252 * Acts on file number I from FLIST, whose name is fname.
253@@ -203,9 +258,6 @@
254 struct sum_struct *s;
255 int statret;
256 struct file_struct *file = flist->files[i];
257- char *fnamecmp;
258- char fnamecmpbuf[MAXPATHLEN];
259- extern char *compare_dest;
260 extern int list_only;
261 extern int preserve_perms;
262 extern int only_existing;
263@@ -341,82 +393,29 @@
264 return;
265 }
266
267- fnamecmp = fname;
268-
269- if ((statret == -1) && (compare_dest != NULL)) {
270- /* try the file at compare_dest instead */
271- int saveerrno = errno;
272- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
273- statret = link_stat(fnamecmpbuf,&st);
274- if (!S_ISREG(st.st_mode))
275- statret = -1;
276- if (statret == -1)
277- errno = saveerrno;
278- else
279- fnamecmp = fnamecmpbuf;
280- }
281-
282- if (statret == -1) {
283- if (errno == ENOENT) {
284- write_int(f_out,i);
285- if (!dry_run) send_sums(NULL,f_out);
286- } else {
287- if (verbose > 1)
288- rprintf(FERROR, RSYNC_NAME
289- ": recv_generator failed to open \"%s\": %s\n",
290- fname, strerror(errno));
291- }
292- return;
293- }
294-
295- if (!S_ISREG(st.st_mode)) {
296- if (delete_file(fname) != 0) {
297- return;
298- }
299-
300- /* now pretend the file didn't exist */
301- write_int(f_out,i);
302- if (!dry_run) send_sums(NULL,f_out);
303- return;
304- }
305-
306- if (opt_ignore_existing && fnamecmp == fname) {
307- if (verbose > 1)
308- rprintf(FINFO,"%s exists\n",fname);
309- return;
310- }
311-
312- if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
313+ /* Failed to stat for some other reason. */
314+ if (statret == -1 && errno != ENOENT) {
315 if (verbose > 1)
316- rprintf(FINFO,"%s is newer\n",fname);
317+ rprintf(FERROR, RSYNC_NAME
318+ ": recv_generator failed to open \"%s\": %s\n",
319+ fname, strerror(errno));
320 return;
321 }
322
323- if (skip_file(fname, file, &st)) {
324- if (fnamecmp == fname)
325- set_perms(fname,file,&st,1);
326- return;
327- }
328-
329- if (dry_run) {
330- write_int(f_out,i);
331+ fd = open_base_file(file, fname, statret, &st);
332+ if (fd == -2)
333 return;
334- }
335-
336- if (whole_file) {
337- write_int(f_out,i);
338- send_sums(NULL,f_out);
339- return;
340- }
341-
342- /* open the file */
343- fd = do_open(fnamecmp, O_RDONLY, 0);
344
345- if (fd == -1) {
346- rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : %s\n",fnamecmp,strerror(errno));
347- /* pretend the file didn't exist */
348+ if ((whole_file || dry_run) && fd != -1) {
349+ close(fd);
350+ fd = -1;
351+ }
352+
353+ if (fd == -1) {
354+ /* the file didn't exist, or we can pretend it doesn't */
355 write_int(f_out,i);
356- send_sums(NULL,f_out);
357+ if (!dry_run)
358+ send_sums(NULL,f_out);
359 return;
360 }
361
362@@ -427,7 +426,7 @@
363 }
364
365 if (verbose > 3)
366- rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
367+ rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size);
368
369 s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
370
371diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c
372--- rsync-2.5.4/options.c Thu Feb 28 09:49:57 2002
373+++ rsync-2.5.4-fuzzy/options.c Wed Apr 3 16:43:54 2002
374@@ -73,6 +73,7 @@
375 #else
376 int modify_window=0;
377 #endif
378+int fuzzy=0;
379 int blocking_io=-1;
380
381 /** Network address family. **/
382@@ -245,6 +246,7 @@
383 rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n");
384 rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n");
385 rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n");
386+ rprintf(F," --fuzzy use similar file as basis if it does't exist\n");
387 rprintf(F," -h, --help show this help screen\n");
388 #ifdef INET6
389 rprintf(F," -4 prefer IPv4\n");
390@@ -340,6 +342,7 @@
391 {"hard-links", 'H', POPT_ARG_NONE, &preserve_hard_links},
392 {"read-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH},
393 {"write-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH},
394+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy},
395 #ifdef INET6
396 {0, '4', POPT_ARG_VAL, &default_af_hint, AF_INET },
397 {0, '6', POPT_ARG_VAL, &default_af_hint, AF_INET6 },
398@@ -757,7 +760,9 @@
399 args[ac++] = "--compare-dest";
400 args[ac++] = compare_dest;
401 }
402-
403+
404+ if (fuzzy && am_sender)
405+ args[ac++] = "--fuzzy";
406
407 *argc = ac;
408 }
409diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h
410--- rsync-2.5.4/proto.h Sat Feb 23 11:05:06 2002
411+++ rsync-2.5.4-fuzzy/proto.h Wed Apr 3 16:35:25 2002
412@@ -256,3 +256,6 @@
413 int cmp_modtime(time_t file1, time_t file2);
414 int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
415 int sys_gettimeofday(struct timeval *tv);
416+int do_open_regular(char *fname);
417+int open_alternate_base_fuzzy(const char *fname);
418+int open_alternate_base_comparedir(const char *fname);
419diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c
420--- rsync-2.5.4/receiver.c Thu Feb 14 05:42:20 2002
421+++ rsync-2.5.4-fuzzy/receiver.c Wed Apr 3 16:46:46 2002
422@@ -36,6 +36,7 @@
423 extern char *compare_dest;
424 extern int make_backups;
425 extern char *backup_suffix;
426+extern int fuzzy;
427
428 static struct delete_list {
429 DEV64_T dev;
430@@ -307,8 +308,6 @@
431 char *fname;
432 char template[MAXPATHLEN];
433 char fnametmp[MAXPATHLEN];
434- char *fnamecmp;
435- char fnamecmpbuf[MAXPATHLEN];
436 struct map_struct *buf;
437 int i;
438 struct file_struct *file;
439@@ -366,28 +365,24 @@
440 if (verbose > 2)
441 rprintf(FINFO,"recv_files(%s)\n",fname);
442
443- fnamecmp = fname;
444-
445 /* open the file */
446- fd1 = do_open(fnamecmp, O_RDONLY, 0);
447+ fd1 = do_open(fname, O_RDONLY, 0);
448
449- if ((fd1 == -1) && (compare_dest != NULL)) {
450- /* try the file at compare_dest instead */
451- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
452- compare_dest,fname);
453- fnamecmp = fnamecmpbuf;
454- fd1 = do_open(fnamecmp, O_RDONLY, 0);
455- }
456+ if (fd1 == -1 && compare_dest != NULL)
457+ fd1 = open_alternate_base_comparedir(fname);
458+
459+ if (fd1 == -1 && fuzzy)
460+ fd1 = open_alternate_base_fuzzy(fname);
461
462 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
463- rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno));
464+ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
465 receive_data(f_in,NULL,-1,NULL,file->length);
466 close(fd1);
467 continue;
468 }
469
470 if (fd1 != -1 && !S_ISREG(st.st_mode)) {
471- rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp);
472+ rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname);
473 receive_data(f_in,NULL,-1,NULL,file->length);
474 close(fd1);
475 continue;
476@@ -403,7 +398,7 @@
477 if (fd1 != -1 && st.st_size > 0) {
478 buf = map_file(fd1,st.st_size);
479 if (verbose > 2)
480- rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
481+ rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size);
482 } else {
483 buf = NULL;
484 }
485