1 From rusty@rustcorp.com.au Wed Apr 3 17:18:42 2002
2 Return-Path: <rusty@rustcorp.com.au>
3 Delivered-To: mbp@samba.org
4 Received: from wagner.rustcorp.com.au (sydney1.au.ibm.com [202.135.142.193])
5 by lists.samba.org (Postfix) with ESMTP id EA7B849DC
6 for <mbp@samba.org>; Tue, 2 Apr 2002 23:06:29 -0800 (PST)
7 Received: from wagner.rustcorp.com.au ([127.0.0.1] helo=rustcorp.com.au)
8 by wagner.rustcorp.com.au with esmtp (Exim 3.35 #1 (Debian))
10 for <mbp@samba.org>; Wed, 03 Apr 2002 17:08:57 +1000
11 From: Rusty Russell <rusty@rustcorp.com.au>
12 To: Martin Pool <mbp@samba.org>
13 Subject: Re: gzip patch
14 In-reply-to: Your message of "Wed, 03 Apr 2002 12:04:59 +1000."
15 <20020403020455.GC18851@samba.org>
16 Date: Wed, 03 Apr 2002 17:08:57 +1000
17 Sender: rusty@rustcorp.com.au
18 Message-Id: <E16set7-0000pL-00@wagner.rustcorp.com.au>
24 In message <20020403020455.GC18851@samba.org> you write:
27 > I think you said the other day that you had a working --rsyncable
28 > patch for gzip. Could I have it please?
32 Just got your mail, sorry for the delay. Found old patch on
33 google, and updated it for 2.5.4 (I know, but that's what apt-get
36 Compiles, otherwise untested.
39 Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
41 diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in
42 --- rsync-2.5.4/Makefile.in Tue Feb 26 05:48:25 2002
43 +++ rsync-2.5.4-fuzzy/Makefile.in Wed Apr 3 16:35:55 2002
45 ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
46 zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
47 zlib/zutil.o zlib/adler32.o
48 -OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o
49 +OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o
50 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o batch.o \
52 DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
53 diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c
54 --- rsync-2.5.4/alternate.c Thu Jan 1 10:00:00 1970
55 +++ rsync-2.5.4-fuzzy/alternate.c Wed Apr 3 17:04:15 2002
59 +extern char *compare_dest;
62 +/* Alternate methods for opening files, if local doesn't exist */
63 +/* Sanity check that we are about to open regular file */
64 +int do_open_regular(char *fname)
68 + if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
69 + return do_open(fname, O_RDONLY, 0);
74 +static void split_names(char *fname, char **dirname, char **basename)
78 + slash = strrchr(fname, '/');
82 + *basename = slash+1;
89 +static unsigned int measure_name(const char *name,
90 + const char *basename,
93 + int namelen = strlen(name);
94 + int extlen = strlen(ext);
95 + unsigned int score = 0;
97 + /* Extensions must match */
98 + if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
101 + /* Now score depends on similarity of prefix */
102 + for (; *name==*basename && *name; name++, basename++)
107 +int open_alternate_base_fuzzy(const char *fname)
111 + char *basename, *dirname;
112 + char mangled_name[MAXPATHLEN];
113 + char bestname[MAXPATHLEN];
114 + unsigned int bestscore = 0;
117 + /* FIXME: can we assume fname fits here? */
118 + strcpy(mangled_name, fname);
120 + split_names(mangled_name, &dirname, &basename);
121 + d = opendir(dirname);
123 + rprintf(FERROR,"recv_generator opendir(%s): %s\n",
124 + dirname,strerror(errno));
128 + /* Get final extension, eg. .gz; never full basename though. */
129 + ext = strrchr(basename + 1, '.');
131 + ext = basename + strlen(basename); /* ext = "" */
133 + while ((di = readdir(d)) != NULL) {
134 + const char *dname = d_name(di);
135 + unsigned int score;
137 + if (strcmp(dname,".")==0 ||
138 + strcmp(dname,"..")==0)
141 + score = measure_name(dname, basename, ext);
143 + rprintf(FINFO,"fuzzy score for %s = %u\n",
145 + if (score > bestscore) {
146 + strcpy(bestname, dname);
152 + /* Found a candidate. */
153 + if (bestscore != 0) {
154 + char fuzzyname[MAXPATHLEN];
156 + snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
158 + rprintf(FINFO,"fuzzy match %s->%s\n",
160 + return do_open_regular(fuzzyname);
165 +int open_alternate_base_comparedir(const char *fname)
167 + char fnamebuf[MAXPATHLEN];
168 + /* try the file at compare_dest instead */
169 + snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
171 + /* FIXME: now follows symlinks... */
172 + return do_open_regular(fnamebuf);
174 diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c
175 --- rsync-2.5.4/generator.c Fri Feb 8 03:36:12 2002
176 +++ rsync-2.5.4-fuzzy/generator.c Wed Apr 3 17:00:06 2002
178 extern int always_checksum;
179 extern int modify_window;
180 extern char *compare_dest;
184 /* choose whether to skip a particular file */
185 static int skip_file(char *fname,
186 - struct file_struct *file, STRUCT_STAT *st)
187 + struct file_struct *file, const STRUCT_STAT *st)
189 if (st->st_size != file->length) {
195 +/* Returns -1 for can't open (null file), -2 for skip */
196 +static int open_base_file(struct file_struct *file,
203 + if (statret == 0) {
204 + if (S_ISREG(st->st_mode)) {
206 + && cmp_modtime(st->st_mtime, file->modtime) > 0) {
208 + rprintf(FINFO,"%s is newer\n",fname);
211 + if (skip_file(fname, file, st)) {
212 + set_perms(fname, file, st, 1);
215 + fd = do_open(fname, O_RDONLY, 0);
217 + rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno));
222 + /* Try to use symlink contents */
223 + if (S_ISLNK(st->st_mode)) {
224 + fd = do_open_regular(fname);
225 + /* Don't delete yet; receiver will need it */
227 + if (delete_file(fname) != 0) {
236 + if (fd == -1 && compare_dest != NULL)
237 + fd = open_alternate_base_comparedir(fname);
239 + if (fd == -1 && fuzzy)
240 + fd = open_alternate_base_fuzzy(fname);
242 + /* Update stat to understand size */
244 + if (do_fstat(fd, st) != 0)
245 + rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
252 * Acts on file number I from FLIST, whose name is fname.
254 struct sum_struct *s;
256 struct file_struct *file = flist->files[i];
258 - char fnamecmpbuf[MAXPATHLEN];
259 - extern char *compare_dest;
260 extern int list_only;
261 extern int preserve_perms;
262 extern int only_existing;
263 @@ -341,82 +393,29 @@
269 - if ((statret == -1) && (compare_dest != NULL)) {
270 - /* try the file at compare_dest instead */
271 - int saveerrno = errno;
272 - snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
273 - statret = link_stat(fnamecmpbuf,&st);
274 - if (!S_ISREG(st.st_mode))
279 - fnamecmp = fnamecmpbuf;
282 - if (statret == -1) {
283 - if (errno == ENOENT) {
284 - write_int(f_out,i);
285 - if (!dry_run) send_sums(NULL,f_out);
288 - rprintf(FERROR, RSYNC_NAME
289 - ": recv_generator failed to open \"%s\": %s\n",
290 - fname, strerror(errno));
295 - if (!S_ISREG(st.st_mode)) {
296 - if (delete_file(fname) != 0) {
300 - /* now pretend the file didn't exist */
301 - write_int(f_out,i);
302 - if (!dry_run) send_sums(NULL,f_out);
306 - if (opt_ignore_existing && fnamecmp == fname) {
308 - rprintf(FINFO,"%s exists\n",fname);
312 - if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
313 + /* Failed to stat for some other reason. */
314 + if (statret == -1 && errno != ENOENT) {
316 - rprintf(FINFO,"%s is newer\n",fname);
317 + rprintf(FERROR, RSYNC_NAME
318 + ": recv_generator failed to open \"%s\": %s\n",
319 + fname, strerror(errno));
323 - if (skip_file(fname, file, &st)) {
324 - if (fnamecmp == fname)
325 - set_perms(fname,file,&st,1);
330 - write_int(f_out,i);
331 + fd = open_base_file(file, fname, statret, &st);
337 - write_int(f_out,i);
338 - send_sums(NULL,f_out);
342 - /* open the file */
343 - fd = do_open(fnamecmp, O_RDONLY, 0);
346 - rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : %s\n",fnamecmp,strerror(errno));
347 - /* pretend the file didn't exist */
348 + if ((whole_file || dry_run) && fd != -1) {
354 + /* the file didn't exist, or we can pretend it doesn't */
356 - send_sums(NULL,f_out);
358 + send_sums(NULL,f_out);
366 - rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
367 + rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size);
369 s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
371 diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c
372 --- rsync-2.5.4/options.c Thu Feb 28 09:49:57 2002
373 +++ rsync-2.5.4-fuzzy/options.c Wed Apr 3 16:43:54 2002
381 /** Network address family. **/
383 rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n");
384 rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n");
385 rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n");
386 + rprintf(F," --fuzzy use similar file as basis if it does't exist\n");
387 rprintf(F," -h, --help show this help screen\n");
389 rprintf(F," -4 prefer IPv4\n");
391 {"hard-links", 'H', POPT_ARG_NONE, &preserve_hard_links},
392 {"read-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH},
393 {"write-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH},
394 + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy},
396 {0, '4', POPT_ARG_VAL, &default_af_hint, AF_INET },
397 {0, '6', POPT_ARG_VAL, &default_af_hint, AF_INET6 },
399 args[ac++] = "--compare-dest";
400 args[ac++] = compare_dest;
404 + if (fuzzy && am_sender)
405 + args[ac++] = "--fuzzy";
409 diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h
410 --- rsync-2.5.4/proto.h Sat Feb 23 11:05:06 2002
411 +++ rsync-2.5.4-fuzzy/proto.h Wed Apr 3 16:35:25 2002
413 int cmp_modtime(time_t file1, time_t file2);
414 int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
415 int sys_gettimeofday(struct timeval *tv);
416 +int do_open_regular(char *fname);
417 +int open_alternate_base_fuzzy(const char *fname);
418 +int open_alternate_base_comparedir(const char *fname);
419 diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c
420 --- rsync-2.5.4/receiver.c Thu Feb 14 05:42:20 2002
421 +++ rsync-2.5.4-fuzzy/receiver.c Wed Apr 3 16:46:46 2002
423 extern char *compare_dest;
424 extern int make_backups;
425 extern char *backup_suffix;
428 static struct delete_list {
432 char template[MAXPATHLEN];
433 char fnametmp[MAXPATHLEN];
435 - char fnamecmpbuf[MAXPATHLEN];
436 struct map_struct *buf;
438 struct file_struct *file;
439 @@ -366,28 +365,24 @@
441 rprintf(FINFO,"recv_files(%s)\n",fname);
446 - fd1 = do_open(fnamecmp, O_RDONLY, 0);
447 + fd1 = do_open(fname, O_RDONLY, 0);
449 - if ((fd1 == -1) && (compare_dest != NULL)) {
450 - /* try the file at compare_dest instead */
451 - snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
452 - compare_dest,fname);
453 - fnamecmp = fnamecmpbuf;
454 - fd1 = do_open(fnamecmp, O_RDONLY, 0);
456 + if (fd1 == -1 && compare_dest != NULL)
457 + fd1 = open_alternate_base_comparedir(fname);
459 + if (fd1 == -1 && fuzzy)
460 + fd1 = open_alternate_base_fuzzy(fname);
462 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
463 - rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno));
464 + rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
465 receive_data(f_in,NULL,-1,NULL,file->length);
470 if (fd1 != -1 && !S_ISREG(st.st_mode)) {
471 - rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp);
472 + rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname);
473 receive_data(f_in,NULL,-1,NULL,file->length);
477 if (fd1 != -1 && st.st_size > 0) {
478 buf = map_file(fd1,st.st_size);
480 - rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size);
481 + rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size);