| 1 | To: rsync@lists.samba.org |
| 2 | From: "Jason M. Felice" <jfelice@cronosys.com> |
| 3 | Subject: [patch] Add `--link-by-hash' option (rev 5). |
| 4 | Date: Mon, 23 Feb 2004 13:29:08 -0500 |
| 5 | |
| 6 | This patch adds the --link-by-hash=DIR option, which hard links received |
| 7 | files in a link farm arranged by MD4 file hash. The result is that the system |
| 8 | will only store one copy of the unique contents of each file, regardless of |
| 9 | the file's name. |
| 10 | |
| 11 | (rev 5) |
| 12 | * Fixed silly logic error. |
| 13 | |
| 14 | (rev 4) |
| 15 | * Updated for committed robust_rename() patch, other changes in CVS. |
| 16 | |
| 17 | (rev 3) |
| 18 | * Don't link empty files. |
| 19 | * Roll over to new file when filesystem maximum link count is reached. |
| 20 | * If link fails for another reason, leave non-linked file there. |
| 21 | * Depends on rsync-rename.diff |
| 22 | |
| 23 | (rev 2) |
| 24 | * This revision is actually against CVS HEAD (I didn't realize I was working |
| 25 | from a stale rsync'd CVS). |
| 26 | * Apply permissions after linking (permissions were lost if we already had |
| 27 | a copy of the file in the link farm). |
| 28 | |
| 29 | Patch Summary: |
| 30 | |
| 31 | -1 +1 Makefile.in |
| 32 | -0 +351 hashlink.c (new) |
| 33 | -1 +22 options.c |
| 34 | -0 +6 proto.h |
| 35 | -6 +21 receiver.c |
| 36 | -2 +8 rsync.c |
| 37 | -0 +8 rsync.h |
| 38 | |
| 39 | --- hashlink.c 1969-12-31 19:00:00.000000000 -0500 |
| 40 | +++ hashlink.c 2004-02-23 10:30:45.000000000 -0500 |
| 41 | @@ -0,0 +1,351 @@ |
| 42 | +/* |
| 43 | + Copyright (C) Cronosys, LLC 2004 |
| 44 | + |
| 45 | + This program is free software; you can redistribute it and/or modify |
| 46 | + it under the terms of the GNU General Public License as published by |
| 47 | + the Free Software Foundation; either version 2 of the License, or |
| 48 | + (at your option) any later version. |
| 49 | + |
| 50 | + This program is distributed in the hope that it will be useful, |
| 51 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 52 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 53 | + GNU General Public License for more details. |
| 54 | + |
| 55 | + You should have received a copy of the GNU General Public License |
| 56 | + along with this program; if not, write to the Free Software |
| 57 | + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 58 | +*/ |
| 59 | + |
| 60 | +/* This file contains code used by the --link-by-hash option. */ |
| 61 | + |
| 62 | +#include "rsync.h" |
| 63 | + |
| 64 | +extern char *link_by_hash_dir; |
| 65 | + |
| 66 | +#ifdef HAVE_LINK |
| 67 | + |
| 68 | +char* make_hash_name(struct file_struct *file) |
| 69 | +{ |
| 70 | + char hash[33], *dst; |
| 71 | + unsigned char *src; |
| 72 | + unsigned char c; |
| 73 | + int i; |
| 74 | + |
| 75 | + src = (unsigned char*)file->u.sum; |
| 76 | + for (dst = hash, i = 0; i < 4; i++, src++) { |
| 77 | + c = *src >> 4; |
| 78 | + *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0'); |
| 79 | + c = *src & 0x0f; |
| 80 | + *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0'); |
| 81 | + } |
| 82 | + *dst++ = '/'; |
| 83 | + for (i = 0; i < 12; i++, src++) { |
| 84 | + c = *src >> 4; |
| 85 | + *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0'); |
| 86 | + c = *src & 0x0f; |
| 87 | + *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0'); |
| 88 | + } |
| 89 | + *dst = 0; |
| 90 | + |
| 91 | + asprintf(&dst,"%s/%s",link_by_hash_dir,hash); |
| 92 | + return dst; |
| 93 | +} |
| 94 | + |
| 95 | + |
| 96 | +void kill_hashfile(struct hashfile_struct *hashfile) |
| 97 | +{ |
| 98 | + if (!hashfile) |
| 99 | + return; |
| 100 | + free(hashfile->name); |
| 101 | + close(hashfile->fd); |
| 102 | + free(hashfile); |
| 103 | +} |
| 104 | + |
| 105 | + |
| 106 | +void kill_hashfiles(struct hashfile_struct *hashfiles) |
| 107 | +{ |
| 108 | + struct hashfile_struct *iter, *next; |
| 109 | + if ((iter = hashfiles) != NULL) { |
| 110 | + do { |
| 111 | + next = iter->next; |
| 112 | + kill_hashfile(iter); |
| 113 | + iter = next; |
| 114 | + } while (iter != hashfiles); |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | + |
| 119 | +struct hashfile_struct *find_hashfiles(char *hashname, int64 size, long *fnbr) |
| 120 | +{ |
| 121 | + DIR *d; |
| 122 | + struct dirent *di; |
| 123 | + struct hashfile_struct *hashfiles = NULL, *hashfile; |
| 124 | + STRUCT_STAT st; |
| 125 | + long this_fnbr; |
| 126 | + |
| 127 | + *fnbr = 0; |
| 128 | + |
| 129 | + /* Build a list of potential candidates and open |
| 130 | + * them. */ |
| 131 | + if ((d = opendir(hashname)) == NULL) { |
| 132 | + rprintf(FERROR,"opendir \"%s\": %s\n", |
| 133 | + hashname, strerror(errno)); |
| 134 | + free(hashname); |
| 135 | + return NULL; |
| 136 | + } |
| 137 | + while ((di = readdir(d)) != NULL) { |
| 138 | + if (!strcmp(di->d_name,".") || !strcmp(di->d_name,"..")) { |
| 139 | + continue; |
| 140 | + } |
| 141 | + |
| 142 | + /* We need to have the largest fnbr in case we need to store |
| 143 | + * a new file. */ |
| 144 | + this_fnbr = atol(di->d_name); |
| 145 | + if (this_fnbr > *fnbr) |
| 146 | + *fnbr = this_fnbr; |
| 147 | + |
| 148 | + hashfile = (struct hashfile_struct*)malloc(sizeof(struct hashfile_struct)); |
| 149 | + asprintf(&hashfile->name,"%s/%s",hashname, |
| 150 | + di->d_name); |
| 151 | + if (do_stat(hashfile->name,&st) == -1) { |
| 152 | + rprintf(FERROR,"%s: %s", hashfile->name, |
| 153 | + strerror(errno)); |
| 154 | + kill_hashfile(hashfile); |
| 155 | + continue; |
| 156 | + } |
| 157 | + if (st.st_size != size) { |
| 158 | + kill_hashfile(hashfile); |
| 159 | + continue; |
| 160 | + } |
| 161 | + hashfile->nlink = st.st_nlink; |
| 162 | + hashfile->fd = open(hashfile->name,O_RDONLY|O_BINARY); |
| 163 | + if (hashfile->fd == -1) { |
| 164 | + rprintf(FERROR,"%s: %s\n", hashfile->name, |
| 165 | + strerror(errno)); |
| 166 | + kill_hashfile(hashfile); |
| 167 | + continue; |
| 168 | + } |
| 169 | + if (hashfiles == NULL) |
| 170 | + hashfiles = hashfile->next = hashfile->prev = hashfile; |
| 171 | + else { |
| 172 | + hashfile->next = hashfiles; |
| 173 | + hashfile->prev = hashfiles->prev; |
| 174 | + hashfile->next->prev = hashfile; |
| 175 | + hashfile->prev->next = hashfile; |
| 176 | + } |
| 177 | + } |
| 178 | + closedir(d); |
| 179 | + |
| 180 | + return hashfiles; |
| 181 | +} |
| 182 | + |
| 183 | + |
| 184 | +struct hashfile_struct *compare_hashfiles(int fd,struct hashfile_struct *files) |
| 185 | +{ |
| 186 | + int amt, hamt; |
| 187 | + char buffer[BUFSIZ], cmpbuffer[BUFSIZ]; |
| 188 | + struct hashfile_struct *iter, *next, *best; |
| 189 | + uint32 nlink; |
| 190 | + |
| 191 | + if (!files) |
| 192 | + return NULL; |
| 193 | + |
| 194 | + iter = files; /* in case files are 0 bytes */ |
| 195 | + while ((amt = read(fd, buffer, BUFSIZ)) > 0) { |
| 196 | + iter = files; |
| 197 | + do { |
| 198 | + /* Icky bit to resync when we steal the first node. */ |
| 199 | + if (!files) |
| 200 | + files = iter; |
| 201 | + |
| 202 | + next = iter->next; |
| 203 | + |
| 204 | + hamt = read(iter->fd, cmpbuffer, BUFSIZ); |
| 205 | + if (amt != hamt || memcmp(buffer, cmpbuffer, amt)) { |
| 206 | + if (iter == files) { |
| 207 | + files = files->prev; |
| 208 | + } |
| 209 | + if (iter->next == iter) { |
| 210 | + files = next = NULL; |
| 211 | + } else { |
| 212 | + next = iter->next; |
| 213 | + if (iter == files) { |
| 214 | + /* So we know to resync */ |
| 215 | + files = NULL; |
| 216 | + } |
| 217 | + } |
| 218 | + iter->next->prev = iter->prev; |
| 219 | + iter->prev->next = iter->next; |
| 220 | + kill_hashfile(iter); |
| 221 | + } |
| 222 | + |
| 223 | + iter = next; |
| 224 | + } while (iter != files); |
| 225 | + |
| 226 | + if (iter == NULL && files == NULL) { |
| 227 | + /* There are no matches. */ |
| 228 | + return NULL; |
| 229 | + } |
| 230 | + |
| 231 | + } |
| 232 | + |
| 233 | + if (amt == -1) { |
| 234 | + rprintf(FERROR,"%s",strerror(errno)); |
| 235 | + kill_hashfiles(files); |
| 236 | + return NULL; |
| 237 | + } |
| 238 | + |
| 239 | + /* If we only have one file left, use it. */ |
| 240 | + if (files == files->next) { |
| 241 | + return files; |
| 242 | + } |
| 243 | + |
| 244 | + /* All files which remain in the list are identical and should have |
| 245 | + * the same size. We pick the one with the lowest link count (we |
| 246 | + * may have rolled over because we hit the maximum link count for |
| 247 | + * the filesystem). */ |
| 248 | + best = iter = files; |
| 249 | + nlink = iter->nlink; |
| 250 | + do { |
| 251 | + if (iter->nlink < nlink) { |
| 252 | + nlink = iter->nlink; |
| 253 | + best = iter; |
| 254 | + } |
| 255 | + iter = iter->next; |
| 256 | + } while (iter != files); |
| 257 | + |
| 258 | + best->next->prev = best->prev; |
| 259 | + best->prev->next = best->next; |
| 260 | + if (files == best) |
| 261 | + files = files->next; |
| 262 | + kill_hashfiles(files); |
| 263 | + return best; |
| 264 | +} |
| 265 | + |
| 266 | + |
| 267 | +int link_by_hash(char *fnametmp,char *fname,struct file_struct *file) |
| 268 | +{ |
| 269 | + STRUCT_STAT st; |
| 270 | + char *hashname = make_hash_name(file); |
| 271 | + int first = 0, rc; |
| 272 | + char *linkname; |
| 273 | + long last_fnbr; |
| 274 | + |
| 275 | + if (file->length == 0) { |
| 276 | + return robust_rename(fnametmp,fname,0644); |
| 277 | + } |
| 278 | + |
| 279 | + if (do_stat(hashname, &st) == -1) { |
| 280 | + char *dirname; |
| 281 | + |
| 282 | + /* Directory does not exist. */ |
| 283 | + dirname = strdup(hashname); |
| 284 | + *strrchr(dirname,'/') = 0; |
| 285 | + if (do_mkdir(dirname, 0755) == -1 && errno != EEXIST) { |
| 286 | + rprintf(FERROR, "mkdir %s: %s\n", dirname, |
| 287 | + strerror(errno)); |
| 288 | + free(hashname); |
| 289 | + free(dirname); |
| 290 | + return robust_rename(fnametmp,fname,0644); |
| 291 | + } |
| 292 | + free(dirname); |
| 293 | + |
| 294 | + if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) { |
| 295 | + rprintf(FERROR, "mkdir %s: %s\n", hashname, |
| 296 | + strerror(errno)); |
| 297 | + free(hashname); |
| 298 | + return robust_rename(fnametmp,fname,0644); |
| 299 | + } |
| 300 | + |
| 301 | + first = 1; |
| 302 | + asprintf(&linkname,"%s/0",hashname); |
| 303 | + rprintf(FINFO, "(1) linkname = %s\n", linkname); |
| 304 | + |
| 305 | + } else { |
| 306 | + struct hashfile_struct *hashfiles, *hashfile; |
| 307 | + int fd; |
| 308 | + |
| 309 | + if (do_stat(fnametmp,&st) == -1) { |
| 310 | + rprintf(FERROR,"%s: %s\n",fname,strerror(errno)); |
| 311 | + return -1; |
| 312 | + } |
| 313 | + hashfiles = find_hashfiles(hashname, st.st_size, &last_fnbr); |
| 314 | + |
| 315 | + if (hashfiles == NULL) { |
| 316 | + first = 1; |
| 317 | + asprintf(&linkname,"%s/0",hashname); |
| 318 | + rprintf(FINFO, "(2) linkname = %s\n", linkname); |
| 319 | + } else { |
| 320 | + |
| 321 | + /* Search for one identical to us. */ |
| 322 | + if ((fd = open(fnametmp,O_RDONLY|O_BINARY)) == -1) { |
| 323 | + rprintf(FERROR,"%s: %s\n",fnametmp, |
| 324 | + strerror(errno)); |
| 325 | + kill_hashfiles(hashfiles); |
| 326 | + return -1; |
| 327 | + } |
| 328 | + hashfile = compare_hashfiles(fd, hashfiles); |
| 329 | + hashfiles = NULL; |
| 330 | + |
| 331 | + if (hashfile) { |
| 332 | + first = 0; |
| 333 | + linkname = strdup(hashfile->name); |
| 334 | + rprintf(FINFO, "(3) linkname = %s\n", linkname); |
| 335 | + kill_hashfile(hashfile); |
| 336 | + } else { |
| 337 | + first = 1; |
| 338 | + asprintf(&linkname, "%s/%ld", hashname, |
| 339 | + last_fnbr + 1); |
| 340 | + rprintf(FINFO, "(4) linkname = %s\n", linkname); |
| 341 | + } |
| 342 | + } |
| 343 | + } |
| 344 | + |
| 345 | + if (!first) { |
| 346 | + rprintf(FINFO, "link-by-hash (existing): \"%s\" -> %s\n", |
| 347 | + linkname, full_fname(fname)); |
| 348 | + rc = do_link(linkname, fname); |
| 349 | + if (rc == -1) { |
| 350 | + if (errno == EMLINK) { |
| 351 | + first = 1; |
| 352 | + free(linkname); |
| 353 | + asprintf(&linkname,"%s/%ld",hashname, |
| 354 | + last_fnbr + 1); |
| 355 | + rprintf(FINFO, "(5) linkname = %s\n", linkname); |
| 356 | + rprintf(FINFO,"link-by-hash: max link count exceeded, starting new file \"%s\".\n", linkname); |
| 357 | + } else { |
| 358 | + rprintf(FERROR,"link \"%s\" -> %s: %s\n", |
| 359 | + linkname,full_fname(fname), |
| 360 | + strerror(errno)); |
| 361 | + robust_unlink(fname); |
| 362 | + rc = robust_rename(fnametmp,fname,0644); |
| 363 | + } |
| 364 | + } else { |
| 365 | + do_unlink(fnametmp); |
| 366 | + } |
| 367 | + } |
| 368 | + |
| 369 | + if (first) { |
| 370 | + rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n", |
| 371 | + full_fname(fname),linkname); |
| 372 | + |
| 373 | + rc = robust_rename(fnametmp,fname,0644); |
| 374 | + if (rc != 0) { |
| 375 | + rprintf(FERROR,"rename \"%s\" -> \"%s\": %s\n", |
| 376 | + full_fname(fnametmp),full_fname(fname), |
| 377 | + strerror(errno)); |
| 378 | + } |
| 379 | + rc = do_link(fname,linkname); |
| 380 | + if (rc != 0) { |
| 381 | + rprintf(FERROR,"link \"%s\" -> \"%s\": %s\n", |
| 382 | + full_fname(fname),linkname, |
| 383 | + strerror(errno)); |
| 384 | + } |
| 385 | + } |
| 386 | + |
| 387 | + free(linkname); |
| 388 | + free(hashname); |
| 389 | + return rc; |
| 390 | +} |
| 391 | + |
| 392 | +#endif |
| 393 | --- Makefile.in 10 Feb 2004 17:06:11 -0000 1.98 |
| 394 | +++ Makefile.in 15 Apr 2004 19:18:59 -0000 |
| 395 | @@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle |
| 396 | main.o checksum.o match.o syscall.o log.o backup.o |
| 397 | OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \ |
| 398 | fileio.o batch.o clientname.o |
| 399 | -OBJS3=progress.o pipe.o |
| 400 | +OBJS3=progress.o pipe.o hashlink.o |
| 401 | DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o |
| 402 | popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \ |
| 403 | popt/popthelp.o popt/poptparse.o |
| 404 | --- options.c 14 Apr 2004 23:33:34 -0000 1.146 |
| 405 | +++ options.c 15 Apr 2004 19:19:00 -0000 |
| 406 | @@ -121,6 +121,7 @@ char *log_format = NULL; |
| 407 | char *password_file = NULL; |
| 408 | char *rsync_path = RSYNC_PATH; |
| 409 | char *backup_dir = NULL; |
| 410 | +char *link_by_hash_dir = NULL; |
| 411 | char backup_dir_buf[MAXPATHLEN]; |
| 412 | int rsync_port = RSYNC_PORT; |
| 413 | int link_dest = 0; |
| 414 | @@ -266,6 +267,7 @@ void usage(enum logcode F) |
| 415 | rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); |
| 416 | rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); |
| 417 | rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); |
| 418 | + rprintf(F," --link-by-hash=DIR create hardlinks by hash to DIR for regular files\n"); |
| 419 | rprintf(F," -P equivalent to --partial --progress\n"); |
| 420 | rprintf(F," -z, --compress compress file data\n"); |
| 421 | rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); |
| 422 | @@ -305,7 +307,7 @@ void usage(enum logcode F) |
| 423 | enum {OPT_VERSION = 1000, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM, |
| 424 | OPT_DELETE_AFTER, OPT_DELETE_EXCLUDED, OPT_LINK_DEST, |
| 425 | OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, |
| 426 | - OPT_READ_BATCH, OPT_WRITE_BATCH, |
| 427 | + OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_LINK_BY_HASH, |
| 428 | OPT_REFUSED_BASE = 9000}; |
| 429 | |
| 430 | static struct poptOption long_options[] = { |
| 431 | @@ -362,6 +364,7 @@ static struct poptOption long_options[] |
| 432 | {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, |
| 433 | {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, |
| 434 | {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, |
| 435 | + {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0}, |
| 436 | /* TODO: Should this take an optional int giving the compression level? */ |
| 437 | {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, |
| 438 | {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, |
| 439 | @@ -584,6 +587,19 @@ int parse_arguments(int *argc, const cha |
| 440 | return 0; |
| 441 | #endif |
| 442 | |
| 443 | + case OPT_LINK_BY_HASH: |
| 444 | +#if HAVE_LINK |
| 445 | + link_by_hash_dir = (char *)poptGetOptArg(pc); |
| 446 | + checksum_seed = FIXED_CHECKSUM_SEED; |
| 447 | + break; |
| 448 | +#else |
| 449 | + snprintf(err_buf, sizeof err_buf, |
| 450 | + "hard links are not supported on this %s\n", |
| 451 | + am_server ? "server" : "client"); |
| 452 | + rprintf(FERROR, "ERROR: %s", err_buf); |
| 453 | + return 0; |
| 454 | +#endif |
| 455 | + |
| 456 | default: |
| 457 | /* A large opt value means that set_refuse_options() |
| 458 | * turned this option off (opt-BASE is its index). */ |
| 459 | @@ -951,6 +967,11 @@ void server_options(char **args,int *arg |
| 460 | */ |
| 461 | args[ac++] = link_dest ? "--link-dest" : "--compare-dest"; |
| 462 | args[ac++] = compare_dest; |
| 463 | + } |
| 464 | + |
| 465 | + if (link_by_hash_dir && am_sender) { |
| 466 | + args[ac++] = "--link-by-hash"; |
| 467 | + args[ac++] = link_by_hash_dir; |
| 468 | } |
| 469 | |
| 470 | if (files_from && (!am_sender || remote_filesfrom_file)) { |
| 471 | --- proto.h 14 Apr 2004 23:33:30 -0000 1.188 |
| 472 | +++ proto.h 15 Apr 2004 19:19:00 -0000 |
| 473 | @@ -92,6 +92,12 @@ char *f_name(struct file_struct *f); |
| 474 | void write_sum_head(int f, struct sum_struct *sum); |
| 475 | void recv_generator(char *fname, struct file_struct *file, int i, int f_out); |
| 476 | void generate_files(int f, struct file_list *flist, char *local_name); |
| 477 | +char* make_hash_name(struct file_struct *file); |
| 478 | +void kill_hashfile(struct hashfile_struct *hashfile); |
| 479 | +void kill_hashfiles(struct hashfile_struct *hashfiles); |
| 480 | +struct hashfile_struct *find_hashfiles(char *hashname, int64 size, long *fnbr); |
| 481 | +struct hashfile_struct *compare_hashfiles(int fd,struct hashfile_struct *files); |
| 482 | +int link_by_hash(char *fnametmp,char *fname,struct file_struct *file); |
| 483 | void init_hard_links(struct file_list *flist); |
| 484 | int hard_link_check(struct file_struct *file, int skip); |
| 485 | void do_hard_links(void); |
| 486 | --- receiver.c 23 Mar 2004 16:50:40 -0000 1.75 |
| 487 | +++ receiver.c 15 Apr 2004 19:19:00 -0000 |
| 488 | @@ -45,6 +45,7 @@ extern int cleanup_got_literal; |
| 489 | extern int module_id; |
| 490 | extern int ignore_errors; |
| 491 | extern int orig_umask; |
| 492 | +extern char *link_by_hash_dir; |
| 493 | |
| 494 | static void delete_one(char *fn, int is_dir) |
| 495 | { |
| 496 | @@ -190,10 +191,11 @@ static int get_tmpname(char *fnametmp, c |
| 497 | |
| 498 | |
| 499 | static int receive_data(int f_in,struct map_struct *mapbuf,int fd,char *fname, |
| 500 | - OFF_T total_size) |
| 501 | + OFF_T total_size,char *md4) |
| 502 | { |
| 503 | int i; |
| 504 | struct sum_struct sum; |
| 505 | + struct mdfour mdfour_data; |
| 506 | unsigned int len; |
| 507 | OFF_T offset = 0; |
| 508 | OFF_T offset2; |
| 509 | @@ -203,7 +205,9 @@ static int receive_data(int f_in,struct |
| 510 | char *map=NULL; |
| 511 | |
| 512 | read_sum_head(f_in, &sum); |
| 513 | - |
| 514 | + if (md4) |
| 515 | + mdfour_begin(&mdfour_data); |
| 516 | + |
| 517 | sum_init(); |
| 518 | |
| 519 | while ((i = recv_token(f_in, &data)) != 0) { |
| 520 | @@ -220,6 +224,8 @@ static int receive_data(int f_in,struct |
| 521 | cleanup_got_literal = 1; |
| 522 | |
| 523 | sum_update(data,i); |
| 524 | + if (md4) |
| 525 | + mdfour_update(&mdfour_data,data,i); |
| 526 | |
| 527 | if (fd != -1 && write_file(fd,data,i) != i) { |
| 528 | rprintf(FERROR, "write failed on %s: %s\n", |
| 529 | @@ -247,6 +253,8 @@ static int receive_data(int f_in,struct |
| 530 | |
| 531 | see_token(map, len); |
| 532 | sum_update(map,len); |
| 533 | + if (md4) |
| 534 | + mdfour_update(&mdfour_data,map,len); |
| 535 | } |
| 536 | |
| 537 | if (fd != -1 && write_file(fd,map,len) != (int) len) { |
| 538 | @@ -269,6 +277,8 @@ static int receive_data(int f_in,struct |
| 539 | } |
| 540 | |
| 541 | sum_end(file_sum1); |
| 542 | + if (md4) |
| 543 | + mdfour_result(&mdfour_data, (unsigned char*)md4); |
| 544 | |
| 545 | read_buf(f_in,file_sum2,MD4_SUM_LENGTH); |
| 546 | if (verbose > 2) { |
| 547 | @@ -372,7 +382,7 @@ int recv_files(int f_in,struct file_list |
| 548 | if (fd1 != -1 && do_fstat(fd1,&st) != 0) { |
| 549 | rprintf(FERROR, "fstat %s failed: %s\n", |
| 550 | full_fname(fnamecmp), strerror(errno)); |
| 551 | - receive_data(f_in,NULL,-1,NULL,file->length); |
| 552 | + receive_data(f_in,NULL,-1,NULL,file->length,NULL); |
| 553 | close(fd1); |
| 554 | continue; |
| 555 | } |
| 556 | @@ -385,7 +395,7 @@ int recv_files(int f_in,struct file_list |
| 557 | */ |
| 558 | rprintf(FERROR,"recv_files: %s is a directory\n", |
| 559 | full_fname(fnamecmp)); |
| 560 | - receive_data(f_in, NULL, -1, NULL, file->length); |
| 561 | + receive_data(f_in,NULL,-1,NULL,file->length,NULL); |
| 562 | close(fd1); |
| 563 | continue; |
| 564 | } |
| 565 | @@ -437,7 +447,7 @@ int recv_files(int f_in,struct file_list |
| 566 | if (fd2 == -1) { |
| 567 | rprintf(FERROR, "mkstemp %s failed: %s\n", |
| 568 | full_fname(fnametmp), strerror(errno)); |
| 569 | - receive_data(f_in,mapbuf,-1,NULL,file->length); |
| 570 | + receive_data(f_in,mapbuf,-1,NULL,file->length,NULL); |
| 571 | if (mapbuf) unmap_file(mapbuf); |
| 572 | if (fd1 != -1) close(fd1); |
| 573 | continue; |
| 574 | @@ -450,7 +460,12 @@ int recv_files(int f_in,struct file_list |
| 575 | } |
| 576 | |
| 577 | /* recv file data */ |
| 578 | - recv_ok = receive_data(f_in,mapbuf,fd2,fname,file->length); |
| 579 | +#ifdef HAVE_LINK |
| 580 | + if (link_by_hash_dir) { |
| 581 | + file->u.sum = (char*)malloc (MD4_SUM_LENGTH); |
| 582 | + } |
| 583 | +#endif |
| 584 | + recv_ok = receive_data(f_in,mapbuf,fd2,fname,file->length,file->u.sum); |
| 585 | |
| 586 | log_recv(file, &initial_stats); |
| 587 | |
| 588 | --- rsync.c 23 Mar 2004 16:16:15 -0000 1.135 |
| 589 | +++ rsync.c 15 Apr 2004 19:19:00 -0000 |
| 590 | @@ -33,6 +33,7 @@ extern int preserve_uid; |
| 591 | extern int preserve_gid; |
| 592 | extern int preserve_perms; |
| 593 | extern int make_backups; |
| 594 | +extern char *link_by_hash_dir; |
| 595 | |
| 596 | |
| 597 | /* |
| 598 | @@ -235,8 +236,12 @@ void finish_transfer(char *fname, char * |
| 599 | if (make_backups && !make_backup(fname)) |
| 600 | return; |
| 601 | |
| 602 | - /* move tmp file over real file */ |
| 603 | - ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS); |
| 604 | +#ifdef HAVE_LINK |
| 605 | + if (link_by_hash_dir) |
| 606 | + ret = link_by_hash(fnametmp,fname,file); |
| 607 | + else |
| 608 | +#endif |
| 609 | + ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS); |
| 610 | if (ret < 0) { |
| 611 | rprintf(FERROR, "%s %s -> \"%s\": %s\n", |
| 612 | ret == -2 ? "copy" : "rename", |
| 613 | --- rsync.h 14 Apr 2004 23:33:37 -0000 1.196 |
| 614 | +++ rsync.h 15 Apr 2004 19:19:00 -0000 |
| 615 | @@ -519,6 +519,14 @@ struct stats { |
| 616 | int current_file_index; |
| 617 | }; |
| 618 | |
| 619 | +struct hashfile_struct { |
| 620 | + struct hashfile_struct *next; |
| 621 | + struct hashfile_struct *prev; |
| 622 | + char *name; |
| 623 | + int fd; |
| 624 | + uint32 nlink; |
| 625 | +}; |
| 626 | + |
| 627 | |
| 628 | /* we need this function because of the silly way in which duplicate |
| 629 | entries are handled in the file lists - we can't change this |