Added some DB-access routines to help rsync keep extra filesystem info about the files it is dealing with. This adds both the --db=CONFIG_FILE option and the "db config" daemon parameter. For the moment this only adds checksum caching when the --checksum option is used. Future improvements may include: - Updating of MD5 checksums when transferring any file, even w/o -c. We should be able to extend this to work for MD4 checksums too if we make the sender force checksum_seed to 0 when using a DB and having the receiving side check to see if it got a 0 checksum_seed. (We probably don't want to compute 2 MD4 checksums for the case where the checksum_seed is non-zero.) - Caching of path info that allows for the finding of files to use for moving/linking/copying/alternate-basis-use. - Extend DB support beyond MySQL and SQLite (PostgreSQL?). To use this patch, run these commands for a successful build: patch -p1 st_size; md_context m; int32 remainder; int fd; @@ -112,7 +113,7 @@ void file_checksum(char *fname, char *sum, OFF_T size) if (fd == -1) return; - buf = map_file(fd, size, MAX_MAP_SIZE, CSUM_CHUNK); + buf = map_file(fd, len, MAX_MAP_SIZE, CSUM_CHUNK); if (protocol_version >= 30) { md5_begin(&m); @@ -146,6 +147,9 @@ void file_checksum(char *fname, char *sum, OFF_T size) mdfour_result(&m, (uchar *)sum); } + if (use_db) + db_set_checksum(fname, st_p, sum); + close(fd); unmap_file(buf); } diff --git a/cleanup.c b/cleanup.c --- a/cleanup.c +++ b/cleanup.c @@ -26,6 +26,7 @@ extern int am_server; extern int am_daemon; extern int am_receiver; extern int io_error; +extern int use_db; extern int keep_partial; extern int got_xfer_error; extern int protocol_version; @@ -140,6 +141,12 @@ NORETURN void _exit_cleanup(int code, const char *file, int line) /* FALLTHROUGH */ #include "case_N.h" + if (use_db) + db_disconnect(); + + /* FALLTHROUGH */ +#include "case_N.h" + if (cleanup_child_pid != -1) { int status; int pid = wait_process(cleanup_child_pid, &status, WNOHANG); diff --git a/clientserver.c b/clientserver.c --- a/clientserver.c +++ b/clientserver.c @@ -42,13 +42,16 @@ extern int numeric_ids; extern int filesfrom_fd; extern int remote_protocol; extern int protocol_version; +extern int always_checksum; extern int io_timeout; extern int no_detach; +extern int use_db; extern int write_batch; extern int default_af_hint; extern int logfile_format_has_i; extern int logfile_format_has_o_or_i; extern mode_t orig_umask; +extern char *db_config; extern char *bind_address; extern char *config_file; extern char *logfile_format; @@ -667,6 +670,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char log_init(1); + if (*lp_db_config(i)) + db_read_config(FLOG, lp_db_config(i)); + #ifdef HAVE_PUTENV if (*lp_prexfer_exec(i) || *lp_postxfer_exec(i)) { int status; @@ -862,6 +868,10 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char am_server = 1; /* Don't let someone try to be tricky. */ quiet = 0; + db_config = NULL; + if (!always_checksum) + use_db = 0; + if (lp_ignore_errors(module_id)) ignore_errors = 1; if (write_batch < 0) diff --git a/configure.in b/configure.in --- a/configure.in +++ b/configure.in @@ -337,7 +337,7 @@ AC_CHECK_HEADERS(sys/fcntl.h sys/select.h fcntl.h sys/time.h sys/unistd.h \ sys/un.h sys/attr.h mcheck.h arpa/inet.h arpa/nameser.h locale.h \ netdb.h malloc.h float.h limits.h iconv.h libcharset.h langinfo.h \ sys/acl.h acl/libacl.h attr/xattr.h sys/xattr.h sys/extattr.h \ - popt.h popt/popt.h) + popt.h popt/popt.h mysql/mysql.h sqlite3.h) AC_HEADER_MAJOR AC_CACHE_CHECK([if makedev takes 3 args],rsync_cv_MAKEDEV_TAKES_3_ARGS,[ @@ -1019,6 +1019,29 @@ if test x"$enable_acl_support" = x"no" -o x"$enable_xattr_support" = x"no" -o x" fi fi +AC_CHECK_PROG(MYSQL_CONFIG, mysql_config, 1, 0) +if test x$MYSQL_CONFIG = x1; then + AC_MSG_CHECKING(for mysql version >= 4) + mysql_version=`mysql_config --version` + mysql_major_version=`echo $mysql_version | sed 's/\..*//'` + if test $mysql_major_version -lt 4; then + AC_MSG_RESULT(no.. skipping MySQL) + else + AC_MSG_RESULT(yes) + + MYSQL_CFLAGS=`mysql_config --cflags` + MYSQL_LIBS=`mysql_config --libs` + + CPPFLAGS="$CPPFLAGS $MYSQL_CFLAGS" + LIBS="$MYSQL_LIBS $LIBS" + + AC_CHECK_LIB(mysqlclient, mysql_init) + fi +fi + +AC_CHECK_LIB(sqlite3, sqlite3_open) +AC_CHECK_FUNCS(sqlite3_open_v2 sqlite3_prepare_v2) + case "$CC" in ' checker'*|checker*) AC_DEFINE(FORCE_FD_ZERO_MEMSET, 1, [Used to make "checker" understand that FD_ZERO() clears memory.]) diff --git a/db.c b/db.c new file mode 100644 --- /dev/null +++ b/db.c @@ -0,0 +1,567 @@ +/* + * Routines to access extended file info via DB. + * + * Copyright (C) 2008 Wayne Davison + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, visit the http://fsf.org website. + */ + +#include "rsync.h" +#include "ifuncs.h" +#include "itypes.h" + +#if defined HAVE_MYSQL_MYSQL_H && defined HAVE_LIBMYSQLCLIENT +#define USE_MYSQL +#include +#include +#endif + +#if defined HAVE_SQLITE3_H && defined HAVE_LIBSQLITE3 +#define USE_SQLITE +#include +#ifndef HAVE_SQLITE3_OPEN_V2 +#define sqlite3_open_v2(dbname, dbhptr, flags, vfs) \ + sqlite3_open(dbname, dbhptr) +#endif +#ifndef HAVE_SQLITE3_PREPARE_V2 +#define sqlite3_prepare_v2 sqlite3_prepare +#endif +#endif + +extern int protocol_version; +extern int checksum_len; + +#define DB_TYPE_NONE 0 +#define DB_TYPE_MYSQL 1 +#define DB_TYPE_SQLITE 2 + +int use_db = DB_TYPE_NONE; + +static const char *dbhost = NULL, *dbuser = NULL, *dbpass = NULL, *dbname = NULL; +static unsigned int dbport = 0; + +static union { +#ifdef USE_MYSQL + MYSQL *mysql; +#endif +#ifdef USE_SQLITE + sqlite3 *sqlite; +#endif + void *all; +} dbh; + +#define SEL_DEV 0 +#define SEL_SUM 1 +#define REP_SUM 2 +#define MAX_PREP_CNT 3 + +static union { +#ifdef USE_MYSQL + MYSQL_STMT *mysql; +#endif +#ifdef USE_SQLITE + sqlite3_stmt *sqlite; +#endif + void *all; +} statements[MAX_PREP_CNT]; + +static int md_num; +static enum logcode log_code; + +#ifdef USE_MYSQL +static unsigned int bind_disk_id; +static unsigned long long bind_devno, bind_ino, bind_size, bind_mtime, bind_ctime; +static char bind_sum[MAX_DIGEST_LEN]; +#endif +static char bind_thishost[256]; +static int bind_thishost_len; + +static unsigned int prior_disk_id = 0; +static unsigned long long prior_devno = 0; + +int db_read_config(enum logcode code, const char *config_file) +{ + char buf[2048], *cp; + FILE *fp; + int lineno = 0; + + log_code = code; + + bind_thishost_len = strlcpy(bind_thishost, "localhost", sizeof bind_thishost); + + if (!(fp = fopen(config_file, "r"))) { + rsyserr(log_code, errno, "unable to open %s", config_file); + return 0; + } + while (fgets(buf, sizeof buf, fp)) { + lineno++; + if ((cp = strchr(buf, '#')) == NULL + && (cp = strchr(buf, '\r')) == NULL + && (cp = strchr(buf, '\n')) == NULL) + cp = buf + strlen(buf); + while (cp != buf && isSpace(cp-1)) cp--; + *cp = '\0'; + + if (!*buf) + continue; + + if (!(cp = strchr(buf, ':'))) + goto invalid_line; + *cp++ = '\0'; + + while (isSpace(cp)) cp++; + if (strcasecmp(buf, "dbhost") == 0) + dbhost = strdup(cp); + else if (strcasecmp(buf, "dbuser") == 0) + dbuser = strdup(cp); + else if (strcasecmp(buf, "dbpass") == 0) + dbpass = strdup(cp); + else if (strcasecmp(buf, "dbname") == 0) + dbname = strdup(cp); + else if (strcasecmp(buf, "dbport") == 0) + dbport = atoi(cp); + else if (strcasecmp(buf, "thishost") == 0) + bind_thishost_len = strlcpy(bind_thishost, cp, sizeof bind_thishost); + else if (strcasecmp(buf, "dbtype") == 0) { +#ifdef USE_MYSQL + if (strcasecmp(cp, "mysql") == 0) { + use_db = DB_TYPE_MYSQL; + continue; + } +#endif +#ifdef USE_SQLITE + if (strcasecmp(cp, "sqlite") == 0) { + use_db = DB_TYPE_SQLITE; + continue; + } +#endif + rprintf(log_code, + "Unsupported dbtype on line #%d in %s.\n", + lineno, config_file); + use_db = DB_TYPE_NONE; + return 0; + } else { + invalid_line: + rprintf(log_code, "Invalid line #%d in %s\n", + lineno, config_file); + use_db = DB_TYPE_NONE; + return 0; + } + } + fclose(fp); + + if (bind_thishost_len >= (int)sizeof bind_thishost) + bind_thishost_len = sizeof bind_thishost - 1; + + if (!use_db || !dbname) { + rprintf(log_code, "Please specify at least dbtype and dbname in %s.\n", config_file); + use_db = DB_TYPE_NONE; + return 0; + } + + md_num = protocol_version >= 30 ? 5 : 4; + + return 1; +} + +#ifdef USE_MYSQL +static MYSQL_STMT *prepare_mysql(MYSQL_BIND *binds, int bind_cnt, const char *fmt, ...) +{ + va_list ap; + char *query; + int qlen, param_cnt; + MYSQL_STMT *stmt = mysql_stmt_init(dbh.mysql); + + if (stmt == NULL) + out_of_memory("prepare_mysql"); + + va_start(ap, fmt); + qlen = vasprintf(&query, fmt, ap); + va_end(ap); + if (qlen < 0) + out_of_memory("prepare_mysql"); + + if (mysql_stmt_prepare(stmt, query, qlen) != 0) { + rprintf(log_code, "Prepare failed: %s\n", mysql_stmt_error(stmt)); + return NULL; + } + free(query); + + if ((param_cnt = mysql_stmt_param_count(stmt)) != bind_cnt) { + rprintf(log_code, "Parameters in statement = %d, bind vars = %d\n", + param_cnt, bind_cnt); + return NULL; + } + if (bind_cnt) + mysql_stmt_bind_param(stmt, binds); + + return stmt; +} +#endif + +#ifdef USE_MYSQL +static int db_connect_mysql(void) +{ + MYSQL_BIND binds[10]; + + if (!(dbh.mysql = mysql_init(NULL))) + out_of_memory("db_read_config"); + + if (!mysql_real_connect(dbh.mysql, dbhost, dbuser, dbpass, dbname, dbport, NULL, 0)) + return 0; + + memset(binds, 0, sizeof binds); + binds[0].buffer_type = MYSQL_TYPE_LONGLONG; + binds[0].buffer = &bind_devno; + binds[1].buffer_type = MYSQL_TYPE_STRING; + binds[1].buffer = &bind_thishost; + binds[1].buffer_length = bind_thishost_len; + statements[SEL_DEV].mysql = prepare_mysql(binds, 2, + "SELECT disk_id" + " FROM disk" + " WHERE devno = ? AND host = ? AND mounted = 1"); + if (!statements[SEL_DEV].mysql) + return 0; + + memset(binds, 0, sizeof binds); + binds[0].buffer_type = MYSQL_TYPE_LONG; + binds[0].buffer = &bind_disk_id; + binds[1].buffer_type = MYSQL_TYPE_LONGLONG; + binds[1].buffer = &bind_ino; + binds[2].buffer_type = MYSQL_TYPE_LONGLONG; + binds[2].buffer = &bind_size; + binds[3].buffer_type = MYSQL_TYPE_LONGLONG; + binds[3].buffer = &bind_mtime; + binds[4].buffer_type = MYSQL_TYPE_LONGLONG; + binds[4].buffer = &bind_ctime; + statements[SEL_SUM].mysql = prepare_mysql(binds, 5, + "SELECT checksum" + " FROM inode_map" + " WHERE disk_id = ? AND ino = ? AND sum_type = %d" + " AND size = ? AND mtime = ? AND ctime = ?", + md_num); + if (!statements[SEL_SUM].mysql) + return 0; + + memset(binds, 0, sizeof binds); + binds[0].buffer_type = MYSQL_TYPE_LONG; + binds[0].buffer = &bind_disk_id; + binds[1].buffer_type = MYSQL_TYPE_LONGLONG; + binds[1].buffer = &bind_ino; + binds[2].buffer_type = binds[6].buffer_type = MYSQL_TYPE_LONGLONG; + binds[2].buffer = binds[6].buffer = &bind_size; + binds[3].buffer_type = binds[7].buffer_type = MYSQL_TYPE_LONGLONG; + binds[3].buffer = binds[7].buffer = &bind_mtime; + binds[4].buffer_type = binds[8].buffer_type = MYSQL_TYPE_LONGLONG; + binds[4].buffer = binds[8].buffer = &bind_ctime; + binds[5].buffer_type = binds[9].buffer_type = MYSQL_TYPE_BLOB; + binds[5].buffer = binds[9].buffer = &bind_sum; + binds[5].buffer_length = binds[9].buffer_length = checksum_len; + statements[REP_SUM].mysql = prepare_mysql(binds, 10, + "INSERT INTO inode_map" + " SET disk_id = ?, ino = ?, sum_type = %d," + " size = ?, mtime = ?, ctime = ?, checksum = ?" + " ON DUPLICATE KEY" + " UPDATE size = ?, mtime = ?, ctime = ?, checksum = ?", + md_num, md_num); + if (!statements[REP_SUM].mysql) + return 0; + + return 1; +} +#endif + +#ifdef USE_SQLITE +static int db_connect_sqlite(void) +{ + char *sql; + + if (sqlite3_open_v2(dbname, &dbh.sqlite, SQLITE_OPEN_READWRITE, NULL) != 0) + return 0; + + sql = "SELECT disk_id" + " FROM disk" + " WHERE devno = ? AND host = ? AND mounted = 1"; + if (sqlite3_prepare_v2(dbh.sqlite, sql, -1, &statements[SEL_DEV].sqlite, NULL) != 0) + return 0; + + if (asprintf(&sql, + "SELECT checksum" + " FROM inode_map" + " WHERE disk_id = ? AND ino = ? AND sum_type = %d" + " AND size = ? AND mtime = ? AND ctime = ?", + md_num) < 0 + || sqlite3_prepare_v2(dbh.sqlite, sql, -1, &statements[SEL_SUM].sqlite, NULL) != 0) + return 0; + free(sql); + + if (asprintf(&sql, + "INSERT OR REPLACE INTO inode_map" + " (disk_id, ino, sum_type, size, mtime, ctime, checksum)" + " VALUES(?, ?, %d, ?, ?, ?, ?)", + md_num) < 0 + || sqlite3_prepare_v2(dbh.sqlite, sql, -1, &statements[REP_SUM].sqlite, NULL) != 0) + return 0; + free(sql); + + return 1; +} +#endif + +int db_connect(void) +{ + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: + if (db_connect_mysql()) + return 1; + break; +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: + if (db_connect_sqlite()) + return 1; + break; +#endif + } + + rprintf(log_code, "Unable to connect to DB\n"); + db_disconnect(); + use_db = DB_TYPE_NONE; + + return 0; +} + +void db_disconnect(void) +{ + int ndx; + + if (!dbh.all) + return; + + for (ndx = 0; ndx < MAX_PREP_CNT; ndx++) { + if (statements[ndx].all) { + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: + mysql_stmt_close(statements[ndx].mysql); + break; +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: + sqlite3_finalize(statements[ndx].sqlite); + break; +#endif + } + statements[ndx].all = NULL; + } + } + + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: + mysql_close(dbh.mysql); + break; +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: + sqlite3_close(dbh.sqlite); + break; +#endif + } + + dbh.all = NULL; +} + +#ifdef USE_MYSQL +static MYSQL_STMT *exec_mysql(int ndx) +{ + MYSQL_STMT *stmt = statements[ndx].mysql; + int rc; + + if ((rc = mysql_stmt_execute(stmt)) == CR_SERVER_LOST) { + db_disconnect(); + if (db_connect()) { + stmt = statements[ndx].mysql; + rc = mysql_stmt_execute(stmt); + } + } + if (rc != 0) { + rprintf(log_code, "SQL execute failed: %s\n", mysql_stmt_error(stmt)); + return NULL; + } + + return stmt; +} +#endif + +#ifdef USE_MYSQL +static int fetch_mysql(MYSQL_BIND *binds, int bind_cnt, int ndx) +{ + unsigned long length[32]; + my_bool is_null[32], error[32]; + MYSQL_STMT *stmt; + int i, rc; + + if (bind_cnt > 32) + exit_cleanup(RERR_UNSUPPORTED); + + if ((stmt = exec_mysql(ndx)) == NULL) + return 0; + + for (i = 0; i < bind_cnt; i++) { + binds[i].is_null = &is_null[i]; + binds[i].length = &length[i]; + binds[i].error = &error[i]; + } + mysql_stmt_bind_result(stmt, binds); + + if ((rc = mysql_stmt_fetch(stmt)) != 0) { + if (rc != MYSQL_NO_DATA) { + rprintf(log_code, "SELECT fetch failed: %s\n", + mysql_stmt_error(stmt)); + } + mysql_stmt_free_result(stmt); + return 0; + } + + mysql_stmt_free_result(stmt); + + return is_null[0] ? 0 : 1; +} +#endif + +static void get_disk_id(unsigned long long devno) +{ + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: { + MYSQL_BIND binds[1]; + + bind_devno = devno; /* The one variable SEL_DEV input value. */ + + /* Bind where to put the output. */ + binds[0].buffer_type = MYSQL_TYPE_LONG; + binds[0].buffer = &prior_disk_id; + if (!fetch_mysql(binds, 1, SEL_DEV)) + prior_disk_id = 0; + break; + } +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: { + sqlite3_stmt *stmt = statements[SEL_DEV].sqlite; + sqlite3_bind_int64(stmt, 1, devno); + sqlite3_bind_text(stmt, 2, bind_thishost, bind_thishost_len, SQLITE_STATIC); + if (sqlite3_step(stmt) == SQLITE_ROW) + prior_disk_id = sqlite3_column_int(stmt, 0); + else + prior_disk_id = 0; + sqlite3_reset(stmt); + break; + } +#endif + } + + prior_devno = devno; +} + +int db_get_checksum(UNUSED(const char *fname), const STRUCT_STAT *st_p, char *sum) +{ + if (prior_devno != st_p->st_dev) + get_disk_id(st_p->st_dev); + if (prior_disk_id == 0) + return 0; + + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: { + MYSQL_BIND binds[1]; + + bind_disk_id = prior_disk_id; + bind_ino = st_p->st_ino; + bind_size = st_p->st_size; + bind_mtime = st_p->st_mtime; + bind_ctime = st_p->st_ctime; + + binds[0].buffer_type = MYSQL_TYPE_BLOB; + binds[0].buffer = sum; + binds[0].buffer_length = checksum_len; + return fetch_mysql(binds, 1, SEL_SUM); + } +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: { + sqlite3_stmt *stmt = statements[SEL_SUM].sqlite; + sqlite3_bind_int(stmt, 1, prior_disk_id); + sqlite3_bind_int64(stmt, 2, st_p->st_ino); + sqlite3_bind_int64(stmt, 3, st_p->st_size); + sqlite3_bind_int64(stmt, 4, st_p->st_mtime); + sqlite3_bind_int64(stmt, 5, st_p->st_ctime); + if (sqlite3_step(stmt) == SQLITE_ROW) { + int len = sqlite3_column_bytes(stmt, 0); + if (len > MAX_DIGEST_LEN) + len = MAX_DIGEST_LEN; + memcpy(sum, sqlite3_column_blob(stmt, 0), len); + sqlite3_reset(stmt); + return 1; + } + sqlite3_reset(stmt); + return 0; + } +#endif + } + + return 0; +} + +int db_set_checksum(UNUSED(const char *fname), const STRUCT_STAT *st_p, const char *sum) +{ + if (prior_devno != st_p->st_dev) + get_disk_id(st_p->st_dev); + if (prior_disk_id == 0) + return 0; + + switch (use_db) { +#ifdef USE_MYSQL + case DB_TYPE_MYSQL: { + bind_disk_id = prior_disk_id; + bind_ino = st_p->st_ino; + bind_size = st_p->st_size; + bind_mtime = st_p->st_mtime; + bind_ctime = st_p->st_ctime; + memcpy(bind_sum, sum, checksum_len); + + return exec_mysql(REP_SUM) != NULL; + } +#endif +#ifdef USE_SQLITE + case DB_TYPE_SQLITE: { + int rc; + sqlite3_stmt *stmt = statements[REP_SUM].sqlite; + sqlite3_bind_int(stmt, 1, prior_disk_id); + sqlite3_bind_int64(stmt, 2, st_p->st_ino); + sqlite3_bind_int64(stmt, 3, st_p->st_size); + sqlite3_bind_int64(stmt, 4, st_p->st_mtime); + sqlite3_bind_int64(stmt, 5, st_p->st_ctime); + sqlite3_bind_blob(stmt, 6, sum, checksum_len, SQLITE_TRANSIENT); + rc = sqlite3_step(stmt); + sqlite3_reset(stmt); + return rc == SQLITE_DONE; + } +#endif + } + + return 0; +} diff --git a/flist.c b/flist.c --- a/flist.c +++ b/flist.c @@ -54,6 +54,7 @@ extern int delete_during; extern int missing_args; extern int uid_ndx; extern int gid_ndx; +extern int use_db; extern int eol_nulls; extern int relative_paths; extern int implied_dirs; @@ -1309,11 +1310,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, extra_len += EXTRA_LEN; #endif - if (always_checksum && am_sender && S_ISREG(st.st_mode)) { - file_checksum(thisname, tmp_sum, st.st_size); - if (sender_keeps_checksum) - extra_len += SUM_EXTRA_CNT * EXTRA_LEN; - } + if (sender_keeps_checksum && S_ISREG(st.st_mode)) + extra_len += SUM_EXTRA_CNT * EXTRA_LEN; #if EXTRA_ROUNDING > 0 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) @@ -1396,8 +1394,12 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, return NULL; } - if (sender_keeps_checksum && S_ISREG(st.st_mode)) - memcpy(F_SUM(file), tmp_sum, checksum_len); + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { + if (!use_db || !db_get_checksum(thisname, &st, tmp_sum)) + file_checksum(thisname, &st, tmp_sum); + if (sender_keeps_checksum) + memcpy(F_SUM(file), tmp_sum, checksum_len); + } if (unsort_ndx) F_NDX(file) = stats.num_dirs; @@ -2075,6 +2077,9 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) | (eol_nulls || reading_remotely ? RL_EOL_NULLS : 0); int implied_dot_dir = 0; + if (use_db) + db_connect(); + rprintf(FLOG, "building file list\n"); if (show_filelist_p()) start_filelist_progress("building file list"); diff --git a/generator.c b/generator.c --- a/generator.c +++ b/generator.c @@ -60,6 +60,7 @@ extern int human_readable; extern int ignore_existing; extern int ignore_non_existing; extern int inplace; +extern int use_db; extern int append_mode; extern int make_backups; extern int csum_length; @@ -530,7 +531,8 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) of the file time to determine whether to sync */ if (always_checksum > 0 && S_ISREG(st->st_mode)) { char sum[MAX_DIGEST_LEN]; - file_checksum(fn, sum, st->st_size); + if (!use_db || !db_get_checksum(fn, st, sum)) + file_checksum(fn, st, sum); return memcmp(sum, F_SUM(file), checksum_len) == 0; } @@ -2075,6 +2077,9 @@ void generate_files(int f_out, const char *local_name) : "enabled"); } + if (use_db && always_checksum) + db_connect(); + dflt_perms = (ACCESSPERMS & ~orig_umask); do { diff --git a/loadparm.c b/loadparm.c --- a/loadparm.c +++ b/loadparm.c @@ -108,6 +108,7 @@ typedef struct { char *auth_users; char *charset; char *comment; + char *db_config; char *dont_compress; char *exclude; char *exclude_from; @@ -182,6 +183,7 @@ static const all_vars Defaults = { /* auth_users; */ NULL, /* charset; */ NULL, /* comment; */ NULL, + /* db_config; */ NULL, /* dont_compress; */ DEFAULT_DONT_COMPRESS, /* exclude; */ NULL, /* exclude_from; */ NULL, @@ -317,6 +319,7 @@ static struct parm_struct parm_table[] = {"auth users", P_STRING, P_LOCAL, &Vars.l.auth_users, NULL,0}, {"charset", P_STRING, P_LOCAL, &Vars.l.charset, NULL,0}, {"comment", P_STRING, P_LOCAL, &Vars.l.comment, NULL,0}, + {"db config", P_STRING, P_LOCAL, &Vars.l.db_config, NULL,0}, {"dont compress", P_STRING, P_LOCAL, &Vars.l.dont_compress, NULL,0}, {"exclude from", P_STRING, P_LOCAL, &Vars.l.exclude_from, NULL,0}, {"exclude", P_STRING, P_LOCAL, &Vars.l.exclude, NULL,0}, @@ -447,6 +450,7 @@ FN_GLOBAL_INTEGER(lp_rsync_port, &Vars.g.rsync_port) FN_LOCAL_STRING(lp_auth_users, auth_users) FN_LOCAL_STRING(lp_charset, charset) FN_LOCAL_STRING(lp_comment, comment) +FN_LOCAL_STRING(lp_db_config, db_config) FN_LOCAL_STRING(lp_dont_compress, dont_compress) FN_LOCAL_STRING(lp_exclude, exclude) FN_LOCAL_STRING(lp_exclude_from, exclude_from) diff --git a/main.c b/main.c --- a/main.c +++ b/main.c @@ -51,6 +51,7 @@ extern int copy_unsafe_links; extern int keep_dirlinks; extern int preserve_hard_links; extern int protocol_version; +extern int always_checksum; extern int file_total; extern int recurse; extern int xfer_dirs; @@ -83,6 +84,7 @@ extern char *filesfrom_host; extern char *partial_dir; extern char *dest_option; extern char *rsync_path; +extern char *db_config; extern char *shell_cmd; extern char *batch_name; extern char *password_file; @@ -1609,6 +1611,9 @@ int main(int argc,char *argv[]) exit_cleanup(RERR_SYNTAX); } + if (db_config && always_checksum) + db_read_config(FERROR, db_config); + if (am_server) { set_nonblocking(STDIN_FILENO); set_nonblocking(STDOUT_FILENO); diff --git a/options.c b/options.c --- a/options.c +++ b/options.c @@ -92,6 +92,7 @@ int use_qsort = 0; char *files_from = NULL; int filesfrom_fd = -1; char *filesfrom_host = NULL; +char *db_config = NULL; int eol_nulls = 0; int protect_args = -1; int human_readable = 1; @@ -567,6 +568,7 @@ static void print_rsync_version(enum logcode f) char const *links = "no "; char const *iconv = "no "; char const *ipv6 = "no "; + char const *db = "no "; STRUCT_STAT *dumstat; #if SUBPROTOCOL_VERSION != 0 @@ -600,6 +602,11 @@ static void print_rsync_version(enum logcode f) #ifdef CAN_SET_SYMLINK_TIMES symtimes = ""; #endif +#if defined HAVE_MYSQL_MYSQL_H && defined HAVE_LIBMYSQLCLIENT + db = ""; +#elif defined HAVE_SQLITE3_H && defined HAVE_LIBSQLITE3 + db = ""; +#endif rprintf(f, "%s version %s protocol version %d%s\n", RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol); @@ -613,8 +620,8 @@ static void print_rsync_version(enum logcode f) (int)(sizeof (int64) * 8)); rprintf(f, " %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n", got_socketpair, hardlinks, links, ipv6, have_inplace); - rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes\n", - have_inplace, acls, xattrs, iconv, symtimes); + rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %sdb\n", + have_inplace, acls, xattrs, iconv, symtimes, db); #ifdef MAINTAINER_MODE rprintf(f, "Panic Action: \"%s\"\n", get_panic_action()); @@ -662,6 +669,7 @@ void usage(enum logcode F) rprintf(F," -q, --quiet suppress non-error messages\n"); rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n"); rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n"); + rprintf(F," --db=CONFIG_FILE specify a CONFIG_FILE for DB checksums\n"); rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n"); rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n"); rprintf(F," -r, --recursive recurse into directories\n"); @@ -934,6 +942,7 @@ static struct poptOption long_options[] = { {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 }, {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, + {"db", 0, POPT_ARG_STRING, &db_config, 0, 0, 0 }, {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, diff --git a/pipe.c b/pipe.c --- a/pipe.c +++ b/pipe.c @@ -27,6 +27,9 @@ extern int am_server; extern int blocking_io; extern int filesfrom_fd; extern int munge_symlinks; +extern int always_checksum; +extern int use_db; +extern char *db_config; extern mode_t orig_umask; extern char *logfile_name; extern int remote_option_cnt; @@ -143,6 +146,9 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out, logfile_close(); } + use_db = 0; + db_config = NULL; + if (remote_option_cnt) { int rc = remote_option_cnt + 1; const char **rv = remote_options; @@ -150,6 +156,8 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out, option_error(); exit_cleanup(RERR_SYNTAX); } + if (db_config && always_checksum) + db_read_config(FERROR, db_config); } if (dup2(to_child_pipe[0], STDIN_FILENO) < 0 || diff --git a/rsync.yo b/rsync.yo --- a/rsync.yo +++ b/rsync.yo @@ -323,6 +323,7 @@ to the detailed description below for a complete description. verb( -q, --quiet suppress non-error messages --no-motd suppress daemon-mode MOTD (see caveat) -c, --checksum skip based on checksum, not mod-time & size + --db=CONFIG_FILE specify a CONFIG_FILE for DB checksums -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X) --no-OPTION turn off an implied OPTION (e.g. --no-D) -r, --recursive recurse into directories @@ -587,6 +588,47 @@ option's before-the-transfer "Does this file need to be updated?" check. For protocol 30 and beyond (first supported in 3.0.0), the checksum used is MD5. For older protocols, the checksum used is MD4. +dit(bf(--db=CONFIG_FILE)) This option specifies a CONFIG_FILE to read +that holds connection details for a database of checksum information. +When combined with the bf(--checksum) (bf(-c)) option, rsync will try to +use cached checksum information from the DB, and will update it if it is +missing. + +The currently supported DB choices are MySQL and SQLite. For example, a +MySQL configuration might look like this: + +verb( dbtype: mysql + dbhost: 127.0.0.1 + dbname: rsyncdb + dbuser: rsyncuser + dbpass: somepass + port: 3306 + thishost: hostname ) + +And a SQLite configuration might look like this: + +verb( dbtype: SQLite + dbname: /var/cache/rsync/sum.db ) + +This option only affects one side of a transfer. See the +bf(--remote-option) option for a way to specify the option for both +sides of the transfer (with each side reading the config file from +their local filesystem). For example: + +verb( rsync -avc {-M,}--db=/etc/rsyncdb.conf src/ host:dest/ ) + +See the perl script "rsyncdb" in the support directory of the source code +(which may also be installed in /usr/bin) for a way to create the tables, +populate the mounted-disk information, check files against their checksums, +and update both the MD4 and MD5 checksums for files at the same time (since +an rsync copy will only update one or the other). + +You can use a single MySQL DB for all your hosts if you give each one +their own "thishost" name and setup their device-mapping data. Or feel +free to use separate databases, separate servers, etc. See the rsync +daemon's "db config" parameter for how to configure a daemon to use a DB +(since a client cannot control this parameter on a daemon). + dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick way of saying you want recursion and want to preserve almost everything (with -H being a notable omission). diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo --- a/rsyncd.conf.yo +++ b/rsyncd.conf.yo @@ -301,6 +301,18 @@ is daemon. This setting has no effect if the "log file" setting is a non-empty string (either set in the per-modules settings, or inherited from the global settings). +dit(bf(db config)) This parameter specifies a config file to read that +holds connection details for a database of checksum information. + +The config file will be read-in prior to any chroot restrictions, but +the connection occurs from inside the chroot. This means that you +should use a socket connection (e.g. 127.0.0.1 rather than localhost) +for a MySQL config from inside a chroot. For SQLite, the DB file must +be placed inside the chroot (though it can be placed outside the +transfer dir if you configured an inside-chroot path). + +See the bf(--db=CONFIG_FILE) option for full details. + dit(bf(max verbosity)) This parameter allows you to control the maximum amount of verbose information that you'll allow the daemon to generate (since the information goes into the log file). The default is 1, diff --git a/support/rsyncdb b/support/rsyncdb new file mode 100755 --- /dev/null +++ b/support/rsyncdb @@ -0,0 +1,331 @@ +#!/usr/bin/perl -w +use strict; + +use DBI; +use Getopt::Long; +use Cwd qw(abs_path cwd); +use Digest::MD4; +use Digest::MD5; + +my $MOUNT_FILE = '/etc/mtab'; + +&Getopt::Long::Configure('bundling'); +&usage if !&GetOptions( + 'db=s' => \( my $db_config ), + 'init' => \( my $init_db ), + 'mounts|m' => \( my $update_mounts ), + 'recurse|r' => \( my $recurse_opt ), + 'check|c' => \( my $check_opt ), + 'verbose|v+' => \( my $verbosity = 0 ), + 'help|h' => \( my $help_opt ), +); +&usage if $help_opt || !defined $db_config; + +my %config; +open(IN, '<', $db_config) or die "Unable to open $db_config: $!\n"; +while () { + s/[#\r\n].*//s; + next if /^$/; + my($key, $val) = /^(\S+):\s*(.*)/ or die "Unable to parse line $. of $db_config\n"; + $config{$key} = $val; +} +close IN; + +die "You must define at least dbtype and dbname in $db_config\n" + unless defined $config{'dbtype'} && defined $config{'dbname'}; + +my $sqlite = $config{'dbtype'} =~ /^sqlite$/i; + +my $thishost = $config{'thishost'} || 'localhost'; + +my $connect = 'DBI:' . $config{'dbtype'} . ':'; +$connect .= 'dbname=' . $config{'dbname'} if $sqlite; +$connect .= 'database=' . $config{'dbname'} if !$sqlite && !$init_db; +$connect .= ';host=' . $config{'dbhost'} if defined $config{'dbhost'}; +$connect .= ';port=' . $config{'dbport'} if defined $config{'dbport'}; + +my $dbh = DBI->connect($connect, $config{'dbuser'}, $config{'dbpass'}) + or die "DB connection failed\n"; + +END { + $dbh->disconnect if defined $dbh; +} + +if ($init_db) { + my $unsigned = $sqlite ? '' : 'unsigned'; + my $auto_increment = $sqlite ? 'AUTOINCREMENT' : 'AUTO_INCREMENT'; + my $dbname = $config{'dbname'}; + + if (!$sqlite) { + $dbh->do("CREATE DATABASE IF NOT EXISTS `$dbname`"); + $dbh->do("USE `$dbname`"); + } + + print "Dropping old tables (if they exist) ...\n" if $verbosity; + $dbh->do("DROP TABLE IF EXISTS disk") or die $dbh->errstr; + $dbh->do("DROP TABLE IF EXISTS inode_map") or die $dbh->errstr; + + print "Creating empty tables ...\n" if $verbosity; + $dbh->do(" + CREATE TABLE disk ( + disk_id integer $unsigned NOT NULL PRIMARY KEY $auto_increment, + devno bigint $unsigned NOT NULL, + host varchar(256) NOT NULL default 'localhost', + mounted tinyint NOT NULL default '1', + comment varchar(256) default NULL + )") or die $dbh->errstr; + + $dbh->do(" + CREATE TABLE inode_map ( + disk_id integer $unsigned NOT NULL, + ino bigint $unsigned NOT NULL, + size bigint $unsigned NOT NULL, + mtime bigint NOT NULL, + ctime bigint NOT NULL, + sum_type tinyint NOT NULL default '0', + checksum binary(16) NOT NULL, + PRIMARY KEY (disk_id,ino,sum_type) + )") or die $dbh->errstr; + + exit unless $update_mounts; +} + +my $sel_disk_H = $dbh->prepare(" + SELECT disk_id, devno, mounted, comment + FROM disk + WHERE host = ? + ") or die $dbh->errstr; + +my $ins_disk_H = $dbh->prepare(" + INSERT INTO disk + (devno, host, mounted, comment) + VALUES(?, ?, ?, ?) + ") or die $dbh->errstr; + +my $up_disk_H = $dbh->prepare(" + UPDATE disk + SET mounted = ? + WHERE disk_id = ? + ") or die $dbh->errstr; + +my $row_id = $sqlite ? 'ROWID' : 'ID'; +my $sel_lastid_H = $dbh->prepare(" + SELECT LAST_INSERT_$row_id() + ") or die $dbh->errstr; + +my $sel_sum_H = $dbh->prepare(" + SELECT sum_type, checksum + FROM inode_map + WHERE disk_id = ? AND ino = ? AND size = ? AND mtime = ? AND ctime = ? + ") or die $dbh->errstr; + +my $rep_sum_H = $dbh->prepare(" + REPLACE INTO inode_map + (disk_id, ino, size, mtime, ctime, sum_type, checksum) + VALUES(?, ?, ?, ?, ?, ?, ?) + ") or die $dbh->errstr; + +my %mounts; +if ($update_mounts) { + open(IN, $MOUNT_FILE) or die "Unable to open $MOUNT_FILE: $!\n"; + while () { + my($devname, $mnt) = (split)[0,1]; + next unless $devname =~ m#^/dev#; + my($devno) = (stat($mnt))[0]; + if (!defined $devno) { + warn "Unable to stat $mnt: $!\n"; + next; + } + $mounts{$devno} = "$devname on $mnt"; + } + close IN; +} + +my %disk_id; +$sel_disk_H->execute($thishost); +while (my($disk_id, $devno, $mounted, $comment) = $sel_disk_H->fetchrow_array) { + if ($update_mounts) { + if (defined $mounts{$devno}) { + if ($comment ne $mounts{$devno}) { + if ($mounted) { + print "Umounting $comment ($thishost:$devno)\n" if $verbosity; + $up_disk_H->execute(0, $disk_id); + } + next; + } + if (!$mounted) { + print "Mounting $comment ($thishost:$devno)\n" if $verbosity; + $up_disk_H->execute(1, $disk_id); + } + } else { + if ($mounted) { + print "Umounting $comment ($thishost:$devno)\n" if $verbosity; + $up_disk_H->execute(0, $disk_id); + } + next; + } + } else { + next unless $mounted; + } + $disk_id{$devno} = $disk_id; +} +$sel_disk_H->finish; + +if ($update_mounts) { + while (my($devno, $comment) = each %mounts) { + next if $disk_id{$devno}; + print "Adding $comment ($thishost:$devno)\n" if $verbosity; + $ins_disk_H->execute($devno, $thishost, 1, $comment); + $sel_lastid_H->execute; + ($disk_id{$devno}) = $sel_lastid_H->fetchrow_array; + $sel_lastid_H->finish; + } + exit; +} + +my $start_dir = cwd(); + +my @dirs = @ARGV; +@dirs = '.' unless @dirs; +foreach (@dirs) { + $_ = abs_path($_); +} + +$| = 1; + +my $exit_code = 0; + +my $md4 = Digest::MD4->new; +my $md5 = Digest::MD5->new; + +while (@dirs) { + my $dir = shift @dirs; + + if (!chdir($dir)) { + warn "Unable to chdir to $dir: $!\n"; + next; + } + if (!opendir(DP, '.')) { + warn "Unable to opendir $dir: $!\n"; + next; + } + + my $reldir = $dir; + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; + print "$reldir ... \n" if $verbosity; + + my @subdirs; + while (defined(my $fn = readdir(DP))) { + next if $fn =~ /^\.\.?$/ || -l $fn; + if (-d _) { + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/; + next; + } + next unless -f _; + + my($dev,$ino,$size,$mtime,$ctime) = (stat(_))[0,1,7,9,10]; + my $disk_id = $disk_id{$dev} or next; + $sel_sum_H->execute($disk_id,$ino,$size,$mtime,$ctime) or die $!; + my($sum4, $dbsum4, $sum5, $dbsum5); + my $dbsumcnt = 0; + while (my($sum_type, $checksum) = $sel_sum_H->fetchrow_array) { + if ($sum_type == 4) { + $dbsum4 = $checksum; + $dbsumcnt++; + } elsif ($sum_type == 5) { + $dbsum5 = $checksum; + $dbsumcnt++; + } + } + $sel_sum_H->finish; + + next if !$check_opt && $dbsumcnt == 2; + + if (!$check_opt || $dbsumcnt || $verbosity > 2) { + if (!open(IN, $fn)) { + print STDERR "Unable to read $fn: $!\n"; + next; + } + + while (1) { + while (sysread(IN, $_, 64*1024)) { + $md4->add($_); + $md5->add($_); + } + $sum4 = $md4->digest; + $sum5 = $md5->digest; + print ' ', unpack('H*', $sum4), ' ', unpack('H*', $sum5) if $verbosity > 2; + print " $fn" if $verbosity > 1; + my($ino2,$size2,$mtime2,$ctime2) = (stat(IN))[1,7,9,10]; + last if $ino == $ino2 && $size == $size2 && $mtime == $mtime2 && $ctime == $ctime2; + $ino = $ino2; + $size = $size2; + $mtime = $mtime2; + $ctime = $ctime2; + sysseek(IN, 0, 0); + print " REREADING\n" if $verbosity > 1; + } + + close IN; + } elsif ($verbosity > 1) { + print "_$fn"; + } + + if ($check_opt) { + my $dif; + if ($dbsumcnt == 0) { + $dif = ' --MISSING--'; + } else { + $dif = ''; + if (!defined $dbsum4) { + $dif .= ' -NO-MD4-'; + } elsif ($sum4 ne $dbsum4) { + $dif .= ' -MD4-CHANGED-'; + } + if (!defined $dbsum5) { + $dif .= ' ---NO-MD5---'; + } elsif ($sum5 ne $dbsum5) { + $dif .= ' -MD5-CHANGED-'; + } + if ($dif eq '') { + print " ====OK====\n" if $verbosity > 1; + next; + } + $dif =~ s/MD4-CHANGED MD5-//; + } + if ($verbosity < 2) { + print $verbosity ? ' ' : "$reldir/"; + print $fn; + } + print $dif, "\n"; + $exit_code = 1; + } else { + print "\n" if $verbosity > 1; + $rep_sum_H->execute($disk_id, $ino, $size, $mtime, $ctime, 4, $sum4); + $rep_sum_H->execute($disk_id, $ino, $size, $mtime, $ctime, 5, $sum5); + } + } + + closedir DP; + + unshift(@dirs, sort @subdirs) if $recurse_opt; +} + +exit $exit_code; + +sub usage +{ + die <