From: Matt McCutchen <matt@mattmccutchen.net>
Date: Wed, 2 Sep 2020 20:42:51 +0000 (-0400)
Subject: Import patchsync version 2
X-Git-Url: https://mattmccutchen.net/utils/utils.git/commitdiff_plain/37ecca1d756945c3fed580d3a991bcfbf41f2314?ds=sidebyside

Import patchsync version 2

Modification time: 2006-12-14 17:13:11 -0500
---

diff --git a/patchsync b/patchsync
index 8c20a1d..07e2f0f 100755
--- a/patchsync
+++ b/patchsync
@@ -1,93 +1,445 @@
 #!/bin/bash
-# patchsync: Synchronize a branch with a patch (e.g., acls.diff and rsync-acl).
-# usage: patchsync <staging> {'' | --dry-run} [branch | patch]
-# <staging>: the staging directory, containing a shell script "settings" and an optional rsync filter file "filter"
-#     "settings" must contain:
-#         - $trunk, $patch, $branch paths interpreted relative to the staging dir
-#         - shell function do_diff: trunk, branch => patch
-#             e.g., for rsync: diff -uprN trunk branch | sed -re 's/^(\+\+\+|---) ([^\t]+).*$/\1 \2/' | grep -v '^diff' >patch
-#         - shell function do_patch: branch copied from trunk, patch => branch
-#             e.g., for rsync: patch --no-backup-if-mismatch -d branch/ -p1 <patch
-# --dry-run: don't note anything
-# branch | patch: overwrite the specified thing in case of a conflict (e.g., on the first run)
+# patchsync: Synchronizes a trunk, a branch, and a patch containing the
+# differences between them.
+# Version 2
+# -- Matt McCutchen
+#
+# usage: patchsync [--dry-run] <staging> [branch | patch]
+#
+# Patchsync is invoked on a "staging directory", which holds some configuration
+# (including the locations of the trunk, patch, and branch it is to synchronize)
+# and some synchronization state.  It determines whether each of the trunk,
+# patch, and branch has changed since the last successful synchronization and
+# updates the patch or branch as appropriate:
+# 
+# Changed since last sync   Patchsync's behavior
+# -------------------------------------------------
+# Nothing                   Do nothing
+# Trunk only                Update branch
+# Patch but not branch      Update branch
+# Branch but not patch      Update patch
+# Branch and patch          Complain about conflict
+#
+# <staging>: path to the staging directory
+#
+# --dry-run: show what would happen without actually modifying the trunk, patch,
+#   branch, or synchronization state
+#
+# {branch | patch}: force patchsync to update the specified thing from the
+#   others instead of deciding automatically; you can use this argument to
+#   revert or to resolve a conflict
+#
+# CAVEAT: Patchsync might make a mess if the trunk, patch, or branch is
+# modified in a way not hidden by the filters while patchsync is running!
+#
+# CAVEAT: Patchsync only notices creations, deletions, and modifications of
+# regular files in the trunk and branch, not other changes like empty directory
+# creations.  If you make a change like that to the trunk, you can force
+# patchsync to update the branch.
+#
+# Staging directory format: A staging directory contains the following items:
+#   "trunk", trunk directory or symlink to it
+#   "patch", patch regular file or symlink to it
+#   "branch", branch directory or symlink to it
+#     [Why symlinks?  Expose as much as possible to tools like symlinks(8).]
+#   "settings", shell script defining the following shell functions:
+#     - do_diff <trunk> <branch> <write-patch>: diff the specified trunk and
+#         branch and write the patch to the specified file; define it to use
+#         your favorite diff format
+#       - example: exitoneok diff -urN $1 $2 \
+#                    | sed -re 's/^(\+\+\+|---) ([^\t]+).*$/\1 \2/' \
+#                    | exitoneok grep -v '^diff' >$3
+#     - do_patch <patch> <convert-trunk-to-branch>: apply the patch to the
+#         specified trunk; define it to understand your favorite diff format
+#       - example: patch --no-backup-if-mismatch -d $2/ -p1 <$1
+#     - Note: patchsync runs these functions under "pipefail", but the
+#         "set -e" it uses does not propagate into the functions.  Patchsync
+#         provides an "exitoneok" function you can use to treat an exit code of
+#         1 as 0.  You might want to && successive commands together.
+#     - There are several possible ways to handle failed hunks.  The simplest
+#         and safest is to make do_patch fail, but that's inconvenient for the
+#         user, who must investigate the *.rej files in the staging directory
+#         and either fix the patch or fix the branch and force updating the
+#         patch.  One could make do_patch succeed, but if the user then modifies
+#         the branch, the failed hunks will merely be dropped from the patch,
+#         which is probably unacceptable.  The clever way is to let do_patch
+#         succeed but make do_diff fail if any *.rej files exist in the branch.
+#   "filters" (optional): rsync filters to use when accessing the trunk and
+#     branch; hide filters apply to reading, protect filters to writing;
+#     hint: you probably want to hide and protect build outputs
+#
+# Other usage: patchsync --new <trunk> <patch> <branch> <staging>
+# Mostly sets up a new staging directory for the given trunk, branch, and patch
+# at the given location.  You still have to provide settings, and filters if
+# you want them.
+# - If one of the patch or branch exists, the other will be calculated when
+#   you first synchronize.
+# - If both exist, you will get a conflict when you first synchronize and you
+#   will need to specify which to update.
+# - If neither exists, you get an empty patch and a branch identical to the trunk.
+
+# Disable branch/.patchsync support because it's a bad idea in general, and the
+# cyclic symlink confuses Eclipse in particular. -- Matt 2006.11.30
 
 set -e
+trap "echo 'Patchsync encountered an unexpected error!  ABORTING!' 1>&2; exit 2;" ERR
+set -o errtrace
+set -o pipefail
+
+# Make sure we have rsync.
+type rsync >/dev/null 2>&1 || \
+	{ echo "Patchsync requires rsync, but there's no rsync on your path!" 1>&2; exit 1; }
+# If a cp2 is available, use it; otherwise define our own.
+type cp2 >/dev/null 2>&1 || function cp2 { rsync -rltE --chmod=ugo=rwx "$@"; }
+
+function exitoneok {
+	"$@" || [ $? == 1 ]
+}
+
+# wdpp_from <B> ==> the shortest relative prefix-path from directory B to the current directory
+# (prefix-path means it ends in a slash unless it's `' which means '.')
+# "patchsync" uses this to link-dest when copying the branch out.
+# "patchsync --new" uses it to reverse the staging dir path when creating symlinks.
+function wdpp_from {
+	AtoB="$1"
+	# Start with symlink-followed absolute prefix-paths without the initial slash.
+	# NOT bash builtin pwd; it tells us how we got here, not where we are
+	pA="$(/bin/pwd)/"
+	pA="${pA#/}"
+	pB="$( (cd "$AtoB" && /bin/pwd) )/"
+	pB="${pB#/}"
+	# Lop off the longest common prefix of components that we can.
+	# While first components are equal...
+	# (Empty correctly doesn't equal remaining)
+	while { [ -n "$pA" ] || [ -n "$pB" ]; } && [ "${pA%%/*}" == "${pB%%/*}" ]; do
+		# Remove them.
+		pA="${pA#*/}"
+		pB="${pB#*/}"
+	done
+	ans="$pA"
+	# Translate remaining components of $pB to ../s
+	while [ -n "$pB" ]; do
+		ans="$ans../"
+		pB="${pB#*/}"
+	done
+	# Double check; add dot to the end to enforce ending in a slash and handle empty ans
+	(cd "$AtoB" && [ "$ans." -ef /proc/self/fd/3 ]) 3<.
+	# Yay
+	echo "$ans"
+}
+
+function hash_file {
+	# Lop off the filename and binary indicator
+	sha1sum -b "$1" | sed -re 's/^([^ ]*).*$/\1/'
+}
+
+function patchsync_sync {
+
+if [ "$1" == --dry-run ]; then
+	echo "Dry run mode."
+	dryrun=1
+	shift
+fi
 
 staging="$1"
-if ! [ -r "$staging/settings" ]; then
+if [ -r "$staging/settings" ]; then
+	echo "Using staging dir $staging"
+else
 	echo "Specify a staging directory containing a settings file!" 1>&2
 	exit 1
 fi
-cd "$staging"
-
-dryrun="$2"
-whichtochange="$3"
-CP2t=(cp2 --del)
-CP2in=("${CP2t[@]}" --filter='. filter' --delete-excluded)
-CP2out=("${CP2t[@]}" --filter='. filter' --no-t --checksum) # be nice to mtimes
-export LC_COLLATE="C"
+cd "$staging" || { echo "Failed to enter staging dir!" 1>&2; exit 1; }
+shift
 
 . settings
+type do_diff >/dev/null 2>&1 || { echo "do_diff is not defined!" 1>&2; exit 1; }
+type do_patch >/dev/null 2>&1 || { echo "do_patch is not defined!" 1>&2; exit 1; }
 
-echo "Copying in, checking for changes..."
-"${CP2in[@]}" --link-dest=../trunk-save -i "$trunk/" trunk/
-diff -r trunk{-save,} >/dev/null || trunkch=ch
-"${CP2in[@]}" --link-dest=../branch-save -i "$branch/" branch/
-diff -r branch{-save,} >/dev/null || branchch=ch
-cp2 --link-dest=../patch-save -i "$patch" patch
-diff -r patch{-save,} >/dev/null || patchch=ch
-echo "Done."
-
-if [ $trunkch ] || [ $branchch ] || [ $patchch ]; then
-	# Something changed.
-	# Update either branch or patch, whichever didn't change.
-	# If trunk changes, update branch, not patch.
-	if [ "$whichtochange" == 'branch' ] || ! [ $branchch ]; then
-		echo "Updating branch..."
-		"${CP2t[@]}" trunk/ branch/
-		do_patch
-		echo "Done."
-		copyout=1
-	elif [ "$whichtochange" == 'patch' ] || ! [ $patchch ]; then
-		echo "Updating patch..."
-		do_diff
-		echo "Done."
-		copyout=1
+whichtoupdate="$1"
+# patchsync --new doesn't need this any more except for identical-branch
+#if [ -z "$whichtoupdate" ] && [ -s whichtoupdate ]; then
+#	# Hook for patchsync --new
+#	whichtoupdate="$(< whichtoupdate)"
+#	echo "Updating $whichtoupdate according to staging dir."
+#el
+if [ -n "$whichtoupdate" ]; then
+	echo "Updating $whichtoupdate according to command line argument."
+else
+	echo "Synchronizing."
+fi
+
+filteropts=()
+! [ -e filters ] || filteropts=("${filteropts[@]}" --filter='. filters')
+# 'R *' or 'S *' disables filtering on the staging dir side.
+
+COPYIN=(cp2 --del --filter='R *' "${filteropts[@]}")
+COPYOUT=(cp2 --del --filter='S *' "${filteropts[@]}" --no-t --checksum) # be nice to mtimes
+
+# hash_dir foo/ ==> a hash code covering all of the shown files in foo/
+function hash_dir {
+	# Itemize the dir, extract filenames, hash the files, and hash the list of
+	# hashes.
+	"${COPYIN[@]}" -i -n $1 nonexistent/ \
+		| sed -n -e '/^>f/{ s/^[^ ]* //; p }' \
+		| (cd $1 && xargs --no-run-if-empty --delimiter='\n' sha1sum -b) \
+		| hash_file /dev/stdin
+}
+
+echo "Checking for changes..."
+hash_dir trunk/ >trunk-new-hash
+cmp trunk-{save,new}-hash &>/dev/null || { trunkch=1; echo "Trunk has changed"; }
+hash_file patch >patch-new-hash
+cmp patch-{save,new}-hash &>/dev/null || { patchch=1; echo "Patch has changed"; }
+hash_dir branch/ >branch-new-hash
+cmp branch-{save,new}-hash &>/dev/null || { branchch=1; echo "Branch has changed"; }
+
+# If we're in synchronization mode, decide what to update.
+if [ -z "$whichtoupdate" ] && [[ -n $trunkch || -n $branchch || -n $patchch ]]; then
+	if [ -e identical-branch-flag ] && ! [ $patchch ] && ! [ $branchch ]; then
+		# We still want to create an identical branch.
+		whichtoupdate=identical-branch
+	elif ! [ $branchch ]; then
+		# Trunk, patch, or both changed.  Update branch.
+		whichtoupdate=branch
+	elif ! [ $patchch ]; then
+		# Branch changed, and trunk may have also changed.  Update patch.
+		whichtoupdate=patch
 	else
-		echo "Conflict: both branch and patch changed!"
-		echo "Run patchsync <staging> <dry-run> [branch | patch] to"
-		echo "update the specified area from the others."
-		exit 1
+		# Branch and patch both changed.  A message appears later.
+		whichtoupdate=conflict
 	fi
-else
-	# Easy case
-	echo "Nothing changed."
+	#echo "Synchronization will update $whichtoupdate."
 fi
 
-if [ $copyout ]; then
-if ! [ $dryrun ]; then
-	! [ -e lock ] || { echo "Locked!  Please fix!"; exit 1; }
-	echo "patchsync lock file pid $$ date $(date)" >lock
+if [ -n "$whichtoupdate" ]; then
+
+# Always show what would happen if patch-new and branch-new were copied out.
+# (If there was a problem creating one of them, patchsync would have just
+# deleted it.)  But only actually copy them out and update synchronization
+# state if no error.
+error=
+
+# Don't let stuff from an old run confuse us.
+rm -rf patch-new branch-new
+
+function prepare_branch {
+	echo "Preparing updated branch..."
+	# No link-dest because we will modify and then link-dest when copying out
+	"${COPYIN[@]}" trunk/ branch-new/
+	do_patch patch branch-new || \
+		{ error=1; echo "Failed to prepare updated branch!" 1>&2; rm -rf branch-new; }
+}
+
+function prepare_patch {
+	echo "Preparing updated patch..."
+	# Link-dest is fine because these are temporary read-only copies
+	"${COPYIN[@]}" --link-dest=../trunk/ trunk/ trunk-tmp/
+	"${COPYIN[@]}" --link-dest=../branch/ branch/ branch-tmp/
+	do_diff trunk-tmp branch-tmp patch-new || \
+		{ error=1; echo "Failed to prepare updated patch!" 1>&2; rm -rf patch-new; }
+	rm -rf trunk-tmp branch-tmp
+}
+
+case $whichtoupdate in
+(identical-branch)
+	echo "Creating identical branch..."
+	# No link-dest because we will link-dest when copying out
+	"${COPYIN[@]}" trunk/ branch-new/
+	echo "Creating empty patch..."
+	do_diff branch-new branch-new patch-new || \
+		{ error=1; echo "Failed to create empty patch!" 1>&2; rm -rf patch-new; }
+	;;
+(branch)
+	prepare_branch
+	;;
+(patch)
+	prepare_patch
+	;;
+(conflict)
+	error=1
+	cat <<EOF 1>&2
+CONFLICT: both branch and patch changed!
+Run patchsync <staging> {branch | patch} to
+update the specified thing from the others.
+I'll leave updated copies of both branch
+and patch in the staging directory to help
+you decide which way you want to update.
+EOF
+	prepare_branch
+	prepare_patch
+	;;
+(*)
+	echo "Internal error, whichtoupdate should not be $whichtoupdate!" 1>&2
+	exit 1
+	;;
+esac
+
+if ! [ $error ] && ! [ $dryrun ]; then
+	# Disable locking for now...
+	# ! [ -e lock ] || { echo "Staging dir is locked!  Delete the file \`lock' if the other instance of patchsync is gone." 1>&2; exit 1; }
+	# echo "patchsync lock file pid $$ date $(date)" >lock
 	
 	echo "Copying out..."
-	"${CP2out[@]}" -i branch/ "$branch/"
-	cp2 -i --checksum patch "$patch"
-	echo "Done."
-	echo "Noting..."
-	for i in trunk branch patch; do
-		rm -rf $i-save
-		mv $i{,-save}
+	! [ -e branch-new ] || {
+		hash_dir branch-new/ >branch-new-hash
+		"${COPYOUT[@]}" -i --link-dest="$(wdpp_from branch/)branch-new/" branch-new/ branch/
+		rm -rf branch-new
+	}
+	! [ -e patch-new ] || cmp -s patch-work patch || {
+		hash_file patch-new >patch-new-hash
+		# Don't use rsync because we might have to write through a symlink.
+		echo "> patch"
+		cp --preserve=timestamps patch-new patch
+		rm -f patch-new
+	}
+	
+	echo "Remembering synchronized state for next time..."
+	for i in trunk patch branch; do
+		mv $i-new-hash $i-save-hash
 	done
-	echo "Done."
 	
-	rm lock
+	# rm lock
 else
-	echo "Dry run; no action.  You can inspect the results if you want."
-	echo "Fake copying out..."
-	"${CP2out[@]}" -in branch/ "$branch/"
-	cp2 -in --checksum patch "$patch"
-	echo "Done."
+	echo "Would copy out as follows:"
+	! [ -e branch-new ] || {
+		hash_dir branch-new/ >branch-new-hash
+		"${COPYOUT[@]}" -n -i --link-dest="$(wdpp_from branch/)branch-new/" branch-new/ branch/
+		#rm -rf branch-new
+	}
+	! [ -e patch-new ] || cmp -s patch-work patch || {
+		hash_file patch-new >patch-new-hash
+		# Don't use rsync because we might have to write through a symlink.
+		echo "> patch"
+		#cp --preserve=timestamps patch-new patch
+		#rm -f patch-new
+	}
+	echo "Would remember synchronized state for next time."
+	echo "I'm leaving \"new\" files in the staging dir so you can inspect them."
+fi
+
+else # whichtoupdate
+	# Easy case
+	echo "Nothing changed."
+	rm -f {trunk,patch,branch}-new-hash
 fi
+
+if [ $error ]; then
+	echo "Synchronization failed." 1>&2
+	exit 1
+else
+	echo "Synchronization finished."
+	if [ -e identical-branch-flag ]; then
+		if ! [ $dryrun ]; then
+			rm identical-branch-flag
+			echo "Removed identical-branch-flag."
+		else
+			echo "Would remove identical-branch-flag."
+		fi
+	fi
+	# Yay!  Done patchsync_sync!
 fi
+}
+
+function patchsync_new {
+	if [ $# != 4 ]; then
+		echo "Expected 4 arguments after --new, got $#." 1>&2
+		echo "usage: patchsync --new <trunk> <patch> <branch> <staging>" 1>&2
+		exit 1
+	fi
+	
+	# Set up arguments.  Open templates because we will change directories.
+	trunk="$1"
+	patch="$2"
+	branch="$3"
+	staging="$4"
+	
+	# What exists?  Whichtochange first?
+	! [ -e "$staging" ] || { echo "Staging dir already exists!" 1>&2; exit 1; }
+	[ -d "$trunk" ] || { echo "Trunk does not exist!" 1>&2; exit 1; }
+	
+	# Create staging dir.
+	mkdir "$staging"
+	wdpp="$(wdpp_from "$staging")"
+	cd "$staging"
+	echo "Created staging dir at $staging."
+	
+	# Adjust paths appropriately.
+	trunk="$wdpp$trunk"
+	patch="$wdpp$patch"
+	branch="$wdpp$branch"
+	
+	# Create links to areas
+	ln -s "$trunk" trunk
+	ln -s "$patch" patch
+	ln -s "$branch" branch
+	echo "Created links to areas."
+	
+	# This approach is better than setting whichtochange because we'll notice
+	# if the user puts something into one of the areas we created before first
+	# sync.
+	function create_patch {
+		touch "$patch"
+		hash_file patch >patch-save-hash
+		echo "Created empty patch."
+	}
+	function create_branch {
+		mkdir "$branch"
+		# Can't do hash_dir because ${COPYIN[@]} hasn't been set <== no filters
+		hash_file /dev/null >branch-save-hash
+		echo "Created empty branch."
+	}
+	
+	if [ -e "$patch" ] && ! [ -e "$branch" ]; then
+		create_branch
+		echo "Patch exists; branch will be calculated when you first synchronize."
+	elif [ -e "$branch" ] && ! [ -e "$patch" ]; then
+		create_patch
+		echo "Branch exists; patch will be calculated when you first synchronize."
+	elif ! [ -e "$patch" ] && ! [ -e "$branch" ]; then
+		create_patch
+		create_branch
+		echo "Neither branch nor patch exists;"
+		echo "a branch identical to the trunk will be created when you first synchronize."
+		echo flag >identical-branch-flag
+		echo "Created identical-branch-flag to tell first run of patchsync about this."
+	else
+		echo "Both patch and branch exist."
+		echo "You will need to specify whether to overwrite the"
+		echo "patch or the branch when you first synchronize!"
+	fi
+	
+	# Write settings file.
+	cat >settings <<END
+# Define do_diff and do_patch here!
+END
+	echo "Wrote settings file placeholder."
+	
+	echo ""
+	echo "Patchsync initialized."
+	echo "Now add your definitions of do_diff and do_patch to the settings file,"
+	echo "add a filter file if you wish, and perform the first sync."
+}
+
+function patchsync_help {
+	cat <<EOF
+Patchsync version 2 by Matt McCutchen
+usage: patchsync [--dry-run] <staging> [branch | patch]
+       patchsync --new <trunk> <patch> <branch> <staging>
+Please read the top of the script for complete documentation.
+EOF
+}
 
-exit 0
+case "$1" in
+(--help|--version)
+	patchsync_help ;;
+(--dry-run)
+	patchsync_sync "$@" ;;
+(--new)
+	shift
+	patchsync_new "$@" ;;
+(''|--*)
+	patchsync_help 1>&2
+	exit 1 ;;
+(*)
+	patchsync_sync "$@" ;;
+esac