#!/bin/bash # "set -e" isn't active inside command substitutions unless we use POSIX mode # (which I don't want to do right now) or inherit_errexit (which would introduce # a dependency on bash >= 4.4). So implement our own version for now. # ~ Matt 2016-10-28 set -o errtrace trap 'exit $?' ERR USAGE="add or: git subtree-lite update or: git subtree-lite diff [...] or: git subtree-lite import []" read -r -d '' LONG_USAGE <<'LONG_USAGE' || true "git subtree-lite" imports the content of another repository (called "upstream") as of a particular commit into a subtree of your repository. It remembers the commit ID that was imported in an ".upstream" file in the subtree, so if you modify the subtree and then import a different upstream commit, the changes will be merged. You can use "git subtree-lite" to bundle libraries, as an alternative to "git submodule" or "git subtree". Semantically, a subtree managed with this tool is equivalent to a submodule pointer with a layer of project-specific modifications. Like "git subtree" and unlike a git submodule, the main project's version of the the content is in the main tree, so no special handling is needed to read or write it. But like a git submodule and unlike "git subtree", the upstream commit pointer is just data that can be merged and reverted, and this tool doesn't clutter the main project history with extra merge commits. The upstream commit pointer doesn't enjoy any of the special tool support of submodules, but most of it isn't relevant with the content in the main tree (an exception might be {fetch,push}.recurseSubmodules for developers who update or diff the subtree). Operations that access the upstream repository require that you have a local copy of it and set the "subtree-lite..repo" configuration option to its path. The path may be absolute or relative to the .git directory of the current repository (the common directory if you use "git worktree"). This mechanism is subject to change and may be made more sophisticated and automated (like submodules) in the future. Further background: http://marc.info/?l=git&m=147752326122139&w=2 git subtree-lite add Add a subtree from the given upstream commit. Follow the directions to configure the path to the upstream repository. git subtree-lite update Update the subtree to be based on the given upstream commit. (To remove a subtree, just use "git rm -r".) git subtree-lite diff [...] Diff the subtree against the original upstream content. Diff options are accepted, but paths to limit the diff currently are not supported. git subtree-lite import [] Low-level command: generate the "imported commit" corresponding to the given upstream commit (with the content moved to the subtree and the .upstream file added) and print its ID. Only the tree of this commit is meaningful. LONG_USAGE OPTIONS_SPEC= # Let's simplify matters for now and not allow running in a subdirectory. . "$(git --exec-path)/git-sh-setup" require_work_tree function cleanup { rm -rf "$tmpdir" } tmpdir="$(mktemp --tmpdir -d git-subtree-lite.XXXXXXXXXX)" trap "cleanup" EXIT function ensure_init_tmp_repo { if [ -z "$tmp_repo" ]; then tmp_repo="$tmpdir/repo" git init --quiet --bare "$tmp_repo" (cd "$subtree_repo" && readlink --canonicalize "$(git rev-parse --git-path objects)") >"$tmp_repo/objects/info/alternates" fi } function setup_subtree { # XXX Introduce a name like submodules have? Either abuse .gitmodules and # call "git submodule--helper name", or reimplement the lookup? local opt_name="subtree-lite.$subtree_path.repo" local common_dir="$(git rev-parse --git-common-dir)" subtree_repo="$(git config "$opt_name")" || die "Please get a local copy of the upstream repository and run: git config $opt_name before using this tool. The path may be absolute or relative to $(readlink --canonicalize "$common_dir") ." subtree_repo="$(cd "$common_dir" && readlink --canonicalize "$subtree_repo")" upstream_file="$subtree_path/.upstream" if [ -f "$upstream_file" ]; then cur_upstream_commit="$(< "$upstream_file")" else cur_upstream_commit="" fi } function reproducible_commit_tree { GIT_AUTHOR_NAME='git-subtree-lite' \ GIT_AUTHOR_EMAIL='git-subtree-lite@invalid' \ GIT_AUTHOR_DATE='@0 +0000' \ GIT_COMMITTER_NAME='git-subtree-lite' \ GIT_COMMITTER_EMAIL='git-subtree-lite@invalid' \ GIT_COMMITTER_DATE='@0 +0000' \ git commit-tree "$@" } function canonicalize_upstream_commit { (cd "$subtree_repo" && git rev-parse --verify "$1^{commit}") } # TODO: We should cache this, but even if we don't stop git from GC-ing the # underlying commits, how to stop the mapping from growing indefinitely? function import_commit { commit="$1" ensure_init_tmp_repo ( cd "$tmp_repo" rm -f index git read-tree -i --prefix="$subtree_path/" "$commit" # Hm, I suppose this would be a great place to add a way to exclude files # the superproject doesn't care about. But not now. # Create the .upstream file. If we do it here, then the right thing ends up # happening during both add/update and diff without any more code. uf_blob="$(echo "$commit" | git hash-object -t blob -w --stdin)" git update-index --add --cacheinfo "100644,$uf_blob,$subtree_path/.upstream" t="$(git write-tree)" # Reuse the same object for the same upstream commit until the repository is # GC-ed. c="$(reproducible_commit_tree -m "git-subtree-lite temporary commit" "$t")" git update-ref HEAD "$c" ) # XXX: The incremental fetch protocol is only based on detection of common # commits, so unless we already have the exact same imported commit, this # fetch will send the entire tree. If we cache previous imported commits in # the main repository, then we can add it as an alternate of the temporary # repository (!) and the previous imported commits will be detected as common. git fetch --quiet "$tmp_repo" HEAD 2>&1 | { grep -v --line-regexp 'warning: no common commits' >&2 || true; } git rev-parse FETCH_HEAD # to stdout } function cmd_add { [ $# == 2 ] || usage subtree_path="$1" new_upstream_commit_expr="$2" setup_subtree ! [ -e "$subtree_path" ] || die "Error: $subtree_path already exists on the filesystem." ! [ -n "$(git ls-files "$subtree_path")" ] || die "Error: $subtree_path already exists in the git index." new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")" new_imported_commit="$(import_commit "$new_upstream_commit")" git read-tree --prefix= -u "$new_imported_commit" } function cmd_update { [ $# == 2 ] || usage subtree_path="$1" new_upstream_commit_expr="$2" setup_subtree [ -n "$cur_upstream_commit" ] || die "Error: $subtree_path is not a subtree set up with this tool." new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")" new_imported_commit="$(import_commit "$new_upstream_commit")" old_imported_commit="$(import_commit "$cur_upstream_commit")" tree2="$(git write-tree)" # Note: the .upstream file is already in the imported commits. git read-tree -mu "$old_imported_commit" "$tree2" "$new_imported_commit" git merge-index -o git-merge-one-file -a } function cmd_import { [ $# -ge 1 ] && [ $# -le 2 ] || usage subtree_path="$1" setup_subtree if [ -n "$2" ]; then new_upstream_commit="$(canonicalize_upstream_commit "$2")" elif [ -n "$cur_upstream_commit" ]; then new_upstream_commit="$cur_upstream_commit" else die "Error: $subtree_path is not added yet and no upstream commit was given." fi import_commit "$new_upstream_commit" } function cmd_diff { [ $# -ge 1 ] || usage subtree_path="${@: -1:1}" setup_subtree # This could be expensive, but it's the only way to honor uncommitted changes # (and --cached). Even if we added the subtree repo to # GIT_ALTERNATE_OBJECT_DIRECTORIES, git has no way to diff a subtree of the # worktree against the root of a given commit. cur_imported_commit="$(import_commit "$cur_upstream_commit")" git diff "${@:1:$#-1}" "$cur_imported_commit" -- "$subtree_path" } [ $# -ge 1 ] || usage cmd="$1" shift case "$cmd" in (add) cmd_add "$@";; (update) cmd_update "$@";; (import) cmd_import "$@";; (diff) cmd_diff "$@";; (*) die "Unknown command $cmd.";; esac