#!/usr/bin/env ruby ## ## Author: Steve Purcell, http://www.sanityinc.com/ ## Obtain the latest version of this software here: http://git.sanityinc.com/ ## # XXX: make backwards compatible # TODO: import parallel darcs repos as git branches, identifying branch points # TODO: use default repo if none was supplied # TODO: handle *-darcs-backupN files? require 'ostruct' require 'rexml/document' require 'optparse' require 'yaml' # Explicitly setting a time zone would cause darcs to only output in # that timezone hence we couldn't get the actual patch TZ # ENV['TZ'] = 'GMT0' GIT_PATCHES = ".git/darcs_patches" DEFAULT_AUTHOR_MAP_FILE = ".git/darcs_author_substitutions" # ------------------------------------------------------------------------------- # Usage info and argument parsing # ------------------------------------------------------------------------------- OPTIONS = { :default_email => nil, :list_authors => false, :author_map => nil, :clean_commit_messages => false, :num_patches => nil } opts = OptionParser.new do |opts| opts.banner = <<-end_usage Creates git repositories from darcs repositories usage: darcs-to-git DARCSREPODIR [options] 1. Create an *empty* directory that will become the new git repository 2. From inside that directory, run this program, passing the location of the local source darcs repo as a parameter The program will git-init the empty directory, and migrate all patches in the source darcs repo into commits in that repository. Thereafter, incremental patch conversion from the same source repo is possible by repeating step 2. NOTE: In case of multiple tags, only the first one will be applied. If you really need to, you can manually identify the patch and use \"git tag -f \". OPTIONS end_usage opts.on('--default-email ADDRESS', "Set the email address used when no explicit address is given") do |m| OPTIONS[:default_email] = m end opts.on('--list-authors', "List all unique authors in source repo and quit.") do |m| OPTIONS[:list_authors] = m end opts.on('--author-map FILE', "Supply a YAML file that maps committer names to canonical author names") do |f| OPTIONS[:author_map] = f end opts.on('--patches [N]', OptionParser::DecimalInteger, "Only pull N patches.") do |n| abort opts.to_s unless n >= 0 OPTIONS[:num_patches] = n end opts.on('--clean-commit-messages', "Don't note darcs hashes in git commit messages (not recommended)") do |n| OPTIONS[:clean_commit_messages] = true end opts.on('--version', "Output version information and exit") do puts <<-EOF darcs-to-git 0.1 Copyright (c) 2009 Steve Purcell, http://www.sanityinc.com/ License MIT: This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. EOF exit end opts.on('-h', '--help', "Show this message") do puts opts.to_s exit end end opts.parse! SRCREPO = ARGV[0] if SRCREPO.nil? abort opts.to_s elsif !FileTest.exists?(SRCREPO + '/_darcs') abort "#{SRCREPO} is not a valid local darcs repository" end # ------------------------------------------------------------------------------- # Utilities # ------------------------------------------------------------------------------- def run(*args) puts "Running: #{args.inspect}" system(*args) || raise("Failed to run: #{args.inspect}") end def output_of(*args) puts "Running: #{args.inspect}" output = IO.popen(args.map {|a| "'#{a}'"}.join(' '), 'r') { |p| p.read } if $?.exitstatus == 0 return output else raise "Failed to run: #{args.inspect}" end end class Symbol def to_proc() lambda { |o| o.send(self) } end end class String def darcs_unescape # darcs uses '[_\hh_]' to quote non-ascii characters where 'h' is # a hexadecimal. We translate this to '=hh' and use ruby's unpack # to do replace this with the proper byte. gsub(/\[\_\\(..)\_\]/) { |x| "=#{$1}" }.unpack("M*")[0] end end # ------------------------------------------------------------------------------- # Map darcs authors to git authors # ------------------------------------------------------------------------------- class AuthorMap < Hash attr_accessor :default_email def self.load(filename) new.merge(YAML.load_file(filename)) end # gives the name and email def [](author) name_and_email(super || author) end private def name_and_email(author) case author when /^\s*(\S.*?)\s*\<(\S+@\S+?)\>\s*$/ [$1, $2] when /^\s*\?\s*$/ email = $1 [email.split('@').first, email] else [author, default_email] end end end # ------------------------------------------------------------------------------- # Storing a history of related darcs and git commits # ------------------------------------------------------------------------------- class CommitHistory def initialize(patch_file_name) @patch_file_name = patch_file_name @darcs_patches_in_git = {} if File.exists?(patch_file_name) @darcs_patches_in_git = YAML.load_file(patch_file_name) unless @darcs_patches_in_git.is_a?(Hash) raise "yaml hash not found in #{patch_file_name}" end else # TODO: consider doing this unconditionally, since that # might allow merging between repositories created with darcs-to-git fill_from_darcs_hash_comments end end def record_git_commit(commit_id, identifier) # using one file per darcs patch would be an incredible waste of space # on my system one file takes up 4K even if only a few bytes are in it # hence we just use a simple YAML hash @darcs_patches_in_git[identifier] = commit_id File.open(@patch_file_name, 'w') do |f| YAML.dump(@darcs_patches_in_git, f) end end def find_git_commit(is_tag, git_tag_name, identifier) return nil if empty_repo? if is_tag (output_of("git", "tag", "-l") rescue "").split(/\r?\n/).include?(git_tag_name) && output_of("git", "rev-list", "--max-count=1", "tags/#{git_tag_name}").strip else @darcs_patches_in_git[identifier]; end end private def empty_repo? !system("git rev-parse --verify HEAD >/dev/null 2>&1") end def fill_from_darcs_hash_comments return if empty_repo? Array(output_of("git", "log", "--grep=darcs-hash:").split(/^commit /m)[1..-1]).each do |entry| commit_id, identifier = entry.scan(/^([a-z0-9]+$).*darcs-hash:(.*?)$/sm).flatten record_git_commit(commit_id, identifier) end end end # ------------------------------------------------------------------------------- # Reading darcs patches and applying them to a git repo # ------------------------------------------------------------------------------- class DarcsPatch attr_accessor :source_repo, :author, :date, :inverted, :identifier, :name, :is_tag, :git_tag_name, :comment attr_reader :git_author_name, :git_author_email def initialize(source_repo, patch_xml) self.source_repo = source_repo self.author = patch_xml.attribute('author').value.darcs_unescape self.date = darcs_date_to_git_date(patch_xml.attribute('date').value, patch_xml.attribute('local_date').value) self.inverted = (patch_xml.attribute('inverted').to_s == 'True') self.identifier = patch_xml.attribute('hash').to_s self.name = patch_xml.get_elements('name').first.get_text.value.darcs_unescape rescue 'Unnamed patch' self.comment = patch_xml.get_elements('comment').first.get_text.value.darcs_unescape rescue nil if (self.is_tag = (self.name =~ /^TAG (.*)/)) self.git_tag_name = $1.gsub(/[\s:]+/, '_') end @git_author_name, @git_author_email = AUTHOR_MAP[author] end def <=>(other) self.identifier <=> other.identifier end def git_commit_message [ ((inverted ? "UNDO: #{name}" : name) unless name =~ /^\[\w+ @ \d+\]/), comment, ("darcs-hash:#{identifier}" unless OPTIONS[:clean_commit_messages]) ].compact.join("\n\n") end def self.read_from_repo(repo) REXML::Document.new(output_of("darcs", "changes", "--reverse", "--repodir=#{repo}", "--xml", "--summary")). get_elements('changelog/patch').map do |p| DarcsPatch.new(repo, p) end end # Return committish for corresponding patch in current git repo, or false/nil def id_in_git_repo @git_commit ||= COMMIT_HISTORY.find_git_commit(is_tag, git_tag_name, identifier) end def pull_and_apply puts "\n" + ("=" * 80) puts "PATCH : #{name}" puts "DATE : #{date}" puts "AUTHOR: #{author} => #{git_author_name} <#{git_author_email}>" puts "=" * 80 if id_in_git_repo puts "Already imported to git as #{id_in_git_repo}" return end pull system("git", "status") commit_to_git_repo end private def pull unless darcs_reports_clean_repo? raise "Darcs reports dirty repo before pulling #{identifier}; confused, so aborting" end run("darcs", "pull", "--all", "--quiet", "--match", "hash #{identifier}", "--set-scripts-executable", source_repo) unless darcs_reports_clean_repo? puts "Darcs reports dirty directory: assuming conflict that is fixed by a later patch... reverting" run("darcs revert --all") run("find . -name '*-darcs-backup0'|xargs rm -f") # darcs2 creates these end unless darcs_reports_clean_repo? system("darcs whatsnew -sl") raise "Failed to clean repo, see above" end end def darcs_reports_clean_repo? `darcs whatsnew -sl | egrep -v '^a (\./)?\.git(/|$)'` =~ /^(No changes!)?$/ end def commit_to_git_repo ENV['GIT_AUTHOR_NAME'] = ENV['GIT_COMMITTER_NAME'] = git_author_name ENV['GIT_AUTHOR_EMAIL'] = ENV['GIT_COMMITTER_EMAIL'] = git_author_email ENV['GIT_AUTHOR_DATE'] = ENV['GIT_COMMITTER_DATE'] = date if is_tag run("git", "tag", "-a", "-m", git_commit_message, git_tag_name) else if (new_files = git_new_files).any? run(*(["git", "add"] + new_files)) end if git_changed_files.any? || new_files.any? run("git", "commit", "-a", "-m", git_commit_message) end # get full id of last commit and associate it with the patch id commit_id = output_of("git", "log", "-n1").scan(/^commit ([a-z0-9]+$)/).flatten.first COMMIT_HISTORY.record_git_commit(commit_id, identifier) end end def darcs_date_to_git_date(utc,local) # Calculates a git-friendly date (e.g., timezone CET decribed as # +0100) by using the two date fields that darcs gives us: a list # of numbers describing the UTC time and a local time formatted in # a human-readable format. We could parse the local time and # derive the timezone offset from the timezone name. but timezones # aren't well-defined, so we ignore the timezone name and instead # calculate the timezone offset ourselves by calculating the # difference between local time and UTC time. if not utc =~ /^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ raise "Wrong darcs date format" end utc_time = Time.utc($1,$2,$3,$4,$5,$6) # example: Mon Oct 2 14:23:28 CEST 2006 # everything except timezone name is fixed-length, if parsing # fails we just use UTC pat = /^\w\w\w (\w\w\w) ([ 1-9]\d) ([ 0-9]\d)\:(\d\d)\:(\d\d) \w* (\d\d\d\d)/ local_time = if pat =~ local Time.utc($6,$1,$2,$3,$4,$5) else utc_time end offs = local_time - utc_time # time offset in seconds t = local_time # formats the above example as: 2006-10-02 14:23:28 +0200 s = sprintf("%4d-%02d-%02d %02d:%02d:%02d %s%02d%02d", t.year, t.month, t.day, t.hour, t.min, t.sec, offs < 0 ? "-" : "+", offs.abs/3600, offs.abs.modulo(3600)/60 ) end def git_ls_files(wanted) output_of(*["git", "ls-files", "-t", "-o", "-m", "-d", "-z", "-X", ".git/info/exclude"]).scan(/(.?) (.*?)\0/m).map do |code, name| name if wanted.include?(code) end.compact end def git_new_files() git_ls_files(["?"]) end def git_changed_files() git_ls_files(%w(? R C)) end end def extract_authors(patches) unique_authors = {} patches.each do |p| unique_authors[p.author] = "#{p.git_author_name}" + (p.git_author_email.nil? ? "" : " <#{p.git_author_email}>") end puts "# You can use the following output as a starting point for an author_map" puts "# Just fill in the proper text after the colon; put email addresses in" puts "# angle brackets. You can remove any lines that look OK to you." # TODO: Can we make the output sorted? puts YAML::dump( unique_authors ) end # ------------------------------------------------------------------------------- # Pre-flight checks # ------------------------------------------------------------------------------- DARCS_VERSION = output_of(*%w(darcs -v)).scan(/(\d+)\.(\d+)\.(\d+)/).flatten.map {|v| v.to_i} def darcs2_repo?(repo) begin output_of("darcs", "show", "repo", "--repodir=#{repo}") =~ /Format:.*darcs-2/ rescue # darcs1 does not have a "show" command, so we get an exception false end end class Array; include Comparable; end unless DARCS_VERSION > [1, 0, 7] STDERR.write("WARNING: your darcs appears to be old, and may not work with this script\n") end # ------------------------------------------------------------------------------- # Initialise the working area # ------------------------------------------------------------------------------- ENV['GIT_PAGER'] = ENV['PAGER'] = "cat" # so that pager of git-log doesn't halt conversion unless File.directory?("_darcs") puts "Initialising the working area." darcs_init = %w(darcs init) if darcs2_repo?(SRCREPO) darcs_init << "--darcs-2" elsif DARCS_VERSION >= [2, 0, 0] darcs_init << "--old-fashioned-inventory" end run(*darcs_init) run("git", "init") File.open(".git/info/exclude", "a") { |f| f.write("_darcs\n.DS_Store\n") } File.open("_darcs/prefs/boring", "a") { |f| f.write("\\.git$\n\\.DS_Store$\n") } # TODO: migrate darcs borings into git excludes? end COMMIT_HISTORY = CommitHistory.new(GIT_PATCHES) AUTHOR_MAP = if OPTIONS[:author_map] AuthorMap.load(OPTIONS[:author_map]) elsif File.exists?(DEFAULT_AUTHOR_MAP_FILE) AuthorMap.load(DEFAULT_AUTHOR_MAP_FILE) else AuthorMap.new end AUTHOR_MAP.default_email = OPTIONS[:default_email] patches = DarcsPatch.read_from_repo(SRCREPO) if OPTIONS[:list_authors] extract_authors(patches) exit(0) end patches_available = [] while patch = patches.pop next if patch.id_in_git_repo patches_available.unshift(patch) end patches_to_pull = if OPTIONS[:num_patches] patches_available.first(OPTIONS[:num_patches]) else patches_available end patches_to_pull.each &:pull_and_apply pulled = patches_to_pull.size if pulled == 0 puts "\nNothing to pull." else puts "\nPulled #{pulled} patch#{"es" unless pulled == 1}." puts "\nDarcs import successful! You may now want to run `git gc' to improve space usage the git repo" end # ------------------------------------------------------------------------------- # Post-flight checks # ------------------------------------------------------------------------------- # if we didn't pull all patches, then the consistency check would # fail, so we simply skip it if patches_to_pull.size == patches_available.size puts "Comparing final state with source repo..." system("diff", "-ur", "-x", "_darcs", "-x", ".git", ".", SRCREPO) if $? != 0 abort <<-end_msg !!! There were differences! See diff above for details. !!! It may be that the source repository was dirty. !!! Run "cd #{SRCREPO} && darcs whatsnew -sl" to check. end_msg end end