git/contrib/contacts/git-contacts

#!/usr/bin/perl

# List people who might be interested in a patch.  Useful as the argument to
# git-send-email --cc-cmd option, and in other situations.
#
# Usage: git contacts <file | rev-list option> ...

use strict;
use warnings;
use IPC::Open2;

my $since = '5-years-ago';
my $min_percent = 10;
my $labels_rx = qr/Signed-off-by|Reviewed-by|Acked-by|Cc|Reported-by/i;
my %seen;

sub format_contact {
	my ($name, $email) = @_;
	return "$name <$email>";
}

sub parse_commit {
	my ($commit, $data) = @_;
	my $contacts = $commit->{contacts};
	my $inbody = 0;
	for (split(/^/m, $data)) {
		if (not $inbody) {
			if (/^author ([^<>]+) <(\S+)> .+$/) {
				$contacts->{format_contact($1, $2)} = 1;
			} elsif (/^$/) {
				$inbody = 1;
			}
		} elsif (/^$labels_rx:\s+([^<>]+)\s+<(\S+?)>$/o) {
			$contacts->{format_contact($1, $2)} = 1;
		}
	}
}

sub import_commits {
	my ($commits) = @_;
	return unless %$commits;
	my $pid = open2 my $reader, my $writer, qw(git cat-file --batch);
	for my $id (keys(%$commits)) {
		print $writer "$id\n";
		my $line = <$reader>;
		if ($line =~ /^([0-9a-f]{40}) commit (\d+)/) {
			my ($cid, $len) = ($1, $2);
			die "expected $id but got $cid\n" unless $id eq $cid;
			my $data;
			# cat-file emits newline after data, so read len+1
			read $reader, $data, $len + 1;
			parse_commit($commits->{$id}, $data);
		}
	}
	close $reader;
	close $writer;
	waitpid($pid, 0);
	die "git-cat-file error: $?\n" if $?;
}

sub get_blame {
	my ($commits, $source, $from, $ranges) = @_;
	return unless @$ranges;
	open my $f, '-|',
		qw(git blame --porcelain -C),
		map({"-L$_->[0],+$_->[1]"} @$ranges),
		'--since', $since, "$from^", '--', $source or die;
	while (<$f>) {
		if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) {
			my $id = $1;
			$commits->{$id} = { id => $id, contacts => {} }
				unless $seen{$id};
			$seen{$id} = 1;
		}
	}
	close $f;
}

sub blame_sources {
	my ($sources, $commits) = @_;
	for my $s (keys %$sources) {
		for my $id (keys %{$sources->{$s}}) {
			get_blame($commits, $s, $id, $sources->{$s}{$id});
		}
	}
}

sub scan_patches {
	my ($sources, $id, $f) = @_;
	my $source;
	while (<$f>) {
		if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) {
			$id = $1;
			$seen{$id} = 1;
		}
		next unless $id;
		if (m{^--- (?:a/(.+)|/dev/null)$}) {
			$source = $1;
		} elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) {
			my $len = defined($2) ? $2 : 1;
			push @{$sources->{$source}{$id}}, [$1, $len] if $len;
		}
	}
}

sub scan_patch_file {
	my ($commits, $file) = @_;
	open my $f, '<', $file or die "read failure: $file: $!\n";
	scan_patches($commits, undef, $f);
	close $f;
}

sub parse_rev_args {
	my @args = @_;
	open my $f, '-|',
		qw(git rev-parse --revs-only --default HEAD --symbolic), @args
		or die;
	my @revs;
	while (<$f>) {
		chomp;
		push @revs, $_;
	}
	close $f;
	return @revs if scalar(@revs) != 1;
	return "^$revs[0]", 'HEAD' unless $revs[0] =~ /^-/;
	return $revs[0], 'HEAD';
}

sub scan_rev_args {
	my ($commits, $args) = @_;
	my @revs = parse_rev_args(@$args);
	open my $f, '-|', qw(git rev-list --reverse), @revs or die;
	while (<$f>) {
		chomp;
		my $id = $_;
		$seen{$id} = 1;
		open my $g, '-|', qw(git show -C --oneline), $id or die;
		scan_patches($commits, $id, $g);
		close $g;
	}
	close $f;
}

sub mailmap_contacts {
	my ($contacts) = @_;
	my %mapped;
	my $pid = open2 my $reader, my $writer, qw(git check-mailmap --stdin);
	for my $contact (keys(%$contacts)) {
		print $writer "$contact\n";
		my $canonical = <$reader>;
		chomp $canonical;
		$mapped{$canonical} += $contacts->{$contact};
	}
	close $reader;
	close $writer;
	waitpid($pid, 0);
	die "git-check-mailmap error: $?\n" if $?;
	return \%mapped;
}

if (!@ARGV) {
	die "No input revisions or patch files\n";
}

my (@files, @rev_args);
for (@ARGV) {
	if (-e) {
		push @files, $_;
	} else {
		push @rev_args, $_;
	}
}

my %sources;
for (@files) {
	scan_patch_file(\%sources, $_);
}
if (@rev_args) {
	scan_rev_args(\%sources, \@rev_args)
}

my $toplevel = `git rev-parse --show-toplevel`;
chomp $toplevel;
chdir($toplevel) or die "chdir failure: $toplevel: $!\n";

my %commits;
blame_sources(\%sources, \%commits);
import_commits(\%commits);

my $contacts = {};
for my $commit (values %commits) {
	for my $contact (keys %{$commit->{contacts}}) {
		$contacts->{$contact}++;
	}
}
$contacts = mailmap_contacts($contacts);

my $ncommits = scalar(keys %commits);
for my $contact (keys %$contacts) {
	my $percent = $contacts->{$contact} * 100 / $ncommits;
	next if $percent < $min_percent;
	print "$contact\n";
}