X-Git-Url: https://mattmccutchen.net/utils/utils.git/blobdiff_plain/8937bc6d8589259981e1d93f8a0cf4f273cc071d..35d3e321c44f7044f53a95d8cf7615c8bcf3e5e9:/web-logs/group-requests diff --git a/web-logs/group-requests b/web-logs/group-requests new file mode 100755 index 0000000..9af5cd1 --- /dev/null +++ b/web-logs/group-requests @@ -0,0 +1,50 @@ +#!/usr/bin/env perl +# group-requests: Helper for Matt McCutchen's web log analysis process +# https://mattmccutchen.net/utils/#web-logs + +# Input: generated/irsr format + +use strict; +use warnings; + +#sub unescape_field($) { +# return ($_[0] =~ s,\\(.),$1,g); +#} + +#sub escape_field($) { +# return ($_[0] =~ s,([\\"]),\\$1,g); +#} + +my %status_objs = (); + +while () { + # irsr_line_regex from Makefile + m,^([^ ]+) "(([^"\\]|\\.)*)" ([0-9]+) "(([^"\\]|\\.)*)"$, or die "Malformed line"; + my ($request, $status, $referrer) = ($2, $4, $5); + #my $rs = "\"$request\" $response_status_code"; + # Duplicating the key inside the object makes iteration easier later. + my $s_obj = ($status_objs{$status} //= {status => $status, req_objs => {}}); + my $req_obj = ($s_obj->{req_objs}->{$request} //= {request => $request, count => 0, ref_objs => {}}); + $req_obj->{count}++; + my $ref_obj = ($req_obj->{ref_objs}->{$referrer} //= {referrer => $referrer, count => 0}); + $ref_obj->{count}++; +} + +# Group by status code to make it easy to separate both my-fault and abuse 404s from the bulk of 200s. +foreach my $s_obj (sort { $a->{status} <=> $b->{status} } values(%status_objs)) { + print "=== STATUS CODE $s_obj->{status} ===\n"; + # Break ties alphabetically to make it a little easier to read. + foreach my $req_obj (sort { $b->{count} <=> $a->{count} || $a->{request} cmp $b->{request} } values(%{$s_obj->{req_objs}})) { + my @ref_objs = sort { $b->{count} <=> $a->{count} || $a->{referrer} cmp $b->{referrer} } values(%{$req_obj->{ref_objs}}); + if (scalar(@ref_objs) == 1) { + # Special case to make output a little easier to read. + print sprintf("%4d \"%s\" \"%s\"\n", $req_obj->{count}, $req_obj->{request}, $ref_objs[0]->{referrer}); + } else { + # That will be the day when we get more than 9999 identical request lines in less than 2 days... + print sprintf("%4d \"%s\"\n", $req_obj->{count}, $req_obj->{request}); + foreach my $ref_obj (@ref_objs) { + print sprintf(" %4d \"%s\"\n", $ref_obj->{count}, $ref_obj->{referrer}); + } + } + } +}