#!/usr/bin/perl -w # Copyright (c) 2010, Edd Edmondson, 2008 Gian Merlino # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =head1 NAME merge - WoW combat log merging tool =head1 DESCRIPTION This program will read in two WoW combat logs and attempt to identify identical events in each, and output a union of the two containing only one of each event. This log may then be parsed by any other combat log parser. For more information visit I. =head1 SYNOPSIS Merge logs a.txt and b.txt to merged.txt merge -file1 a.txt -file2 b.txt -output merged.txt =head1 USAGE merge [options] =head2 Options The following options are required: B<-file1> Sets the first input file B<-file2> Sets the second input file B<-output> Sets the output file Some parameters of the merge are adjustable as follows. If not set, a default value is used. B<-offset1> Sets the time offset in seconds of the first log, default 0 B<-offset2> Sets the time offset in seconds of the second log, default 0 B<-flexibility> Sets the match distance in seconds, default 0.3 =head1 BUGS http://code.google.com/p/apostasis/ Use the "Issues" tab. =cut use strict; use POSIX; use Getopt::Long; my $offset1 = 0 * 60 * 60; #Optionally offset the timestamps of one file by this amount - e.g. for timezone differences my $offset2 = 0 * 60 * 60; my $flexibility=0.3; #0.3 seconds flexibility in matching events my $file1; my $file2; my $outputfile; my $opts = GetOptions( "flexibility=f" => \$flexibility, "offset1=f" => \$offset1, "offset2=f" => \$offset2, "file1=s" => \$file1, "file2=s" => \$file2, "output=s" => \$outputfile ); our $stamp_regex = qr/^(\d+)\/(\d+) (\d+):(\d+):(\d+)\.(\d+) (.*?)[\r\n]*$/s; our $csv_regex = qr/,(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))/; our $year = strftime "%Y", localtime; #Assume this year sub parseline { my $line = $_[0]; my $data; my $timestamp; if ($line =~ $stamp_regex) { $timestamp=POSIX::mktime( $5, # sec $4, # min $3, # hour $2, # mday $1 - 1, # mon $year, # year 0, # wday 0, # yday -1 # is_dst ) + $6 / 1000; #apply offset outside function $data=$7; } else { $timestamp = 0; $data=' '; } return ($timestamp,$data); } sub reducedata { #Takes a string (without timestamp) and removes values that may vary across logs #This is stuff like wowpedia.org/UnitFlag - what apostasis calls target_relationship my $eventstring = $_[0]; my $reducedevent; my @data = map { $_ eq "nil" ? "" : $_ } map {s/"//g; $_;} split $csv_regex, $eventstring; for (my $i=0;$i<$#data;$i++) { if ( ($i != 3) && ($i != 6) ) { $reducedevent .= $data[$i] } } return $reducedevent; } my $line; #stores the current log line my $timestamp; #stores the timestamp of the current event my $datastring; #stores the event data my $reduceddata; #stores event data without relationship flags; my %eventhash; #We put all events in a hash based on their non-timestamp details (ignoring relationship flags) my %timehash; #We put all events (fully-specified) in a hash here keyed on timestamp rather than event my %completeeventhash; #We have all events (ignoring relationship flags) plus timestamps as keys, fully specified events as values my $match=0; #flag for matching events my ($timestamp1,$timestamp2); my @data; my $hpeakmax=0; #biggest value in histogram my $hpeakkey; #key for that value my %histogram; #records all offsets even for events considered not to match. This is so we can warn if the offsets aren't looking like the best values open INPUT1,$file1 or die "Couldn't open $file1."; open INPUT2,$file2 or die "Couldn't open $file2."; open OUTPUT, "> $outputfile" or die "Couldn't open $outputfile for writing."; print "Beginning parse of $file1.\n"; while ($line=) { ($timestamp,$datastring)=&parseline($line); $timestamp -= $offset1; if ($timestamp != 0) { $reduceddata=&reducedata($datastring); #Push the event in the array in the hash, or make the array to put in the hash if (defined $eventhash{$reduceddata}) { push @{$eventhash{$reduceddata}},$timestamp; } else { $eventhash{$reduceddata} = [$timestamp]; } $completeeventhash{$reduceddata.$timestamp}=$datastring; } } close INPUT1; print "Parsed $file1.\n"; print "Beginning parse of $file2.\n"; while ($line=) { ($timestamp,$datastring) = parseline($line); $timestamp -= $offset2; if ($timestamp != 0) { $reduceddata=&reducedata($datastring); if (defined $eventhash{$reduceddata}) { #We need to check if the event is already in the hash or if it's an identical event at a different time my @preexistingtimestamps = @{$eventhash{$reduceddata}}; foreach (@preexistingtimestamps) { #record in the histogram hash, just in case #key is integer units based on flexibility $histogram{ int(($_-$timestamp)/$flexibility) } += 1; if ( abs($_ - $timestamp) < $flexibility ) { #we assume it's a matched event, flag it $match=1; } } if ($match == 0) { #No match, so the event is new. Put it in the hash. push @{$eventhash{$reduceddata}},$timestamp; $completeeventhash{$reduceddata.$timestamp}=$datastring; } $match = 0; } else { #The event is definitely new, put it in the hash $eventhash{$reduceddata} = [$timestamp]; $completeeventhash{$reduceddata.$timestamp}=$datastring; } } } close INPUT2; print "Parsed $file2.\n"; print "Checking histogram.\n"; foreach my $key (keys %histogram) { if ($histogram{$key} > $hpeakmax) { $hpeakmax = $histogram{$key}; $hpeakkey = $key; } } if ( abs($hpeakkey * $flexibility) > 1 ) { print "Better offset detected at ",$hpeakkey*$flexibility," seconds (time of $file1 - $file2). It may be advisable to rerun with offsets adjusted.\n"; } print "Constructing merged log.\n"; #We need to sort all events by timestamp #We do this by basically swapping keys and values (allowing for the fact values are in arrays) #Then we sort on keys (now timestamps), convert timestamps back and write the reconstructed log event out #We need to allow for different events happening at the same time too foreach my $key (keys %eventhash) { my @arr = @{$eventhash{$key}}; foreach my $value (@arr) { if (defined $timehash{$value}) { push @{$timehash{$value}},$key; } else { $timehash{$value} = [$key]; } } } my @sorted = sort {$a <=> $b} keys %timehash; foreach my $key (@sorted) { foreach my $event (@{$timehash{$key}}) { #Print ##Reconstruct time string my @localtime = localtime($key); my $month=$localtime[4]+1; my $mday=$localtime[3]; $key =~ /\.(.+)/; #gets fractional part of seconds my $millisec; if (! defined($1)) {$millisec="000";} elsif (length $1 == 1) {$millisec = $1 . "00";} elsif (length $1 == 2) {$millisec = $1 . "0";} else {$millisec = $1;} my $time = sprintf("%02s",$localtime[2]) . ":" . sprintf("%02s",$localtime[1]) . ":" . sprintf("%02s",$localtime[0]) . "." . $millisec; #Reclaim event string my $fullevent = $completeeventhash{$event.$key}; print OUTPUT "$month/$mday $time $fullevent\n"; } } close OUTPUT;