Skip to content

Instantly share code, notes, and snippets.

@Buttonwood
Created April 8, 2016 08:12
Show Gist options
  • Save Buttonwood/3cbcb11e5a810c428fb2059d9bdfa7cc to your computer and use it in GitHub Desktop.
Save Buttonwood/3cbcb11e5a810c428fb2059d9bdfa7cc to your computer and use it in GitHub Desktop.
use List::Util qw/max min sum/;
my %data;
while(<>){
my @arr = split("%########%");
my @sql = split(";",$arr[-1]);
=pod
my $a = &phaser_sql(&trans_sql($sql[0]));
my $b = &phaser_keys($a);
print "$sql[0]\n$a\n$b\n";
=cut
$arr[2] = &phaser_keys(&phaser_sql(&trans_sql($sql[0])));
push(@{$data{$arr[2]}},$arr[0]);
print join(";",@arr);
}
sub trans_sql{
my $tmp = shift;
$tmp =~ s/\n//g;
$tmp =~ s/\s+\,\s+/\,/g;
$tmp =~ s/\s+\=\s+/\=/g;
$tmp =~ s/\s+\>\=\s+/\>\=/g;
$tmp =~ s/\s+\<\=\s+/\<\=/g;
$tmp =~ s/\s+\=\<\s+/\=\</g;
$tmp =~ s/^\s+|\s+$//g;
$tmp =~ s/\-\-/ /g;
$tmp =~ s/\;$//g;
$tmp =~ s/\s+/\#/g;
$tmp =~ s/\,\#/\,/g;
$tmp =~ s/\`//g;
$tmp =~ s/\#\)\#/\)\#/g;
$tmp =~ s/\#\)/\)/g;
$tmp =~ s/\#\(\#/\#\(/g;
$tmp =~ s/\(\#/\(/g;
$tmp =~ s/\,\s+//g;
$tmp =~ s/\s+\,/\,/g;
$tmp =~ s/\#\=\#/\=/g;
$tmp =~ s/\=\#/\=/g;
$tmp =~ s/\#\=/\=/g;
$tmp =~ s/\#\-\#/\-/g;
$tmp =~ s/\-\#/\-/g;
$tmp =~ s/\#\-/\-/g;
$tmp =~ s/\#\>\#/\>/g;
$tmp =~ s/\>\#/\>/g;
$tmp =~ s/\#\>/\>/g;
$tmp =~ s/\#\<\#/\</g;
$tmp =~ s/\<\#/\</g;
$tmp =~ s/\#\</\</g;
$tmp =~ s/\#\>\=\#/\>\=/g;
$tmp =~ s/\>\=\#/\>\=/g;
$tmp =~ s/\#\>\=/\>\=/g;
$tmp =~ s/\#\=\<\#/\=\</g;
$tmp =~ s/\=\<\#/\=\</g;
$tmp =~ s/\#\=\</\=\</g;
$tmp =~ s/\#\!\=\#/\!\=/g;
$tmp =~ s/\!\=\#/\!\=/g;
$tmp =~ s/\#\!\=/\!\=/g;
$tmp =~ s/\#\,/\,/g;
$tmp =~ s/\,\#/\,/g;
$tmp =~ s/\#\,\#/\,/g;
$tmp =~ s/\#\;/\;/g;
$tmp =~ s/\;\#/\;/g;
$tmp =~ s/\#\;\#/\;/g;
#print $tmp."\n";
return $tmp;
}
sub phaser_sql{
my $tmp = shift;
#print $tmp."\n";
my @a_a = split(/\;|\#/,$tmp);
if ($a_a[0] eq "select"){
my @a_b = split("where",$tmp);
#my $pos = index()
$a_b[-1] =~ s/'\d+[\-\/]\d+[\-\/]\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+[\-\/]\d+[\-\/]\d+\#\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+[\-\/]\d+[\-\/]\d+\#\d+\:\d+([\.\,\;])*'/'\*'/g;
$a_b[-1] =~ s/'\d+[\-\/]\d+[\-\/]\d+(\#)*\d+\:\d+([\.\,\;])*'/'\*'/g;
$a_b[-1] =~ s/'\d+[\-\/]\d+[\-\/]\d+(\#)*\d+\:\d+\:\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+\-\d+\-\d+\#\d+\:\d+\:\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+\-\d+\-\d+\#\d+\:\d+\:\d+[\.\,\:]\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+\/\d+\/\d+\#\d+\:\d+\:\d+'/'\*'/g;
$a_b[-1] =~ s/'\d+\/\d+\/\d+\#\d+\:\d+\:\d+[\.\,\:]\d+'/'\*'/g;
$a_b[-1] =~ s/'\w+'/'\*'/g;
$a_b[-1] =~ s/\#between\#\d+\#and\#\d+/\#between\#'\*'\#and\#'\*'/g;
#$a_b[-1] =~ s/\(('\*',)+'\*')/\('\*'\)/;
$a_b[-1] =~ s/'i\-\d+'/'\*'/g;
#$a_b[-1] =~ s/website\=''#and#websiteplate='\'//g;
$a_b[-1] =~ s/'http:\S+/'\*'/g;
$a_b[-1] =~ s/'https:\S+/'\*'/g;
#$a_b[-1] =~ s/'\w+|\S+'/'\*'/g;
return $a_b[0]."where".$a_b[-1];
#pod
}elsif($a_a[0] eq "insert"){
my @a_b = split("values",$tmp);
return $a_b[0];
}elsif($a_a[0] eq "update"){
my @a_b = split(/where|,summary=/,$tmp);
return $a_b[0];
}else{
return $a_a[0]."#";
#cut
}
}
use Data::Dumper;
#print Dumper(\%data);
#=pod
open(OUT,">stat.keys.csv");
foreach my $key (sort(keys (%data))){
my $CNT = $data{$key};
#print $key ."=>".Dumper($CNT);
if(scalar(@$CNT) > 0){
printf OUT "%d;%0.4f;%0.4f;%0.4f;%s\n",scalar(@$CNT),sum(@$CNT)/scalar(@$CNT),max(@$CNT),min(@$CNT),$key;
}
}
close OUT;
#=cut
sub phaser_keys{
my $tmp = shift;
=pod
if($tmp =~ /\#website\=/){
my @a = split(/website\=/,$tmp);
$tmp = $a[0]."website=".&sub_first($a[-1]);
}
if($tmp =~/websiteplate\=/){
my @a = split(/websiteplate\=/,$tmp);
$tmp = $a[0]."websiteplate=".&sub_first($a[-1]);
}
if($tmp =~ /\#title\=/){
my @a = split(/title\=/,$tmp);
$tmp = $a[0]."title=".&sub_first($a[-1]);
}
if($tmp =~ /\#in\#\(/){
my @a = split(/\#in\#\(/,$tmp);
$tmp = $a[0]."#in#".&sub_first($a[-1]);
}
if($tmp =~ /\#like\#/){
my @a = split(/\#like\#/,$tmp);
$tmp = $a[0]."#like#".&sub_first($a[-1]);
}
if($tmp =~ /\#and\#\(\id\>\=/){
my @a = split(/\#and\#\(\id\>\=/,$tmp);
$tmp = $a[0]."#and#(id>=";
}
=cut
if($tmp =~ /\'/){
my @a = split(/\'/,$tmp);
$tmp = &check_all_start(\@a);
}
if($tmp =~ /\(\'/){
my @a = split(/\(\'|\'\)/,$tmp);
$tmp = &check_all_start(\@a);
}
return $tmp;
}
sub sub_first{
my $t = shift;
my @a = split(/\'\#/,$t);
$a[0] = "\'\*\'";
return join("#",@a);
}
sub check_all_start{
my $t = shift;
for(my $i=1;$i<scalar(@$t);$i++){
#print "$i=>".$t->[$i]."\n";
if($i%2){
$t->[$i] = "\'\*\'";
}
}
return join("",@$t);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment