Skip to content

Instantly share code, notes, and snippets.

@userid
Last active April 20, 2020 16:30
Show Gist options
  • Save userid/fecee8535dcebf3197b7347c123a653c to your computer and use it in GitHub Desktop.
Save userid/fecee8535dcebf3197b7347c123a653c to your computer and use it in GitHub Desktop.
# 10.10.10.41 15 07 * * * /home/odps/src/download.pl >>/home/log/odps/odps_to_clickhouse.log 2>&1 # 10.10.14.33 24 9 * * * /home/odps/ods_ac_serverconfig/src/crontab_download_generate.pl >> /home/log/odps/odps_download.log 2>>/home/log/odps/odps_download_err.log
#!/usr/bin/perl -w
##
##
##
use 5.012;
$| = 1;
use POSIX qw(strftime);
chdir '/home/odps/ods_ac_serverconfig/';
my $date = strftime("%Y%m%d", localtime(time()-86400));
my $sql = "tunnel download cbods.ods_ac_serverconfig/requestdate='$date' -fd '|' data/serverconfig/serverconfig.$date.txt;";
say $sql;
say qx{ /opt/odps/bin/odpscmd -e "$sql"};
my $downloadfile = "data/serverconfig/serverconfig.$date.txt";
die("file download error") unless -s $downloadfile;
###_---- 开始解析
open my $f_user, "+>", "huge/user.csv";
open my $f_device, "+>", "huge/device.csv";
open my $f_udevice, "+>", "huge/udevice.csv";
open my $f_used, "+>", "huge/used.csv";
open my $f_contains, "+>", "huge/contains.csv";
sub get_uuid_time($){
use POSIX qw(strftime);
my $uuid= shift || 'ef802820-46b3-11e2-bf3a-47ef6b3e28e2';
$uuid =~ s/-//g;
my $timelow = hex substr( $uuid, 2 * 0, 2 * 4 );
my $timemid = hex substr( $uuid, 2 * 4, 2 * 2 );
my $version = hex substr( $uuid, 2 * 6, 1 );
my $timehi = hex substr( $uuid, 2 * 6 + 1, 2 * 2 - 1 );
my $time = ( $timehi * ( 2**16 ) + $timemid ) * ( 2**32 ) + $timelow;
my $epoc = int( $time / 10000000 ) - 12219292800;
#return strftime( "%Y%m%d", localtime($epoc) ) ;
return strftime( "%Y-%m-%d %H:%M:%S", localtime($epoc) );
}
sub process($){
my $l = shift;
chomp $l;
my @vals = split '\\|', $l ;
return if scalar @vals <5;
my $userid = $vals[0];
my $type = $vals[1];
my $model = $vals[2];
$model=~s/,/_/;
my $mac = $vals[3];
my $uuid = $vals[-1] || '';
return if $userid eq '';
return if $userid == 0;
my $date = get_uuid_time($uuid);
#say "$userid $model $mac $uuid $date";
print $f_user "$userid\n";
print $f_device "$mac,$model,$type\n";
print $f_udevice "$uuid,$date,$type\n";
print $f_used "$userid,$uuid\n";
print $f_contains "$uuid,$mac\n";
}
my $n = 0;
open my $fd,"<", $downloadfile;
while(<$fd>){
process($_) if ( $_ ne '');
say "processed $n lines." if $n++ % 1000 == 0;
}
close($fd);
close($f_user);
close $f_device;
close $f_udevice;
close $f_user;
close $f_contains;
die("解析文件出错" ) unless -s "huge/user.csv";
#__----- 开始导入hugegraph
say qx {/home/dev/hugegraph-loader-0.7.0/bin/hugegraph-loader -g hugegraph -f huge/s.json -s huge/schema.groovy -h 10.10.14.33 --max-parse-errors 10000 --max-insert-errors 10000};
#!/usr/bin/perl -w
##
##
##
use 5.012;
$| = 1;
use POSIX qw(strftime);
chdir '/home/odps/';
my $date = strftime("%Y%m%d", localtime(time()-86400*1));
my $partition = strftime("%Y-%m-%d", localtime(time()-86400*1));
my $sql = "tunnel download cbmds.mds_user_attribute_original_big_data_table/requestdate=${date} -fd '\x01' ./data/user_attribute_${date}.txt ";
say $sql;
say "=== 开始下载odps ===";
say qx{ /opt/odps/bin/odpscmd -e "$sql"};
my $downloadfile = "./data/user_attribute_${date}.txt";
die("file download error") unless -s $downloadfile;
#__----- 开始导入
#
say "=== 开始导入clickhouse ===";
say qx{ cat $downloadfile |sed -e "s/['\\"{}\\r\\t]//g" -e 's/\$/\\x01${partition}/' | clickhouse-client --format_csv_delimiter='\x01' --query="INSERT INTO user_attribute FORMAT CSV" --input_format_allow_errors_num=10000000};
#__----- 开始清理旧数据
my $old_date = strftime("%Y%m%d", localtime(time()-86400*9));
my $old_partition = strftime("%Y-%m-%d", localtime(time()-86400*9));
say "=== drop table partition '$old_partition' ===";
qx{rm ./data/user_attribute_${old_date}.txt };
say qx{ clickhouse-client --query=" alter table user_attribute drop partition '$old_partition' "};
say "=== Done ===\n,", `date`;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment