Last active
February 27, 2019 03:53
-
-
Save jgamblin/184590e2ba64371730e435ab2977e4cf to your computer and use it in GitHub Desktop.
Download Umbrella's Top 1 Million Sites List For Last 2 Days And List New Sites
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
day1=$(date --date="2 days ago" +"%Y"-"%m"-"%d") | |
day2=$(date --date="3 days ago" +"%Y"-"%m"-"%d") | |
mkdir -p ~/ut1m | |
cd ~/ut1m | |
#Get Yesterdays Data | |
printf "Getting $day1 Data\n" | |
cd ~/ut1m | |
mkdir $day1 | |
cd $day1 | |
wget -q http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-$day1.csv.zip | |
unzip top-1m-$day1.csv.zip > /dev/null | |
cut --complement -f 1 -d, top-1m.csv > $day1.csv | |
sort $day1.csv > $day1.txt | |
sed -r 's/\s+//g' $day1.txt > $day1-f.txt | |
sort $day1-f.txt > $day1-a.txt | |
#Get The Day Before Yesterday Data | |
printf "Getting $day2 Data\n" | |
cd ~/ut1m | |
mkdir $day2 | |
cd $day2 | |
wget -q http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-$day2.csv.zip | |
unzip top-1m-$day2.csv.zip > /dev/null | |
cut --complement -f 1 -d, top-1m.csv > $day2.csv | |
sort $day2.csv > $day2.txt | |
sed -r 's/\s+//g' $day2.txt > $day2-f.txt | |
sort $day2-f.txt > $day2-a.txt | |
#Find The Differences: | |
printf "Finding The Differences\n" | |
cd ~/ut1m | |
comm -3 $day1/$day1-a.txt $day2/$day2-a.txt > diff.txt | |
sed -r 's/\s+//g' diff.txt > newdomains.txt | |
#Upload to Spurnge | |
printf "Uploaded to Sprunge Here:\n" | |
cat newdomains.txt | curl -F 'sprunge=<-' http://sprunge.us | |
#Clean Up | |
#rm -rf * |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@daveio Thanks for the amazing fix!! I commented out the clean up line. As I said on I twitter I built and tested this quickly my code is usually not elegant but glad you caught out.