Created
February 23, 2015 05:45
-
-
Save chankeypathak/c51804180a592b24fb14 to your computer and use it in GitHub Desktop.
Scrap top 10 results YouTube
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
#Author: Chandra Prakash Pathak | |
#Date: 23/02/2015 10:15 AM | |
#Always use strict and warnings in each Perl program | |
use strict; | |
use warnings; | |
use Selenium::Remote::Driver; | |
use Selenium::Remote::WDKeys qw(KEYS); | |
use Config::Simple; | |
use POSIX qw/strftime/; | |
my $cfg = new Config::Simple('config.ini'); | |
#Read the browser name and what to search from configuration file named config.ini | |
my $browser = $cfg->param('browser'); | |
my $search_term = $cfg->param('searchterm'); | |
print "Browser is $browser and search term is $search_term\n"; | |
my $driver = Selenium::Remote::Driver->new( | |
browser_name => $browser, | |
auto_close => 0 | |
); | |
######################## config part done ######################## | |
$driver->get('http://www.youtube.com/'); | |
#Find the search input box | |
my $search_box = $driver->find_element('masthead-search-term', 'id'); | |
#Type in search box, the value will be taken from config file | |
$search_box->send_keys($search_term); | |
#Press enter to get results of search term | |
$search_box->send_keys(KEYS->{'enter'}); | |
my @elements; | |
#Since we need top 10 results, loop through 1 to 11 | |
foreach (1..11){ | |
my $text = qq{//*[\@id='section-list']/li/ol/li[$_]/div/div/div[2]/h3/a}; | |
push @elements, $driver->find_elements($text); | |
} | |
#output filename should be <your name>-<MMDDYYYY-HHMMSS>.html | |
my $date = strftime('%m%d%Y',localtime); | |
my $output_filename = "ChandraPrakash-$date.html"; | |
open(my $fh, ">" , $output_filename) or die $!; | |
#Loop through each element (which is "a" tag) and get the title and link | |
foreach my $item (@elements){ | |
print $fh "Title: ".$item->get_text()."\t"; | |
print $fh "Link: ".$item->get_attribute('href')."\n"; | |
} | |
#either set auto_close to 1 while making object or call the below to quit | |
#$driver->quit(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment