Skip to content

Instantly share code, notes, and snippets.

@chankeypathak
Created February 23, 2015 05:45
Show Gist options
  • Save chankeypathak/c51804180a592b24fb14 to your computer and use it in GitHub Desktop.
Save chankeypathak/c51804180a592b24fb14 to your computer and use it in GitHub Desktop.
Scrap top 10 results YouTube
#!/usr/bin/perl
#Author: Chandra Prakash Pathak
#Date: 23/02/2015 10:15 AM
#Always use strict and warnings in each Perl program
use strict;
use warnings;
use Selenium::Remote::Driver;
use Selenium::Remote::WDKeys qw(KEYS);
use Config::Simple;
use POSIX qw/strftime/;
my $cfg = new Config::Simple('config.ini');
#Read the browser name and what to search from configuration file named config.ini
my $browser = $cfg->param('browser');
my $search_term = $cfg->param('searchterm');
print "Browser is $browser and search term is $search_term\n";
my $driver = Selenium::Remote::Driver->new(
browser_name => $browser,
auto_close => 0
);
######################## config part done ########################
$driver->get('http://www.youtube.com/');
#Find the search input box
my $search_box = $driver->find_element('masthead-search-term', 'id');
#Type in search box, the value will be taken from config file
$search_box->send_keys($search_term);
#Press enter to get results of search term
$search_box->send_keys(KEYS->{'enter'});
my @elements;
#Since we need top 10 results, loop through 1 to 11
foreach (1..11){
my $text = qq{//*[\@id='section-list']/li/ol/li[$_]/div/div/div[2]/h3/a};
push @elements, $driver->find_elements($text);
}
#output filename should be <your name>-<MMDDYYYY-HHMMSS>.html
my $date = strftime('%m%d%Y',localtime);
my $output_filename = "ChandraPrakash-$date.html";
open(my $fh, ">" , $output_filename) or die $!;
#Loop through each element (which is "a" tag) and get the title and link
foreach my $item (@elements){
print $fh "Title: ".$item->get_text()."\t";
print $fh "Link: ".$item->get_attribute('href')."\n";
}
#either set auto_close to 1 while making object or call the below to quit
#$driver->quit();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment