Skip to content

Instantly share code, notes, and snippets.

@Jawn78
Created February 13, 2018 17:39
Show Gist options
  • Save Jawn78/b86e2bb91240c70b87071f10a2b8941c to your computer and use it in GitHub Desktop.
Save Jawn78/b86e2bb91240c70b87071f10a2b8941c to your computer and use it in GitHub Desktop.
Selenium - Jsoup - Tika - Automated Web Posting
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package rexautomation;
import java.util.concurrent.TimeUnit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Point;
//import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.interactions.Actions;
/**
*
* @author RexPC
*/
public class RexAutomationv12 {
/**
* @param args the command line arguments
* @throws java.lang.InterruptedException
*/
public static void main(String[] args) throws InterruptedException {
// TODO code application logic here
System.setProperty("webdriver.chrome.driver", "C:\\Users\\RexPC\\Documents\\WebDrivers\\chromedriver.exe");
String reqId ="17155094";
String reqId2 = "17156841";
String requisitionPage;
requisitionPage = "https://hhccareers.taleo.net/careersection/jobdetail.ftl?job="+ reqId2 + "&lang=en#.WW663PYW-j0.link";
WebDriver driver = new ChromeDriver();
driver.get(requisitionPage);
String pageSource = driver.getPageSource();
Document doc = Jsoup.parse(pageSource);//assuming register.html file in e drive [attr^=value]
Elements reqDescription = doc.select("span:matches(Description)");
//iterate through the parents until match get(2)///? //
Element parentInfo_reqDescription = reqDescription.get(2).parent().parent().parent();
String descriptionId = parentInfo_reqDescription.attr("id");
Elements childInfo_reqDescription = reqDescription.get(2).parent().children();
Elements reqQualifications = doc.select("span:matches(Qualifications)");
Element parentInfo_reqQualifications = reqQualifications.get(0).parent().parent().parent();
String qualificationsId = parentInfo_reqQualifications.attr("id");
Elements childInfo_reqQualifications = reqQualifications.get(0).parent().children();
driver.manage().timeouts().implicitlyWait(1, TimeUnit.SECONDS);
//Copies Title selection from job posting
WebElement titleSelection;
titleSelection = driver.findElement(By.xpath("//*[@id=\"requisitionDescriptionInterface.ID1667.row1\"]"));;
String titleSelectionString = titleSelection.getText();
//Selects the location field and stores it as a string
WebElement locationSelection;
locationSelection = driver.findElement(By.xpath("//*[@id=\"requisitionDescriptionInterface.ID1496.row1\"]"));;
String locationSelectionString = locationSelection.getText();
WebElement selection;
//WebElement to String selectionElement;
selection = driver.findElement(By.id(descriptionId));
//print text from Description webelement
String SelectionText = selection.getText();
// System.out.println(selection.getText());
WebElement qualificationSelection;
qualificationSelection = driver.findElement(By.id(qualificationsId));
String qualificationSelectionText = qualificationSelection.getText();
//System.out.println(qualificationSelection);
//Go to login Page
driver.get("http://www.jonathonpalmieri.com/wp-login.php?redirect_to=http%3A%2F%2Fwww.jonathonpalmieri.com%2Fjobs%2Fpost-a-job%2F");
driver.manage().timeouts().implicitlyWait(1, TimeUnit.SECONDS);
// Select the Webelement user login field, click it, then enter username
WebElement userName;
userName = driver.findElement(By.xpath("//*[@id=\"user_login\"]"));
userName.click();
userName.clear();
userName.sendKeys("Username"); //Enter Username
// Select the Webelement user login field, click it, then enter password
WebElement passWord;
passWord = driver.findElement(By.xpath("//*[@id=\"user_pass\"]"));
passWord.click();
passWord.clear();
passWord.sendKeys("Password"); //Enter Password
//Submit userlogin
WebElement submit;
submit = driver.findElement(By.xpath("//*[@id=\"wp-submit\"]"));
submit.click();
//wait for page to load
driver.manage().timeouts().implicitlyWait(1, TimeUnit.SECONDS);
driver.get("http://www.jonathonpalmieri.com/jobs/post-a-job/");
//select title field and send requisition title information
WebElement title = driver.findElement(By.xpath("//*[@id=\"job_title\"]"));
title.click();
title.clear();
title.sendKeys(titleSelectionString);
//select location field and send requisition location informaiton
WebElement location = driver.findElement(By.xpath("//*[@id=\"job_location\"]"));
location.click();
location.clear();
location.sendKeys(locationSelectionString);
//Select Job Category field
WebElement jobCategory;
jobCategory = driver.findElement(By.xpath("//*[@id=\"job_category_chosen\"]"));
jobCategory.click();
//Select the option, and click
WebElement jobCategory1;
jobCategory1 = driver.findElement(By.xpath("//*[@id=\"job_category_chosen\"]/div/ul/li[1]"));
jobCategory1.click();
//select the application URL field, click it, then send the requisitionPage variable which containrs the URL
WebElement applicationUrl;
applicationUrl = driver.findElement(By.xpath("//*[@id=\"application\"]"));
applicationUrl.click();
applicationUrl.clear();
applicationUrl.sendKeys(requisitionPage);
//Selects the company name category clicks it, clears the content. then sends company name
WebElement companyName;
companyName = driver.findElement(By.xpath("//*[@id=\"company_name\"]"));
companyName.click();
companyName.clear();
companyName.sendKeys("Hartford HealthCare");
//Selects URL webelment and submits company url
WebElement companyWebsite;
companyWebsite = driver.findElement(By.xpath("//*[@id=\"company_website\"]"));
companyWebsite.click();
companyWebsite.clear();
companyWebsite.sendKeys("www.JoinHartfordHealthCare.org");
//Selects the tagline field clears it and submits the company tagline
WebElement tagLine;
tagLine = driver.findElement(By.xpath("//*[@id=\"company_tagline\"]"));
tagLine.click();
tagLine.clear();
tagLine.sendKeys("Connect to Healthier");
// Select video category and submit company youtube url
WebElement companyYoutube;
companyYoutube = driver.findElement(By.xpath("//*[@id=\"company_video\"]"));
companyYoutube.click();
companyYoutube.clear();
companyYoutube.sendKeys("https://www.youtube.com/user/HartfordHospitalCT");
//Selects the company twitter page and submits company twitter
WebElement companyTwitter;
companyTwitter = driver.findElement(By.xpath("//*[@id=\"company_twitter\"]"));
companyTwitter.click();
companyTwitter.clear();
companyTwitter.sendKeys("@HartfordHealthC");
//Select iframe & click it.
WebElement iframe1 = driver.findElement(By.tagName("iframe"));
iframe1.click();
//Select the iframe text rich field click it, and write text.
WebElement web1;
web1 = driver.findElement(By.xpath("//*[@id=\"mceu_15\"]"));
web1.click();
//Send the Wualifications variable to the web1 location. Builds and performs to complete the action.
Actions act1=new Actions(driver);
act1.sendKeys(web1, "SelectionText" + "qualificationSelectionText").build().perform();
WebElement file_input = driver.findElement(By.xpath("//*[@id=\"company_logo\"]"));
file_input.sendKeys("C:\\Users\\RexPC\\Pictures\\HHC_logo.jpg");
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
WebElement previewPage1;
previewPage1 = driver.findElement(By.name("step"));
WebElement element;
element = driver.findElement(By.xpath("//*[@id='submit-job-form']//*[@name='submit_job']"));
element.click();
/*
//JavascriptExecutor js = (JavascriActions builder = new Actions(driver);
Actions builder = new Actions(driver);
Point locations = element.getLocation();
builder.moveToElement(element).click().perform();
// js.executeScript("arguments[0].click()", element);
/*
js.executeScript ("arguments[0].setAttribute(arguments[1], arguments[2]);",
previewPage1,
"value",
"2");
WebElement previewPage = driver.findElement(By.xpath("//*[@id=\"submit-job-form\"]/p/input[4]"));
previewPage.submit();
*/
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
}
}
@Jawn78
Copy link
Author

Jawn78 commented Feb 13, 2018

This is a Java project using Selenium, Jsoup, and Apache Tika to extra information through the DOM structure, and enter it into a web form (including Iframes), mimicking human input through the sendkeys method. Selenium was used to get around the dynamic content loading, and extract the page source to then use Jsoup to iterate through the information and extract the necessary content. The JSoup loop in incomplete to validate information. Apache Tika, which is not shown in this code snippet was used to extract information from a document, which was used in the same way as the extracted web data.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment