Last active
March 25, 2019 13:47
-
-
Save aino-prashant/f37755187477c7cbd13a367fba26cb7d to your computer and use it in GitHub Desktop.
Save article entity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.appops.scrapper.path.processor; | |
import org.appops.altshared.shared.altcore.altoperations.AltOperation; | |
import org.appops.core.service.ServiceStore; | |
import org.appops.invoker.call.OperationInvoker; | |
import org.appops.scrapper.exception.BrowserException; | |
import org.appops.scrapper.exception.ScriptException; | |
import org.appops.scrapper.path.context.ScraperContextStore; | |
import org.appops.scrapper.path.context.entity.ArticleEntityStore; | |
import org.appops.scrapper.path.context.entity.PageEntityStore; | |
import org.appops.scrapper.path.element.DataFetchElement; | |
import org.appops.scrapper.path.element.ElementProperty; | |
import org.appops.scrapper.path.element.EntityElement; | |
import org.appops.scrapper.path.element.locator.Locator; | |
import org.appops.scrapper.path.element.property.EntityProperty; | |
import org.appops.scrapper.util.XPathLogger; | |
import org.openqa.selenium.StaleElementReferenceException; | |
import org.openqa.selenium.TimeoutException; | |
import org.openqa.selenium.WebElement; | |
import com.google.inject.Inject; | |
/** | |
* @author prashant@ainosoft.com | |
* @CreatedOn 28-Feb-2019 | |
* @Responsibility A class which is used to traverse through the each entity property and process and act according to | |
* each element behavior like save into db or context. | |
*/ | |
public class EntityElementProcessor extends ElementProcessor<EntityElement> { | |
private WebElementPropertyProcessor propertyProcessor; | |
private ScraperContextStore contextStore; | |
private ElementProcessorProvider elementProcessorProvider; | |
private ArticleEntityStore articleEntityStore; | |
private PageEntityStore pageEntityStore; | |
private ServiceStore serviceStore; | |
private OperationInvoker oprationInvoker; | |
/** | |
* Locates an element by locator (e.g. x-path) and fetches the value against the property specified. | |
* | |
* @param dataFetchElement | |
* contains information of element from which data is to be fetched. | |
* @throws ScriptException | |
*/ | |
@Override | |
public void processElement(EntityElement entityElement) throws ScriptException { | |
if (entityElement.getEntityName().equals("Page")) { | |
savePageClient(entityElement); | |
} else if (entityElement.getEntityName().equals("Article")) { | |
for (EntityProperty entityProperty : entityElement.getEntityProperties()) { | |
articleProcessor(entityProperty); | |
} | |
getArticleEntityStore().setArticlePageId(getPageEntity().getPageId()); | |
if (entityElement.getOperation() != null) { | |
String friendlyName = entityElement.getOperation().getName(); | |
String serviceName = entityElement.getOperation().getService(); | |
try { | |
AltOperation operation = getServiceStore().getOperationByFriendlyName(serviceName, friendlyName); | |
operation.getParameters().get("article").setValue(getArticleEntityStore().getArticleClient()); | |
getOprationInvoker().executeOperation(operation); | |
} catch (Exception e) { | |
try { | |
e.printStackTrace(); | |
throw e; | |
} catch (Exception e1) { | |
e1.printStackTrace(); | |
} | |
} | |
} | |
} else | |
return;// not implemented yet | |
} | |
private Object savePageClient(EntityElement entityElement) { | |
Object obj = null; | |
if (entityElement.getOperation() != null) { | |
String friendlyName = entityElement.getOperation().getName(); | |
String serviceName = entityElement.getOperation().getService(); | |
try { | |
AltOperation operation = getServiceStore().getOperationByFriendlyName(serviceName, friendlyName); | |
operation.getParameters().get("page").setValue(getPageEntity().getPage()); | |
obj = getOprationInvoker().executeOperation(operation); | |
getPageEntity().setPageId((Integer) obj); | |
} catch (Exception e) { | |
try { | |
e.printStackTrace(); | |
throw e; | |
} catch (Exception e1) { | |
e1.printStackTrace(); | |
} | |
} | |
} | |
return obj; | |
} | |
/** | |
* populate article object. | |
*/ | |
private void articleProcessor(EntityProperty entityProperty) { | |
String propertyName = entityProperty.getName(); | |
DataFetchElement dataFetchElement = entityProperty.getDataFetchElement(); | |
Locator locator = dataFetchElement.getLocator(); | |
ElementProperty propertyToBeFetched = dataFetchElement.getProperty(); | |
WebElement webElement = null; | |
try { | |
webElement = (WebElement) getLocatorProcessor().processorLocator(getWebBrowser(), locator); | |
if (webElement != null) { | |
Object propertyValue = getPropertyProcessor().getPropertyValue(webElement, propertyToBeFetched); | |
if (propertyValue != null) { | |
if (dataFetchElement.getProperty().equals(ElementProperty.TEXT)) { | |
String value = String.valueOf(propertyValue).replaceAll(System.getProperty("line.separator"), | |
" "); | |
getArticleEntityStore().setArticleProperty(propertyName, value); | |
} | |
} | |
} | |
} catch (Exception e) { | |
XPathLogger.log(e); | |
String cause = e.getCause().getMessage(); | |
if (e instanceof TimeoutException) { | |
throw new ScriptException("Timeout occured because of " + cause); | |
} | |
if (e instanceof BrowserException) { | |
throw new ScriptException(cause); | |
} | |
if (e instanceof StaleElementReferenceException) | |
throw new ScriptException("Unable to fetch data from : " + locator.getValue() + " because of " + cause); | |
else | |
throw new ScriptException(e); | |
} | |
} | |
public ElementProcessorProvider getElementProcessorProvider() { | |
return elementProcessorProvider; | |
} | |
@Inject | |
public void setElementProcessorProvider(ElementProcessorProvider elementProcessorProvider) { | |
this.elementProcessorProvider = elementProcessorProvider; | |
} | |
public WebElementPropertyProcessor getPropertyProcessor() { | |
return propertyProcessor; | |
} | |
@Inject | |
public void setPropertyProcessor(WebElementPropertyProcessor propertyExtactor) { | |
this.propertyProcessor = propertyExtactor; | |
} | |
public ScraperContextStore getContextStore() { | |
return contextStore; | |
} | |
@Inject | |
public void setContextStore(ScraperContextStore contextStore) { | |
this.contextStore = contextStore; | |
} | |
public ArticleEntityStore getArticleEntityStore() { | |
return articleEntityStore; | |
} | |
@Inject | |
public void setArtiicleEntityStore(ArticleEntityStore entityStore) { | |
this.articleEntityStore = entityStore; | |
} | |
public ServiceStore getServiceStore() { | |
return serviceStore; | |
} | |
@Inject | |
public void setServiceStore(ServiceStore serviceStore) { | |
this.serviceStore = serviceStore; | |
} | |
public OperationInvoker getOprationInvoker() { | |
return oprationInvoker; | |
} | |
@Inject | |
public void setOprationInvoker(OperationInvoker oprationInvoker) { | |
this.oprationInvoker = oprationInvoker; | |
} | |
public PageEntityStore getPageEntity() { | |
return pageEntityStore; | |
} | |
@Inject | |
public void setPageEntity(PageEntityStore pageEntityStore) { | |
this.pageEntityStore = pageEntityStore; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/"> | |
<execution-element type="flow"> | |
<!--here is the execution element to navigate --> | |
<execution-element type="data-put"> | |
<locator type="ID" value="email" /> | |
<attribute-value>dummy username</attribute-value> | |
<property>TEXT</property> | |
</execution-element> | |
<!--this is a Article entity which is used to traverse through the entity-property | |
and process and act according to each element added in to it. --> | |
<execution-element type="entity" entity-name="Article"> | |
<operation signature="_ContentService_saveArticle" /> | |
<entity-property name="title"> | |
<execution-element type="data-fetch"> | |
<locator type="XPATH" | |
value="//*[@id="blueBarDOMInspector"]/div/div/div/div[1]/h1/a/i" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="subTitle"> | |
<execution-element type="data-fetch"> | |
<locator type="XPATH" | |
value="//*[@id="login_form"]/table/tbody/tr[1]/td[1]" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="content"> | |
<execution-element type="data-fetch"> | |
<locator type="ID" value="loginbutton" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
</execution-element> | |
</execution-element> | |
</target-source> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/"> | |
<execution-element type="flow"> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="iframe_login" /> | |
<action-type>SWITCHFRAME</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="data-put"> | |
<locator type="XPATH" value="//input[@id='input_username']" /> | |
<attribute-value>amministrazione@p-review.it</attribute-value> | |
<property>TEXT</property> | |
</execution-element> | |
<execution-element type="data-put"> | |
<locator type="XPATH" value="//input[@id='input_password']" /> | |
<attribute-value>12681870155</attribute-value> | |
<property>TEXT</property> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="XPATH" value="//*[@id="login_form"]/p[6]" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="XPATH" expression="boolean(//*[@id="activate"]/a)" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="XPATH" value="//*[@id="activate"]/a" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="CLASSNAME" expression="vc-tooltip" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="pages" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
<execution-element type="for-each"> | |
<iterator-locator type="XPATH" value="childrenOf('//*[@id="thumbcont"]/ul',li)" /> | |
<repeat-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="articles" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="entity" entity-name="Page"> | |
<operation name="savePage" service="eXtrapola" /> | |
</execution-element> | |
<execution-element type="for-each"> | |
<iterator-locator type="ID" value="childrenOf('articles_list')" /> | |
<repeat-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="shadowbox_content" /> | |
<action-type>SWITCHFRAME</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="entity" entity-name="Article"> | |
<operation name="saveArticle" service="eXtrapola" /> | |
<entity-property name="title"> | |
<execution-element type="data-fetch"> | |
<locator type="CSS_SELECTOR" value="h1.titolo_articolo.titolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="subtitle"> | |
<execution-element type="data-fetch"> | |
<locator type="CSS_SELECTOR" value="h2.sottotitolo_articolo.sottotitolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="content"> | |
<execution-element type="data-fetch"> | |
<locator type="CLASSNAME" value="testo_articolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="iframe_login" /> | |
<action-type>SWITCHFRAMEDEFAULT</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="XPATH" value="//*[@id="shadowbox_nav_close"]" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="ID" expression="textual_articles" /> | |
<then-execution /> | |
<else-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="articles" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</else-execution> | |
</execution-element> | |
</repeat-execution> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="CLASSNAME" expression="vc-tooltip" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="pages" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
</repeat-execution> | |
</execution-element> | |
</execution-element> | |
</target-source> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<target-source | |
url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/" | |
edition="pescara" publication-id="22" publication-name="IlCentro"> | |
<execution-element type="flow"> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="iframe_login" /> | |
<action-type>SWITCHFRAME</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="data-put"> | |
<locator type="XPATH" value="//input[@id='input_username']" /> | |
<attribute-value>amministrazione@p-review.it</attribute-value> | |
<property>TEXT</property> | |
</execution-element> | |
<execution-element type="data-put"> | |
<locator type="XPATH" value="//input[@id='input_password']" /> | |
<attribute-value>12681870155</attribute-value> | |
<property>TEXT</property> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="XPATH" | |
value="//*[@id="login_form"]/p[6]" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="XPATH" | |
expression="boolean(//*[@id="activate"]/a)" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="XPATH" | |
value="//*[@id="activate"]/a" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="CLASSNAME" | |
expression="vc-tooltip" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="pages" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
<execution-element type="for-each"> | |
<iterator-locator type="XPATH" | |
value="//*[@id="thumbcont"]/ul/li[@data-page]" /> | |
<repeat-execution type="flow"> | |
<execution-element type="If"> | |
<condition expression-type="XPATH" | |
expression="//li[not(contains(@class, 'rightpage'))]" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="articles" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="currentIteratorValue" /> | |
<property>data-page</property> | |
<result-handler type="context" | |
store-as="currentPageNumber" store-in="LOCAL_CONTEXT" /> | |
</execution-element> | |
<execution-element type="server-op" | |
operation-signature="Entities_saveEntity"> | |
<param name="entity" order="1"> | |
<value type="entity" entity-type="content" | |
service-name="eXtrapola"> | |
<entity-property name="date" type-as="DATE"> | |
<execution-element type="data-fetch"> | |
<locator type="CLASSNAME" value="dataedizione" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="convertedOn" | |
type-as="DATE"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="{{currentDateTime}}" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
<entity-property name="downloadedOn"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="{{currentDateTime}}" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
<entity-property name="sourceId" | |
type-as="INTEGER"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="IlCentroPublicationId" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
<entity-property name="editionCode" | |
type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" value="currentEdition" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
<entity-property name="pageNumber" | |
type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="currentIteratorValue" /> | |
<property>data-page</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="sourceBlobId" | |
type-as="BLOB"> | |
<execution-element type="url" | |
value="http://digital.ilcentro.it/ilcentro/books/pescara/{{currentYear}}/{{currentdate}}pescara/images/thumbnails/Page-{{@context.currentPageNumber}}.jpg" /> | |
</entity-property> | |
</value> | |
</param> | |
<result-handler type="context" | |
store-as="currentPageId" store-in="LOCAL_CONTEXT" /> | |
</execution-element> | |
<execution-element type="for-each"> | |
<iterator-locator type="CLASSNAME" | |
value="article_title" /> | |
<repeat-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="shadowbox_content" /> | |
<action-type>SWITCHFRAME</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="server-op" | |
operation-signature="Entities_saveEntity"> | |
<param name="entity" order="1"> | |
<value type="entity" entity-type="article" | |
service-name="eXtrapola"> | |
<entity-property name="title" | |
type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="CSS_SELECTOR" | |
value="h1.titolo_articolo.titolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="subTitle" | |
type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="CSS_SELECTOR" | |
value="h2.sottotitolo_articolo.sottotitolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="contentBlobId" | |
type-as="BLOB"> | |
<execution-element type="data-fetch"> | |
<locator type="CLASSNAME" value="testo_articolo" /> | |
<property>TEXT</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="guid" type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="GUID_GENERATOR" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
<entity-property name="pageId" | |
type-as="INTEGER"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" value="currentPageId" /> | |
<property /> | |
</execution-element> | |
</entity-property> | |
</value> | |
</param> | |
<result-handler /> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="IFRAME_NAME" value="iframe_login" /> | |
<action-type>SWITCHFRAMEDEFAULT</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="act"> | |
<locator type="XPATH" | |
value="//*[@id="shadowbox_nav_close"]" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="ID" | |
expression="textual_articles" /> | |
<then-execution /> | |
<else-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="articles" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</else-execution> | |
</execution-element> | |
</repeat-execution> | |
</execution-element> | |
<execution-element type="If"> | |
<condition expression-type="CLASSNAME" | |
expression="vc-tooltip" /> | |
<then-execution type="flow"> | |
<execution-element type="act"> | |
<locator type="ID" value="pages" /> | |
<action-type>CLICK</action-type> | |
<key /> | |
</execution-element> | |
</then-execution> | |
<else-execution /> | |
</execution-element> | |
</then-execution> | |
<else-execution> | |
<execution-element type="server-op" | |
operation-signature="combineAndSavePages"> | |
<param name="firstPage" order="1"> | |
<value type="entity" entity-type="content" | |
service-name="eXtrapola"> | |
<entity-property name="pageNumber" | |
type-as="STRING"> | |
<execution-element type="data-fetch"> | |
<locator type="LOCAL_CONTEXT" | |
value="currentIteratorValue" /> | |
<property>data-page</property> | |
</execution-element> | |
</entity-property> | |
<entity-property name="sourceBlobId" | |
type-as="BLOB"> | |
<execution-element type="url" | |
value="http://digital.ilcentro.it/ilcentro/books/pescara/{{currentYear}}/{{currentdate}}pescara/images/thumbnails/Page-{{@context.currentPageNumber}}.jpg" /> | |
</entity-property> | |
</value> | |
</param> | |
</execution-element> | |
</else-execution> | |
</execution-element> | |
</repeat-execution> | |
</execution-element> | |
</execution-element> | |
</target-source> |
<execution-element type="pushVar" name="sourceId" value=""></execution-element>
<execution-element type="serverOp" service="extrapola" result-var="savedArticle">
<operation friendly="_ContentService_saveArticle" />
<param type="entity" entity-type="Article" name="article" >
<entity-property name="title">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h1.titolo_articolo.titolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="subtitle">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h2.sottotitolo_articolo.sottotitolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="content">
<execution-element type="data-fetch">
<locator type="CLASSNAME" value="testo_articolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="pageId" populate="@currentPage.id"></entity-property>
</entity-property>
</param>
</execution-element>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@aino-prashant @aino-vedang
Although the script does the job of saving article for now, we need to look at the problem with a broader view of 1. entity instantiation / population / push to variable 2. invoking an operation from a service / space and preparing all parameters needed.
Let's take this up for a review in an in person meeting soon.