Skip to content

Instantly share code, notes, and snippets.

@aino-prashant
Last active March 25, 2019 13:47
Show Gist options
  • Save aino-prashant/f37755187477c7cbd13a367fba26cb7d to your computer and use it in GitHub Desktop.
Save aino-prashant/f37755187477c7cbd13a367fba26cb7d to your computer and use it in GitHub Desktop.
Save article entity
package org.appops.scrapper.path.processor;
import org.appops.altshared.shared.altcore.altoperations.AltOperation;
import org.appops.core.service.ServiceStore;
import org.appops.invoker.call.OperationInvoker;
import org.appops.scrapper.exception.BrowserException;
import org.appops.scrapper.exception.ScriptException;
import org.appops.scrapper.path.context.ScraperContextStore;
import org.appops.scrapper.path.context.entity.ArticleEntityStore;
import org.appops.scrapper.path.context.entity.PageEntityStore;
import org.appops.scrapper.path.element.DataFetchElement;
import org.appops.scrapper.path.element.ElementProperty;
import org.appops.scrapper.path.element.EntityElement;
import org.appops.scrapper.path.element.locator.Locator;
import org.appops.scrapper.path.element.property.EntityProperty;
import org.appops.scrapper.util.XPathLogger;
import org.openqa.selenium.StaleElementReferenceException;
import org.openqa.selenium.TimeoutException;
import org.openqa.selenium.WebElement;
import com.google.inject.Inject;
/**
* @author prashant@ainosoft.com
* @CreatedOn 28-Feb-2019
* @Responsibility A class which is used to traverse through the each entity property and process and act according to
* each element behavior like save into db or context.
*/
public class EntityElementProcessor extends ElementProcessor<EntityElement> {
private WebElementPropertyProcessor propertyProcessor;
private ScraperContextStore contextStore;
private ElementProcessorProvider elementProcessorProvider;
private ArticleEntityStore articleEntityStore;
private PageEntityStore pageEntityStore;
private ServiceStore serviceStore;
private OperationInvoker oprationInvoker;
/**
* Locates an element by locator (e.g. x-path) and fetches the value against the property specified.
*
* @param dataFetchElement
* contains information of element from which data is to be fetched.
* @throws ScriptException
*/
@Override
public void processElement(EntityElement entityElement) throws ScriptException {
if (entityElement.getEntityName().equals("Page")) {
savePageClient(entityElement);
} else if (entityElement.getEntityName().equals("Article")) {
for (EntityProperty entityProperty : entityElement.getEntityProperties()) {
articleProcessor(entityProperty);
}
getArticleEntityStore().setArticlePageId(getPageEntity().getPageId());
if (entityElement.getOperation() != null) {
String friendlyName = entityElement.getOperation().getName();
String serviceName = entityElement.getOperation().getService();
try {
AltOperation operation = getServiceStore().getOperationByFriendlyName(serviceName, friendlyName);
operation.getParameters().get("article").setValue(getArticleEntityStore().getArticleClient());
getOprationInvoker().executeOperation(operation);
} catch (Exception e) {
try {
e.printStackTrace();
throw e;
} catch (Exception e1) {
e1.printStackTrace();
}
}
}
} else
return;// not implemented yet
}
private Object savePageClient(EntityElement entityElement) {
Object obj = null;
if (entityElement.getOperation() != null) {
String friendlyName = entityElement.getOperation().getName();
String serviceName = entityElement.getOperation().getService();
try {
AltOperation operation = getServiceStore().getOperationByFriendlyName(serviceName, friendlyName);
operation.getParameters().get("page").setValue(getPageEntity().getPage());
obj = getOprationInvoker().executeOperation(operation);
getPageEntity().setPageId((Integer) obj);
} catch (Exception e) {
try {
e.printStackTrace();
throw e;
} catch (Exception e1) {
e1.printStackTrace();
}
}
}
return obj;
}
/**
* populate article object.
*/
private void articleProcessor(EntityProperty entityProperty) {
String propertyName = entityProperty.getName();
DataFetchElement dataFetchElement = entityProperty.getDataFetchElement();
Locator locator = dataFetchElement.getLocator();
ElementProperty propertyToBeFetched = dataFetchElement.getProperty();
WebElement webElement = null;
try {
webElement = (WebElement) getLocatorProcessor().processorLocator(getWebBrowser(), locator);
if (webElement != null) {
Object propertyValue = getPropertyProcessor().getPropertyValue(webElement, propertyToBeFetched);
if (propertyValue != null) {
if (dataFetchElement.getProperty().equals(ElementProperty.TEXT)) {
String value = String.valueOf(propertyValue).replaceAll(System.getProperty("line.separator"),
" ");
getArticleEntityStore().setArticleProperty(propertyName, value);
}
}
}
} catch (Exception e) {
XPathLogger.log(e);
String cause = e.getCause().getMessage();
if (e instanceof TimeoutException) {
throw new ScriptException("Timeout occured because of " + cause);
}
if (e instanceof BrowserException) {
throw new ScriptException(cause);
}
if (e instanceof StaleElementReferenceException)
throw new ScriptException("Unable to fetch data from : " + locator.getValue() + " because of " + cause);
else
throw new ScriptException(e);
}
}
public ElementProcessorProvider getElementProcessorProvider() {
return elementProcessorProvider;
}
@Inject
public void setElementProcessorProvider(ElementProcessorProvider elementProcessorProvider) {
this.elementProcessorProvider = elementProcessorProvider;
}
public WebElementPropertyProcessor getPropertyProcessor() {
return propertyProcessor;
}
@Inject
public void setPropertyProcessor(WebElementPropertyProcessor propertyExtactor) {
this.propertyProcessor = propertyExtactor;
}
public ScraperContextStore getContextStore() {
return contextStore;
}
@Inject
public void setContextStore(ScraperContextStore contextStore) {
this.contextStore = contextStore;
}
public ArticleEntityStore getArticleEntityStore() {
return articleEntityStore;
}
@Inject
public void setArtiicleEntityStore(ArticleEntityStore entityStore) {
this.articleEntityStore = entityStore;
}
public ServiceStore getServiceStore() {
return serviceStore;
}
@Inject
public void setServiceStore(ServiceStore serviceStore) {
this.serviceStore = serviceStore;
}
public OperationInvoker getOprationInvoker() {
return oprationInvoker;
}
@Inject
public void setOprationInvoker(OperationInvoker oprationInvoker) {
this.oprationInvoker = oprationInvoker;
}
public PageEntityStore getPageEntity() {
return pageEntityStore;
}
@Inject
public void setPageEntity(PageEntityStore pageEntityStore) {
this.pageEntityStore = pageEntityStore;
}
}
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/">
<execution-element type="flow">
<!--here is the execution element to navigate -->
<execution-element type="data-put">
<locator type="ID" value="email" />
<attribute-value>dummy username</attribute-value>
<property>TEXT</property>
</execution-element>
<!--this is a Article entity which is used to traverse through the entity-property
and process and act according to each element added in to it. -->
<execution-element type="entity" entity-name="Article">
<operation signature="_ContentService_saveArticle" />
<entity-property name="title">
<execution-element type="data-fetch">
<locator type="XPATH"
value="//*[@id=&quot;blueBarDOMInspector&quot;]/div/div/div/div[1]/h1/a/i" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="subTitle">
<execution-element type="data-fetch">
<locator type="XPATH"
value="//*[@id=&quot;login_form&quot;]/table/tbody/tr[1]/td[1]" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="content">
<execution-element type="data-fetch">
<locator type="ID" value="loginbutton" />
<property>TEXT</property>
</execution-element>
</entity-property>
</execution-element>
</execution-element>
</target-source>
<?xml version="1.0" encoding="UTF-8"?>
<target-source url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_username']" />
<attribute-value>amministrazione@p-review.it</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_password']" />
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;login_form&quot;]/p[6]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="XPATH" expression="boolean(//*[@id=&quot;activate&quot;]/a)" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;activate&quot;]/a" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME" expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="XPATH" value="childrenOf('//*[@id=&quot;thumbcont&quot;]/ul',li)" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="entity" entity-name="Page">
<operation name="savePage" service="eXtrapola" />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="ID" value="childrenOf('articles_list')" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="shadowbox_content" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="entity" entity-name="Article">
<operation name="saveArticle" service="eXtrapola" />
<entity-property name="title">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h1.titolo_articolo.titolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="subtitle">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR" value="h2.sottotitolo_articolo.sottotitolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="content">
<execution-element type="data-fetch">
<locator type="CLASSNAME" value="testo_articolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAMEDEFAULT</action-type>
<key />
</execution-element>
<execution-element type="act">
<locator type="XPATH" value="//*[@id=&quot;shadowbox_nav_close&quot;]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="ID" expression="textual_articles" />
<then-execution />
<else-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</else-execution>
</execution-element>
</repeat-execution>
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME" expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
</repeat-execution>
</execution-element>
</execution-element>
</target-source>
<target-source
url="http://digital.ilcentro.it/ilcentro/books/latinaoggi/"
edition="pescara" publication-id="22" publication-name="IlCentro">
<execution-element type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_username']" />
<attribute-value>amministrazione@p-review.it</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="data-put">
<locator type="XPATH" value="//input[@id='input_password']" />
<attribute-value>12681870155</attribute-value>
<property>TEXT</property>
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;login_form&quot;]/p[6]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="XPATH"
expression="boolean(//*[@id=&quot;activate&quot;]/a)" />
<then-execution type="flow">
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;activate&quot;]/a" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME"
expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="XPATH"
value="//*[@id=&quot;thumbcont&quot;]/ul/li[@data-page]" />
<repeat-execution type="flow">
<execution-element type="If">
<condition expression-type="XPATH"
expression="//li[not(contains(@class, 'rightpage'))]" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="currentIteratorValue" />
<property>data-page</property>
<result-handler type="context"
store-as="currentPageNumber" store-in="LOCAL_CONTEXT" />
</execution-element>
<execution-element type="server-op"
operation-signature="Entities_saveEntity">
<param name="entity" order="1">
<value type="entity" entity-type="content"
service-name="eXtrapola">
<entity-property name="date" type-as="DATE">
<execution-element type="data-fetch">
<locator type="CLASSNAME" value="dataedizione" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="convertedOn"
type-as="DATE">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="{{currentDateTime}}" />
<property />
</execution-element>
</entity-property>
<entity-property name="downloadedOn">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="{{currentDateTime}}" />
<property />
</execution-element>
</entity-property>
<entity-property name="sourceId"
type-as="INTEGER">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="IlCentroPublicationId" />
<property />
</execution-element>
</entity-property>
<entity-property name="editionCode"
type-as="STRING">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT" value="currentEdition" />
<property />
</execution-element>
</entity-property>
<entity-property name="pageNumber"
type-as="STRING">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="currentIteratorValue" />
<property>data-page</property>
</execution-element>
</entity-property>
<entity-property name="sourceBlobId"
type-as="BLOB">
<execution-element type="url"
value="http://digital.ilcentro.it/ilcentro/books/pescara/{{currentYear}}/{{currentdate}}pescara/images/thumbnails/Page-{{@context.currentPageNumber}}.jpg" />
</entity-property>
</value>
</param>
<result-handler type="context"
store-as="currentPageId" store-in="LOCAL_CONTEXT" />
</execution-element>
<execution-element type="for-each">
<iterator-locator type="CLASSNAME"
value="article_title" />
<repeat-execution type="flow">
<execution-element type="act">
<locator type="IFRAME_NAME" value="shadowbox_content" />
<action-type>SWITCHFRAME</action-type>
<key />
</execution-element>
<execution-element type="server-op"
operation-signature="Entities_saveEntity">
<param name="entity" order="1">
<value type="entity" entity-type="article"
service-name="eXtrapola">
<entity-property name="title"
type-as="STRING">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR"
value="h1.titolo_articolo.titolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="subTitle"
type-as="STRING">
<execution-element type="data-fetch">
<locator type="CSS_SELECTOR"
value="h2.sottotitolo_articolo.sottotitolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="contentBlobId"
type-as="BLOB">
<execution-element type="data-fetch">
<locator type="CLASSNAME" value="testo_articolo" />
<property>TEXT</property>
</execution-element>
</entity-property>
<entity-property name="guid" type-as="STRING">
<execution-element type="data-fetch">
<locator type="GUID_GENERATOR" />
<property />
</execution-element>
</entity-property>
<entity-property name="pageId"
type-as="INTEGER">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT" value="currentPageId" />
<property />
</execution-element>
</entity-property>
</value>
</param>
<result-handler />
</execution-element>
<execution-element type="act">
<locator type="IFRAME_NAME" value="iframe_login" />
<action-type>SWITCHFRAMEDEFAULT</action-type>
<key />
</execution-element>
<execution-element type="act">
<locator type="XPATH"
value="//*[@id=&quot;shadowbox_nav_close&quot;]" />
<action-type>CLICK</action-type>
<key />
</execution-element>
<execution-element type="If">
<condition expression-type="ID"
expression="textual_articles" />
<then-execution />
<else-execution type="flow">
<execution-element type="act">
<locator type="ID" value="articles" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</else-execution>
</execution-element>
</repeat-execution>
</execution-element>
<execution-element type="If">
<condition expression-type="CLASSNAME"
expression="vc-tooltip" />
<then-execution type="flow">
<execution-element type="act">
<locator type="ID" value="pages" />
<action-type>CLICK</action-type>
<key />
</execution-element>
</then-execution>
<else-execution />
</execution-element>
</then-execution>
<else-execution>
<execution-element type="server-op"
operation-signature="combineAndSavePages">
<param name="firstPage" order="1">
<value type="entity" entity-type="content"
service-name="eXtrapola">
<entity-property name="pageNumber"
type-as="STRING">
<execution-element type="data-fetch">
<locator type="LOCAL_CONTEXT"
value="currentIteratorValue" />
<property>data-page</property>
</execution-element>
</entity-property>
<entity-property name="sourceBlobId"
type-as="BLOB">
<execution-element type="url"
value="http://digital.ilcentro.it/ilcentro/books/pescara/{{currentYear}}/{{currentdate}}pescara/images/thumbnails/Page-{{@context.currentPageNumber}}.jpg" />
</entity-property>
</value>
</param>
</execution-element>
</else-execution>
</execution-element>
</repeat-execution>
</execution-element>
</execution-element>
</target-source>
@aino-gautam
Copy link

aino-gautam commented Mar 13, 2019

                     <execution-element type="pushVar" name="sourceId" value=""></execution-element>

                     <execution-element type="serverOp" service="extrapola" result-var="savedArticle">
                     <operation friendly="_ContentService_saveArticle" />
                     <param type="entity" entity-type="Article" name="article" >
                     <entity-property name="title">
                        <execution-element type="data-fetch">
                           <locator type="CSS_SELECTOR" value="h1.titolo_articolo.titolo" />
                           <property>TEXT</property>
                        </execution-element>
                     </entity-property>
                     <entity-property name="subtitle">
                        <execution-element type="data-fetch">
                           <locator type="CSS_SELECTOR" value="h2.sottotitolo_articolo.sottotitolo" />
                           <property>TEXT</property>
                        </execution-element>
                     </entity-property>
                     <entity-property name="content">
                        <execution-element type="data-fetch">
                           <locator type="CLASSNAME" value="testo_articolo" />
                           <property>TEXT</property>
                        </execution-element>
                     </entity-property>
                      <entity-property name="pageId" populate="@currentPage.id"></entity-property>
          
                        
                     </entity-property>
                    </param>
                  </execution-element> 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment