Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ninetwentyfour/202735664a5639e2f2fe3336e9161047 to your computer and use it in GitHub Desktop.
Save ninetwentyfour/202735664a5639e2f2fe3336e9161047 to your computer and use it in GitHub Desktop.
sponsoredItems = []
for i in data.xpath('ffed_item)
sponsored = i.xpath("div/span[@class='label--sponsored']")
if sponsored is not None:
sponsoredItems = sponsoredItems.append(i)
print(sponsoredItems)
@ninetwentyfour
Copy link
Author

hr_data = []
for page in whatever
    for i in data.xpath('feed__item):
        title = i.xpath.("the same shit that should be here")
        label = i.xpath.("same label shit as before")
        isSponsored = false
        sponsoredHtmlElements = i.xpath("div/span[@class='label--sponsored']")
        if sponsoredHtmlElements is not None:
            isSponsored = true

        hr_data.append(title, label, isSponsored)

len(hr_data)

@joshstaab
Copy link

hr_data = []

web = 'https://hrdive.com/topic/hr-technology-analytics/?page=%s'
for page in range(1, 25):
url = web % page
data = html.fromstring(requests.get(url).text)

for i in data.xpath("//li[@Class='row feed__item']"):
title = i.xpath("div/h3/a/text()")
label_text = ""
label = i.xpath("div/span[@Class='label--sponsored']")
if label is not None:
label=True

hr_data.append([title, label])

len(hr_data)

@ninetwentyfour
Copy link
Author

hr_data = []
for page in whatever
    for i in data.xpath('feed__item):
        title = i.xpath.("the same shit that should be here")
        label = i.xpath.("same label shit as before")
        label_text = ""
        sponsoredHtmlElements = i.xpath("div/span[@class='label--sponsored']")
        if sponsoredHtmlElements is not None:
            label_text = sponsoredHtmlElements

        loundHtmlElements = i.xpath("div/span[@class='label--loud']")
        if loundHtmlElements is not None:
            label_text = loundHtmlElements

        hr_data.append([title, label_text])

len(hr_data)

@ninetwentyfour
Copy link
Author

hr_data = []
for page in whatever
    for i in data.xpath('feed__item):
        title = i.xpath.("the same shit that should be here")
        label = i.xpath.("same label shit as before")
        label_text = ""
        sponsoredHtmlElements = i.xpath("div/span[@class='label--sponsored']")
        print(sponsoredHtmlElements)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment