Skip to content

Instantly share code, notes, and snippets.

@Zalasyu
Created December 19, 2021 23:12
Show Gist options
  • Save Zalasyu/694df422ecf50bf55c3eeeb0c5d195a8 to your computer and use it in GitHub Desktop.
Save Zalasyu/694df422ecf50bf55c3eeeb0c5d195a8 to your computer and use it in GitHub Desktop.
Your instagram scraper methods in here.
# Author: Alec Moldovan
# Description: This module contains the logic for an instagram data scraper bot.
# Import Standard Libraries
import time, os, random
import json
# Import third-party modules
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
# Logger Module
from loguru import logger
# logger.add("bot_{level}.log", level = info, rotation="00:00")
# Import Local Modules
import config
class Bot:
"""
Interacts with instagram's pages through the webdriver.
"""
def __init__(self, driver: WebDriver) -> None:
# Initialize chrome webdriver
self.driver = driver
def search_for_website(self, url:str) -> None:
"""Search and navigate to target webpage.
Parameters:
url:str -> URL for target webpage.
Return:
None
"""
self.driver.get(url)
def login(self) -> None:
"""
Login to an instagram account.
Parameters: None
Return: None
"""
# Go to instagram login page
logger.debug("Opening a google chrome browser and going to instagram login page")
self.search_for_website("https://www.instagram.com/")
# Fill out login form and click submit button
try:
self.wait_until(EC.presence_of_element_located((By.NAME, 'username')))
try:
# Select username and password input text fields
user_field = self.driver.find_element_by_name("username")
pass_field = self.driver.find_element_by_name("password")
# Clear fields
user_field.clear()
pass_field.clear()
# Fill username and password text fields with username and password credentials
user_field.send_keys(config.USERNAME)
pass_field.send_keys(config.PASSWORD)
# Find and click the submit button to login.
logger.debug("Submitting login info!")
self.driver.find_element_by_xpath(
'//*[@id="loginForm"]/div/div[3]/button/div'
).click()
except NoSuchElementException:
logger.exception("Could not find element!")
except TimeoutException:
pass
# Checkpoint # 1
logger.info("Passed the login page.")
# Go through popup messages
# Skip remember this browser prompt
try:
logger.debug("Skipping save browser prompt.")
self.driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div/div/section/div/button').click()
except NoSuchElementException:
pass
# Turn off notifications prompt
try:
logger.debug("Skipping turn on notifications prompt.")
self.driver.find_element_by_xpath("/html/body/div[4]/div/div/div/div[3]/button[2]").click()
except NoSuchElementException:
pass
def search(self, keyword: str = "emmalilywinery") -> None:
"""Search content by keyword with the instagram search box
Parameters:
str: keyword
Return: None
"""
try:
logger.debug("Locating Searchbox...")
time.sleep(3)
searchBox = self.wait_until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Search']")))
searchBox.clear()
# Fill Search box and submit search.
searchBox.send_keys(keyword)
time.sleep(3)
searchBox.send_keys(Keys.ENTER)
time.sleep(3)
searchBox.send_keys(Keys.ENTER)
time.sleep(5)
except TimeoutException:
logger.exception("Search box not found!")
def scroll(self) -> None:
"""Scroll and load all posts from profile webpage.
Parameters:
int: scroll_amt -> Amount of space that an instagram webpage should load in
Return:
None
"""
logger.debug("Begin scrolling...")
# Set one full page scroll to lenPage
lenPage = self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);let lenPage=document.body.scrollHeight;return lenPage;")
logger.debug(f'Length of page is {lenPage}')
# Keep scrolling until no more content
match = False
no_of_scrolls = 0
while(match == False):
lastCount = lenPage
no_of_scrolls += 1
time.sleep(2)
lenPage = self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);let lenPage=document.body.scrollHeight;return lenPage;")
logger.debug(f'Length of last count is {lastCount}')
logger.debug(f'Length of page is {lenPage}')
if (lastCount == lenPage) or (no_of_scrolls == 10):
logger.info("Scrolled to bottom!")
match = True
def open_and_switch(self, url:str) -> None:
"""Open link in a new tab and switch to that tab.
Parameters:
url:str --> hyperlink
Return:
None
"""
pass
def close_and_switch_back(self) -> None:
"""Close current tab and switch back to previous tab
Parameters:
Return:
None
"""
pass
def wait_until(self, condition, timeout=5):
"""
Tell Webdriver to wait until a condition is met or timeout.
Parameters:
condition: bool -> True/False
timeout:int -> (default: 5 secs)
Return: WebDriverWait object
"""
return WebDriverWait(self.driver, timeout).until(condition)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment