Skip to content

Instantly share code, notes, and snippets.

View PandaWhoCodes's full-sized avatar

Thomas Ashish Cherian PandaWhoCodes

View GitHub Profile
@PandaWhoCodes
PandaWhoCodes / SFTP.py
Created April 12, 2021 10:35 — forked from billcrook/SFTP.py
An SFTP util class I created for use in my airflow pipelines. Not beautiful, but it works.
import logging
import pexpect
from airflow.hooks.base_hook import BaseHook
class SFTP(object):
"""
Requires openssh_client. Spawns process to execute sftp command.
"""
@PandaWhoCodes
PandaWhoCodes / get_old_rss.py
Last active September 7, 2020 12:27
Get all items in an RSS feed. You can set the MAX ID higher to get more items from the feeds
import pandas as pd
import requests
import feedparser
import time
import requests
def parse_rss_feed(url):
# Read feed xml data
# Try 3 times requesting the url if error
for i in range(0, 4):
@PandaWhoCodes
PandaWhoCodes / get_feed_links.py
Created September 5, 2020 13:52
Extract Feed URL's from a given URL
@PandaWhoCodes
PandaWhoCodes / url_to_txt.py
Created July 16, 2020 07:37
Extracts the text from a webpage and saves it to a text file
"""
Gets the webpage
Converts the HTML to a readable HTML using readability
Extracts the text and saves it to a text file.
usage - python url_to_txt.py http://example.com
"""
from readability import Document
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sqlite3
db_name = "database.db"
def create_connection(db_file):
""" create a database connection to the SQLite database
specified by db_file
:param db_file: database file
:return: Connection object or None
@PandaWhoCodes
PandaWhoCodes / get_gaps.py
Created April 15, 2020 14:53
gets gaps from uber suggest keywords
import glob
import pandas as pd
import os
def get_file_names(path=os.getcwd()):
return glob.glob(path + "/ubersuggest_*.csv")
def get_file(filename):
// original gist: https://gist.github.com/willpatera/ee41ae374d3c9839c2d6
function doGet(e){
return handleResponse(e);
}
// Enter sheet name where data is to be written below
var SHEET_NAME = "Sheet1";
var SCRIPT_PROP = PropertiesService.getScriptProperties(); // new property service
@PandaWhoCodes
PandaWhoCodes / verify_mails.py
Created March 7, 2020 08:26
Code to verify emails from a given csv file
# pip install verify-email (before running this)
import pandas as pd
from verify_email import verify_email
import pickle
final_row = ["S.No", "Name of Student", "Email", "valid"]
def get_file(filename):
return pd.read_csv(filename)
@PandaWhoCodes
PandaWhoCodes / Keyword_gap_analysis.py
Created March 6, 2020 07:58
convert ubersuggest keywords into a gap analysis
import glob
import pandas as pd
import os
def get_file_names(path=os.getcwd()):
return glob.glob(path + "/ubersuggest_*.csv")
def get_file(filename):
@PandaWhoCodes
PandaWhoCodes / extract_entity.py
Last active March 3, 2020 09:50
To install spacy en_core_web_md- python -m spacy download en_core_web_md
import spacy
import pandas as pd
import sys
import re
nlp = spacy.load("en_core_web_md")
class Error(Exception):
"""Base class for other exceptions"""