Oliver Batey oliver-batey

## using_common_interface.py
import parse_file as dp

#define paths to test files
txt_path = 'test_txt.txt'
docx_path = 'test_docx.docx'
pdf_path = 'test_pdf.pdf'
html_path = 'test_html.html'
pptx_path = 'test_pptx.pptx'

file_paths = [txt_path,docx_path,pdf_path,html_path,pptx_path]

## common_interface.py
import os
import io
from docx import Document

from pdfminer3.layout import LAParams, LTTextBox
from pdfminer3.pdfpage import PDFPage
from pdfminer3.pdfinterp import PDFResourceManager
from pdfminer3.pdfinterp import PDFPageInterpreter
from pdfminer3.converter import PDFPageAggregator
from pdfminer3.converter import TextConverter

## factory_pattern_part1.py
import os
from docx import Document

class DocParser:
    def parse(self,document):
        parser = get_format(document)
        return parser(document)

def get_format(document):
    format = os.path.splitext(document)[-1]

## bad_file_parsing.py
def serialise_file(document,format):

    if format =='txt':
        with open(document, 'r') as file:
            string = file.read().replace('\n', ' ')
        return string

    elif format == 'docx'
        #docx parsing code here


## sentence_sentiment.py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from textblob import TextBlob


def sentiment_polarity(string: str) -> float:
    polarity = TextBlob(string).sentiment[0]
    return polarity

## filter_language_structures.py
import numpy as np
import pandas as pd
from textacy import extract, make_spacy_doc

# Load the entire article text
with open("news_article.txt", "r") as file:
    data = file.read().replace("\n", "")
article = data.replace(u"\xa0", u" ")

# Create doc object

## subject_dependencies.py
from itertools import count

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import textacy

with open("news_article.txt", "r") as file:
    data = file.read().replace("\n", "")

## plot_keyterms.py
from typing import List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from textacy import extract, make_spacy_doc


def decompose_keyterms(keyterm_list: List[str]) -> Tuple:

## download_images.py
#Scrape profile and get recent posts
natgeo = Profile('natgeo')
natgeo.scrape()
recents = natgeo.get_recent_posts()

#Filter list to separate images from videos
recent_photos = [post for post in recents if not post.is_video]

#Save photos in a loop
for i, post in enumerate(recent_photos):

## hashtag_posts.py
from instascrape import Hashtag
#Substitute 'ad' with the word you
#want to search for (as a string)
hashtag = Hashtag('ad')

#Scrape the profile
hashtag.scrape()

#Get list of the recent posts
recents = hashtag.get_recent_posts()
	import parse_file as dp

	#define paths to test files
	txt_path = 'test_txt.txt'
	docx_path = 'test_docx.docx'
	pdf_path = 'test_pdf.pdf'
	html_path = 'test_html.html'
	pptx_path = 'test_pptx.pptx'

	file_paths = [txt_path,docx_path,pdf_path,html_path,pptx_path]
	import os
	import io
	from docx import Document

	from pdfminer3.layout import LAParams, LTTextBox
	from pdfminer3.pdfpage import PDFPage
	from pdfminer3.pdfinterp import PDFResourceManager
	from pdfminer3.pdfinterp import PDFPageInterpreter
	from pdfminer3.converter import PDFPageAggregator
	from pdfminer3.converter import TextConverter
	import os
	from docx import Document

	class DocParser:
	def parse(self,document):
	parser = get_format(document)
	return parser(document)

	def get_format(document):
	format = os.path.splitext(document)[-1]
	def serialise_file(document,format):

	if format =='txt':
	with open(document, 'r') as file:
	string = file.read().replace('\n', ' ')
	return string

	elif format == 'docx'
	#docx parsing code here
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	from textblob import TextBlob


	def sentiment_polarity(string: str) -> float:
	polarity = TextBlob(string).sentiment[0]
	return polarity
	import numpy as np
	import pandas as pd
	from textacy import extract, make_spacy_doc

	# Load the entire article text
	with open("news_article.txt", "r") as file:
	data = file.read().replace("\n", "")
	article = data.replace(u"\xa0", u" ")

	# Create doc object
	from itertools import count

	import matplotlib.pyplot as plt
	import networkx as nx
	import numpy as np
	import pandas as pd
	import textacy

	with open("news_article.txt", "r") as file:
	data = file.read().replace("\n", "")
	from typing import List, Tuple

	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	from matplotlib.axes import Axes
	from textacy import extract, make_spacy_doc


	def decompose_keyterms(keyterm_list: List[str]) -> Tuple:
	#Scrape profile and get recent posts
	natgeo = Profile('natgeo')
	natgeo.scrape()
	recents = natgeo.get_recent_posts()

	#Filter list to separate images from videos
	recent_photos = [post for post in recents if not post.is_video]

	#Save photos in a loop
	for i, post in enumerate(recent_photos):
	from instascrape import Hashtag
	#Substitute 'ad' with the word you
	#want to search for (as a string)
	hashtag = Hashtag('ad')

	#Scrape the profile
	hashtag.scrape()

	#Get list of the recent posts
	recents = hashtag.get_recent_posts()