This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pip install tweepy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
import pandas as pd | |
import nltk | |
from tweepy import OAuthHandler | |
from tweepy import Cursor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Content of preprocess_data.py | |
""" | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
def prepare_data(path_to_data): | |
# Read data from path | |
data = pd.read_csv(path_to_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Content of train_model.py | |
""" | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import classification_report | |
import pickle | |
def run_model_training(X_train, X_test, y_train, y_test): | |
clf = LogisticRegression() | |
clf.fit(X_train,y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Content of run_training.py | |
""" | |
import joblib | |
# Get customized functions from library | |
from packages.preprocess_data import * | |
from packages.train_model import run_model_training |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import slate3k as slate | |
text = slate.PDF(open('./data/obama-worlds-matter.pdf', 'rb')).text() | |
print(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pdfminer.high_level import extract_text | |
text = extract_text('./data/obama-worlds-matter.pdf', 'rb') | |
print(text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PyPDF2 import PdfFileReader | |
# creating a pdf file object | |
pdfObject = open('./data/obama-worlds-matter.pdf', 'rb') | |
# creating a pdf reader object | |
pdfReader = PdfFileReader(pdfObject) | |
# Extract and concatenate each page's content | |
text='' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tabula import read_pdf | |
from tabulate import tabulate | |
import pandas as pd | |
import io | |
# Read the only the page n°6 of the file | |
food_calories = read_pdf('./data/food_calories.pdf',pages = 6, | |
multiple_tables = True, stream = True) | |
# Transform the result into a string table format |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tabula | |
tabula.convert_into('./data/food_calories.pdf', "./data/food_calories2.csv", | |
output_format="csv", pages = 6) |
OlderNewer