This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3, configparser | |
s3resource = None | |
def setup(): | |
"""Creates S3 resource & sets configs to enable download.""" | |
print('Connecting to Amazon S3...') | |
# Securely import configs from private config file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3, configparser, os, botocore | |
def download_file(key): | |
""" | |
Downloads given filename from source bucket to destination directory. | |
Parameters | |
---------- | |
key : str | |
Name of file to download |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
def explore_metadata(): | |
"""Explores arxiv bucket metadata.""" | |
print('\narxiv bucket metadata:') | |
with open('src/arXiv_src_manifest.xml', 'r') as manifest: | |
soup = BeautifulSoup(manifest, 'xml') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from datetime import datetime | |
def begin_download(): | |
"""Sets up download of tars from arxiv bucket.""" | |
print('Beginning tar download & extraction...') | |
# Create a reusable Paginator | |
paginator = s3resource.meta.client.get_paginator('list_objects_v2') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3, configparser, os, botocore, json | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
s3resource = None | |
def setup(): | |
"""Creates S3 resource & sets configs to enable download.""" | |
print('Connecting to Amazon S3...') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt, numpy as np | |
from matplotlib.font_manager import FontProperties | |
# Load Apple Color Emoji font | |
prop = FontProperties(fname='/System/Library/Fonts/Apple Color Emoji.ttc') | |
# Set up plot | |
freqs = [301, 96, 53, 81, 42] | |
labels = ['😊', '😱', '😂', '😄', '😛'] | |
plt.figure(figsize=(12,8)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set the backend to use mplcairo | |
import matplotlib, mplcairo | |
print('Default backend: ' + matplotlib.get_backend()) | |
matplotlib.use("module://mplcairo.macosx") | |
print('Backend is now ' + matplotlib.get_backend()) | |
# IMPORTANT: Import these libraries only AFTER setting the backend | |
import matplotlib.pyplot as plt, numpy as np | |
from matplotlib.font_manager import FontProperties |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"type": "Feature", | |
"properties": { | |
"name": "Alabama", | |
"density": 94.65 | |
}, | |
"geometry": ... | |
... | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
latitude | longitude | |
---|---|---|
44.52634219999999 | -109.05653079999999 | |
44.2633149 | -104.9502538 | |
41.311366899999996 | -105.5911007 | |
42.8500769 | -106.32517490000001 | |
41.311366899999996 | -105.5911007 | |
41.1399814 | -104.8202462 | |
42.833014 | -108.73067250000001 | |
44.7538408 | -108.7573525 | |
43.381669099999996 | -87.9406453 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = {'headline': [], | |
'date': [], | |
'doc_type': [], | |
'material_type': [], | |
'section': [], | |
'keywords': []} | |
for response in responses: # For each response, get all the articles | |
articles = response['response']['docs'] | |
for article in articles: # For each article, make sure it falls within our date range |
OlderNewer