Skip to content

Instantly share code, notes, and snippets.

@sjwaight
Created July 23, 2021 05:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sjwaight/f7ce5a24ea6e59978186e8d07e9f4853 to your computer and use it in GitHub Desktop.
Save sjwaight/f7ce5a24ea6e59978186e8d07e9f4853 to your computer and use it in GitHub Desktop.
Python Azure Function that can read and RSS feed and generate a PowerPoint presentation
import logging
import azure.functions as func
import os
from azure.storage.blob import BlobClient, BlobSasPermissions, generate_blob_sas
from datetime import datetime, timedelta, timezone
from pptx import Presentation
from pptx.util import Pt
import requests # pulling data
from bs4 import BeautifulSoup # xml parsing
# RSS scraping function
# Based mostly on: https://github.com/mattdood/web_scraping_example/blob/master/scraping.py
def get_updates_rss(startDate, endDate):
article_list = []
try:
# execute my request, parse the data using XML
# parse using BS4
r = requests.get(os.environ["UpdatesURL"])
soup = BeautifulSoup(r.content, features='xml')
# select only the "items" I want from the data
updates = soup.findAll('item')
# for each "item" I want, parse it into a list
for a in updates:
# Get publication date
published = a.find('pubDate').text
pubDate = datetime.strptime(a.find('pubDate').text, "%a, %d %b %Y %H:%M:%S Z")
# only include items falling within our requested date range
if (pubDate >= startDate and pubDate <= endDate):
title = a.find('title').text
link = a.find('link').text
# basic parse to flag announcement types
if "preview" in title.lower():
announcement_type = "preview"
else:
announcement_type = "GA"
# create an "article" object with the data
# from each "item"
article = {
'title': title,
'link': link,
'published': published,
'antype': announcement_type
}
# append my "article_list" with each "article" object
article_list.append(article)
# after the loop, dump my saved objects into a .txt file
return article_list
except Exception as e:
logging.exception("Couldn't scrape the Azure Updates RSS feed")
###
# Generate a section of the final PowerPoint
###
def generate_presentation_section(presentation, layout, articles, item_type):
# Add first slide and slide notes
slide = presentation.slides.add_slide(layout)
slide_notes = slide.notes_slide
shapes = slide.shapes
slide_item_count = 0
total_item_count = 0
article_count = len(articles)
slide_count = 1
for article in articles:
# Each new slide requires first elements be added differently to the rest.
if slide_item_count == 0:
# Insert title for slide
title_shape = shapes.title
body_shape = shapes.placeholders[1]
title_shape.text = item_type + " (" + str(slide_count) + ")"
# Insert first bullet item
tf = body_shape.text_frame
tf.text = article["title"]
tf.paragraphs[0].font.size = Pt(24)
# Insert first slide note
sltf = slide_notes.notes_text_frame
sltf.text = "- " + article["link"] + " (" + article["published"] + ")"
else:
# Insert bullet point
p = tf.add_paragraph()
p.font.size = Pt(24)
p.text = article["title"]
# Insert slide note
dotpoint = sltf.add_paragraph()
dotpoint.text = "- " + article["link"] + " (" + article["published"] + ")"
slide_item_count += 1
total_item_count += 1
# If we hit 5 items on a slide, create a new slide and reset item count
# If there aren't any items left, don't create a new empty slide
if slide_item_count == 5 and total_item_count < article_count:
slide = presentation.slides.add_slide(layout)
slide_notes = slide.notes_slide
shapes = slide.shapes
slide_item_count = 0
slide_count += 1
###
# Upload generated file to Azure Storage and generate a SAS URL for it
###
def upload_file_to_storage(presenation_file):
blob_client = BlobClient.from_connection_string(conn_str=os.environ["PowerPointAccountConnection"], container_name=os.environ["PowerPointContainer"], blob_name=presenation_file)
with open(presenation_file, "rb") as data:
blob_client.upload_blob(data)
# Generate a SAS-protected URL for the item which will allow the caller to download the file for 1 hour.
startTime = datetime.now(tz=timezone.utc)
endTime = startTime + timedelta(hours=1)
return "https://" + os.environ["PowerPointStorageAccount"] + ".blob.core.windows.net/" + os.environ["PowerPointContainer"] + "/" + presenation_file + "?" + generate_blob_sas(os.environ["PowerPointStorageAccount"],os.environ["PowerPointContainer"],blob_name=presenation_file,account_key=os.environ["PowerPointStorageKey"],permission=BlobSasPermissions(read=True),start=startTime,expiry=endTime)
#####
# Azure Function main entry point
#####
def main(req: func.HttpRequest) -> func.HttpResponse:
blob_sas_url = ""
message = ""
http_status = 200
try:
# start date is required
startParam = req.params.get('start')
if not startParam:
message = "Bad request: 'start' query parameter is required in format YYYY-MM-DD."
http_status=400
else:
# end date is optional, so if not provided use today
endParam = req.params.get('end')
if not endParam:
endParam = datetime.now("%Y-%m-%d")
# add 1 day to end date so we include all of the day
ending = datetime.strptime(endParam, "%Y-%m-%d")
ending = ending + timedelta(days=1)
starting = datetime.strptime(startParam, "%Y-%m-%d")
updatelist = get_updates_rss(startDate=starting,endDate=ending)
if len(updatelist) > 0:
prs = Presentation()
# Initialise default slide layout (bullets)
bullet_slide_layout = prs.slide_layouts[1]
preview_items = [item for item in updatelist if item["antype"] == "preview"]
ga_items = [item for item in updatelist if item["antype"] == "GA"]
generate_presentation_section(prs, bullet_slide_layout, preview_items, "Preview")
generate_presentation_section(prs, bullet_slide_layout, ga_items, "GA")
filename = os.environ["LocalTempFilePath"] + "AzureUpdate-" + datetime.strftime(datetime.now(),"%Y-%m-%d-%H-%M-%S") + ".pptx"
prs.save(filename)
blob_sas_url = upload_file_to_storage(filename)
message = "File created and uploaded to storage. You can <a href='" + blob_sas_url + "'>download it</a> for the next 1 hour."
else:
message = "There are no updates for the specified period, so no PowerPoint has been generated.",
except TypeError as te:
logging.exception("Type error")
message = "Check the format of your request and ensure you provide the 'start' query parameter in the format YYYY-MM-DD",
http_status=400
except ValueError:
pass
return func.HttpResponse(
mimetype="text/html",
body=message,
status_code=http_status
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment