Skip to content

Instantly share code, notes, and snippets.

@addozhang
Created November 21, 2023 01:59
Show Gist options
  • Save addozhang/88473b9495ea66519d50d37ee1a4568e to your computer and use it in GitHub Desktop.
Save addozhang/88473b9495ea66519d50d37ee1a4568e to your computer and use it in GitHub Desktop.
A python script for adding MVP blog activities from blog sitemap
from xml.etree import ElementTree as ET
import datetime
import requests
import time
# Load and parse the XML file
file_path = '[PATH]' # Replace [PATH] with the actual path
userProfileId = '[USER_PROFILE_ID]' # Replace [USER_PROFILE_ID] with the actual user profile ID
token = 'Bearer [TOKEN]' # Replace [TOKEN] with the actual token
lastRenewalDate = "2022-10-01"
technologyFocusArea = "Cloud Native"
tree = ET.parse(file_path)
root = tree.getroot()
# Define the target date for comparison
target_date = datetime.datetime.strptime(lastRenewalDate, '%Y-%m-%d').replace(tzinfo=datetime.timezone(datetime.timedelta(hours=8)))
# Extract articles with pubDate after October 1, 2022
extracted_articles = []
for item in root.findall('.//item'):
# Extracting necessary details
title = item.find('title').text if item.find('title') is not None else 'No Title'
link = item.find('link').text if item.find('link') is not None else 'No Link'
description = item.find('description').text if item.find('description') is not None else 'No Description'
pub_date_str = item.find('pubDate').text if item.find('pubDate') is not None else ''
# Converting pubDate to datetime object
try:
pub_date = datetime.datetime.strptime(pub_date_str, '%a, %d %b %Y %H:%M:%S %z')
except ValueError:
continue # Skip the article if the date format is incorrect or missing
# Check if the publication date is after the target date
if pub_date > target_date:
extracted_articles.append({
'title': title,
'link': link,
'date': pub_date_str,
'description': description
})
# Define the request body
url = 'https://mavenapi-prod.azurewebsites.net/api/Activities/'
data = {
"activity": {
"id": 0,
"activityTypeName": "Blog",
"typeName": "Blog",
# "date": "2023-11-15T16:00:00.000Z",
# "description": "快速探索 Tetragon:基于 eBPF 的安全可观察性和执行工具",
# "privateDescription": "快速探索 Tetragon:基于 eBPF 的安全可观察性和执行工具",
"isPrivate": False,
"targetAudience": ["Developer"],
"tenant": "MVP",
# "title": "快速探索 Tetragon:基于 eBPF 的安全可观察性和执行工具",
# "url": "https://atbug.com/explore-tetragon-security-observability-and-enforcement-tool-based-on-ebpf/",
"userProfileId": userProfileId,
"reach": 200,
"quantity": 1,
"role": "Author",
"technologyFocusArea": technologyFocusArea,
"additionalTechnologyAreas": [],
"imageUrl": ""
}
}
headers = {
'Content-Type': 'application/json',
'Pragma': 'no-cache',
'Accept': '*/*',
'Authorization': token,
'Sec-Fetch-Site': 'cross-site',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'no-cache',
'Sec-Fetch-Mode': 'cors',
'Accept-Encoding': 'gzip, deflate, br',
'Origin': 'https://mvp.microsoft.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15',
'Referer': 'https://mvp.microsoft.com/',
'Connection': 'keep-alive',
'Host': 'mavenapi-prod.azurewebsites.net',
'Sec-Fetch-Dest': 'empty'
}
print(f'Number of articles published after {target_date}: {len(extracted_articles)}')
for article in extracted_articles[:1]:
data['activity']['date'] = article['date']
data['activity']['description'] = article['description'][:1000] # Limit the description to 1000 characters
data['activity']['privateDescription'] = 'a blog post'
data['activity']['title'] = article['title']
data['activity']['url'] = article['link']
response = requests.post(url, headers=headers, json=data)
print(response.text)
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment