Created
January 5, 2021 12:04
-
-
Save elisong/6964a75e466282fc1164cba1a950b478 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="utf-8" standalone="yes" ?> | |
<rss version="2.0" | |
xmlns:atom="http://www.w3.org/2005/Atom"> | |
<channel> | |
<title>RStudio Blog</title> | |
<link>https://blog.rstudio.com/</link> | |
<description>Recent content on RStudio Blog</description> | |
<generator>Hugo -- gohugo.io</generator> | |
<language>en-us</language> | |
<managingEditor>info@rstudio.com (RStudio, Inc.)</managingEditor> | |
<webMaster>info@rstudio.com (RStudio, Inc.)</webMaster> | |
<lastBuildDate>Wed, 23 Dec 2020 00:00:00 +0000</lastBuildDate> | |
<atom:link href="https://blog.rstudio.com/" rel="self" type="application/rss+xml" /> | |
<item> | |
<title>Exploring US COVID-19 Cases and Deaths</title> | |
<link>https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/</link> | |
<pubDate>Wed, 23 Dec 2020 00:00:00 +0000</pubDate> | |
<author>Art Steinmetz</author> | |
<guid>https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/</guid> | |
<description> | |
<img src="pic12.png" /> | |
<a src="anchor12.png" /> | |
<script src="script12.js" /> | |
</description> | |
</item> | |
<item> | |
<title>Winners of the 2020 RStudio Table Contest</title> | |
<link>https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/</link> | |
<pubDate>Wed, 23 Dec 2020 00:00:00 +0000</pubDate> | |
<author>Rich Iannone and Curtis Kephart</author> | |
<guid>https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/</guid> | |
<description> | |
<img src="pic22.png" /> | |
<a src="anchor22.png" /> | |
<script src="script22.js" /> | |
</description> | |
</item> | |
</channel> | |
</rss> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version='1.0' encoding='utf-8'?> | |
<rss xmlns:ns0="http://www.w3.org/2005/Atom" version="2.0"> | |
<channel> | |
<title>RStudio Blog</title> | |
<link>https://blog.rstudio.com/</link> | |
<description>Recent content on RStudio Blog</description> | |
<generator>Hugo -- gohugo.io</generator> | |
<language>en-us</language> | |
<managingEditor>info@rstudio.com (RStudio, Inc.)</managingEditor> | |
<webMaster>info@rstudio.com (RStudio, Inc.)</webMaster> | |
<lastBuildDate>Wed, 23 Dec 2020 00:00:00 +0000</lastBuildDate> | |
<ns0:link href="https://blog.rstudio.com/" rel="self" type="application/rss+xml" /> | |
<item> | |
<title>Exploring US COVID-19 Cases and Deaths</title> | |
<link>https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/</link> | |
<pubDate>Wed, 23 Dec 2020 00:00:00 +0000</pubDate> | |
<author>Art Steinmetz</author> | |
<guid>https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/</guid> | |
<description> | |
<img src="https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/pic12.png" /> | |
<a src="https://blog.rstudio.com/2020/12/23/exploring-us-covid-19-cases/anchor12.png" /> | |
<script src="script12.js" /> | |
</description> | |
</item> | |
<item> | |
<title>Winners of the 2020 RStudio Table Contest</title> | |
<link>https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/</link> | |
<pubDate>Wed, 23 Dec 2020 00:00:00 +0000</pubDate> | |
<author>Rich Iannone and Curtis Kephart</author> | |
<guid>https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/</guid> | |
<description> | |
<img src="https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/pic22.png" /> | |
<a src="https://blog.rstudio.com/2020/12/23/winners-of-the-2020-rstudio-table-contest/anchor22.png" /> | |
<script src="script22.js" /> | |
</description> | |
</item> | |
</channel> | |
</rss> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Platform : Darwin-20.2.0-x86_64-i386-64bit | |
# Version : 3.7.4 | |
import platform | |
import subprocess | |
import re | |
import defusedxml.cElementTree as ET | |
def main(): | |
input_file = "rss-xml-url-sub-input.xml" | |
output_file = "rss-xml-url-sub-output.xml" | |
print("Before :", subprocess.check_output( | |
'grep -m 1 "img" ' + input_file, shell=True)) | |
tree = ET.parse("rss-xml-url-sub-input.xml") | |
root = tree.getroot() | |
for item in root.iter("item"): | |
link_url = item.find("link").text | |
description = item.find("description") | |
description.text = re.sub( | |
# refer: https://regexr.com/3e9bv | |
r'(<(?:img|a)[^>]+(?:src|href)=[\"|\'])(?!https?:\/\/)([^\/].+?)([\"|\'])', | |
r'\1' + link_url + r'\2\3', | |
description.text | |
) | |
tree.write(output_file, encoding="utf-8", xml_declaration=True) | |
print("After :", subprocess.check_output( | |
'grep -m 1 "img" ' + output_file, shell=True)) | |
if __name__ == "__main__": | |
print("Platform : ", platform.platform()) | |
print("Version : ", platform.python_version()) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment