jsanz/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Notebook available at:
https://nbviewer.jupyter.org/gist/jsanz/22de75a321475485219d67a3740193f5
# coding: utf-8

# # Processing FOSS4G 2019 abstracts submitted

# Non Python 3 standard libraries available through this command
# 
# ```bash
# $ pip install requests beautifulsoup4 wordcloud
# ```

# imports
import os
import requests
from bs4 import BeautifulSoup
import re
from collections import namedtuple
from wordcloud import WordCloud

# getting my param session to retrieve abstracts using env variable
SESSION = os.getenv('FOSS4G_SESSION')
if SESSION:
    print('all good!')

# retrieve the community review page
r = requests.get(url=f'https://community-review.foss4g.org//rank.php?ranksession={SESSION}')
if r.status_code == 200:
    print('request OK')

# cook your soup and check the title
soup = BeautifulSoup(r.text)
soup.title

# retrieve the abstract divs, and check how many
divs = soup.find_all(class_=re.compile("abstract-div-[01]"))
len(divs)

# process the divs to get a list of named tuples with title and abstract
Submission = namedtuple('Submission',['title','abstract'])

def process_div(div):
    # Title is easy
    h3 = div.find_all('h3')[0]
    title = ' '.join(h3.strings).strip()
    # Abstract is a three level dive
    els = div.find_all('div')
    pars = [el.strings for el in els]    
    words = [ item.strip() for par in pars for item in par]
    abstract = ' '.join(words).strip()
    
    return Submission(title, abstract)

subs = [ process_div(div) for div in divs]
print(subs[0])

# generate the word cloud
cloud_generator = WordCloud(width=1024, height=768, scale=1, background_color="white", mode="RGBA")

# put together titles and abstracts 
texts = [ sub.title.split(' ') for sub in subs] + [ sub.abstract.split(' ') for sub in subs]
words = [ word.lower() for phrase in texts for word in phrase]

# generate the cloud joining all words again
cloud_generator.generate(' '.join(words)).to_image()

  
## foss4g_abstracts_wordcloud.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              foss4g_abstracts_wordcloud.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.