Skip to content

Instantly share code, notes, and snippets.

@prithwi
Created June 1, 2018 12:30
Show Gist options
  • Save prithwi/6356b561aeb7e7dbd4a298885de90c8c to your computer and use it in GitHub Desktop.
Save prithwi/6356b561aeb7e7dbd4a298885de90c8c to your computer and use it in GitHub Desktop.
Notebook to parse list of accepted KDD papers
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from lxml import html\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"tree = html.parse('./KDD 2018 _ Accepted Papers.htm')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Research"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Research - Oral"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>133</td>\n",
" <td>R2SDH: Robust Rotated Supervised Discrete Hashing</td>\n",
" <td>Jie Gui (Rutgers University); Ping Li (Baidu R...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>Smoothed Dilated Convolutions for Improved Den...</td>\n",
" <td>Zhengyang Wang (Washington State University); ...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>27</td>\n",
" <td>Discovering Non-Redundant K-means Clusterings ...</td>\n",
" <td>Dominik Mautz (Ludwig Maximilian University of...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>44</td>\n",
" <td>Trajectory-driven Influential Billboard Placement</td>\n",
" <td>Ping Zhang (Wuhan University); Zhifeng Bao (RM...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>83</td>\n",
" <td>Multi-Type Itemset Embedding for Learning Beha...</td>\n",
" <td>Daheng Wang (University of Notre Dame); Meng J...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"0 133 R2SDH: Robust Rotated Supervised Discrete Hashing \n",
"1 22 Smoothed Dilated Convolutions for Improved Den... \n",
"2 27 Discovering Non-Redundant K-means Clusterings ... \n",
"3 44 Trajectory-driven Influential Billboard Placement \n",
"4 83 Multi-Type Itemset Embedding for Learning Beha... \n",
"\n",
" Authors Track \n",
"0 Jie Gui (Rutgers University); Ping Li (Baidu R... Research - Oral \n",
"1 Zhengyang Wang (Washington State University); ... Research - Oral \n",
"2 Dominik Mautz (Ludwig Maximilian University of... Research - Oral \n",
"3 Ping Zhang (Wuhan University); Zhifeng Bao (RM... Research - Oral \n",
"4 Daheng Wang (University of Notre Dame); Meng J... Research - Oral "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_oral_xpath = '/html/body/main/div[1]/section/div[1]/div/div[1]/div/ul'\n",
"\n",
"res_oral_list = tree.xpath(res_oral_xpath)[0].xpath('li/div/span/text()')\n",
"\n",
"res_oral_df = pd.DataFrame(np.array(res_oral_list)\n",
" .reshape(-1, 3))\n",
"res_oral_df = pd.concat((res_oral_df[0].str.split('^(\\d*):', \n",
" expand=True)[[1,2]],\n",
" res_oral_df[2]),\n",
" axis=1)\n",
"res_oral_df.columns = ['ID', 'Title', 'Authors']\n",
"\n",
"res_oral_df['ID'] = res_oral_df.ID.str.strip()\n",
"res_oral_df['Title'] = res_oral_df.Title.str.strip()\n",
"res_oral_df['Authors'] = res_oral_df.Authors.str.strip()\n",
"\n",
"res_oral_df['Track'] = \"Research - Oral\"\n",
"res_oral_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>1418</td>\n",
" <td>Investor-Imitator: A Framework for Trading Kno...</td>\n",
" <td>Yi Ding (Nanjing University of Aeronautics and...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>103</th>\n",
" <td>1437</td>\n",
" <td>Discrete Ranking-based Matrix Factorization wi...</td>\n",
" <td>Yan Zhang (University of Science and Technolog...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>1494</td>\n",
" <td>LARC: Learning Activity-Regularized overlappin...</td>\n",
" <td>Alexander Gorovits (University at Albany-SUNY)...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>1540</td>\n",
" <td>EvoGraph: An Effective and Efficient Graph Ups...</td>\n",
" <td>Himchan Park (DGIST); Min-Soo Kim (DGIST)</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>1596</td>\n",
" <td>Training Big Random Forests with Little Resources</td>\n",
" <td>Fabian Gieseke (University of Copenhagen); Chr...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"102 1418 Investor-Imitator: A Framework for Trading Kno... \n",
"103 1437 Discrete Ranking-based Matrix Factorization wi... \n",
"104 1494 LARC: Learning Activity-Regularized overlappin... \n",
"105 1540 EvoGraph: An Effective and Efficient Graph Ups... \n",
"106 1596 Training Big Random Forests with Little Resources \n",
"\n",
" Authors Track \n",
"102 Yi Ding (Nanjing University of Aeronautics and... Research - Oral \n",
"103 Yan Zhang (University of Science and Technolog... Research - Oral \n",
"104 Alexander Gorovits (University at Albany-SUNY)... Research - Oral \n",
"105 Himchan Park (DGIST); Min-Soo Kim (DGIST) Research - Oral \n",
"106 Fabian Gieseke (University of Copenhagen); Chr... Research - Oral "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_oral_df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Research - poster"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>62</td>\n",
" <td>Large-Scale Learnable Graph Convolutional Netw...</td>\n",
" <td>Hongyang Gao (Washington State University); Zh...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>64</td>\n",
" <td>Online Adaptive Asymmetric Active Learning for...</td>\n",
" <td>Yifan Zhang (South China University of Technol...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>148</td>\n",
" <td>Multi-Label Inference for Crowdsourcing</td>\n",
" <td>Jing Zhang (Nanjing University of Science and ...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>182</td>\n",
" <td>Deep Adversarial Learning for Multi-Modality M...</td>\n",
" <td>Lei Cai (Washington State University); Zhengya...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>227</td>\n",
" <td>SPARC: Self-Paced Network Representation for F...</td>\n",
" <td>Dawei Zhou (Arizona State University); Jingrui...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"0 62 Large-Scale Learnable Graph Convolutional Netw... \n",
"1 64 Online Adaptive Asymmetric Active Learning for... \n",
"2 148 Multi-Label Inference for Crowdsourcing \n",
"3 182 Deep Adversarial Learning for Multi-Modality M... \n",
"4 227 SPARC: Self-Paced Network Representation for F... \n",
"\n",
" Authors Track \n",
"0 Hongyang Gao (Washington State University); Zh... Research - poster \n",
"1 Yifan Zhang (South China University of Technol... Research - poster \n",
"2 Jing Zhang (Nanjing University of Science and ... Research - poster \n",
"3 Lei Cai (Washington State University); Zhengya... Research - poster \n",
"4 Dawei Zhou (Arizona State University); Jingrui... Research - poster "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_poster_xpath = '/html/body/main/div[1]/section/div[1]/div/div[2]/div/ul'\n",
"\n",
"res_poster_list = tree.xpath(res_poster_xpath)[0].xpath('li/div/span/text()')\n",
"\n",
"res_poster_df = pd.DataFrame(np.array(res_poster_list)\n",
" .reshape(-1, 3))\n",
"res_poster_df = pd.concat((res_poster_df[0].str.split('^(\\d*):', \n",
" expand=True)[[1,2]],\n",
" res_poster_df[2]),\n",
" axis=1)\n",
"res_poster_df.columns = ['ID', 'Title', 'Authors']\n",
"\n",
"res_poster_df['ID'] = res_poster_df.ID.str.strip()\n",
"res_poster_df['Title'] = res_poster_df.Title.str.strip()\n",
"res_poster_df['Authors'] = res_poster_df.Authors.str.strip()\n",
"\n",
"res_poster_df['Track'] = \"Research - poster\"\n",
"res_poster_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>1450</td>\n",
" <td>Prediction-time Efficient Classification Using...</td>\n",
" <td>Liang Zhao (George Mason University); Amir Ali...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>1495</td>\n",
" <td>Approximating the Spectrum of a Graph</td>\n",
" <td>David Cohen-Steiner (INRIA); Weihao Kong (Stan...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>1500</td>\n",
" <td>When Sentiment Analysis Meets Social Network: ...</td>\n",
" <td>Lin Gong (University of Virginia); Hongning Wa...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>1501</td>\n",
" <td>Latent variable time-varying network inference</td>\n",
" <td>Federico Tomasi (DIBRIS - Universita degli stu...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>1523</td>\n",
" <td>Stablizing Reinforcement Learning in Dynamic E...</td>\n",
" <td>Shi-Yong Chen (Nanjing University); Yang Yu (N...</td>\n",
" <td>Research - poster</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"69 1450 Prediction-time Efficient Classification Using... \n",
"70 1495 Approximating the Spectrum of a Graph \n",
"71 1500 When Sentiment Analysis Meets Social Network: ... \n",
"72 1501 Latent variable time-varying network inference \n",
"73 1523 Stablizing Reinforcement Learning in Dynamic E... \n",
"\n",
" Authors Track \n",
"69 Liang Zhao (George Mason University); Amir Ali... Research - poster \n",
"70 David Cohen-Steiner (INRIA); Weihao Kong (Stan... Research - poster \n",
"71 Lin Gong (University of Virginia); Hongning Wa... Research - poster \n",
"72 Federico Tomasi (DIBRIS - Universita degli stu... Research - poster \n",
"73 Shi-Yong Chen (Nanjing University); Yang Yu (N... Research - poster "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_poster_df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Application"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Application - oral"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A-51</td>\n",
" <td>I Know You’ll Be Back: Interpretable New User ...</td>\n",
" <td>Carl Yang (University Of Illinois, Urbana Cham...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A-594</td>\n",
" <td>PrePeP – A Tool for the Identification and Cha...</td>\n",
" <td>Maksim Koptelov (University of Caen Normandy);...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A-88</td>\n",
" <td>Large-Scale Order Dispatch in On-Demand Ride-S...</td>\n",
" <td>Zhe Xu (AI Labs, Didi Chuxing); Zhixin Li (AI ...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A-186</td>\n",
" <td>WattHome: Identifying Energy-Inefficient Homes...</td>\n",
" <td>Srinivasan Iyengar (University of Massachusett...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A-293</td>\n",
" <td>Perceive Your Users in Depth: Learning Univers...</td>\n",
" <td>Yabo Ni (Alibaba Group); Dan Ou (Alibaba Group...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"0 A-51 I Know You’ll Be Back: Interpretable New User ... \n",
"1 A-594 PrePeP – A Tool for the Identification and Cha... \n",
"2 A-88 Large-Scale Order Dispatch in On-Demand Ride-S... \n",
"3 A-186 WattHome: Identifying Energy-Inefficient Homes... \n",
"4 A-293 Perceive Your Users in Depth: Learning Univers... \n",
"\n",
" Authors Track \n",
"0 Carl Yang (University Of Illinois, Urbana Cham... Application - oral \n",
"1 Maksim Koptelov (University of Caen Normandy);... Application - oral \n",
"2 Zhe Xu (AI Labs, Didi Chuxing); Zhixin Li (AI ... Application - oral \n",
"3 Srinivasan Iyengar (University of Massachusett... Application - oral \n",
"4 Yabo Ni (Alibaba Group); Dan Ou (Alibaba Group... Application - oral "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app_oral_xpath = '/html/body/main/div[1]/section/div[1]/div/div[3]/div/ul'\n",
"\n",
"app_oral_list = tree.xpath(app_oral_xpath)[0].xpath('li/div/span/text()')\n",
"\n",
"app_oral_df = pd.DataFrame(np.array(app_oral_list)\n",
" .reshape(-1, 3))\n",
"app_oral_df = pd.concat((app_oral_df[0].str.split('^(A-\\d*):', \n",
" expand=True)[[1,2]],\n",
" app_oral_df[2]),\n",
" axis=1)\n",
"app_oral_df.columns = ['ID', 'Title', 'Authors']\n",
"\n",
"app_oral_df['ID'] = app_oral_df.ID.str.strip()\n",
"app_oral_df['Title'] = app_oral_df.Title.str.strip()\n",
"app_oral_df['Authors'] = app_oral_df.Authors.str.strip()\n",
"\n",
"app_oral_df['Track'] = \"Application - oral\"\n",
"app_oral_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>A-1158</td>\n",
" <td>ActiveRemediation: The Search for Lead Pipes i...</td>\n",
" <td>Jacob Abernethy (Georgia Institute of Technolo...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>A-1253</td>\n",
" <td>Winner’s Curse: Bias Estimation for Total Effe...</td>\n",
" <td>Minyong Lee (Airbnb); Milan Shen (Airbnb)</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>A-1326</td>\n",
" <td>FireGuru: A real-time pipeline for spatio-temp...</td>\n",
" <td>Bhavkaran Swalia (Carnegie Mellon University);...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>A-1411</td>\n",
" <td>Applying the Delta method in metric analytics:...</td>\n",
" <td>Alex Deng (Microsoft); Ulf Knoblich (Microsoft...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>A-1415</td>\n",
" <td>Where Will Dockless Shared Bikes be Stacked?—-...</td>\n",
" <td>Zhaoyang Liu (Shanghai Jiao Tong University); ...</td>\n",
" <td>Application - oral</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"35 A-1158 ActiveRemediation: The Search for Lead Pipes i... \n",
"36 A-1253 Winner’s Curse: Bias Estimation for Total Effe... \n",
"37 A-1326 FireGuru: A real-time pipeline for spatio-temp... \n",
"38 A-1411 Applying the Delta method in metric analytics:... \n",
"39 A-1415 Where Will Dockless Shared Bikes be Stacked?—-... \n",
"\n",
" Authors Track \n",
"35 Jacob Abernethy (Georgia Institute of Technolo... Application - oral \n",
"36 Minyong Lee (Airbnb); Milan Shen (Airbnb) Application - oral \n",
"37 Bhavkaran Swalia (Carnegie Mellon University);... Application - oral \n",
"38 Alex Deng (Microsoft); Ulf Knoblich (Microsoft... Application - oral \n",
"39 Zhaoyang Liu (Shanghai Jiao Tong University); ... Application - oral "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app_oral_df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Application - poster"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A-1001</td>\n",
" <td>Towards Knowledge Discovery from the Vatican S...</td>\n",
" <td>Donatella Firmani (Roma Tre University); Marco...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A-1001</td>\n",
" <td>Towards Knowledge Discovery from the Vatican S...</td>\n",
" <td>Donatella Firmani (Roma Tre University); Marco...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A-14</td>\n",
" <td>Visual Search at Alibaba</td>\n",
" <td>Yanhao Zhang (iDST, Alibaba Group); Pan Pan (i...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A-53</td>\n",
" <td>Deep Distributed Fusion Network for Air Qualit...</td>\n",
" <td>Xiuwen Yi (Southwest Jiaotong University); Jun...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A-65</td>\n",
" <td>Deep Interest Network for Click-Through Rate P...</td>\n",
" <td>Guorui Zhou (Alibaba-inc); Xiaoqiang Zhu (Alib...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"0 A-1001 Towards Knowledge Discovery from the Vatican S... \n",
"1 A-1001 Towards Knowledge Discovery from the Vatican S... \n",
"2 A-14 Visual Search at Alibaba \n",
"3 A-53 Deep Distributed Fusion Network for Air Qualit... \n",
"4 A-65 Deep Interest Network for Click-Through Rate P... \n",
"\n",
" Authors Track \n",
"0 Donatella Firmani (Roma Tre University); Marco... Application - poster \n",
"1 Donatella Firmani (Roma Tre University); Marco... Application - poster \n",
"2 Yanhao Zhang (iDST, Alibaba Group); Pan Pan (i... Application - poster \n",
"3 Xiuwen Yi (Southwest Jiaotong University); Jun... Application - poster \n",
"4 Guorui Zhou (Alibaba-inc); Xiaoqiang Zhu (Alib... Application - poster "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app_poster_xpath = '/html/body/main/div[1]/section/div[1]/div/div[4]/div/ul'\n",
"\n",
"app_poster_list = tree.xpath(app_poster_xpath)[0].xpath('li/div/span/text()')\n",
"\n",
"app_poster_df = pd.DataFrame(np.array(app_poster_list)\n",
" .reshape(-1, 3))\n",
"app_poster_df = pd.concat((app_poster_df[0].str.split('^(A-\\d*):', \n",
" expand=True)[[1,2]],\n",
" app_poster_df[2]),\n",
" axis=1)\n",
"app_poster_df.columns = ['ID', 'Title', 'Authors']\n",
"\n",
"app_poster_df['ID'] = app_poster_df.ID.str.strip()\n",
"app_poster_df['Title'] = app_poster_df.Title.str.strip()\n",
"app_poster_df['Authors'] = app_poster_df.Authors.str.strip()\n",
"\n",
"app_poster_df['Track'] = \"Application - poster\"\n",
"app_poster_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>A-1459</td>\n",
" <td>Alchemist: Accelerating Large-Scale Data Analy...</td>\n",
" <td>Alex Gittens (Rensselaer Polytechnic Institute...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>A-1513</td>\n",
" <td>MIX: Multi-Channel Information Crossing for Te...</td>\n",
" <td>Haolan Chen (Mobile Internet Group, Tencent); ...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>A-1535</td>\n",
" <td>Discovering latent patterns of urban cultural ...</td>\n",
" <td>Xiao Zhou (University of Cambridge); Anastasio...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>A-1573</td>\n",
" <td>Learning Tasks for Multitask Learning: Heterog...</td>\n",
" <td>Harini Suresh (Massachusetts Institute of Tech...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>A-1592</td>\n",
" <td>StepDeep: A Novel Spatial-temporal Mobility Ev...</td>\n",
" <td>Bilong Shen (Tsinghua University); Xiaodan Lia...</td>\n",
" <td>Application - poster</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"70 A-1459 Alchemist: Accelerating Large-Scale Data Analy... \n",
"71 A-1513 MIX: Multi-Channel Information Crossing for Te... \n",
"72 A-1535 Discovering latent patterns of urban cultural ... \n",
"73 A-1573 Learning Tasks for Multitask Learning: Heterog... \n",
"74 A-1592 StepDeep: A Novel Spatial-temporal Mobility Ev... \n",
"\n",
" Authors Track \n",
"70 Alex Gittens (Rensselaer Polytechnic Institute... Application - poster \n",
"71 Haolan Chen (Mobile Internet Group, Tencent); ... Application - poster \n",
"72 Xiao Zhou (University of Cambridge); Anastasio... Application - poster \n",
"73 Harini Suresh (Massachusetts Institute of Tech... Application - poster \n",
"74 Bilong Shen (Tsinghua University); Xiaodan Lia... Application - poster "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app_poster_df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Merging"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Title</th>\n",
" <th>Authors</th>\n",
" <th>Track</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>133</td>\n",
" <td>R2SDH: Robust Rotated Supervised Discrete Hashing</td>\n",
" <td>Jie Gui (Rutgers University); Ping Li (Baidu R...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>Smoothed Dilated Convolutions for Improved Den...</td>\n",
" <td>Zhengyang Wang (Washington State University); ...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>27</td>\n",
" <td>Discovering Non-Redundant K-means Clusterings ...</td>\n",
" <td>Dominik Mautz (Ludwig Maximilian University of...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>44</td>\n",
" <td>Trajectory-driven Influential Billboard Placement</td>\n",
" <td>Ping Zhang (Wuhan University); Zhifeng Bao (RM...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>83</td>\n",
" <td>Multi-Type Itemset Embedding for Learning Beha...</td>\n",
" <td>Daheng Wang (University of Notre Dame); Meng J...</td>\n",
" <td>Research - Oral</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Title \\\n",
"0 133 R2SDH: Robust Rotated Supervised Discrete Hashing \n",
"1 22 Smoothed Dilated Convolutions for Improved Den... \n",
"2 27 Discovering Non-Redundant K-means Clusterings ... \n",
"3 44 Trajectory-driven Influential Billboard Placement \n",
"4 83 Multi-Type Itemset Embedding for Learning Beha... \n",
"\n",
" Authors Track \n",
"0 Jie Gui (Rutgers University); Ping Li (Baidu R... Research - Oral \n",
"1 Zhengyang Wang (Washington State University); ... Research - Oral \n",
"2 Dominik Mautz (Ludwig Maximilian University of... Research - Oral \n",
"3 Ping Zhang (Wuhan University); Zhifeng Bao (RM... Research - Oral \n",
"4 Daheng Wang (University of Notre Dame); Meng J... Research - Oral "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.concat((res_oral_df,\n",
" res_poster_df,\n",
" app_oral_df,\n",
" app_poster_df),\n",
" ignore_index=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"assert df[df.isnull().any(axis=1)].empty"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('./KDD_Papers.csv', index=False, encoding='utf-8')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment