Output of the schema extraction process.
Last active
December 20, 2015 07:09
-
-
Save madelfio/6091794 to your computer and use it in GitHub Desktop.
Schema Extraction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
var classes = ['title', 'non-relational', 'non-relational', 'header', | |
'group-header', 'data', 'data', 'data', 'aggregate', 'group-header', 'data', | |
'data', 'data', 'aggregate', 'blank', 'non-relational']; | |
var targets = ['title', 'header', 'data', 'group-header', 'aggregate']; | |
var container = d3.select('body').append('div'); | |
function create_copy(tr, i) { | |
var node = d3.select(this); | |
var pos = $(node.node()).position(); | |
container.append('table').append('tr') | |
.attr('class', tr + ' copy') | |
.style('position', 'absolute') | |
.style('left', pos.left + 'px') | |
.style('top', pos.top + 'px') | |
.style('opacity', 0) | |
.html(node.html()); | |
} | |
d3.selectAll('.orig tr') | |
.data(classes) | |
.each(create_copy); | |
function extract(tgt_num) { | |
var t = targets[tgt_num]; | |
var tgt = d3.select('#schema .' + t).node(); | |
var pos = $(tgt).position(); | |
console.log(pos); | |
container.selectAll('tr.' + t) | |
.transition() | |
.delay(function(d, i) {return 1000 + i * 200;}) | |
.style('opacity', 1) | |
.transition() | |
.style('top', function(d, i) {return (pos.top + i * 20) + 'px';}) | |
.style('left', function(d, i) {return pos.left + 'px';}) | |
.each('end', function(d, i) { | |
d3.select(this.parentNode).remove(); | |
tgt.appendChild(this); | |
d3.select(this).style('position', 'inherit'); | |
if (tgt_num < targets.length) {extract(tgt_num + 1);} | |
}); | |
} | |
extract(0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<meta charset="utf-8"> | |
<title>Schema Extraction from Web Tables</title> | |
<style> | |
table { | |
border-collapse: collapse; | |
width: inherit; | |
margin: 5px 10px; | |
} | |
table.orig { | |
display: inline-block; | |
border-radius: 2px; | |
box-shadow: 0 0 10px rgba(100, 100, 100, 0.8); | |
background-color: #f6f6f6; | |
} | |
.orig th, .orig td {border: 1px solid #ccc;} | |
.tgt > tbody > tr > th, .tgt > tbody > tr > td {border: 1px solid #ccc;} | |
th, td {padding: 1px 5px;} | |
.orig th, .tgt th {background-color: #c33; color: white;} | |
.title {font-weight: bold; color: darkblue;} | |
.non-relational {color: #888; font-size: 0.9em;} | |
.aggregate {font-style: italic;} | |
.group-header td { | |
background-color: #ddd; | |
border-bottom: 2px solid #ccc; | |
font-weight: bold; | |
font-style: italic; | |
} | |
.m {text-align: center;} | |
.r {text-align: right;} | |
</style> | |
<body> | |
<table> | |
<tr><th>Source Table</th><th>Extracted Schema</th></tr> | |
<tr><td> | |
<table class="example orig"> | |
<tr class="title"><td colspan="3">Patent Applications by Residents</td></tr> | |
<tr class="non-relational"><td colspan="3">Data Source: worldbank.org</td></tr> | |
<tr class="non-relational"><td colspan="3">(showing top countries in each continent)</td></tr> | |
<tr class="header"><th>Country</th><th>Residents</th><th>Applications</th></tr> | |
<tr class="group-header"><td>North America</td><td></td><td></td></tr> | |
<tr class="data"><td class="m">United States</td><td class="r">307,007,000</td><td class="r">224,912</td></tr> | |
<tr class="data"><td class="m">Canada</td><td class="r">33,739,900</td><td class="r">5,067</td></tr> | |
<tr class="data"><td class="m">Mexico</td><td class="r">112,033,369</td><td class="r">822</td></tr> | |
<tr class="aggregate"><td></td><td>N.A. Total</td><td class="r">230,801</td></tr> | |
<tr class="group-header"><td>Asia</td><td></td><td></td></tr> | |
<tr class="data"><td class="m">Japan</td><td class="r">127,557,958</td><td class="r">295,315</td></tr> | |
<tr class="data"><td class="m">China</td><td class="r">1,331,380,000</td><td class="r">229,096</td></tr> | |
<tr class="data"><td class="m">South Korea</td><td class="r">48,747,000</td><td class="r">127,316</td></tr> | |
<tr class="aggregate"><td></td><td>Asia Total</td><td class="r">651,727</td></tr> | |
<tr class="blank"><td> </td><td></td><td></td></tr> | |
<tr class="non-relational"><td colspan="2">Note: data from 2009</td><td></td></tr> | |
</table> | |
</td><td style="vertical-align:top;"> | |
<table id="schema" class="tgt" style="display:inline-block;"> | |
<tr><td>Title:</td><td><table class="title"></table></td></tr> | |
<tr><td>Columns:</td><td><table class="header"></table></td></tr> | |
<tr><td>Data:</td><td><table class="data"></table></td></tr> | |
<tr><td>Groups:</td><td><table class="group-header"></table></td></tr> | |
<tr><td>Totals/Subtotals:</td><td><table class="aggregate"></table></td></tr> | |
</table> | |
</td></tr></table> | |
</body> | |
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script> | |
<script src="http://d3js.org/d3.v3.min.js"></script> | |
<script src="extract.js"></script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment