Skip to content

Instantly share code, notes, and snippets.

@supernovel
Created April 7, 2019 05:16
Show Gist options
  • Save supernovel/0558caac5078e78e13505cb7d9e93d7d to your computer and use it in GitHub Desktop.
Save supernovel/0558caac5078e78e13505cb7d9e93d7d to your computer and use it in GitHub Desktop.
Converts html table to json.
var $ = require('cheerio');
module.exports = {
tableToJson : tableToJson
}
function tableToJson(table) {
var colGroup = [], //테이블 데이터의 제목 그룹
tdRowSpans = [], //늘어진 데이터 셀 높이
thRowSpans = [], //늘어진 헤더 셀 높이
tdRowSpanDatas = [], //늘어진 데이터 셀 데이터
thRowSpanDatas = []; //늘어진 헤더 셀 데이터
var result = [];
$(table).find("tr").each(function (rowIdx, row) {
var thCount = 0,
tdCount = 0,
cellCount = 0,
cells = $(row).children()
.filter(function (idx, elemnet) {
if (elemnet.name == "th") {
thCount++;
cellCount++;
}
if (elemnet.name == "td") {
tdCount++;
cellCount++;
}
return elemnet.type == "tag";
});
//thCount만 있다면 아래로 자라는 테이블 라벨
//tdCount만 있다면 아래로 자라는 테이블 데이터
//thCount가 tdCount 보다 적다면 옆으로 자라는 테이블
// - 다음 th태그가 나올때까지 td는 th에 해당하는 값으로 생각
if (tdCount != cellCount) colGroup = []; //새로운 제목 등장
var cellIdx = 0;
var thRemainCells = [],
tdRemainCells = [];
for (var idx = 0; idx < thRowSpans.length; idx++) {
if (thRowSpans[idx] > 0) {
colGroup[idx] = thRowSpanDatas[idx];
thRowSpans[idx]--;
} else {
thRemainCells.push(idx);
}
if(idx == thRowSpans.length - 1){
thRemainCells.push(thRowSpans.length);
}
}
for (var idx = 0; idx < tdRowSpans.length; idx++) {
if (tdRowSpans[idx] > 0) {
if (!result[rowIdx]) result[rowIdx] = {};
result[rowIdx][colGroup[idx]] = tdRowSpanDatas[idx];
tdRowSpans[idx]--;
} else {
tdRemainCells.push(idx);
}
if(idx == tdRowSpans.length - 1){
tdRemainCells.push(tdRowSpans.length);
}
}
cells.each(function (notUsed, cell) {
var text = $(cell).text().replace(/(^\s*)|(\s*$)/g, "");
if (cell.name == "th" && thRemainCells.length > 0) {
cellIdx = thRemainCells.shift();
}
if (cell.name == "td" && tdRemainCells.length > 0) {
cellIdx = tdRemainCells.shift();
}
if(tdCount == cellCount && colGroup.length == 0){
console.log("잘못된 테이블 입니다.");
return;
}
if (tdCount == cellCount && colGroup.length >= tdCount) {
var rowspan = cell.attribs.rowspan;
if (rowspan && cell.attribs.rowspan > 1) {
tdRowSpans[cellIdx] = rowspan - 1;
tdRowSpanDatas[cellIdx] = text;
}
if (!result[rowIdx]) result[rowIdx] = {};
result[rowIdx][colGroup[cellIdx]] = text;
} else if (thCount == cellCount) {
var colsapn = cell.attribs.colspan || 1;
var rowspan = cell.attribs.rowspan;
if (rowspan && cell.attribs.rowspan > 1) {
thRowSpans[cellIdx] = rowspan - 1;
thRowSpanDatas[cellIdx] = text;
}
for (var idx = 0; idx < colsapn; idx++) {
if (idx == 0) colGroup[cellIdx] = text;
else colGroup[cellIdx] = text + idx;
cellIdx++;
}
cellIdx = cellIdx - 1;
} else {
if (cell.name == "th") {
var rowspan = cell.attribs.rowspan;
if (rowspan && cell.attribs.rowspan > 1) {
thRowSpans[cellIdx] = rowspan - 1;
thRowSpanDatas[cellIdx] = text;
}
colGroup.push(text);
} else if (cell.name == "td") {
var checkTable = $(cell).find("table");
if (!result[rowIdx]) result[rowIdx] = {};
if (checkTable.length > 0) {
checkTable.each(function (subIdx, subTable) {
if (result[rowIdx][colGroup[colGroup.length - 1]]) {
if (!(result[rowIdx][colGroup[colGroup.length - 1]] instanceof Array)) {
var temp = result[rowIdx][colGroup[colGroup.length - 1]];
result[rowIdx][colGroup[colGroup.length - 1]] = [temp];
}
result[rowIdx][colGroup[colGroup.length - 1]].push(tableToJson(subTable));
} else {
result[rowIdx][colGroup[colGroup.length - 1]] = tableToJson(subTable);
}
});
} else {
if (result[rowIdx][colGroup[colGroup.length - 1]]) {
if (!(result[rowIdx][colGroup[colGroup.length - 1]] instanceof Array)) {
var temp = result[rowIdx][colGroup[colGroup.length - 1]];
result[rowIdx][colGroup[colGroup.length - 1]] = [temp];
}
result[rowIdx][colGroup[colGroup.length - 1]].push(text);
} else {
result[rowIdx][colGroup[colGroup.length - 1]] = text;
}
}
} else {
console.log("알 수 없는 태그 : ", cell.name);
}
}
cellIdx++;
});
});
for(var idx=0;idx<result.length;idx++){
if(!result[idx] ||
((result[idx] instanceof Array) && result[idx].length == 0)){
result.splice(idx,1);
idx = -1;
}
}
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment