Created
April 7, 2019 05:16
-
-
Save supernovel/0558caac5078e78e13505cb7d9e93d7d to your computer and use it in GitHub Desktop.
Converts html table to json.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var $ = require('cheerio'); | |
module.exports = { | |
tableToJson : tableToJson | |
} | |
function tableToJson(table) { | |
var colGroup = [], //테이블 데이터의 제목 그룹 | |
tdRowSpans = [], //늘어진 데이터 셀 높이 | |
thRowSpans = [], //늘어진 헤더 셀 높이 | |
tdRowSpanDatas = [], //늘어진 데이터 셀 데이터 | |
thRowSpanDatas = []; //늘어진 헤더 셀 데이터 | |
var result = []; | |
$(table).find("tr").each(function (rowIdx, row) { | |
var thCount = 0, | |
tdCount = 0, | |
cellCount = 0, | |
cells = $(row).children() | |
.filter(function (idx, elemnet) { | |
if (elemnet.name == "th") { | |
thCount++; | |
cellCount++; | |
} | |
if (elemnet.name == "td") { | |
tdCount++; | |
cellCount++; | |
} | |
return elemnet.type == "tag"; | |
}); | |
//thCount만 있다면 아래로 자라는 테이블 라벨 | |
//tdCount만 있다면 아래로 자라는 테이블 데이터 | |
//thCount가 tdCount 보다 적다면 옆으로 자라는 테이블 | |
// - 다음 th태그가 나올때까지 td는 th에 해당하는 값으로 생각 | |
if (tdCount != cellCount) colGroup = []; //새로운 제목 등장 | |
var cellIdx = 0; | |
var thRemainCells = [], | |
tdRemainCells = []; | |
for (var idx = 0; idx < thRowSpans.length; idx++) { | |
if (thRowSpans[idx] > 0) { | |
colGroup[idx] = thRowSpanDatas[idx]; | |
thRowSpans[idx]--; | |
} else { | |
thRemainCells.push(idx); | |
} | |
if(idx == thRowSpans.length - 1){ | |
thRemainCells.push(thRowSpans.length); | |
} | |
} | |
for (var idx = 0; idx < tdRowSpans.length; idx++) { | |
if (tdRowSpans[idx] > 0) { | |
if (!result[rowIdx]) result[rowIdx] = {}; | |
result[rowIdx][colGroup[idx]] = tdRowSpanDatas[idx]; | |
tdRowSpans[idx]--; | |
} else { | |
tdRemainCells.push(idx); | |
} | |
if(idx == tdRowSpans.length - 1){ | |
tdRemainCells.push(tdRowSpans.length); | |
} | |
} | |
cells.each(function (notUsed, cell) { | |
var text = $(cell).text().replace(/(^\s*)|(\s*$)/g, ""); | |
if (cell.name == "th" && thRemainCells.length > 0) { | |
cellIdx = thRemainCells.shift(); | |
} | |
if (cell.name == "td" && tdRemainCells.length > 0) { | |
cellIdx = tdRemainCells.shift(); | |
} | |
if(tdCount == cellCount && colGroup.length == 0){ | |
console.log("잘못된 테이블 입니다."); | |
return; | |
} | |
if (tdCount == cellCount && colGroup.length >= tdCount) { | |
var rowspan = cell.attribs.rowspan; | |
if (rowspan && cell.attribs.rowspan > 1) { | |
tdRowSpans[cellIdx] = rowspan - 1; | |
tdRowSpanDatas[cellIdx] = text; | |
} | |
if (!result[rowIdx]) result[rowIdx] = {}; | |
result[rowIdx][colGroup[cellIdx]] = text; | |
} else if (thCount == cellCount) { | |
var colsapn = cell.attribs.colspan || 1; | |
var rowspan = cell.attribs.rowspan; | |
if (rowspan && cell.attribs.rowspan > 1) { | |
thRowSpans[cellIdx] = rowspan - 1; | |
thRowSpanDatas[cellIdx] = text; | |
} | |
for (var idx = 0; idx < colsapn; idx++) { | |
if (idx == 0) colGroup[cellIdx] = text; | |
else colGroup[cellIdx] = text + idx; | |
cellIdx++; | |
} | |
cellIdx = cellIdx - 1; | |
} else { | |
if (cell.name == "th") { | |
var rowspan = cell.attribs.rowspan; | |
if (rowspan && cell.attribs.rowspan > 1) { | |
thRowSpans[cellIdx] = rowspan - 1; | |
thRowSpanDatas[cellIdx] = text; | |
} | |
colGroup.push(text); | |
} else if (cell.name == "td") { | |
var checkTable = $(cell).find("table"); | |
if (!result[rowIdx]) result[rowIdx] = {}; | |
if (checkTable.length > 0) { | |
checkTable.each(function (subIdx, subTable) { | |
if (result[rowIdx][colGroup[colGroup.length - 1]]) { | |
if (!(result[rowIdx][colGroup[colGroup.length - 1]] instanceof Array)) { | |
var temp = result[rowIdx][colGroup[colGroup.length - 1]]; | |
result[rowIdx][colGroup[colGroup.length - 1]] = [temp]; | |
} | |
result[rowIdx][colGroup[colGroup.length - 1]].push(tableToJson(subTable)); | |
} else { | |
result[rowIdx][colGroup[colGroup.length - 1]] = tableToJson(subTable); | |
} | |
}); | |
} else { | |
if (result[rowIdx][colGroup[colGroup.length - 1]]) { | |
if (!(result[rowIdx][colGroup[colGroup.length - 1]] instanceof Array)) { | |
var temp = result[rowIdx][colGroup[colGroup.length - 1]]; | |
result[rowIdx][colGroup[colGroup.length - 1]] = [temp]; | |
} | |
result[rowIdx][colGroup[colGroup.length - 1]].push(text); | |
} else { | |
result[rowIdx][colGroup[colGroup.length - 1]] = text; | |
} | |
} | |
} else { | |
console.log("알 수 없는 태그 : ", cell.name); | |
} | |
} | |
cellIdx++; | |
}); | |
}); | |
for(var idx=0;idx<result.length;idx++){ | |
if(!result[idx] || | |
((result[idx] instanceof Array) && result[idx].length == 0)){ | |
result.splice(idx,1); | |
idx = -1; | |
} | |
} | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment