Created
June 18, 2018 23:14
-
-
Save Dviejopomata/d6902c5602b8795f84873a37b46e219f to your computer and use it in GitHub Desktop.
xlsx to csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Data; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using DocumentFormat.OpenXml.Packaging; | |
using DocumentFormat.OpenXml.Spreadsheet; | |
namespace ExcelToCsv | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
const string dirName = "/home/dviejo/go/src/github.com/nextagilesoft/callejero/callejero-api/tmp"; | |
var docName = $"{dirName}/provincias.xlsx"; | |
var outDocName = $"{dirName}/provincias1.csv"; | |
var excelService = new ExcelService(); | |
var stream = excelService.ExcelToCsv(new FileStream(docName, FileMode.Open)); | |
stream.CopyToAsync(new FileStream(outDocName, FileMode.Create)); | |
} | |
} | |
public class ExcelService | |
{ | |
public Stream ExcelToCsv(Stream stream, string delimiter = ";", bool firstRowIsHeader = true) | |
{ | |
Stopwatch stopwatch = new Stopwatch(); | |
stopwatch.Start(); | |
var dt = ReadExcelSheet(stream, firstRowIsHeader); | |
Console.WriteLine($"ReadExcelSheet Elapsed {stopwatch.Elapsed.ToString()}"); | |
return DatatableToCsv(dt, delimiter); | |
} | |
private static Stream DatatableToCsv(DataTable dt, string delimiter = ";") | |
{ | |
MemoryStream mstream = new MemoryStream(); | |
StreamWriter writer = new StreamWriter(mstream); | |
foreach (DataRow row in dt.Rows) | |
{ | |
writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter); | |
} | |
writer.Flush(); | |
mstream.Seek(0, SeekOrigin.Begin); | |
return mstream; | |
} | |
private DataTable ReadExcelSheet(Stream stream, bool firstRowIsHeader) | |
{ | |
Stopwatch stopwatch = new Stopwatch(); | |
stopwatch.Start(); | |
DataTable dt = new DataTable(); | |
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(stream, false)) | |
{ | |
var dictionary = LoadDictionary(doc); | |
Console.WriteLine($"Open SpreadsheetDocument {stopwatch.Elapsed.ToString()}"); | |
//Read the first Sheets | |
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>(); | |
Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart)?.Worksheet; | |
if (worksheet == null) | |
{ | |
throw new Exception($"Worksheet is null for file {stream}"); | |
} | |
var rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>(); | |
foreach (Row row in rows) | |
{ | |
//Read the first row as header | |
if (row.RowIndex.Value == 1) | |
{ | |
var j = 1; | |
foreach (Cell cell in row.Descendants<Cell>()) | |
{ | |
var colunmName = firstRowIsHeader ? GetCellValue(dictionary, cell) : "Field" + j++; | |
dt.Columns.Add(colunmName); | |
} | |
} | |
else | |
{ | |
dt.Rows.Add(); | |
int i = 0; | |
foreach (Cell cell in row.Descendants<Cell>()) | |
{ | |
dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(dictionary, cell); | |
i++; | |
} | |
} | |
} | |
} | |
return dt; | |
} | |
private static Dictionary<int, string> LoadDictionary(SpreadsheetDocument doc) | |
{ | |
int i = 0; | |
Dictionary<int, string> dictionary = new Dictionary<int, string>(); | |
foreach (var ss in doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements) | |
{ | |
dictionary.Add(i++, ss.InnerText); | |
} | |
return dictionary; | |
} | |
private string GetCellValue(Dictionary<int, string> dictionary, Cell cell) | |
{ | |
string value = cell.CellValue?.InnerText; | |
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) | |
{ | |
return dictionary[int.Parse(value)]; | |
} | |
return value; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment