Skip to content

Instantly share code, notes, and snippets.

@Dviejopomata
Created June 18, 2018 23:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dviejopomata/d6902c5602b8795f84873a37b46e219f to your computer and use it in GitHub Desktop.
Save Dviejopomata/d6902c5602b8795f84873a37b46e219f to your computer and use it in GitHub Desktop.
xlsx to csv
using System;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.IO;
using System.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
namespace ExcelToCsv
{
class Program
{
static void Main(string[] args)
{
const string dirName = "/home/dviejo/go/src/github.com/nextagilesoft/callejero/callejero-api/tmp";
var docName = $"{dirName}/provincias.xlsx";
var outDocName = $"{dirName}/provincias1.csv";
var excelService = new ExcelService();
var stream = excelService.ExcelToCsv(new FileStream(docName, FileMode.Open));
stream.CopyToAsync(new FileStream(outDocName, FileMode.Create));
}
}
public class ExcelService
{
public Stream ExcelToCsv(Stream stream, string delimiter = ";", bool firstRowIsHeader = true)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
var dt = ReadExcelSheet(stream, firstRowIsHeader);
Console.WriteLine($"ReadExcelSheet Elapsed {stopwatch.Elapsed.ToString()}");
return DatatableToCsv(dt, delimiter);
}
private static Stream DatatableToCsv(DataTable dt, string delimiter = ";")
{
MemoryStream mstream = new MemoryStream();
StreamWriter writer = new StreamWriter(mstream);
foreach (DataRow row in dt.Rows)
{
writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
}
writer.Flush();
mstream.Seek(0, SeekOrigin.Begin);
return mstream;
}
private DataTable ReadExcelSheet(Stream stream, bool firstRowIsHeader)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
DataTable dt = new DataTable();
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(stream, false))
{
var dictionary = LoadDictionary(doc);
Console.WriteLine($"Open SpreadsheetDocument {stopwatch.Elapsed.ToString()}");
//Read the first Sheets
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart)?.Worksheet;
if (worksheet == null)
{
throw new Exception($"Worksheet is null for file {stream}");
}
var rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
foreach (Row row in rows)
{
//Read the first row as header
if (row.RowIndex.Value == 1)
{
var j = 1;
foreach (Cell cell in row.Descendants<Cell>())
{
var colunmName = firstRowIsHeader ? GetCellValue(dictionary, cell) : "Field" + j++;
dt.Columns.Add(colunmName);
}
}
else
{
dt.Rows.Add();
int i = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(dictionary, cell);
i++;
}
}
}
}
return dt;
}
private static Dictionary<int, string> LoadDictionary(SpreadsheetDocument doc)
{
int i = 0;
Dictionary<int, string> dictionary = new Dictionary<int, string>();
foreach (var ss in doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements)
{
dictionary.Add(i++, ss.InnerText);
}
return dictionary;
}
private string GetCellValue(Dictionary<int, string> dictionary, Cell cell)
{
string value = cell.CellValue?.InnerText;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return dictionary[int.Parse(value)];
}
return value;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment