Skip to content

Instantly share code, notes, and snippets.

@alfeg
Created October 15, 2013 12:41
Show Gist options
  • Save alfeg/6990995 to your computer and use it in GitHub Desktop.
Save alfeg/6990995 to your computer and use it in GitHub Desktop.
This tool were used to convert files from tabs to spaces. This file can be compiled with mono
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace expand
{
public class Program
{
private static bool identOnly = true;
private static bool doCRLFfix;
private static bool quiet;
private static bool isVerbose;
public static List<string> toInvoke = new List<string>();
private static int idxLastArg;
private static int _threadsRunning;
private static int readCounter;
private static int writeCounter;
private static int totalFiles;
private static long totalReadBytes;
private static long totalWriteBytes;
private static readonly UTF8Encoding utfWithBom = new UTF8Encoding(true);
private static readonly UTF8Encoding utfWithNoBom = new UTF8Encoding(false);
private static readonly ThreadLocal<char[]> inputBuffer = new ThreadLocal<char[]>(() => new char[0x1000]);
private static readonly ThreadLocal<char[]> outputBuffer = new ThreadLocal<char[]>(() => new char[0x10000]);
private static readonly ThreadLocal<byte[]> preambulaBuffer = new ThreadLocal<byte[]>(() => new byte[3]);
private static readonly byte[] utf8 = new UTF8Encoding(true).GetPreamble();
private static bool Has(List<string> args, params string[] values)
{
List<string> matches = args.Where(arg => values.Contains(arg.ToLower())).ToList();
foreach (string match in matches)
{
int idx = args.IndexOf(match) + 1;
if (idx > idxLastArg) idxLastArg = idx;
}
return matches.Count > 0;
}
private static void Log(string message, bool verbose = false)
{
if (quiet || (verbose && !isVerbose)) return;
Console.WriteLine(message);
}
public static int Main(string[] args)
{
var sw = new Stopwatch();
sw.Start();
List<string> arguments = args.Take(4).ToList();
if (arguments.Any())
{
if (Has(arguments, "q", "-q"))
{
quiet = true;
}
if (Has(arguments, "-v"))
{
Console.WriteLine("Verbose mode");
isVerbose = true;
}
if (Has(arguments, "crlf"))
{
Log("Will do CRLF => LF conversion");
doCRLFfix = true;
}
if (Has(arguments, "alltabs"))
{
Log("Will process all tabs, not only ident");
identOnly = false;
}
if (Has(arguments, "help", "-h", "-?", "--help"))
{
Log("User 'q' for quiet");
Log("Use 'clrf' argument for CRLF => LF conversion");
Log("Use 'alltabs' argument for all tabs conversion, not only ident one");
return 0;
}
if (Has(arguments, "demo"))
{
arguments = args.Skip(idxLastArg).ToList();
string changeIn = arguments.First();
string[] ext = changeIn.Split(' ');
toInvoke =
Directory.EnumerateFiles(Directory.GetCurrentDirectory(), "*.*", SearchOption.AllDirectories)
.Where(
file =>
!file.Contains(".git") && !file.Contains(".gitrewrite") && ext.Any(file.EndsWith))
.ToList();
ProcessFiles();
return 0;
}
}
arguments = args.Skip(idxLastArg).ToList();
Log("Args parsed in " + sw.ElapsedMilliseconds);
if (isVerbose)
{
Log(string.Join(" ", arguments), verbose: true);
}
if (arguments.Any())
{
Log("Processing files from arguments");
toInvoke = arguments.ToList();
ProcessFiles();
return 0;
}
Log("Waiting for console input");
while (Console.In.Peek() != -1)
{
string input = Console.In.ReadLine();
toInvoke.Add(input);
}
ProcessFiles();
return 0;
}
public static void ProcessFiles()
{
Log("Reading/Writing files. Total: " + toInvoke.Count);
totalFiles = toInvoke.Count;
if (toInvoke.Count == 0) return;
const int numOfSlices = 12;
List<int> idSlicesList = Enumerable.Range(0, toInvoke.Count).ToList();
IEnumerable<IEnumerable<int>> idSlices = idSlicesList.Split(numOfSlices);
Log("Starting threads");
Parallel.ForEach(idSlices, ids =>
{
foreach (int id in ids)
{
ProcessFile(id);
}
});
Log(string.Format("Total files: {0}, Read/WriteBytes: {1}/{2}", toInvoke.Count, totalReadBytes, totalWriteBytes));
}
private static void UpdateReadCounter()
{
Interlocked.Increment(ref readCounter);
UpdateText();
}
private static void UpdateText()
{
if ((readCounter % 100 == 0 || writeCounter % 100 == 0 || readCounter == 0 || writeCounter == 0) && !quiet)
{
Console.Write("\rRead: {0} of {1}, Write: {2} of {3}, using {4} threads", readCounter, totalFiles,
writeCounter, totalFiles, _threadsRunning);
}
}
private static void UpdateWriteCounter()
{
Interlocked.Increment(ref writeCounter);
UpdateText();
}
private static bool HasBom(FileStream stream)
{
byte[] bits = preambulaBuffer.Value;
int read = stream.Read(bits, 0, 3);
stream.Seek(-read, SeekOrigin.Current);
// UTF8 byte order mark is: 0xEF,0xBB,0xBF
if (bits[0] == utf8[0] && bits[1] == utf8[1] && bits[2] == utf8[2])
{
return true;
}
return false;
}
private static void ProcessFile(int id)
{
if (id >= toInvoke.Count || id < 0)
{
Log("File index out of range " + id, true);
}
string filePath = Path.Combine(Directory.GetCurrentDirectory(), toInvoke[id]);
if (!File.Exists(filePath))
{
Log("File not found: " + toInvoke[id]);
return;
}
string extenstion = (Path.GetExtension(filePath) ?? "").ToLower();
int tabStops;
switch (extenstion)
{
case ".xml":
case ".vm":
tabStops = 2;
break;
default:
tabStops = 4;
break;
}
var outputMemoryBuffer = new MemoryStream();
var outputBufferWriter = new StreamWriter(outputMemoryBuffer);
bool isDirty;
Encoding enc;
bool hasBom;
using (var inputFile = new FileStream(filePath, FileMode.Open, FileAccess.ReadWrite))
{
hasBom = HasBom(inputFile);
using (var inputReader = new StreamReader(inputFile, true))
{
isDirty = ExpandAndFix(inputReader, outputBufferWriter, tabStops);
enc = inputReader.CurrentEncoding;
UpdateReadCounter();
}
inputFile.Close();
}
if (!isDirty)
{
return;
}
using (var outFile = new FileStream(filePath, FileMode.Truncate))
{
char[] buffer = inputBuffer.Value;
bool includeBom = enc.EncodingName == Encoding.UTF8.EncodingName && hasBom;
var outputWriter = new StreamWriter(outFile, includeBom ? utfWithBom : utfWithNoBom);
outputMemoryBuffer.Flush();
outputMemoryBuffer.Seek(0, SeekOrigin.Begin);
if (outputMemoryBuffer.Length == 0)
{
Debugger.Break();
}
using (var outputBufferReader = new StreamReader(outputMemoryBuffer))
{
int read;
while ((read = outputBufferReader.Read(buffer, 0, buffer.Length)) > 0)
{
outputWriter.Write(buffer, 0, read);
Interlocked.Add(ref totalWriteBytes, read);
}
outputWriter.Flush();
outputWriter.Dispose();
}
UpdateWriteCounter();
outFile.Close();
}
outputBufferWriter.Dispose();
}
private static bool ExpandAndFix(StreamReader fileContent, StreamWriter resultFileContent, int tabLength)
{
bool isIdent = true;
bool isDirty = false;
int currentPositionInLine = 0;
char[] inBuffer = inputBuffer.Value;
char[] outBuffer = outputBuffer.Value;
int read;
int outIdx = 0;
while ((read = fileContent.Read(inBuffer, 0, inBuffer.Length)) > 0)
{
Interlocked.Add(ref totalReadBytes, read);
for (int i = 0; i < read; i++)
{
char t = inBuffer[i];
switch (t)
{
case '\t':
{
if (identOnly && !isIdent) // do tabs expansion ONLY if identOnly option is ON
{
outBuffer[outIdx++] = t;
}
else
{
int n = tabLength - (currentPositionInLine % tabLength);
for (int j = 0; j < n; j++)
outBuffer[outIdx++] = ' ';
currentPositionInLine += n;
isDirty = true;
}
}
break;
case '\r': // encounter begin of windows eol
if (!doCRLFfix)
{
outBuffer[outIdx++] = t;
}
else
{
isDirty = true;
}
break;
case '\n':
outBuffer[outIdx++] = t;
currentPositionInLine = 0;
isIdent = true;
break;
default:
if (t != ' ')
isIdent = false;
outBuffer[outIdx++] = t;
currentPositionInLine++;
break;
}
}
resultFileContent.Write(outBuffer, 0, outIdx);
outIdx = 0;
}
resultFileContent.Flush();
return isDirty;
}
}
internal static class LinqExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int parts)
{
int i = 0;
IEnumerable<IEnumerable<T>> splits = from item in list
group item by i++ % parts
into part
select part.AsEnumerable();
return splits;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment