Skip to content

Instantly share code, notes, and snippets.

@Junzki
Created May 25, 2015 18:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Junzki/b12822085c011019c4b6 to your computer and use it in GitHub Desktop.
Save Junzki/b12822085c011019c4b6 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
namespace zh_clean_csharp
{
class Program
{
static void Main(string[] args)
{
string PATH = args[0];
string FILETYPE = args[1];
string[] files = Directory.GetFiles(PATH);
string[] filtered = FileFilter(files, FILETYPE);
foreach (string file in filtered)
{
FilterAndOutput(file, FILETYPE);
Console.WriteLine(GetFileName(file, FILETYPE) + "......OK");
}
Console.WriteLine("Press any key to continue...");
Console.ReadKey(true);
}
public static void FilterAndOutput(string file, string FILETYPE)
{
string output_filename;
output_filename = file.Replace(GetFileName(file, FILETYPE), GetFileName(file, FILETYPE) + "_Filtered");
using (StreamWriter writer = new StreamWriter(output_filename))
{
using (StreamReader reader = new StreamReader(file))
{
string line;
line = reader.ReadLine();
while (line != null)
{
var to_write = TextFilter(line);
writer.WriteLine(to_write);
line = reader.ReadLine();
}
}
}
}
public static string[] FileFilter(string[] files, string FILETYPE)
{
List<string> filtered_files = new List<string>();
Regex pattern = new Regex(".+\\." + FILETYPE.ToLower() + "$");
foreach (string file in files)
{
if (pattern.IsMatch(file.ToLower()))
filtered_files.Add(file);
}
return filtered_files.ToArray();
}
private static string GetFileName(string file, string FILETYPE)
{
Regex pattern = new Regex("\\." + FILETYPE.ToLower() + "$");
Match match;
string filename;
string cache;
match = pattern.Match(file);
cache = file.Remove(file.IndexOf(match.Value), match.Value.Length);
filename = cache.Split('\\').Last<string>();
return filename;
}
public static string TextFilter(string origin_data)
{
Regex pattern = new Regex("[\\u4e00-\\u9fa5].+$");
string output_str;
Match match;
match = pattern.Match(origin_data);
output_str = origin_data.Remove(origin_data.IndexOf(match.Value), match.Value.Length);
return output_str;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment