Skip to content

Instantly share code, notes, and snippets.

@qianlifeng
Last active August 29, 2015 14:21
Show Gist options
  • Save qianlifeng/cd43f829958289adbcea to your computer and use it in GitHub Desktop.
Save qianlifeng/cd43f829958289adbcea to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Bayes
{
public class Bayes
{
private static List<NameFrequency> nameFrequencies = new List<NameFrequency>();
public static void Initialize()
{
//get charfreq.csv from https://gist.github.com/qianlifeng/c4470544c8f953043ac9
var reader = new StreamReader(File.OpenRead(@"charfreq.csv"));
//skip first line
reader.ReadLine();
while (!reader.EndOfStream)
{
var line = reader.ReadLine();
if (!string.IsNullOrEmpty(line))
{
var values = line.Split(',');
nameFrequencies.Add(new NameFrequency(values[0], int.Parse(values[1]), int.Parse(values[2])));
}
}
}
public static GuessResult Guess(string name)
{
// P(gender=男|name=本山)
// = P(name=本山|gender=男) * P(gender=男) / P(name=本山)
// = P(name has 本|gender=男) * P(name has 山|gender=男) * P(gender=男) / P(name=本山)
var fullName = name;
//除掉姓氏干扰
name = name.Substring(1);
//算该名字是男生的概率
double pMale = 1;
foreach (char c in name)
{
NameFrequency cFrequency = nameFrequencies.FirstOrDefault(o => o.Name == c.ToString());
if (cFrequency != null)
{
pMale *= (double)cFrequency.MaleCount / nameFrequencies.Sum(o => o.MaleCount);
}
}
double pM = nameFrequencies.Sum(o => o.MaleCount) /
(double)nameFrequencies.Sum(o => o.MaleCount + o.FeMaleCount);
pMale *= pM;
//算该名字是女生的概率
double pFeMale = 1;
foreach (char c in name)
{
NameFrequency cFrequency = nameFrequencies.FirstOrDefault(o => o.Name == c.ToString());
if (cFrequency != null)
{
pFeMale *= (double)cFrequency.FeMaleCount / nameFrequencies.Sum(o => o.FeMaleCount);
}
}
pFeMale *= nameFrequencies.Sum(o => o.FeMaleCount) / (double)nameFrequencies.Sum(o => o.MaleCount + o.FeMaleCount);
return new GuessResult()
{
Name = fullName,
IsMale = pMale > pFeMale,
Probability = (pMale > pFeMale ? pMale : pFeMale) / (pMale + pFeMale)
};
}
}
public class NameFrequency
{
public NameFrequency()
{
}
public NameFrequency(string name, int maleCount, int femaleCount)
{
Name = name;
MaleCount = maleCount;
FeMaleCount = femaleCount;
}
public string Name { get; set; }
public int MaleCount { get; set; }
public int FeMaleCount { get; set; }
}
public class GuessResult
{
public string Name { get; set; }
public bool IsMale { get; set; }
public double Probability { get; set; }
public override string ToString()
{
return "姓名:" + Name + ",性别:" + (IsMale ? "男性" : "女性") + ",概率:" + Math.Round(Probability, 2) * 100 + "%";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment