Skip to content

Instantly share code, notes, and snippets.

@zHaytam
Created December 19, 2019 14:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zHaytam/75c8de95fba4e8158cf7945bd316bbb1 to your computer and use it in GitHub Desktop.
Save zHaytam/75c8de95fba4e8158cf7945bd316bbb1 to your computer and use it in GitHub Desktop.
Trying out DataFrame
using System;
using Microsoft.Data.Analysis;
namespace TryingOutDataFrame
{
class Program
{
static void Main(string[] args)
{
// Data
string[] names = { "John", "Ahmed", "Chris", "Albert" };
int[] salaries = { 20000, 30000, 40000, 10000 };
DateTime[] birthdays = { DateTime.Parse("23/4/1990"), DateTime.Parse("4/5/1982"),
DateTime.Parse("2/1/1980"), DateTime.Parse("9/10/1994") };
string[] departments = { "Development", "Development", "HR", null };
var idColumn1 = new PrimitiveDataFrameColumn<int>("Id", new int[] { 1, 2, 3, 4 });
var nameColumn = new StringDataFrameColumn("Name", names);
var birthdayColumn = new PrimitiveDataFrameColumn<DateTime>("Birthday", birthdays);
var salaryColumn = new PrimitiveDataFrameColumn<int>("Salary", salaries);
var idColumn2 = new PrimitiveDataFrameColumn<int>("EmployeeId", new int[] { 3, 1, 2, 4 });
var departmentColumn = new StringDataFrameColumn("Department", departments);
var employeesDf = new DataFrame(idColumn1, nameColumn, birthdayColumn, salaryColumn);
var departmentsDf = new DataFrame(idColumn2, departmentColumn);
employeesDf.PrettyPrint();
departmentsDf.PrettyPrint();
employeesDf.Info().PrettyPrint();
departmentsDf.Info().PrettyPrint();
employeesDf.Description().PrettyPrint();
// Fill nulls
departmentColumn.FillNulls("Other", inPlace: true);
// Create a new column by applying a function
var currentYear = DateTime.Now.Year;
employeesDf["Age"] = birthdayColumn.Apply(d => currentYear - d.Year);
employeesDf.PrettyPrint();
// Normalize salary between 0 and 1
var minSalary = (float)(int)salaryColumn.Min();
var maxSalary = (int)salaryColumn.Max();
employeesDf["NormalizedSalary"] = (salaryColumn - minSalary) / (maxSalary - minSalary);
employeesDf.PrettyPrint();
// Join 2 dataframes
var df = employeesDf.Merge<int>(departmentsDf, "Id", "EmployeeId", joinAlgorithm: JoinAlgorithm.Inner);
df.PrettyPrint();
// Drop column and sort by id
df.Columns.Remove("EmployeeId");
df = df.Sort("Id");
df.PrettyPrint();
// Mean salary by department
var employeesByDepartment = df.GroupBy("Department");
employeesByDepartment.Mean("Salary").PrettyPrint();
// Value counts of department
departmentColumn.ValueCounts().PrettyPrint();
// Sample rows - sometimes gives duplicate rows
df.Sample(2).PrettyPrint();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment