Skip to content

Instantly share code, notes, and snippets.

@gorkemgoknar
Last active March 16, 2017 06:10
Show Gist options
  • Save gorkemgoknar/3af6b91318647f75c32ddab7dfd67ecb to your computer and use it in GitHub Desktop.
Save gorkemgoknar/3af6b91318647f75c32ddab7dfd67ecb to your computer and use it in GitHub Desktop.
Calculates distinct song play count per client (in C#) , will use input.csv as input and date 10/08/2016 (10 Aug 2016) as default if not entered.
using System.IO;
using System;
using System.Collections.Generic;
class Program {
//client Database, will contain unique song access for each client
public static List < Client > clientAccessList = new List < Client > ();
//Dictionary for key,value pair where key = Number of song accesses by a client, value = number of clients
public static Dictionary < string, int > distinctCount = new Dictionary < string, int > ();
//format for date parsing
public static string dateFormat = "dd/MM/yyyy";
//Information about client and songs access for a given day
public class Client {
public int clientID;
public DateTime dateAccessed;
public List < string > songAccessList;
}
//convertDate
//Converts from string to DateTime format according to dateFormat specified in globals
public static DateTime convertDate(string dateToConvert) {
//Returns a standartized (date only) format from a date string
return DateTime.ParseExact(dateToConvert, dateFormat, System.Globalization.CultureInfo.InvariantCulture);
}
//readAndParseCSV
//reads whitespace delimited CSV file, extracts unique client and populates clientAccessList
//clientAccessList will contain entry for each client with unique songs access for given accessDate
public static void readAndParseCSV(string fileInput, string accessDate) {
DateTime dateRequired = convertDate(accessDate);
if (!File.Exists(fileInput)) {
Console.Write("ERROR: Input file does not exist...");
Environment.Exit(0);
}
int lineCounter = 0;
Console.Write("--------------\nBegin Processing.\n");
using(StreamReader reader = new StreamReader(fileInput)) //using will close reader
{
while (!reader.EndOfStream) {
var line = reader.ReadLine();
lineCounter++;
Console.Write("Processing Line {0}\n", lineCounter);
var columns = line.Split(null); //tab or space, or whitespace delimited
//if first line pass it
if (string.Compare(columns[0], "PLAY_ID") == 0) continue;
/*
columns[0] PLAY_ID
columns[1] SONG_ID
columns[2] CLIENT_ID
columns[3] PLAY_TS
*/
//first check date is the required date , will only gate date info not time
DateTime dateOfLine = convertDate(columns[3]);
//pass if date is not the date we require
if (dateOfLine.Date != dateRequired.Date) continue;
//should check if we have a client_ID in list then add song_id to this client
int indexOfClient = clientAccessList.FindIndex(x => x.clientID==Int32.Parse(columns[2]));
if (indexOfClient >= 0) {
Client thisClient = clientAccessList[indexOfClient];
//we have the client
//find if this client has the song accessed today
int indexOfSongOfClient = thisClient.songAccessList.IndexOf(columns[1]);
if (indexOfSongOfClient >= 0) {
//song already added for client, pass for uniqueness
} else {
//song not added for client, add it
thisClient.songAccessList.Add(columns[1]);
}
} else {
//we do not have the client, ad with song id
Client newClient = new Client();
newClient.dateAccessed = dateOfLine.Date;
newClient.clientID = Int32.Parse(columns[2]);
newClient.songAccessList = new List < string > ();
newClient.songAccessList.Add(columns[1]);
clientAccessList.Add(newClient);
}
} //end while
}
Console.Write("End of processing.\n----------------\n");
}
public static void getSongAccessCount() {
// clientAccessList contains each distinc song accessed by client with given date
//required output is
//how many distinc play counts observed and how many clients
//populate a table where disting play counts starts fron 0 and loop until each client is finished
/*
DISTINCT_PLAY_COUNT, CLIENT_COUNT
0 , x
1 , y
..
*/
foreach(var client in clientAccessList) {
int songCounter = 0;
foreach(var song in client.songAccessList) {
songCounter++;
}
//number of unique songs by this client is songCounter, increase counter
int currentCount;
// currentCount will be zero if the key id doesn't exist..
distinctCount.TryGetValue(songCounter.ToString(), out currentCount);
distinctCount[songCounter.ToString()] = currentCount + 1;
}
}
public static void printDistinctPlays(){
// debug output, print distinct Count Table
foreach(KeyValuePair<string, int> pair in distinctCount)
{
Console.WriteLine("DISTINCT_PLAY_COUNT: {0} CLIENT_COUNT: {1}",
pair.Key,
pair.Value);
}
}
//writeCSVOutput
//writes Output To CSV file
//
public static void writeCSVOutput(string outputFile) {
using(StreamWriter sw = new StreamWriter(outputFile)) {
sw.WriteLine("DISTINCT_PLAY_COUNT, CLIENT_COUNT");
foreach(KeyValuePair < string, int > pair in distinctCount) {
sw.WriteLine(pair.Key + "," + pair.Value);
}
}
}
//outputClientsToConsole
//gets output to Console for debugging purposes
public static void outputClientsToConsole() {
foreach(var client in clientAccessList) {
Console.Write("Client ID: " + client.clientID + "\n");
Console.Write("Date Access: " + client.dateAccessed.ToString(dateFormat) + "\n");
foreach(var song in client.songAccessList) {
Console.Write("Song ID: " + song + "\n");
}
}
}
public static void getClientCountForDistinctSong(int distinctSong){
int userCount=0;
int maxDistinct = 0;
foreach(KeyValuePair<string, int> pair in distinctCount) {
if ( Int32.Parse(pair.Key)>maxDistinct) maxDistinct= Int32.Parse(pair.Key);
if( string.Compare(pair.Key,distinctSong.ToString()) == 0)
{
if(pair.Value>userCount) userCount =pair.Value ;
}
}
Console.Write("Users playing " + distinctSong + " distinct songs: " + userCount + "\n");
Console.Write("Max distinct : " + maxDistinct + "\n");
}
//MAIN function
static void Main(string[] args) {
string inputFileName = "input.csv";
string inputDate = "10/08/2016";
int distCount = 346;
if ((args == null) | (args.Length <=0)) {
Console.WriteLine("Using input: input.csv, date:10/08/2016"); // Check for null array
} else if (args.Length == 1) {
inputFileName = args[0];
} else if (args.Length ==2) {
inputFileName = args[0];
inputDate = args[1];
}else if (args.Length >2) {
inputFileName = args[0];
inputDate = args[1];
distCount = Int32.Parse(args[2]);
}
//read input file and fill client song access list for accessdate
readAndParseCSV(inputFileName, inputDate);
//put clients and accessed songs to console
Console.Write("--------------\nClient Output:\n");
outputClientsToConsole();
Console.Write("End of Client Output.\n--------------\n");
getSongAccessCount();
Console.Write("--------------\nDistinct Songs:\n");
printDistinctPlays();
Console.Write("End of Distinct Songs.\n--------------\n");
if(args.Length>=0){
getClientCountForDistinctSong(distCount);
}
writeCSVOutput("output.csv");
}
}
@gorkemgoknar
Copy link
Author

Calcultates Distinct Song Access with Clients counts for a given input.csv, Where input columnt type is "PLAY_ID SONG_ID CLIENT_ID PLAY_TS" and Date is 10/08/2016

Sample input:
PLAY_ID SONG_ID CLIENT_ID PLAY_TS
44BB190BC2493964E053CF0A000AB546 6164 249 09/08/2016 09:16:34
44BB190BC24A3964E053CF0A000AB546 544 86 10/08/2016 13:54:52
44BB190BC24B3964E053CF0A000AB546 9648 589 08/08/2016 06:08:53

Sample output
Distinct Songs:
DISTINCT_PLAY_COUNT: 1 CLIENT_COUNT: 3

Max distinct : 1

Execution:
main.exe [inputcsv] ["DATE"] [ReqDistincts]

using no input will use input.csv as input and 10/08/2016 as date with 356 ReqDistincts

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment