Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Dirty Money: Code Challenge https://jorin.me/dirtymoney

Follow the dirty money

A shady Internet business has been discovered.

The website has been made public by a whistle blower. We have enough evidence about the dirty deals they did. But to charge them we need to get hands on precise numbers about the transactions that happened on their platform.

Unfortunately no record of the transactions could be seized so far. The only hint we have is this one transaction:

fd0d929f-966f-4d1a-89cd-feee5a1c5347.json

What we need is the total of all transactions in Dollar. Can you trace down all other transactions and get the total?

Be careful to count each transaction only once, even if it is linked multiple times. You can use whatever tool works best.

Please share the total and your solution below!

Cheers, Jorin

@jorinvo

(For 2015 challenge see https://jorin.me/csv-challenge)

const axios = require('axios')
const queue = require('async/queue')

let sum = 0
const visited = []

const q = queue((url, done) => {
  if (visited.includes(url)) return done()
  visited.push(url)
  axios.get(url).then(({data}) => {
    sum += Number(data.content.match(/\$(\d*(\.|,)\d*)/)[1].replace(',', '.'))
    q.push(data.links)
    done()
  })
}, 1)

q.push('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json')

q.drain = () => { console.log('Total: $' + sum.toFixed(2)) }

Which results in

>>> Total: $9064.79
Owner

jorinvo commented Nov 13, 2016

This Go version downloads links in parallel:

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"sync"
)

const workers = 10

var dollarMatch = regexp.MustCompile(`\$[0-9.,]+`)

type transaction struct {
	ID      string
	Content string
	Links   []string
}

func main() {
	start := "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
	var total float64
	visited := map[string]bool{}
	urls := make(chan string, 1000)
	transactions := make(chan transaction)
	var wg sync.WaitGroup

	// download in parallel
	for w := 0; w < workers; w++ {
		go func() {
			for u := range urls {
				t := transaction{}
				err := getJSON(u, &t)
				if err != nil {
					log.Printf("failed to get JSON from '%s': %v", u, err)
				}
				transactions <- t
			}
		}()
	}

	go func() {
		for t := range transactions {
			if t.ID != "" && !visited[t.ID] {
				visited[t.ID] = true
				s := dollarMatch.FindString(t.Content)
				s = strings.Trim(s, "$,.")
				s = strings.Replace(s, ",", ".", 1)
				dollar, err := strconv.ParseFloat(s, 64)
				if err != nil {
					log.Fatal(err)
				}
				total += dollar
				wg.Add(len(t.Links))
				for _, link := range t.Links {
					urls <- link
				}
			}
			wg.Done()
		}
	}()

	wg.Add(1)
	urls <- start
	wg.Wait()
	close(urls)
	close(transactions)
	fmt.Printf("transactions: %d, total: $%.2f", len(visited), total)
}

func getJSON(url string, target interface{}) error {
	r, err := http.Get(url)
	if err != nil {
		return err
	}
	defer r.Body.Close()
	err = json.NewDecoder(r.Body).Decode(&target)
	return err
}
$ go run follow-the-dirty-money.go 
transactions: 50, total: $9064.79

jimfoltz commented Feb 5, 2017 edited

# Ruby
require "open-uri"
require 'json'

def get_json(url, txns, visited = [])
   return if visited.include?(url)
   visited << url
   txn = JSON.parse(open(url).read)
   txns[txn['id']] = txn['content']
   txn['links'].each {|link| get_json(link, txns, visited)}
end

url = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
txns = {}

get_json(url, txns)

sum = 0
txns.each { |id, txn|
   money = txn.match(/\$(\d+.\d+)/)
   amt = money[1].gsub(',', '.').to_f
   sum += amt
}

puts "Transactions: #{txns.keys.size}"
puts "total: $%.2f\n" % sum
Transactions: 50
total: $9064.79

Corrected.

import 'isomorphic-fetch';

const start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'

const getSum = visited => async url => {
  const node = await fetch(url).then(resp => resp.json());
  const urls = node.links.filter(url => !visited.includes(url));
  visited.push(...urls)
  const thisAmount = parseFloat(node.content.match(/\$(\d+[,.]\d\d?)/)[1].replace(',', '.'));
  return (await Promise.all(urls.map(getSum(visited)))).reduce((a, b) => a + b, thisAmount);
}

getSum([start])(start).then(result => console.log(result.toFixed(2))).catch(e => console.error(e));

mrjbq7 commented Feb 5, 2017 edited

In Factor:

USING: assocs http.client json.reader kernel locals math
math.parser regexp sequences sets splitting ;

: dollars ( str -- $ )
    R/ \$\d*[,.]\d+/ first-match rest
    "," "." replace string>number ;

:: transaction ( url visited -- dollars )
    url visited ?adjoin [
        url http-get nip json> :> data
        data "content" of dollars
        data "links" of [ visited transaction ] map-sum +
    ] [ 0 ] if ;

: transactions ( url -- dollars #transactions )
    HS{ } clone [ transaction ] [ cardinality ] bi ;

Using it:

IN: scratchpad "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
               transactions "$%.2f in %d transactions.\n" printf
$9064.79 in 50 transactions.

st0le commented Feb 5, 2017 edited

from requests import get
from re import search

src = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
visited = {}
stack = [src]
while stack:
    url = stack.pop()
    if url not in visited:
        transaction = get(url).json()
        desc = transaction["content"]
        money = search("\$(\d+[,\.]\d+)", desc).group(1)
        money = money.replace(',','.')
        visited[url] = float(money)
        for link in transaction["links"]:
            stack.append(link)

print(sum(visited.values()))

Output : 9064.789999999997

import requests
import json
import re

regex =  re.compile(r"\$[0-9]*[,.][0-9]*")
visited_links = {}
queue = ["https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"]
total = 0
transactions = 0

for link in queue:
    obj = json.loads(requests.get(link).text)
    if not link in visited_links:
        transactions += 1
        for new_link in obj["links"]:
            queue.append(new_link)
        total += float(regex.findall(obj["content"])[0][1:].replace(",","."))
        visited_links[link] = 1

print("Total: " + str(total))
print("Transactions: " + str(transactions))

Output:
Total: 9064.789999999999
Transactions: 50

XANi commented Feb 6, 2017

Ugly Perl version:

use JSON;
use LWP::Simple;
use List::Util qw(sum);

my $tr_list;
p('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json');
my @tr_val = map { s{,}{.}; m/\$(\d+\.\d+)/; $1 } values %$tr_list;
print "Sum: " . sum(@tr_val) . " count: " . scalar @tr_val . "\n";

sub p {
    my $tr = decode_json( get(shift) );
    if ( defined( $tr_list->{ $tr->{'id'} } ) ) { return }
    for my $url ( @{ $tr->{'links'} } ) {
        p($url);
    }
    $tr_list->{ $tr->{'id'} } = $tr->{'content'};
}

Sum: 9064.79 count: 50

Kotlin:

import com.google.gson.Gson
import java.math.BigDecimal
import java.net.URL
import java.util.*

data class Transaction(val id: String, val content: String, val links: List<String>)
fun main(args: Array<String>) {
  val q = LinkedList<String>(listOf("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"))
  generateSequence { q.poll() }
    .distinct()
    .map { Gson().fromJson(URL(it).readText(), Transaction::class.java) }
    .fold(BigDecimal.ZERO) { sum, t ->
      q.addAll(t.links)
      sum + Regex("\\$(\\d+\\.\\d+)").find(t.content.replace(',', '.'))!!.groupValues[1].let { BigDecimal(it) }
    }.let { println(it) }
}

9064.79

thinkholic commented Feb 6, 2017 edited

a simple PHP version;


<?php
function processTransactions($reqUrl, &$queue) {
  $response = json_decode(file_get_contents($reqUrl));

  $id = $response->id;
  $content = $response->content;
  $queue[$id] = $content;

  $links = $response->links;
  foreach($links as $link) {
    processTransactions($link, $queue);
  }

  return $queue;
}

$initialReqUrl = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json";
$queue = array();
$queueFinal = processTransactions($initialReqUrl, $queue);
$totalRecords = count($queueFinal);
$queueUnique = array_unique($queueFinal); // remove duplicates

// Count Total
$totalTransactions = array(
  'records' => $totalRecords,
  'records_unique' => count($queueUnique),
  'amount' => 0
);

foreach($queueUnique as $key => $value) {
  preg_match_all('/\$[0-9]+[,.][0-9]*/', $value, $matchedArr);
  $amount = (float) str_replace(array('$',','), '', $matchedArr[0][0]);
  $totalTransactions['amount'] += $amount;
}

print "Result: `".$totalTransactions['records']." total records`, `".$totalTransactions['records_unique']." unique transaction` and `$".$totalTransactions['amount']."`";

// eof.

Results: 50 total records, 35 unique transaction and $519238.86

Note:
As my understanding, $9064.79 is incorrect.
Because;

  1. $149,97 != $149.97
  2. To remove duplicate transactions, we must check for transaction contents instead of links. It may include same transaction details in multiple pages with different links.

evowise commented Feb 6, 2017

import json
import requests
import re
p = re.compile(r'\$\d+[\,,\.]\d+')

def get_sum(t):
 t = p.search(j["content"]).group(0)
 if ',' in t:
  return float(t.replace(',','.')[1:])
 return float(t[1:])

def get_node_data(url):
 return json.loads(requests.get(url).text)

j = get_node_data("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json")
s = {j["id"]}
sum = get_sum(j["content"])

links = j.get("links")
while links:
 x = []
 for l in links:
  j = get_node_data(l)
  if j["id"] not in s:
   s.add(j["id"])
   print j["content"]
   sum += get_sum(j["content"])
   x.extend(j["links"])
 links = x

print sum

It works! :-) 9064.79

Owner

jorinvo commented Feb 6, 2017

Thanks @chocolateboy! Next time :)
But looks like the discontinue their service? http://www.git.to/

chocolateboy commented Feb 6, 2017 edited

@jorinvo No, it's still up (it's GitHub's own link shortener). It's git.io rather than git.to, though :-)

matthew-hallsworth commented Feb 7, 2017 edited

Working PHP solution

<?php

$start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json';
$original = fetch_and_decode($start);
$keyed_array = [];

$keyed_array = process_links($original, $keyed_array);

echo "transactions: " . count($keyed_array) . ", total: " . array_sum($keyed_array) . "\n";

//--

function process_links($payload, $keyed_array) {

  if (!array_key_exists($payload->id, $keyed_array)) {
    $keyed_array[$payload->id] = find_transaction($payload);
  }

  if (count($payload->links) == 0) return $keyed_array;

  foreach($payload->links as $link) {
    $link_fetched = fetch_and_decode($link);
    $keyed_array = process_links($link_fetched, $keyed_array);
  }

  return $keyed_array;
}


function fetch_and_decode($url) {
  return json_decode(file_get_contents($url));
}

function find_transaction($decoded_transaction) {
  preg_match('/\$(\d*(\.|,)\d*)/', $decoded_transaction->content, $matches);
  return(str_replace(',', '.', $matches[1]));
}

tested on php 5.6

transactions: 50, total: 9064.79

Owner

jorinvo commented Feb 7, 2017

@chocolateboy, I see cool :) But I wan't change the links for this post anymore. Next time!

aptinio commented Feb 14, 2017

In Elixir (not concurrent yet):

defmodule DirtyMoney do
  def total(link) do
    {:ok, agent} = Agent.start_link(fn -> %{} end)

    follow(link, agent)

    Agent.get agent, fn transactions ->
      transactions
      |> Map.values
      |> Enum.sum
      |> Kernel./(100)
    end
  end

  def follow(link, agent) do
    %{"id" => id, "content" => content, "links" => links} =
      link
      |> HTTPoison.get!([], follow_redirect: true)
      |> Map.fetch!(:body)
      |> Poison.decode!

    count(agent, id, content)

    Enum.each(links, fn link ->
      follow(link, agent)
    end)
  end

  def count(agent, id, content) do
    Agent.update agent, fn transactions ->
      Map.put_new_lazy transactions, id, fn ->
        parse(content)
      end
    end
  end

  def parse(content) do
    [_, whole, part] = Regex.run(~r/\$(\d+)[,.](\d+)/, content)

    cents =
      part
      |> String.pad_trailing(2, "0")
      |> String.to_integer

    cents + String.to_integer(whole) * 100
  end
end

DirtyMoney.total("https://git.io/vDCxb")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment