Skip to content

Instantly share code, notes, and snippets.

@jorinvo
Last active January 1, 2018 14:41
Show Gist options
  • Save jorinvo/3d7f6a60fcede1863fa9f0788b8cc1b4 to your computer and use it in GitHub Desktop.
Save jorinvo/3d7f6a60fcede1863fa9f0788b8cc1b4 to your computer and use it in GitHub Desktop.
Dirty Money: Code Challenge https://jorin.me/dirtymoney

Follow the dirty money

A shady Internet business has been discovered.

The website has been made public by a whistle blower. We have enough evidence about the dirty deals they did. But to charge them we need to get hands on precise numbers about the transactions that happened on their platform.

Unfortunately no record of the transactions could be seized so far. The only hint we have is this one transaction:

fd0d929f-966f-4d1a-89cd-feee5a1c5347.json

What we need is the total of all transactions in Dollar. Can you trace down all other transactions and get the total?

Be careful to count each transaction only once, even if it is linked multiple times. You can use whatever tool works best.

Please share the total and your solution below!

Cheers, Jorin

@jorinvo

(For 2015 challenge see https://jorin.me/csv-challenge)

@danieldiekmeier
Copy link

const axios = require('axios')
const queue = require('async/queue')

let sum = 0
const visited = []

const q = queue((url, done) => {
  if (visited.includes(url)) return done()
  visited.push(url)
  axios.get(url).then(({data}) => {
    sum += Number(data.content.match(/\$(\d*(\.|,)\d*)/)[1].replace(',', '.'))
    q.push(data.links)
    done()
  })
}, 1)

q.push('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json')

q.drain = () => { console.log('Total: $' + sum.toFixed(2)) }

Which results in

>>> Total: $9064.79

@jorinvo
Copy link
Author

jorinvo commented Nov 13, 2016

This Go version downloads links in parallel:

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"sync"
)

const workers = 10

var dollarMatch = regexp.MustCompile(`\$[0-9.,]+`)

type transaction struct {
	ID      string
	Content string
	Links   []string
}

func main() {
	start := "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
	var total float64
	visited := map[string]bool{}
	urls := make(chan string, 1000)
	transactions := make(chan transaction)
	var wg sync.WaitGroup

	// download in parallel
	for w := 0; w < workers; w++ {
		go func() {
			for u := range urls {
				t := transaction{}
				err := getJSON(u, &t)
				if err != nil {
					log.Printf("failed to get JSON from '%s': %v", u, err)
				}
				transactions <- t
			}
		}()
	}

	go func() {
		for t := range transactions {
			if t.ID != "" && !visited[t.ID] {
				visited[t.ID] = true
				s := dollarMatch.FindString(t.Content)
				s = strings.Trim(s, "$,.")
				s = strings.Replace(s, ",", ".", 1)
				dollar, err := strconv.ParseFloat(s, 64)
				if err != nil {
					log.Fatal(err)
				}
				total += dollar
				wg.Add(len(t.Links))
				for _, link := range t.Links {
					urls <- link
				}
			}
			wg.Done()
		}
	}()

	wg.Add(1)
	urls <- start
	wg.Wait()
	close(urls)
	close(transactions)
	fmt.Printf("transactions: %d, total: $%.2f", len(visited), total)
}

func getJSON(url string, target interface{}) error {
	r, err := http.Get(url)
	if err != nil {
		return err
	}
	defer r.Body.Close()
	err = json.NewDecoder(r.Body).Decode(&target)
	return err
}
$ go run follow-the-dirty-money.go 
transactions: 50, total: $9064.79

@jimfoltz
Copy link

jimfoltz commented Feb 5, 2017

# Ruby
require "open-uri"
require 'json'

def get_json(url, txns, visited = [])
   return if visited.include?(url)
   visited << url
   txn = JSON.parse(open(url).read)
   txns[txn['id']] = txn['content']
   txn['links'].each {|link| get_json(link, txns, visited)}
end

url = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
txns = {}

get_json(url, txns)

sum = 0
txns.each { |id, txn|
   money = txn.match(/\$(\d+.\d+)/)
   amt = money[1].gsub(',', '.').to_f
   sum += amt
}

puts "Transactions: #{txns.keys.size}"
puts "total: $%.2f\n" % sum
Transactions: 50
total: $9064.79

Corrected.

@andrewgreenh
Copy link

import 'isomorphic-fetch';

const start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'

const getSum = visited => async url => {
  const node = await fetch(url).then(resp => resp.json());
  const urls = node.links.filter(url => !visited.includes(url));
  visited.push(...urls)
  const thisAmount = parseFloat(node.content.match(/\$(\d+[,.]\d\d?)/)[1].replace(',', '.'));
  return (await Promise.all(urls.map(getSum(visited)))).reduce((a, b) => a + b, thisAmount);
}

getSum([start])(start).then(result => console.log(result.toFixed(2))).catch(e => console.error(e));

@mrjbq7
Copy link

mrjbq7 commented Feb 5, 2017

In Factor:

USING: assocs http.client json.reader kernel locals math
math.parser regexp sequences sets splitting ;

: dollars ( str -- $ )
    R/ \$\d*[,.]\d+/ first-match rest
    "," "." replace string>number ;

:: transaction ( url visited -- dollars )
    url visited ?adjoin [
        url http-get nip json> :> data
        data "content" of dollars
        data "links" of [ visited transaction ] map-sum +
    ] [ 0 ] if ;

: transactions ( url -- dollars #transactions )
    HS{ } clone [ transaction ] [ cardinality ] bi ;

Using it:

IN: scratchpad "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
               transactions "$%.2f in %d transactions.\n" printf
$9064.79 in 50 transactions.

@st0le
Copy link

st0le commented Feb 5, 2017

from requests import get
from re import search

src = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
visited = {}
stack = [src]
while stack:
    url = stack.pop()
    if url not in visited:
        transaction = get(url).json()
        desc = transaction["content"]
        money = search("\$(\d+[,\.]\d+)", desc).group(1)
        money = money.replace(',','.')
        visited[url] = float(money)
        for link in transaction["links"]:
            stack.append(link)

print(sum(visited.values()))

Output : 9064.789999999997

@adamantmc
Copy link

import requests
import json
import re

regex =  re.compile(r"\$[0-9]*[,.][0-9]*")
visited_links = {}
queue = ["https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"]
total = 0
transactions = 0

for link in queue:
    obj = json.loads(requests.get(link).text)
    if not link in visited_links:
        transactions += 1
        for new_link in obj["links"]:
            queue.append(new_link)
        total += float(regex.findall(obj["content"])[0][1:].replace(",","."))
        visited_links[link] = 1

print("Total: " + str(total))
print("Transactions: " + str(transactions))

Output:
Total: 9064.789999999999
Transactions: 50

@XANi
Copy link

XANi commented Feb 6, 2017

Ugly Perl version:

use JSON;
use LWP::Simple;
use List::Util qw(sum);

my $tr_list;
p('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json');
my @tr_val = map { s{,}{.}; m/\$(\d+\.\d+)/; $1 } values %$tr_list;
print "Sum: " . sum(@tr_val) . " count: " . scalar @tr_val . "\n";

sub p {
    my $tr = decode_json( get(shift) );
    if ( defined( $tr_list->{ $tr->{'id'} } ) ) { return }
    for my $url ( @{ $tr->{'links'} } ) {
        p($url);
    }
    $tr_list->{ $tr->{'id'} } = $tr->{'content'};
}

Sum: 9064.79 count: 50

@fGeorjje
Copy link

fGeorjje commented Feb 6, 2017

Kotlin:

import com.google.gson.Gson
import java.math.BigDecimal
import java.net.URL
import java.util.*

data class Transaction(val id: String, val content: String, val links: List<String>)
fun main(args: Array<String>) {
  val q = LinkedList<String>(listOf("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"))
  generateSequence { q.poll() }
    .distinct()
    .map { Gson().fromJson(URL(it).readText(), Transaction::class.java) }
    .fold(BigDecimal.ZERO) { sum, t ->
      q.addAll(t.links)
      sum + Regex("\\$(\\d+\\.\\d+)").find(t.content.replace(',', '.'))!!.groupValues[1].let { BigDecimal(it) }
    }.let { println(it) }
}

9064.79

@heimdallrj
Copy link

heimdallrj commented Feb 6, 2017

a simple PHP version;


<?php
function processTransactions($reqUrl, &$queue) {
  $response = json_decode(file_get_contents($reqUrl));

  $id = $response->id;
  $content = $response->content;
  $queue[$id] = $content;

  $links = $response->links;
  foreach($links as $link) {
    processTransactions($link, $queue);
  }

  return $queue;
}

$initialReqUrl = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json";
$queue = array();
$queueFinal = processTransactions($initialReqUrl, $queue);
$totalRecords = count($queueFinal);
$queueUnique = array_unique($queueFinal); // remove duplicates

// Count Total
$totalTransactions = array(
  'records' => $totalRecords,
  'records_unique' => count($queueUnique),
  'amount' => 0
);

foreach($queueUnique as $key => $value) {
  preg_match_all('/\$[0-9]+[,.][0-9]*/', $value, $matchedArr);
  $amount = (float) str_replace(array('$',','), '', $matchedArr[0][0]);
  $totalTransactions['amount'] += $amount;
}

print "Result: `".$totalTransactions['records']." total records`, `".$totalTransactions['records_unique']." unique transaction` and `$".$totalTransactions['amount']."`";

// eof.

Results: 50 total records, 35 unique transaction and $519238.86

Note:
As my understanding, $9064.79 is incorrect.
Because;

  1. $149,97 != $149.97
  2. To remove duplicate transactions, we must check for transaction contents instead of links. It may include same transaction details in multiple pages with different links.

@chocolateboy
Copy link

chocolateboy commented Feb 6, 2017

@evowise
Copy link

evowise commented Feb 6, 2017

import json
import requests
import re
p = re.compile(r'\$\d+[\,,\.]\d+')

def get_sum(t):
 t = p.search(j["content"]).group(0)
 if ',' in t:
  return float(t.replace(',','.')[1:])
 return float(t[1:])

def get_node_data(url):
 return json.loads(requests.get(url).text)

j = get_node_data("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json")
s = {j["id"]}
sum = get_sum(j["content"])

links = j.get("links")
while links:
 x = []
 for l in links:
  j = get_node_data(l)
  if j["id"] not in s:
   s.add(j["id"])
   print j["content"]
   sum += get_sum(j["content"])
   x.extend(j["links"])
 links = x

print sum

It works! :-) 9064.79

@jorinvo
Copy link
Author

jorinvo commented Feb 6, 2017

Thanks @chocolateboy! Next time :)
But looks like the discontinue their service? http://www.git.to/

@chocolateboy
Copy link

chocolateboy commented Feb 6, 2017

@jorinvo No, it's still up (it's GitHub's own link shortener). It's git.io rather than git.to, though :-)

@matthew-hallsworth
Copy link

matthew-hallsworth commented Feb 7, 2017

Working PHP solution

<?php

$start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json';
$original = fetch_and_decode($start);
$keyed_array = [];

$keyed_array = process_links($original, $keyed_array);

echo "transactions: " . count($keyed_array) . ", total: " . array_sum($keyed_array) . "\n";

//--

function process_links($payload, $keyed_array) {

  if (!array_key_exists($payload->id, $keyed_array)) {
    $keyed_array[$payload->id] = find_transaction($payload);
  }

  if (count($payload->links) == 0) return $keyed_array;

  foreach($payload->links as $link) {
    $link_fetched = fetch_and_decode($link);
    $keyed_array = process_links($link_fetched, $keyed_array);
  }

  return $keyed_array;
}


function fetch_and_decode($url) {
  return json_decode(file_get_contents($url));
}

function find_transaction($decoded_transaction) {
  preg_match('/\$(\d*(\.|,)\d*)/', $decoded_transaction->content, $matches);
  return(str_replace(',', '.', $matches[1]));
}

tested on php 5.6

transactions: 50, total: 9064.79

@jorinvo
Copy link
Author

jorinvo commented Feb 7, 2017

@chocolateboy, I see, cool :) But I won't change the links for this post anymore. Next time!

@aptinio
Copy link

aptinio commented Feb 14, 2017

In Elixir (not concurrent yet):

defmodule DirtyMoney do
  def total(link) do
    {:ok, agent} = Agent.start_link(fn -> %{} end)

    follow(link, agent)

    Agent.get agent, fn transactions ->
      transactions
      |> Map.values
      |> Enum.sum
      |> Kernel./(100)
    end
  end

  def follow(link, agent) do
    %{"id" => id, "content" => content, "links" => links} =
      link
      |> HTTPoison.get!([], follow_redirect: true)
      |> Map.fetch!(:body)
      |> Poison.decode!

    count(agent, id, content)

    Enum.each(links, fn link ->
      follow(link, agent)
    end)
  end

  def count(agent, id, content) do
    Agent.update agent, fn transactions ->
      Map.put_new_lazy transactions, id, fn ->
        parse(content)
      end
    end
  end

  def parse(content) do
    [_, whole, part] = Regex.run(~r/\$(\d+)[,.](\d+)/, content)

    cents =
      part
      |> String.pad_trailing(2, "0")
      |> String.to_integer

    cents + String.to_integer(whole) * 100
  end
end

DirtyMoney.total("https://git.io/vDCxb")

@jorinvo
Copy link
Author

jorinvo commented Nov 9, 2017

There are more amazing answers over at dev.to!

@assafmo
Copy link

assafmo commented Jan 1, 2018

bash, jq, wget, grep, sed, awk:

#!/bin/bash
mkdir xyz && cd xyz
echo -e '0\n1' > /tmp/count
wget https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json > /dev/null 2> /dev/null
while [[ $(tail -1 /tmp/count) -ne $(tail -2 /tmp/count | head -1) ]]; do
  jq -r '.links[]' *.json | xargs wget -nc > /dev/null 2> /dev/null
  ls | wc -l >> /tmp/count
done
jq .content *.json | grep -Eo '\$[0-9,.]+[0-9]' | tr -d '$' | sed 's/,/./g' | awk '{sum = sum + $1} END{print sum}'
cd ..
rm -rf ./xyz /tmp/count

9064.79 😄

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment