Instantly share code, notes, and snippets.

Embed
What would you like to do?
Dirty Money: Code Challenge https://jorin.me/dirtymoney

Follow the dirty money

A shady Internet business has been discovered.

The website has been made public by a whistle blower. We have enough evidence about the dirty deals they did. But to charge them we need to get hands on precise numbers about the transactions that happened on their platform.

Unfortunately no record of the transactions could be seized so far. The only hint we have is this one transaction:

fd0d929f-966f-4d1a-89cd-feee5a1c5347.json

What we need is the total of all transactions in Dollar. Can you trace down all other transactions and get the total?

Be careful to count each transaction only once, even if it is linked multiple times. You can use whatever tool works best.

Please share the total and your solution below!

Cheers, Jorin

@jorinvo

(For 2015 challenge see https://jorin.me/csv-challenge)

@danieldiekmeier

This comment has been minimized.

danieldiekmeier commented Nov 13, 2016

const axios = require('axios')
const queue = require('async/queue')

let sum = 0
const visited = []

const q = queue((url, done) => {
  if (visited.includes(url)) return done()
  visited.push(url)
  axios.get(url).then(({data}) => {
    sum += Number(data.content.match(/\$(\d*(\.|,)\d*)/)[1].replace(',', '.'))
    q.push(data.links)
    done()
  })
}, 1)

q.push('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json')

q.drain = () => { console.log('Total: $' + sum.toFixed(2)) }

Which results in

>>> Total: $9064.79
@jorinvo

This comment has been minimized.

Owner

jorinvo commented Nov 13, 2016

This Go version downloads links in parallel:

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"sync"
)

const workers = 10

var dollarMatch = regexp.MustCompile(`\$[0-9.,]+`)

type transaction struct {
	ID      string
	Content string
	Links   []string
}

func main() {
	start := "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
	var total float64
	visited := map[string]bool{}
	urls := make(chan string, 1000)
	transactions := make(chan transaction)
	var wg sync.WaitGroup

	// download in parallel
	for w := 0; w < workers; w++ {
		go func() {
			for u := range urls {
				t := transaction{}
				err := getJSON(u, &t)
				if err != nil {
					log.Printf("failed to get JSON from '%s': %v", u, err)
				}
				transactions <- t
			}
		}()
	}

	go func() {
		for t := range transactions {
			if t.ID != "" && !visited[t.ID] {
				visited[t.ID] = true
				s := dollarMatch.FindString(t.Content)
				s = strings.Trim(s, "$,.")
				s = strings.Replace(s, ",", ".", 1)
				dollar, err := strconv.ParseFloat(s, 64)
				if err != nil {
					log.Fatal(err)
				}
				total += dollar
				wg.Add(len(t.Links))
				for _, link := range t.Links {
					urls <- link
				}
			}
			wg.Done()
		}
	}()

	wg.Add(1)
	urls <- start
	wg.Wait()
	close(urls)
	close(transactions)
	fmt.Printf("transactions: %d, total: $%.2f", len(visited), total)
}

func getJSON(url string, target interface{}) error {
	r, err := http.Get(url)
	if err != nil {
		return err
	}
	defer r.Body.Close()
	err = json.NewDecoder(r.Body).Decode(&target)
	return err
}
$ go run follow-the-dirty-money.go 
transactions: 50, total: $9064.79
@jimfoltz

This comment has been minimized.

jimfoltz commented Feb 5, 2017

# Ruby
require "open-uri"
require 'json'

def get_json(url, txns, visited = [])
   return if visited.include?(url)
   visited << url
   txn = JSON.parse(open(url).read)
   txns[txn['id']] = txn['content']
   txn['links'].each {|link| get_json(link, txns, visited)}
end

url = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
txns = {}

get_json(url, txns)

sum = 0
txns.each { |id, txn|
   money = txn.match(/\$(\d+.\d+)/)
   amt = money[1].gsub(',', '.').to_f
   sum += amt
}

puts "Transactions: #{txns.keys.size}"
puts "total: $%.2f\n" % sum
Transactions: 50
total: $9064.79

Corrected.

@andreasgruenh

This comment has been minimized.

andreasgruenh commented Feb 5, 2017

import 'isomorphic-fetch';

const start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'

const getSum = visited => async url => {
  const node = await fetch(url).then(resp => resp.json());
  const urls = node.links.filter(url => !visited.includes(url));
  visited.push(...urls)
  const thisAmount = parseFloat(node.content.match(/\$(\d+[,.]\d\d?)/)[1].replace(',', '.'));
  return (await Promise.all(urls.map(getSum(visited)))).reduce((a, b) => a + b, thisAmount);
}

getSum([start])(start).then(result => console.log(result.toFixed(2))).catch(e => console.error(e));
@mrjbq7

This comment has been minimized.

mrjbq7 commented Feb 5, 2017

In Factor:

USING: assocs http.client json.reader kernel locals math
math.parser regexp sequences sets splitting ;

: dollars ( str -- $ )
    R/ \$\d*[,.]\d+/ first-match rest
    "," "." replace string>number ;

:: transaction ( url visited -- dollars )
    url visited ?adjoin [
        url http-get nip json> :> data
        data "content" of dollars
        data "links" of [ visited transaction ] map-sum +
    ] [ 0 ] if ;

: transactions ( url -- dollars #transactions )
    HS{ } clone [ transaction ] [ cardinality ] bi ;

Using it:

IN: scratchpad "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
               transactions "$%.2f in %d transactions.\n" printf
$9064.79 in 50 transactions.
@st0le

This comment has been minimized.

st0le commented Feb 5, 2017

from requests import get
from re import search

src = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
visited = {}
stack = [src]
while stack:
    url = stack.pop()
    if url not in visited:
        transaction = get(url).json()
        desc = transaction["content"]
        money = search("\$(\d+[,\.]\d+)", desc).group(1)
        money = money.replace(',','.')
        visited[url] = float(money)
        for link in transaction["links"]:
            stack.append(link)

print(sum(visited.values()))

Output : 9064.789999999997

@adamantmc

This comment has been minimized.

adamantmc commented Feb 6, 2017

import requests
import json
import re

regex =  re.compile(r"\$[0-9]*[,.][0-9]*")
visited_links = {}
queue = ["https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"]
total = 0
transactions = 0

for link in queue:
    obj = json.loads(requests.get(link).text)
    if not link in visited_links:
        transactions += 1
        for new_link in obj["links"]:
            queue.append(new_link)
        total += float(regex.findall(obj["content"])[0][1:].replace(",","."))
        visited_links[link] = 1

print("Total: " + str(total))
print("Transactions: " + str(transactions))

Output:
Total: 9064.789999999999
Transactions: 50

@XANi

This comment has been minimized.

XANi commented Feb 6, 2017

Ugly Perl version:

use JSON;
use LWP::Simple;
use List::Util qw(sum);

my $tr_list;
p('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json');
my @tr_val = map { s{,}{.}; m/\$(\d+\.\d+)/; $1 } values %$tr_list;
print "Sum: " . sum(@tr_val) . " count: " . scalar @tr_val . "\n";

sub p {
    my $tr = decode_json( get(shift) );
    if ( defined( $tr_list->{ $tr->{'id'} } ) ) { return }
    for my $url ( @{ $tr->{'links'} } ) {
        p($url);
    }
    $tr_list->{ $tr->{'id'} } = $tr->{'content'};
}

Sum: 9064.79 count: 50

@fGeorjje

This comment has been minimized.

fGeorjje commented Feb 6, 2017

Kotlin:

import com.google.gson.Gson
import java.math.BigDecimal
import java.net.URL
import java.util.*

data class Transaction(val id: String, val content: String, val links: List<String>)
fun main(args: Array<String>) {
  val q = LinkedList<String>(listOf("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"))
  generateSequence { q.poll() }
    .distinct()
    .map { Gson().fromJson(URL(it).readText(), Transaction::class.java) }
    .fold(BigDecimal.ZERO) { sum, t ->
      q.addAll(t.links)
      sum + Regex("\\$(\\d+\\.\\d+)").find(t.content.replace(',', '.'))!!.groupValues[1].let { BigDecimal(it) }
    }.let { println(it) }
}

9064.79

@thinkholic

This comment has been minimized.

thinkholic commented Feb 6, 2017

a simple PHP version;


<?php
function processTransactions($reqUrl, &$queue) {
  $response = json_decode(file_get_contents($reqUrl));

  $id = $response->id;
  $content = $response->content;
  $queue[$id] = $content;

  $links = $response->links;
  foreach($links as $link) {
    processTransactions($link, $queue);
  }

  return $queue;
}

$initialReqUrl = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json";
$queue = array();
$queueFinal = processTransactions($initialReqUrl, $queue);
$totalRecords = count($queueFinal);
$queueUnique = array_unique($queueFinal); // remove duplicates

// Count Total
$totalTransactions = array(
  'records' => $totalRecords,
  'records_unique' => count($queueUnique),
  'amount' => 0
);

foreach($queueUnique as $key => $value) {
  preg_match_all('/\$[0-9]+[,.][0-9]*/', $value, $matchedArr);
  $amount = (float) str_replace(array('$',','), '', $matchedArr[0][0]);
  $totalTransactions['amount'] += $amount;
}

print "Result: `".$totalTransactions['records']." total records`, `".$totalTransactions['records_unique']." unique transaction` and `$".$totalTransactions['amount']."`";

// eof.

Results: 50 total records, 35 unique transaction and $519238.86

Note:
As my understanding, $9064.79 is incorrect.
Because;

  1. $149,97 != $149.97
  2. To remove duplicate transactions, we must check for transaction contents instead of links. It may include same transaction details in multiple pages with different links.
@chocolateboy

This comment has been minimized.

@evowise

This comment has been minimized.

evowise commented Feb 6, 2017

import json
import requests
import re
p = re.compile(r'\$\d+[\,,\.]\d+')

def get_sum(t):
 t = p.search(j["content"]).group(0)
 if ',' in t:
  return float(t.replace(',','.')[1:])
 return float(t[1:])

def get_node_data(url):
 return json.loads(requests.get(url).text)

j = get_node_data("https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json")
s = {j["id"]}
sum = get_sum(j["content"])

links = j.get("links")
while links:
 x = []
 for l in links:
  j = get_node_data(l)
  if j["id"] not in s:
   s.add(j["id"])
   print j["content"]
   sum += get_sum(j["content"])
   x.extend(j["links"])
 links = x

print sum

It works! :-) 9064.79

@jorinvo

This comment has been minimized.

Owner

jorinvo commented Feb 6, 2017

Thanks @chocolateboy! Next time :)
But looks like the discontinue their service? http://www.git.to/

@chocolateboy

This comment has been minimized.

chocolateboy commented Feb 6, 2017

@jorinvo No, it's still up (it's GitHub's own link shortener). It's git.io rather than git.to, though :-)

@matthew-hallsworth

This comment has been minimized.

matthew-hallsworth commented Feb 7, 2017

Working PHP solution

<?php

$start = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json';
$original = fetch_and_decode($start);
$keyed_array = [];

$keyed_array = process_links($original, $keyed_array);

echo "transactions: " . count($keyed_array) . ", total: " . array_sum($keyed_array) . "\n";

//--

function process_links($payload, $keyed_array) {

  if (!array_key_exists($payload->id, $keyed_array)) {
    $keyed_array[$payload->id] = find_transaction($payload);
  }

  if (count($payload->links) == 0) return $keyed_array;

  foreach($payload->links as $link) {
    $link_fetched = fetch_and_decode($link);
    $keyed_array = process_links($link_fetched, $keyed_array);
  }

  return $keyed_array;
}


function fetch_and_decode($url) {
  return json_decode(file_get_contents($url));
}

function find_transaction($decoded_transaction) {
  preg_match('/\$(\d*(\.|,)\d*)/', $decoded_transaction->content, $matches);
  return(str_replace(',', '.', $matches[1]));
}

tested on php 5.6

transactions: 50, total: 9064.79

@jorinvo

This comment has been minimized.

Owner

jorinvo commented Feb 7, 2017

@chocolateboy, I see, cool :) But I won't change the links for this post anymore. Next time!

@aptinio

This comment has been minimized.

aptinio commented Feb 14, 2017

In Elixir (not concurrent yet):

defmodule DirtyMoney do
  def total(link) do
    {:ok, agent} = Agent.start_link(fn -> %{} end)

    follow(link, agent)

    Agent.get agent, fn transactions ->
      transactions
      |> Map.values
      |> Enum.sum
      |> Kernel./(100)
    end
  end

  def follow(link, agent) do
    %{"id" => id, "content" => content, "links" => links} =
      link
      |> HTTPoison.get!([], follow_redirect: true)
      |> Map.fetch!(:body)
      |> Poison.decode!

    count(agent, id, content)

    Enum.each(links, fn link ->
      follow(link, agent)
    end)
  end

  def count(agent, id, content) do
    Agent.update agent, fn transactions ->
      Map.put_new_lazy transactions, id, fn ->
        parse(content)
      end
    end
  end

  def parse(content) do
    [_, whole, part] = Regex.run(~r/\$(\d+)[,.](\d+)/, content)

    cents =
      part
      |> String.pad_trailing(2, "0")
      |> String.to_integer

    cents + String.to_integer(whole) * 100
  end
end

DirtyMoney.total("https://git.io/vDCxb")
@jorinvo

This comment has been minimized.

Owner

jorinvo commented Nov 9, 2017

There are more amazing answers over at dev.to!

@assafmo

This comment has been minimized.

assafmo commented Jan 1, 2018

bash, jq, wget, grep, sed, awk:

#!/bin/bash
mkdir xyz && cd xyz
echo -e '0\n1' > /tmp/count
wget https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json > /dev/null 2> /dev/null
while [[ $(tail -1 /tmp/count) -ne $(tail -2 /tmp/count | head -1) ]]; do
  jq -r '.links[]' *.json | xargs wget -nc > /dev/null 2> /dev/null
  ls | wc -l >> /tmp/count
done
jq .content *.json | grep -Eo '\$[0-9,.]+[0-9]' | tr -d '$' | sed 's/,/./g' | awk '{sum = sum + $1} END{print sum}'
cd ..
rm -rf ./xyz /tmp/count

9064.79 😄

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment