Sh4kE/.gitignore

## .gitignore
# Created by .ignore support plugin (hsz.mobi)
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
### Linux template
*~

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### Example user template template
### Example user template

# IntelliJ project files
.idea
*.iml
out
gen### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
.venv/
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/workspace.xml
.idea/tasks.xml

# Sensitive or high-churn files:
.idea/dataSources/
.idea/dataSources.ids
.idea/dataSources.xml
.idea/dataSources.local.xml
.idea/sqlDataSources.xml
.idea/dynamic.xml
.idea/uiDesigner.xml

# Gradle:
.idea/gradle.xml
.idea/libraries

# Mongo Explorer plugin:
.idea/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# ofm transfers
transfers/

## convert_xls_to_csv.sh
#!/bin/bash
for FILE in *.xls; do
    FILENAME=$(echo $FILE | awk '{split($0,a,"."); print a[1]}')
    if [ -e "$FILENAME.csv" ]
    then
        echo "File already exists: $FILENAME.csv"
    else
        # convert to csv
        libreoffice --headless --convert-to csv $FILE

        START=$(grep -n Nr. $FILENAME.csv | awk '{split($0,a,":"); print a[1]}')

        # drop first comment line
        tail -n +$START "$FILENAME.csv" > "$FILENAME.tmp"

        # remove csv in order to append to it later
        rm -f "$FILENAME.csv"

        # convert from windows encoding to utf-8
        iconv -f cp1252 -t utf-8  "$FILENAME.tmp" > "$FILENAME.csv"

        # delete temporary file
        rm -f "$FILENAME.tmp"
    fi
done

## player_prices.py
import os

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
from matplotlib import style


style.use('ggplot')

TRANSFERS_DIR = 'transfers'


def load_data():
    main_df = pd.DataFrame()
    for file in os.listdir(TRANSFERS_DIR):
        if file.endswith('csv'):
            df = pd.read_csv('{}/{}'.format(TRANSFERS_DIR, file),
                             index_col=0,
                             dtype={7: np.int32, 8: np.int32, 9: np.int32},
                             skip_blank_lines=True,
                             )
            df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)

            df = df.rename(columns={df.columns[0]: "Matchday",
                                    df.columns[1]: "Season",
                                    df.columns[2]: "Position",
                                    df.columns[3]: "Age",
                                    df.columns[4]: "Strength",
                                    df.columns[5]: "Price",
                                    })

            if main_df.empty:
                main_df = df
            else:
                main_df = main_df.append(df)

    return main_df


def filter_transfers(df, positions=None, ages=None, strengths=None, seasons=None, matchdays=None):
    filtered_df = df.copy()
    if positions:
        filtered_df = df[df.Position.isin(positions)]
    if ages:
        filtered_df = filtered_df[filtered_df.Age.isin(ages)]
    if strengths:
        filtered_df = filtered_df[filtered_df.Strength.isin(strengths)]
    if seasons:
        filtered_df = filtered_df[filtered_df.Season.isin(seasons)]
    if matchdays:
        filtered_df = filtered_df[filtered_df.Matchday.isin(matchdays)]
    return filtered_df


def get_prices_grouped_by_strength(position='MS', age=33):
    df = filter_transfers(load_data(),
                          positions=[position],
                          ages=[age],
                          )

    return df.groupby('Strength').Price


def get_prices_grouped_by_age(position='MS', strength=16):
    df = filter_transfers(load_data(),
                          positions=[position],
                          strengths=[strength],
                          )

    return df.groupby('Age').Price


def draw_prices(prices, title, xlabel, ylabel='Preis'):
    x = np.array(prices.mean().index)
    y = prices.mean()
    yerr = prices.std()

    fig = plt.figure()
    ax = fig.add_subplot(111)

    plt.errorbar(x, y, yerr=yerr, fmt='o')

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)

    ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

    plt.show()

if __name__ == '__main__':
    #draw_prices(get_prices_grouped_by_age(),
    #            title="Preise: MS/16er",
    #            xlabel='Alter')

    draw_prices(get_prices_grouped_by_strength(),
                title="Preise: MS, 33 Jahre",
                xlabel='Stärke')

## requirements.txt
pandas
pandas-datareader
matplotlib
numpy
BeautifulSoup4
sklearn
xlrd
	# Created by .ignore support plugin (hsz.mobi)
	### VirtualEnv template
	# Virtualenv
	# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
	.Python
	[Bb]in
	[Ii]nclude
	[Ll]ib
	[Ll]ib64
	[Ll]ocal
	[Ss]cripts
	pyvenv.cfg
	.venv
	pip-selfcheck.json
	### Linux template
	*~

	# temporary files which can be created if a process still has a handle open of a deleted file
	.fuse_hidden*

	# KDE directory preferences
	.directory

	# Linux trash folder which might appear on any partition or disk
	.Trash-*

	# .nfs files are created when an open file is removed but is still being accessed
	.nfs*
	### Example user template template
	### Example user template

	# IntelliJ project files
	.idea
	*.iml
	out
	gen### Python template
	# Byte-compiled / optimized / DLL files
	__pycache__/
	*.py[cod]
	*$py.class

	# C extensions
	*.so

	# Distribution / packaging
	env/
	build/
	develop-eggs/
	dist/
	downloads/
	eggs/
	.eggs/
	lib/
	lib64/
	parts/
	sdist/
	var/
	*.egg-info/
	.installed.cfg
	*.egg

	# PyInstaller
	# Usually these files are written by a python script from a template
	# before PyInstaller builds the exe, so as to inject date/other infos into it.
	*.manifest
	*.spec

	# Installer logs
	pip-log.txt
	pip-delete-this-directory.txt

	# Unit test / coverage reports
	htmlcov/
	.tox/
	.coverage
	.coverage.*
	.cache
	nosetests.xml
	coverage.xml
	*,cover
	.hypothesis/

	# Translations
	*.mo
	*.pot

	# Django stuff:
	*.log
	local_settings.py

	# Flask stuff:
	instance/
	.webassets-cache

	# Scrapy stuff:
	.scrapy

	# Sphinx documentation
	docs/_build/

	# PyBuilder
	target/

	# Jupyter Notebook
	.ipynb_checkpoints

	# pyenv
	.python-version

	# celery beat schedule file
	celerybeat-schedule

	# dotenv
	.env

	# virtualenv
	.venv/
	venv/
	ENV/

	# Spyder project settings
	.spyderproject

	# Rope project settings
	.ropeproject
	### JetBrains template
	# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
	# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

	# User-specific stuff:
	.idea/workspace.xml
	.idea/tasks.xml

	# Sensitive or high-churn files:
	.idea/dataSources/
	.idea/dataSources.ids
	.idea/dataSources.xml
	.idea/dataSources.local.xml
	.idea/sqlDataSources.xml
	.idea/dynamic.xml
	.idea/uiDesigner.xml

	# Gradle:
	.idea/gradle.xml
	.idea/libraries

	# Mongo Explorer plugin:
	.idea/mongoSettings.xml

	## File-based project format:
	*.iws

	## Plugin-specific files:

	# IntelliJ
	/out/

	# mpeltonen/sbt-idea plugin
	.idea_modules/

	# JIRA plugin
	atlassian-ide-plugin.xml

	# Crashlytics plugin (for Android Studio and IntelliJ)
	com_crashlytics_export_strings.xml
	crashlytics.properties
	crashlytics-build.properties
	fabric.properties

	# ofm transfers
	transfers/
	#!/bin/bash
	for FILE in *.xls; do
	FILENAME=$(echo $FILE \| awk '{split($0,a,"."); print a[1]}')
	if [ -e "$FILENAME.csv" ]
	then
	echo "File already exists: $FILENAME.csv"
	else
	# convert to csv
	libreoffice --headless --convert-to csv $FILE

	START=$(grep -n Nr. $FILENAME.csv \| awk '{split($0,a,":"); print a[1]}')

	# drop first comment line
	tail -n +$START "$FILENAME.csv" > "$FILENAME.tmp"

	# remove csv in order to append to it later
	rm -f "$FILENAME.csv"

	# convert from windows encoding to utf-8
	iconv -f cp1252 -t utf-8 "$FILENAME.tmp" > "$FILENAME.csv"

	# delete temporary file
	rm -f "$FILENAME.tmp"
	fi
	done
	import os

	import matplotlib.pyplot as plt
	import matplotlib.ticker as mtick
	import numpy as np
	import pandas as pd
	from matplotlib import style


	style.use('ggplot')

	TRANSFERS_DIR = 'transfers'


	def load_data():
	main_df = pd.DataFrame()
	for file in os.listdir(TRANSFERS_DIR):
	if file.endswith('csv'):
	df = pd.read_csv('{}/{}'.format(TRANSFERS_DIR, file),
	index_col=0,
	dtype={7: np.int32, 8: np.int32, 9: np.int32},
	skip_blank_lines=True,
	)
	df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)

	df = df.rename(columns={df.columns[0]: "Matchday",
	df.columns[1]: "Season",
	df.columns[2]: "Position",
	df.columns[3]: "Age",
	df.columns[4]: "Strength",
	df.columns[5]: "Price",
	})

	if main_df.empty:
	main_df = df
	else:
	main_df = main_df.append(df)

	return main_df


	def filter_transfers(df, positions=None, ages=None, strengths=None, seasons=None, matchdays=None):
	filtered_df = df.copy()
	if positions:
	filtered_df = df[df.Position.isin(positions)]
	if ages:
	filtered_df = filtered_df[filtered_df.Age.isin(ages)]
	if strengths:
	filtered_df = filtered_df[filtered_df.Strength.isin(strengths)]
	if seasons:
	filtered_df = filtered_df[filtered_df.Season.isin(seasons)]
	if matchdays:
	filtered_df = filtered_df[filtered_df.Matchday.isin(matchdays)]
	return filtered_df


	def get_prices_grouped_by_strength(position='MS', age=33):
	df = filter_transfers(load_data(),
	positions=[position],
	ages=[age],
	)

	return df.groupby('Strength').Price


	def get_prices_grouped_by_age(position='MS', strength=16):
	df = filter_transfers(load_data(),
	positions=[position],
	strengths=[strength],
	)

	return df.groupby('Age').Price


	def draw_prices(prices, title, xlabel, ylabel='Preis'):
	x = np.array(prices.mean().index)
	y = prices.mean()
	yerr = prices.std()

	fig = plt.figure()
	ax = fig.add_subplot(111)

	plt.errorbar(x, y, yerr=yerr, fmt='o')

	plt.xlabel(xlabel)
	plt.ylabel(ylabel)
	plt.title(title)

	ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

	plt.show()

	if __name__ == '__main__':
	#draw_prices(get_prices_grouped_by_age(),
	# title="Preise: MS/16er",
	# xlabel='Alter')

	draw_prices(get_prices_grouped_by_strength(),
	title="Preise: MS, 33 Jahre",
	xlabel='Stärke')
	pandas
	pandas-datareader
	matplotlib
	numpy
	BeautifulSoup4
	sklearn
	xlrd