I hereby claim:
- I am jp-um on github.
- I am drjpe (https://keybase.io/drjpe) on keybase.
- I have a public key ASCzx7aB-lKTJNkyBnqaU3YYoguZxPo4EuoolQ0XcPRWrQo
To claim this, I am signing this object:
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "0394f140", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "2fa36238", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "76c3eddf", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit import Chem\n", |
I hereby claim:
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit import Chem\n", | |
"from rdkit.Chem import AllChem\n", |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": false | |
}, | |
"source": [ | |
"# A very short tutorial for ipython \n" | |
] |
{"cells":[{"cell_type":"code","source":["from pyspark.sql.functions import udf, col, lit\nfrom pyspark.sql.types import IntegerType, StringType\nfrom datetime import datetime, date\nimport calendar\nfrom re import match\n\ncrimes = sqlContext.read.format('com.databricks.spark.csv').options(header='true', delimiter=',').load('/tmp/data/uk_crimes.csv')"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["def clean_month_col(value):\n if value is not None:\n if match(r'\\d{4}-\\d{2}', value):\n return value\n return None\n\nudf_clean_month_col = udf(clean_month_col, StringType())\ncrimes = crimes.withColumn('Month', udf_clean_month_col('Month'))"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":["def month_from_date(value):\n value = datetime.strptime(value, '%Y-%m')\n return value.month\n\nudf_month_from_date = udf(month_from_date, IntegerType())\nmonth_df = crimes.where(col('Month').isNotNull()).withColumn('Month', udf_month_from_date('Month' |