This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dash | |
import dash_deck.DeckGL as dgl | |
import pydeck | |
app = dash.Dash() | |
# Read Data | |
DATA_URL = "https://raw.githubusercontent.com/visgl/deck.gl-data/master/examples/geojson/vancouver-blocks.json" | |
# Generate polygon layer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
import geopandas as gpd | |
import shapely.geometry | |
import numpy as np | |
import wget | |
# download a zipped shapefile | |
wget.download( | |
"https://plotly.github.io/datasets/ne_50m_rivers_lake_centerlines.zip" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import dask.dataframe as dd | |
from sklearn.ensemble import GradientBoostingClassifier as GBC | |
from sklearn.model_selection import train_test_split as tts | |
# read in encoded dataframe from s3 | |
df = dd.read_csv('s3://folder/subfolder/trade_encoded.csv/*.csv') | |
# reads df to memory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from iso3166 import countries | |
# load trade data | |
df_trade = spark.read.options(header=True, inferSchema=True) | |
.parquet('s3://folder/subfolder/trade_data_clean.parquet') | |
# load known arms traders | |
df_arms = spark.read.options(header=True, inferSchema=True)\ | |
.parquet('s3://folder/subfolder/arms_traders_clean.parquet') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# creates a list of all CONSIGNEE_COUNTRY values for known arms dealers | |
temp_df = df_arms.select('CONSIGNEE_COUNTRY').distinct().collect() | |
arms_CONSIGNEE_COUNTRY = [temp_df[i]['CONSIGNEE_COUNTRY'] for i in range(len(temp_df))] | |
del(temp_df) | |
# creates a new dataframe for all the encoded columns | |
df_encoded = df_trade | |
# create user defined functions to apply to each column | |
func_CONSIGNEE_COUNTRY = F.udf(lambda x: 1 if (x in arms_CONSIGNEE_COUNTRY) else 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# defines regex expressions to apply to trade data | |
regexINN = '(\d{8,12}|None|null|0|00)' | |
regexNOTDIGIT = '[^0-9]' | |
regexADDRESS = '(null|None)|(\b[a-zA-z]{1,3}\b)|(\d{2,})' | |
regex2CHAR = '(None|[a-zA-Z]{2}|\d{2})' | |
regexDATE = '(None|null|\d{4}-\d{2}-\d{2})' | |
# applying regex to the trade data | |
df_trade = df_trade.filter(df_trade['CONSIGNOR_NAME'].rlike(regexNOTDIGIT))\ | |
.filter(df_trade['DECLARATION_NUMBER'].rlike(regexDECLARATION_NUMBER))\ |