bz bbzzzz

## Working with World Bank API
# -*- coding: utf-8 -*-
#### Author: Bohan Zhang | The Business Analytics Program of the George Washington University
#### Python Part

import wbdata
import pandas as pd
import datetime
import MySQLdb as myDB

#### test if data for certain indicator, country and year is available

## Job Aggregator
# -*- coding: utf-8 -*-
# Contributor: Lucas Laviolet, Nisha Iyer, Mikhail Flom and Bohan Zhang

# Part 0 Preparation
#-------------------------------------------------------------------------------------------------
import urllib2
from bs4 import BeautifulSoup
import pandas as pd

# Set up server on user's computer for OAuth 2.0 based authentication and authorization

## WordNet Interface
{
 "metadata": {
  "name": "Wordnet Interface"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {

## Word Similarity
import nltk
from nltk.corpus import wordnet as wn

### Synsets and lemmas
# For an arbitrary word, i.e. dog, it may have different senses, and we can find its synsets.
wn.synsets('dog')

# Once you have a synset, there are functions to find the information on that synset,
# and we will start with “lemma_names”, “lemmas”, “definitions” and “examples”.
# For the first synset 'dog.n.01', which means the first noun sense of ‘dog’, we can first find all of its words/lemma names.

## README
Sentiment analysis experiment using scikit-learn
================================================

The script sentiment.py reproduces the sentiment analysis approach from Pang,
Lee and Vaithyanathan (2002), who tried to classify movie reviews as positive
or negative, with three differences:

* tf-idf weighting is applied to terms
* the three-fold cross validation split is different
* regularization is tuned by cross validation

## README
Sentiment Analysis using sklearn
=================================
* sklearn LinearSVC
* 10-fold cross validation
* accuracy 88.45%

## 1.0 README
Sentiment Analysis using Naive Bayes
====================================
* Naive Bayes
* Add-1 smoothing
* 10-fold cross validation
* regular expression detecting negation words

Besides the regular method, the code also realized:
* Boolean Naive Bayes
* Naive Bayes with stop word

## download_report
import urllib2
from bs4 import BeautifulSoup as BeautifulSoup

def get_list(ticker):

    base_url_part1 = "http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK="
    base_url_part2 = "&type=&dateb=&owner=&start="
    base_url_part3 = "&count=100&output=xml"
    href = []


## gist:3ce7348a036b2b1a82db
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [

## zest-model
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This code is for ZestFinance modeling team interview homework assisgnment. ML algorithms including Regularized Logistic Regression, Elastic Net, Random Fores and Gradient Boosting (xgboost) are applied."
   ]
  },
  {
	# -- coding: utf-8 --
	#### Author: Bohan Zhang \| The Business Analytics Program of the George Washington University
	#### Python Part

	import wbdata
	import pandas as pd
	import datetime
	import MySQLdb as myDB

	#### test if data for certain indicator, country and year is available
	# -- coding: utf-8 --
	# Contributor: Lucas Laviolet, Nisha Iyer, Mikhail Flom and Bohan Zhang

	# Part 0 Preparation
	#-------------------------------------------------------------------------------------------------
	import urllib2
	from bs4 import BeautifulSoup
	import pandas as pd

	# Set up server on user's computer for OAuth 2.0 based authentication and authorization
	{
	"metadata": {
	"name": "Wordnet Interface"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	import nltk
	from nltk.corpus import wordnet as wn

	### Synsets and lemmas
	# For an arbitrary word, i.e. dog, it may have different senses, and we can find its synsets.
	wn.synsets('dog')

	# Once you have a synset, there are functions to find the information on that synset,
	# and we will start with “lemma_names”, “lemmas”, “definitions” and “examples”.
	# For the first synset 'dog.n.01', which means the first noun sense of ‘dog’, we can first find all of its words/lemma names.
	Sentiment analysis experiment using scikit-learn
	================================================

	The script sentiment.py reproduces the sentiment analysis approach from Pang,
	Lee and Vaithyanathan (2002), who tried to classify movie reviews as positive
	or negative, with three differences:

	* tf-idf weighting is applied to terms
	* the three-fold cross validation split is different
	* regularization is tuned by cross validation
	Sentiment Analysis using sklearn
	=================================
	* sklearn LinearSVC
	* 10-fold cross validation
	* accuracy 88.45%
	Sentiment Analysis using Naive Bayes
	====================================
	* Naive Bayes
	* Add-1 smoothing
	* 10-fold cross validation
	* regular expression detecting negation words

	Besides the regular method, the code also realized:
	* Boolean Naive Bayes
	* Naive Bayes with stop word
	import urllib2
	from bs4 import BeautifulSoup as BeautifulSoup

	def get_list(ticker):

	base_url_part1 = "http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK="
	base_url_part2 = "&type=&dateb=&owner=&start="
	base_url_part3 = "&count=100&output=xml"
	href = []
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This code is for ZestFinance modeling team interview homework assisgnment. ML algorithms including Regularized Logistic Regression, Elastic Net, Random Fores and Gradient Boosting (xgboost) are applied."
	]
	},
	{