Ryan J Kang kangjin2014

## scala_example.scala
import math.abs

object Playground {
  def main(args: Array[String]) {

    val tolerance = 0.001

    def isCloseEnough(x: Double, y: Double) =
      abs((x - y) / x) / x < tolerance

## manage_file_load_using_class.py
class LoadFiles(object):

    def __int__(self):
        self.path_to_dict = 'data/key_skill.csv'
        self.path_to_jobs = 'data/ds.csv'
        self.path_to_resume = 'data/resume_ryan_kang.docx'

    def load_skills_dict(self, path_to_dict):
        df_skills = pd.read_csv(path_to_dict, encoding='latin1', header= None)
        return df_skills

## modulate_python
## __init__.py
def init():
    from <folder> import <file>
    from . import <file> # "." means current folder/module
    a = func_0()
    b = funk_1()

## __main__.py

if __name__ == '__main__':

## fulltext_html_parser.py
from bs4 import BeautifulSoup
import urllib

def parser_job_link(job_link):
    try:
        html = urllib.request.urlopen(urllib.request.Request(job_link, headers={'User-Agent': 'Mozilla/5.0'})).read()
        soup = BeautifulSoup(html, "html.parser")
        for script in soup(["script", "style"]):
            script.extract()
        text = soup.get_text()

## kmean_with_gridsearch.py
from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist
import numpy as np
import matplotlib.pyplot as plt

# k means determine k
distortions = []
K = range(1,10)
for k in K:

## pull.py
from urllib.request import Request, urlopen

link = 'www.google.com' # define the linnk

req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})

webpage = urlopen(req).read()

## pull.py
from urllib.request import Request, urlopen

link = 'www.google.com' # define the linnk

req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})

webpage = urlopen(req).read()

## web2pdf.py
import pandas as pd
import numpy as np
import pdfkit

# configure the pdf layout
options = {
    'page-size': 'A4',
    'margin-top': '0.75in',
    'margin-right': '0.75in',
    'margin-bottom': '0.75in',

## web2page.py
from selenium import webdriver

driver = webdriver.PhantomJS()
driver.maximize_window()
driver.get(link)
scheight = .1

while scheight < 9.9:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
    scheight += .01

## scrapy_ryerson.py
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule

class DomainSpider(CrawlSpider):
    name = 'prof'
    allowed_domains = ['ryerson.ca']
    start_urls = ['http://www.ryerson.ca/']
    rules = (
        Rule(LinkExtractor(allow=r"graduate/"), callback='parse_item', follow=True),
    )
	import math.abs

	object Playground {
	def main(args: Array[String]) {

	val tolerance = 0.001

	def isCloseEnough(x: Double, y: Double) =
	abs((x - y) / x) / x < tolerance
	class LoadFiles(object):

	def __int__(self):
	self.path_to_dict = 'data/key_skill.csv'
	self.path_to_jobs = 'data/ds.csv'
	self.path_to_resume = 'data/resume_ryan_kang.docx'

	def load_skills_dict(self, path_to_dict):
	df_skills = pd.read_csv(path_to_dict, encoding='latin1', header= None)
	return df_skills
	## __init__.py
	def init():
	from <folder> import <file>
	from . import <file> # "." means current folder/module
	a = func_0()
	b = funk_1()

	## __main__.py

	if __name__ == '__main__':
	from bs4 import BeautifulSoup
	import urllib

	def parser_job_link(job_link):
	try:
	html = urllib.request.urlopen(urllib.request.Request(job_link, headers={'User-Agent': 'Mozilla/5.0'})).read()
	soup = BeautifulSoup(html, "html.parser")
	for script in soup(["script", "style"]):
	script.extract()
	text = soup.get_text()
	from sklearn.cluster import KMeans
	from sklearn import metrics
	from scipy.spatial.distance import cdist
	import numpy as np
	import matplotlib.pyplot as plt

	# k means determine k
	distortions = []
	K = range(1,10)
	for k in K:
	from urllib.request import Request, urlopen

	link = 'www.google.com' # define the linnk

	req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})

	webpage = urlopen(req).read()
	import pandas as pd
	import numpy as np
	import pdfkit

	# configure the pdf layout
	options = {
	'page-size': 'A4',
	'margin-top': '0.75in',
	'margin-right': '0.75in',
	'margin-bottom': '0.75in',
	from selenium import webdriver

	driver = webdriver.PhantomJS()
	driver.maximize_window()
	driver.get(link)
	scheight = .1

	while scheight < 9.9:
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
	scheight += .01
	from scrapy.contrib.linkextractors import LinkExtractor
	from scrapy.contrib.spiders import CrawlSpider, Rule

	class DomainSpider(CrawlSpider):
	name = 'prof'
	allowed_domains = ['ryerson.ca']
	start_urls = ['http://www.ryerson.ca/']
	rules = (
	Rule(LinkExtractor(allow=r"graduate/"), callback='parse_item', follow=True),
	)