Skip to content

Instantly share code, notes, and snippets.

@onurdegerli
Created August 28, 2013 06:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onurdegerli/6362839 to your computer and use it in GitHub Desktop.
Save onurdegerli/6362839 to your computer and use it in GitHub Desktop.
This code gets the job titles from http://jobsearch.about.com.
#!/usr/bin/python
import MySQLdb
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import re
regex = re.compile("<li>([\w &;/<>]*?)</li>")
db = MySQLdb.connect(host="",
user="",
passwd="",
db="")
cur = db.cursor()
cur.execute("SELECT * FROM job_title WHERE parent_job_title_id=0")
rows = cur.fetchall()
for row in rows:
parentJobTitleId = row[0]
parentJobTitleEn = row[2]
url = row[4]
print parentJobTitleId , '-' , parentJobTitleEn , '-', url
req = urllib2.Request(url)
try:
response = urlopen(req)
except HTTPError as e:
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
except URLError as e:
print 'We failed to reach a server.'
print 'Reason: ', e.reason
else:
html = response.read()
# print html
r = regex.findall(html)
for jobTitleEn in r:
print jobTitleEn
cur.execute('''INSERT INTO job_title (parent_job_title_id, title_en, title_tr, url)
values (%s, %s, %s, %s)''',
(parentJobTitleId, jobTitleEn, '', ''))
# print cur._last_executed
db.commit()
db.close()
CREATE TABLE IF NOT EXISTS `job_title` (
`job_title_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`parent_job_title_id` int(10) unsigned NOT NULL DEFAULT '0',
`title_en` varchar(255) NOT NULL,
`title_tr` varchar(255) NOT NULL,
`url` varchar(255) NOT NULL,
PRIMARY KEY (`job_title_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=3788 ;
--
-- Dumping data for table `job_title`
--
INSERT INTO `job_title` (`job_title_id`, `parent_job_title_id`, `title_en`, `title_tr`, `url`) VALUES
(1, 0, 'Advertising', '', 'http://jobsearch.about.com/od/job-titles/a/advertising-job-titles.htm'),
(2, 0, 'Accounting', '', 'http://jobsearch.about.com/od/job-title-samples/a/accounting-job-titles.htm'),
(3, 0, 'Construction', '', 'http://jobsearch.about.com/od/job-title-samples/a/construction-job-titles.htm'),
(4, 0, 'Administrative', '', 'http://jobsearch.about.com/od/job-title-samples/a/admin-job-titles.htm'),
(5, 0, 'Business', '', 'http://jobsearch.about.com/od/job-title-samples/a/business-job-titles.htm'),
(6, 0, 'Engineering', '', 'http://jobsearch.about.com/od/job-title-samples/a/engineering-job-titles.htm'),
(7, 0, 'Corporate', '', 'http://jobsearch.about.com/od/job-title-samples/a/c-level-job-titles.htm'),
(8, 0, 'Entry Level', '', 'http://jobsearch.about.com/od/best-jobs/a/best-entry-level-jobs.htm'),
(9, 0, 'Hospitality', '', 'http://jobsearch.about.com/od/job-title-samples/a/hospitality-job-titles.htm'),
(10, 0, 'Health/Safety', '', 'http://jobsearch.about.com/od/job-title-samples/a/health-safety-job-titles.htm'),
(11, 0, 'First', '', 'http://jobsearch.about.com/od/justforstudents/a/first-job-list.htm'),
(12, 0, 'Social Media', '', 'http://jobsearch.about.com/od/job-title-samples/a/social-media-job-titles.htm'),
(13, 0, 'Real Estate', '', 'http://jobsearch.about.com/od/job-title-samples/a/real-estate-job-titles.htm'),
(14, 0, 'Health Care / Medical', '', 'http://jobsearch.about.com/od/job-title-samples/a/health-care-job-titles.htm'),
(15, 0, 'Insurance', '', 'http://jobsearch.about.com/od/job-title-samples/a/insurance-job-titles.htm'),
(16, 0, 'IT', '', 'http://jobsearch.about.com/od/job-title-samples/a/it-job-titles.htm'),
(17, 0, 'Legal', '', 'http://jobsearch.about.com/od/job-title-samples/a/legal-job-titles.htm'),
(18, 0, 'Maintenance', '', 'http://jobsearch.about.com/od/job-title-samples/a/maintenance-job-titles.htm'),
(19, 0, 'Chef', '', 'http://culinaryarts.about.com/od/culinaryfundamentals/a/whatisachef.htm'),
(20, 0, 'Manufacturing', '', 'http://jobsearch.about.com/od/job-title-samples/a/manufacturing-job-titles.htm'),
(21, 0, 'Event Planning', '', 'http://eventplanning.about.com/od/eventcareers/tp/corporateevents.htm'),
(22, 0, 'Media', '', 'http://jobsearch.about.com/od/job-title-samples/a/media-job-titles.htm'),
(23, 0, 'Non Profit', '', 'http://jobsearch.about.com/od/job-title-samples/a/nonprofit-job-titles.htm'),
(24, 0, 'Finance', '', 'http://financecareers.about.com/od/jobtitles/a/jobtitles.htm'),
(25, 0, 'Public Relations', '', 'http://jobsearch.about.com/od/job-title-samples/a/public-relations-job-titles.htm'),
(26, 0, 'Science', '', 'http://jobsearch.about.com/od/job-title-samples/a/science-job-titles.htm'),
(27, 0, 'Human Resources', '', 'http://humanresources.about.com/od/jobdescriptions/f/hr_job_mgr.htm'),
(28, 0, 'Geography', '', 'http://geography.about.com/od/careersingeography/a/jobsgeography.htm'),
(29, 0, 'Second', '', 'http://jobsearch.about.com/od/parttimejobs/a/best-second-jobs.htm'),
(30, 0, 'Social Work', '', 'http://jobsearch.about.com/od/job-title-samples/a/social-work-job-titles.htm'),
(31, 0, 'Transportation', '', 'http://jobsearch.about.com/od/job-title-samples/a/transportation-job-titles.htm'),
(32, 0, 'Psychology Related', '', 'http://psychology.about.com/od/careersinpsychology/a/career-list.htm');
@onurdegerli
Copy link
Author

1- Create mysql table and paret job titles.
2- Run "get_job_titles.py" on command line.

Note: Do not forget to configure your mysql connection settings.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment