Created
February 3, 2010 00:52
-
-
Save prehensile/293221 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0; | |
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0; | |
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL'; | |
DROP SCHEMA IF EXISTS `verbivoredb` ; | |
CREATE SCHEMA IF NOT EXISTS `verbivoredb` DEFAULT CHARACTER SET latin1 COLLATE latin1_swedish_ci ; | |
USE `verbivoredb`; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`words` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`words` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`words` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`word` VARCHAR(45) NULL , | |
`first_seen` TIMESTAMP NULL , | |
`last_seen` TIMESTAMP NULL , | |
`frequency` INT NOT NULL DEFAULT 0 , | |
PRIMARY KEY (`id`) ) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`following_words` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`following_words` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`following_words` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`frequency` INT NOT NULL DEFAULT 0 , | |
`root_word_id` INT NOT NULL , | |
`following_word_id` INT NOT NULL , | |
PRIMARY KEY (`id`) , | |
INDEX `fk_following_words_root` (`root_word_id` ASC) , | |
INDEX `fk_following_words_end` (`following_word_id` ASC) , | |
CONSTRAINT `fk_following_words_root` | |
FOREIGN KEY (`root_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION, | |
CONSTRAINT `fk_following_words_end` | |
FOREIGN KEY (`following_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`pos` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`pos` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`pos` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`tag_name` VARCHAR(16) NULL , | |
PRIMARY KEY (`id`) ) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`following_pos` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`following_pos` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`following_pos` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`frequency` INT NOT NULL DEFAULT 0 , | |
`following_pos_id` INT NOT NULL , | |
`root_word_id` INT NOT NULL , | |
PRIMARY KEY (`id`) , | |
INDEX `fk_linkTableFollowingPos_pos` (`following_pos_id` ASC) , | |
INDEX `fk_linkTableFollowingPos_words1` (`root_word_id` ASC) , | |
CONSTRAINT `fk_linkTableFollowingPos_pos` | |
FOREIGN KEY (`following_pos_id` ) | |
REFERENCES `verbivoredb`.`pos` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION, | |
CONSTRAINT `fk_linkTableFollowingPos_words1` | |
FOREIGN KEY (`root_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`preceding_pos` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`preceding_pos` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`preceding_pos` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`frequency` INT NOT NULL DEFAULT 0 , | |
`root_word_id` INT NOT NULL , | |
`preceding_pos_id` INT NOT NULL , | |
PRIMARY KEY (`id`) , | |
INDEX `fk_linkTablePrecedingPos_words1` (`root_word_id` ASC) , | |
INDEX `fk_linkTablePrecedingPos_pos1` (`preceding_pos_id` ASC) , | |
CONSTRAINT `fk_linkTablePrecedingPos_words1` | |
FOREIGN KEY (`root_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION, | |
CONSTRAINT `fk_linkTablePrecedingPos_pos1` | |
FOREIGN KEY (`preceding_pos_id` ) | |
REFERENCES `verbivoredb`.`pos` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`preceding_words` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`preceding_words` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`preceding_words` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`frequency` INT NOT NULL DEFAULT 0 , | |
`root_word_id` INT NOT NULL , | |
`preceding_word_id` INT NOT NULL , | |
PRIMARY KEY (`id`) , | |
INDEX `fk_preceding_word_root` (`root_word_id` ASC) , | |
INDEX `fk_preceding_word_end` (`preceding_word_id` ASC) , | |
CONSTRAINT `fk_preceding_word_root` | |
FOREIGN KEY (`root_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION, | |
CONSTRAINT `fk_preceding_word_end` | |
FOREIGN KEY (`preceding_word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`verbivore_internals` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`verbivore_internals` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`verbivore_internals` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`record_name` VARCHAR(16) NULL , | |
`record_value` VARCHAR(45) NULL , | |
PRIMARY KEY (`id`) ) | |
ENGINE = InnoDB; | |
-- ----------------------------------------------------- | |
-- Table `verbivoredb`.`pos_frequency` | |
-- ----------------------------------------------------- | |
DROP TABLE IF EXISTS `verbivoredb`.`pos_frequency` ; | |
CREATE TABLE IF NOT EXISTS `verbivoredb`.`pos_frequency` ( | |
`id` INT NOT NULL AUTO_INCREMENT , | |
`word_id` INT NOT NULL , | |
`pos_id` INT NOT NULL , | |
`frequency` INT NULL DEFAULT 0 , | |
PRIMARY KEY (`id`) , | |
INDEX `fk_pos_frequencies_words1` (`word_id` ASC) , | |
INDEX `fk_pos_frequencies_pos1` (`pos_id` ASC) , | |
CONSTRAINT `fk_pos_frequencies_words1` | |
FOREIGN KEY (`word_id` ) | |
REFERENCES `verbivoredb`.`words` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION, | |
CONSTRAINT `fk_pos_frequencies_pos1` | |
FOREIGN KEY (`pos_id` ) | |
REFERENCES `verbivoredb`.`pos` (`id` ) | |
ON DELETE NO ACTION | |
ON UPDATE NO ACTION) | |
ENGINE = InnoDB; | |
SET SQL_MODE=@OLD_SQL_MODE; | |
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS; | |
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import twitter | |
import nltk | |
import MySQLdb | |
import re | |
from datetime import datetime | |
twitter_username = 'some_user' | |
twitter_password = 'some_pass' | |
mysql_host = '127.0.0.1' | |
mysql_user = 'some_user' | |
mysql_pass = 'some_pass' | |
mysql_db = 'verbivoredb' | |
url_pattern = re.compile('''["']http://[^+]*?['"]''') | |
def get_tokens(): | |
api = twitter.Api(username = twitter_username, password = twitter_password) | |
statuses = api.GetFriendsTimeline( count=20 ) | |
tokens = list() | |
for s in statuses: | |
text = s.text | |
text = url_pattern.sub( "", text ) | |
tokens.extend( nltk.word_tokenize( text ) ) | |
return tokens | |
def main(): | |
tokens = get_tokens() | |
fd = nltk.FreqDist() | |
for token in tokens: | |
fd.inc( token ) | |
db = MySQLdb.connect( host=mysql_host, port=3306, user=mysql_user, passwd=mysql_pass, db=mysql_db ) | |
curs = db.cursor() | |
ss = fd.keys() | |
for word in ss: | |
# word = re.escape( word ) | |
print word | |
curs.execute( "SELECT id FROM words WHERE word=%s", word ) | |
row = curs.fetchone() | |
if( row is None ): | |
curs.execute( "INSERT INTO words ( word, first_seen ) VALUES ( %s, CURRENT_TIMESTAMP )", word ) | |
frequency = fd[ word ] | |
curs.execute( """UPDATE words SET last_seen=CURRENT_TIMESTAMP, frequency=frequency+%s WHERE word=%s""", ( frequency, word ) ) | |
curs.execute( """UPDATE verbivore_internals SET record_value=CURRENT_TIMESTAMP WHERE record_name=%s""", "time_last_run" ); | |
db.commit() | |
curs.close() | |
db.close() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment