Skip to content

Instantly share code, notes, and snippets.

@prehensile
Created February 3, 2010 00:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prehensile/293221 to your computer and use it in GitHub Desktop.
Save prehensile/293221 to your computer and use it in GitHub Desktop.
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL';
DROP SCHEMA IF EXISTS `verbivoredb` ;
CREATE SCHEMA IF NOT EXISTS `verbivoredb` DEFAULT CHARACTER SET latin1 COLLATE latin1_swedish_ci ;
USE `verbivoredb`;
-- -----------------------------------------------------
-- Table `verbivoredb`.`words`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`words` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`words` (
`id` INT NOT NULL AUTO_INCREMENT ,
`word` VARCHAR(45) NULL ,
`first_seen` TIMESTAMP NULL ,
`last_seen` TIMESTAMP NULL ,
`frequency` INT NOT NULL DEFAULT 0 ,
PRIMARY KEY (`id`) )
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`following_words`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`following_words` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`following_words` (
`id` INT NOT NULL AUTO_INCREMENT ,
`frequency` INT NOT NULL DEFAULT 0 ,
`root_word_id` INT NOT NULL ,
`following_word_id` INT NOT NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_following_words_root` (`root_word_id` ASC) ,
INDEX `fk_following_words_end` (`following_word_id` ASC) ,
CONSTRAINT `fk_following_words_root`
FOREIGN KEY (`root_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `fk_following_words_end`
FOREIGN KEY (`following_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`pos`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`pos` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`pos` (
`id` INT NOT NULL AUTO_INCREMENT ,
`tag_name` VARCHAR(16) NULL ,
PRIMARY KEY (`id`) )
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`following_pos`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`following_pos` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`following_pos` (
`id` INT NOT NULL AUTO_INCREMENT ,
`frequency` INT NOT NULL DEFAULT 0 ,
`following_pos_id` INT NOT NULL ,
`root_word_id` INT NOT NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_linkTableFollowingPos_pos` (`following_pos_id` ASC) ,
INDEX `fk_linkTableFollowingPos_words1` (`root_word_id` ASC) ,
CONSTRAINT `fk_linkTableFollowingPos_pos`
FOREIGN KEY (`following_pos_id` )
REFERENCES `verbivoredb`.`pos` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `fk_linkTableFollowingPos_words1`
FOREIGN KEY (`root_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`preceding_pos`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`preceding_pos` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`preceding_pos` (
`id` INT NOT NULL AUTO_INCREMENT ,
`frequency` INT NOT NULL DEFAULT 0 ,
`root_word_id` INT NOT NULL ,
`preceding_pos_id` INT NOT NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_linkTablePrecedingPos_words1` (`root_word_id` ASC) ,
INDEX `fk_linkTablePrecedingPos_pos1` (`preceding_pos_id` ASC) ,
CONSTRAINT `fk_linkTablePrecedingPos_words1`
FOREIGN KEY (`root_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `fk_linkTablePrecedingPos_pos1`
FOREIGN KEY (`preceding_pos_id` )
REFERENCES `verbivoredb`.`pos` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`preceding_words`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`preceding_words` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`preceding_words` (
`id` INT NOT NULL AUTO_INCREMENT ,
`frequency` INT NOT NULL DEFAULT 0 ,
`root_word_id` INT NOT NULL ,
`preceding_word_id` INT NOT NULL ,
PRIMARY KEY (`id`) ,
INDEX `fk_preceding_word_root` (`root_word_id` ASC) ,
INDEX `fk_preceding_word_end` (`preceding_word_id` ASC) ,
CONSTRAINT `fk_preceding_word_root`
FOREIGN KEY (`root_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `fk_preceding_word_end`
FOREIGN KEY (`preceding_word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`verbivore_internals`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`verbivore_internals` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`verbivore_internals` (
`id` INT NOT NULL AUTO_INCREMENT ,
`record_name` VARCHAR(16) NULL ,
`record_value` VARCHAR(45) NULL ,
PRIMARY KEY (`id`) )
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `verbivoredb`.`pos_frequency`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `verbivoredb`.`pos_frequency` ;
CREATE TABLE IF NOT EXISTS `verbivoredb`.`pos_frequency` (
`id` INT NOT NULL AUTO_INCREMENT ,
`word_id` INT NOT NULL ,
`pos_id` INT NOT NULL ,
`frequency` INT NULL DEFAULT 0 ,
PRIMARY KEY (`id`) ,
INDEX `fk_pos_frequencies_words1` (`word_id` ASC) ,
INDEX `fk_pos_frequencies_pos1` (`pos_id` ASC) ,
CONSTRAINT `fk_pos_frequencies_words1`
FOREIGN KEY (`word_id` )
REFERENCES `verbivoredb`.`words` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `fk_pos_frequencies_pos1`
FOREIGN KEY (`pos_id` )
REFERENCES `verbivoredb`.`pos` (`id` )
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
SET SQL_MODE=@OLD_SQL_MODE;
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
import twitter
import nltk
import MySQLdb
import re
from datetime import datetime
twitter_username = 'some_user'
twitter_password = 'some_pass'
mysql_host = '127.0.0.1'
mysql_user = 'some_user'
mysql_pass = 'some_pass'
mysql_db = 'verbivoredb'
url_pattern = re.compile('''["']http://[^+]*?['"]''')
def get_tokens():
api = twitter.Api(username = twitter_username, password = twitter_password)
statuses = api.GetFriendsTimeline( count=20 )
tokens = list()
for s in statuses:
text = s.text
text = url_pattern.sub( "", text )
tokens.extend( nltk.word_tokenize( text ) )
return tokens
def main():
tokens = get_tokens()
fd = nltk.FreqDist()
for token in tokens:
fd.inc( token )
db = MySQLdb.connect( host=mysql_host, port=3306, user=mysql_user, passwd=mysql_pass, db=mysql_db )
curs = db.cursor()
ss = fd.keys()
for word in ss:
# word = re.escape( word )
print word
curs.execute( "SELECT id FROM words WHERE word=%s", word )
row = curs.fetchone()
if( row is None ):
curs.execute( "INSERT INTO words ( word, first_seen ) VALUES ( %s, CURRENT_TIMESTAMP )", word )
frequency = fd[ word ]
curs.execute( """UPDATE words SET last_seen=CURRENT_TIMESTAMP, frequency=frequency+%s WHERE word=%s""", ( frequency, word ) )
curs.execute( """UPDATE verbivore_internals SET record_value=CURRENT_TIMESTAMP WHERE record_name=%s""", "time_last_run" );
db.commit()
curs.close()
db.close()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment