Skip to content

Instantly share code, notes, and snippets.

@haganbt
Created February 28, 2012 15:56
Show Gist options
  • Save haganbt/1933308 to your computer and use it in GitHub Desktop.
Save haganbt/1933308 to your computer and use it in GitHub Desktop.
Create MySQL table from a DataSift CSV export.
<?php
/* Generate MySQL - CREATE TABLE and LOAD DATA LOCAL INFILE statements from a DataSift CSV file */
// Edit with file location and name.
$myFile = "test.csv";
$lines = file($myFile);
$fields = preg_split('/,/', $lines[0]);
$create = "CREATE TABLE IF NOT EXISTS `Interactions` (`intId` int(11) NOT NULL auto_increment,";
$load = "LOAD DATA LOCAL INFILE '" . $myFile . "' INTO TABLE Interactions CHARACTER SET UTF8 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\\\' LINES TERMINATED BY '\\n' IGNORE 1 LINES (";
foreach ($fields as $key => $value) {
$value = preg_replace("/\./", "_", $value);
$create .= "<br />`" . trim($value) . "` TEXT NULL ,";
$load .= $value . ", ";
}
$create .= " PRIMARY KEY (`intId`)
) ENGINE=MyISAM DEFAULT CHARSET=UTF8;";
$load=substr($load,0,-2); // Last comma
$load .= ");\n";
echo "<p>" . $create . "</p>";
echo "<p>" . $load . "</p>";
echo "SHOW WARNINGS;";
/*
* The above will create a statement similar to that below:
*
CREATE TABLE IF NOT EXISTS `Interactions` (`intId` int(11) NOT NULL auto_increment,
`twitter_retweet_count` TEXT NULL ,
`twitter_retweet_user_screen_name` TEXT NULL ,
`twitter_retweet_user_id_str` TEXT NULL ,
`twitter_retweet_user_geo_enabled` TEXT NULL ,
`twitter_retweet_user_statuses_count` TEXT NULL ,
`twitter_retweet_user_lang` TEXT NULL ,
`twitter_retweet_user_id` TEXT NULL ,
`twitter_retweet_user_followers_count` TEXT NULL ,
`twitter_retweet_user_listed_count` TEXT NULL ,
`twitter_retweet_user_location` TEXT NULL ,
`twitter_retweet_user_description` TEXT NULL ,
`twitter_retweet_user_url` TEXT NULL ,
`twitter_retweet_user_friends_count` TEXT NULL ,
`twitter_retweet_user_time_zone` TEXT NULL ,
`twitter_retweet_user_name` TEXT NULL ,
`twitter_retweet_domains` TEXT NULL ,
`twitter_retweet_text` TEXT NULL ,
`twitter_retweet_links` TEXT NULL ,
`twitter_retweet_created_at` TEXT NULL ,
`twitter_retweet_source` TEXT NULL ,
`twitter_retweet_id` TEXT NULL ,
`twitter_retweet_mentions` TEXT NULL ,
`twitter_retweeted_user_name` TEXT NULL ,
`twitter_retweeted_user_listed_count` TEXT NULL ,
`twitter_retweeted_user_id` TEXT NULL ,
`twitter_retweeted_user_followers_count` TEXT NULL ,
`twitter_retweeted_user_friends_count` TEXT NULL ,
`twitter_retweeted_user_location` TEXT NULL ,
`twitter_retweeted_user_url` TEXT NULL ,
`twitter_retweeted_user_time_zone` TEXT NULL ,
`twitter_retweeted_user_statuses_count` TEXT NULL ,
`twitter_retweeted_user_description` TEXT NULL ,
`twitter_retweeted_user_screen_name` TEXT NULL ,
`twitter_retweeted_user_geo_enabled` TEXT NULL ,
`twitter_retweeted_user_lang` TEXT NULL ,
`twitter_retweeted_user_id_str` TEXT NULL ,
`twitter_retweeted_place_place_type` TEXT NULL ,
`twitter_retweeted_place_country_code` TEXT NULL ,
`twitter_retweeted_place_url` TEXT NULL ,
`twitter_retweeted_place_name` TEXT NULL ,
`twitter_retweeted_place_country` TEXT NULL ,
`twitter_retweeted_place_id` TEXT NULL ,
`twitter_retweeted_place_full_name` TEXT NULL ,
`twitter_retweeted_created_at` TEXT NULL ,
`twitter_retweeted_geo_latitude` TEXT NULL ,
`twitter_retweeted_geo_longitude` TEXT NULL ,
`twitter_retweeted_source` TEXT NULL ,
`twitter_retweeted_id` TEXT NULL ,
`twitter_user_description` TEXT NULL ,
`twitter_user_id` TEXT NULL ,
`twitter_user_listed_count` TEXT NULL ,
`twitter_user_url` TEXT NULL ,
`twitter_user_name` TEXT NULL ,
`twitter_user_id_str` TEXT NULL ,
`twitter_user_statuses_count` TEXT NULL ,
`twitter_user_followers_count` TEXT NULL ,
`twitter_user_time_zone` TEXT NULL ,
`twitter_user_friends_count` TEXT NULL ,
`twitter_user_screen_name` TEXT NULL ,
`twitter_user_location` TEXT NULL ,
`twitter_user_geo_enabled` TEXT NULL ,
`twitter_user_lang` TEXT NULL ,
`twitter_geo_latitude` TEXT NULL ,
`twitter_geo_longitude` TEXT NULL ,
`twitter_place_full_name` TEXT NULL ,
`twitter_place_name` TEXT NULL ,
`twitter_place_id` TEXT NULL ,
`twitter_place_url` TEXT NULL ,
`twitter_place_country` TEXT NULL ,
`twitter_place_place_type` TEXT NULL ,
`twitter_place_country_code` TEXT NULL ,
`twitter_text` TEXT NULL ,
`twitter_mentions` TEXT NULL ,
`twitter_id` TEXT NULL ,
`twitter_source` TEXT NULL ,
`twitter_links` TEXT NULL ,
`twitter_domains` TEXT NULL ,
`twitter_in_reply_to_screen_name` TEXT NULL ,
`twitter_in_reply_to_status_id` TEXT NULL ,
`twitter_in_reply_to_user_id` TEXT NULL ,
`twitter_created_at` TEXT NULL ,
`myspace_verb` TEXT NULL ,
`myspace_contenttype` TEXT NULL ,
`myspace_content` TEXT NULL ,
`myspace_author_avatar` TEXT NULL ,
`myspace_author_id` TEXT NULL ,
`myspace_author_link` TEXT NULL ,
`myspace_author_name` TEXT NULL ,
`myspace_author_friends_count` TEXT NULL ,
`myspace_geo_longitude` TEXT NULL ,
`myspace_geo_latitude` TEXT NULL ,
`myspace_link` TEXT NULL ,
`myspace_category` TEXT NULL ,
`myspace_id` TEXT NULL ,
`links_url` TEXT NULL ,
`links_retweet_count` TEXT NULL ,
`links_created_at` TEXT NULL ,
`links_title` TEXT NULL ,
`interaction_source` TEXT NULL ,
`interaction_geo_latitude` TEXT NULL ,
`interaction_geo_longitude` TEXT NULL ,
`interaction_author_friends_count` TEXT NULL ,
`interaction_author_username` TEXT NULL ,
`interaction_author_link` TEXT NULL ,
`interaction_author_id` TEXT NULL ,
`interaction_author_avatar` TEXT NULL ,
`interaction_author_name` TEXT NULL ,
`interaction_created_at` TEXT NULL ,
`interaction_type` TEXT NULL ,
`interaction_id` TEXT NULL ,
`interaction_digg_id` TEXT NULL ,
`interaction_link` TEXT NULL ,
`interaction_title` TEXT NULL ,
`interaction_tags` TEXT NULL ,
`interaction_content` TEXT NULL ,
`facebook_message` TEXT NULL ,
`facebook_type` TEXT NULL ,
`facebook_to_names` TEXT NULL ,
`facebook_to_ids` TEXT NULL ,
`facebook_author_avatar` TEXT NULL ,
`facebook_author_name` TEXT NULL ,
`facebook_author_link` TEXT NULL ,
`facebook_author_id` TEXT NULL ,
`facebook_og_by` TEXT NULL ,
`facebook_application` TEXT NULL ,
`facebook_name` TEXT NULL ,
`facebook_source` TEXT NULL ,
`facebook_link` TEXT NULL ,
`facebook_caption` TEXT NULL ,
`facebook_id` TEXT NULL ,
`facebook_description` TEXT NULL ,
`digg_item_description` TEXT NULL ,
`digg_item_media` TEXT NULL ,
`digg_item_thumbnail_originalheight` TEXT NULL ,
`digg_item_thumbnail_src` TEXT NULL ,
`digg_item_thumbnail_height` TEXT NULL ,
`digg_item_thumbnail_contentType` TEXT NULL ,
`digg_item_thumbnail_width` TEXT NULL ,
`digg_item_thumbnail_originalwidth` TEXT NULL ,
`digg_item_shorturl_view_count` TEXT NULL ,
`digg_item_shorturl_short_url` TEXT NULL ,
`digg_item_container_short_name` TEXT NULL ,
`digg_item_container_name` TEXT NULL ,
`digg_item_title` TEXT NULL ,
`digg_item_comments` TEXT NULL ,
`digg_item_topic_name` TEXT NULL ,
`digg_item_topic_short_name` TEXT NULL ,
`digg_item_submit_date` TEXT NULL ,
`digg_item_href` TEXT NULL ,
`digg_item_id` TEXT NULL ,
`digg_item_status` TEXT NULL ,
`digg_item_link` TEXT NULL ,
`digg_item_diggs` TEXT NULL ,
`digg_id` TEXT NULL ,
`digg_user_name` TEXT NULL ,
`digg_user_links` TEXT NULL ,
`digg_user_icon` TEXT NULL ,
`digg_user_profileviews` TEXT NULL ,
`digg_user_registered` TEXT NULL ,
`digg_user_fullname` TEXT NULL ,
`digg_type` TEXT NULL ,
`klout_amplification` TEXT NULL ,
`klout_score` TEXT NULL ,
`klout_true_reach` TEXT NULL ,
`klout_slope` TEXT NULL ,
`klout_network` TEXT NULL ,
`klout_class` TEXT NULL ,
`demographic_gender` TEXT NULL ,
`salience_content_entities` TEXT NULL ,
`salience_content_topics` TEXT NULL ,
`salience_content_sentiment` TEXT NULL ,
`salience_title_sentiment` TEXT NULL ,
`salience_title_entities` TEXT NULL ,
`salience_title_topics` TEXT NULL ,
`trends_type` TEXT NULL ,
`trends_source` TEXT NULL ,
`trends_content` TEXT NULL ,
`language_tag` TEXT NULL , PRIMARY KEY (`intId`) ) ENGINE=MyISAM DEFAULT CHARSET=UTF8;
LOAD DATA LOCAL INFILE 'test.csv' INTO TABLE Interactions CHARACTER SET UTF8 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n' IGNORE 1 LINES (twitter_retweet_count, twitter_retweet_user_screen_name, twitter_retweet_user_id_str, twitter_retweet_user_geo_enabled, twitter_retweet_user_statuses_count, twitter_retweet_user_lang, twitter_retweet_user_id, twitter_retweet_user_followers_count, twitter_retweet_user_listed_count, twitter_retweet_user_location, twitter_retweet_user_description, twitter_retweet_user_url, twitter_retweet_user_friends_count, twitter_retweet_user_time_zone, twitter_retweet_user_name, twitter_retweet_domains, twitter_retweet_text, twitter_retweet_links, twitter_retweet_created_at, twitter_retweet_source, twitter_retweet_id, twitter_retweet_mentions, twitter_retweeted_user_name, twitter_retweeted_user_listed_count, twitter_retweeted_user_id, twitter_retweeted_user_followers_count, twitter_retweeted_user_friends_count, twitter_retweeted_user_location, twitter_retweeted_user_url, twitter_retweeted_user_time_zone, twitter_retweeted_user_statuses_count, twitter_retweeted_user_description, twitter_retweeted_user_screen_name, twitter_retweeted_user_geo_enabled, twitter_retweeted_user_lang, twitter_retweeted_user_id_str, twitter_retweeted_place_place_type, twitter_retweeted_place_country_code, twitter_retweeted_place_url, twitter_retweeted_place_name, twitter_retweeted_place_country, twitter_retweeted_place_id, twitter_retweeted_place_full_name, twitter_retweeted_created_at, twitter_retweeted_geo_latitude, twitter_retweeted_geo_longitude, twitter_retweeted_source, twitter_retweeted_id, twitter_user_description, twitter_user_id, twitter_user_listed_count, twitter_user_url, twitter_user_name, twitter_user_id_str, twitter_user_statuses_count, twitter_user_followers_count, twitter_user_time_zone, twitter_user_friends_count, twitter_user_screen_name, twitter_user_location, twitter_user_geo_enabled, twitter_user_lang, twitter_geo_latitude, twitter_geo_longitude, twitter_place_full_name, twitter_place_name, twitter_place_id, twitter_place_url, twitter_place_country, twitter_place_place_type, twitter_place_country_code, twitter_text, twitter_mentions, twitter_id, twitter_source, twitter_links, twitter_domains, twitter_in_reply_to_screen_name, twitter_in_reply_to_status_id, twitter_in_reply_to_user_id, twitter_created_at, myspace_verb, myspace_contenttype, myspace_content, myspace_author_avatar, myspace_author_id, myspace_author_link, myspace_author_name, myspace_author_friends_count, myspace_geo_longitude, myspace_geo_latitude, myspace_link, myspace_category, myspace_id, links_url, links_retweet_count, links_created_at, links_title, interaction_source, interaction_geo_latitude, interaction_geo_longitude, interaction_author_friends_count, interaction_author_username, interaction_author_link, interaction_author_id, interaction_author_avatar, interaction_author_name, interaction_created_at, interaction_type, interaction_id, interaction_digg_id, interaction_link, interaction_title, interaction_tags, interaction_content, facebook_message, facebook_type, facebook_to_names, facebook_to_ids, facebook_author_avatar, facebook_author_name, facebook_author_link, facebook_author_id, facebook_og_by, facebook_application, facebook_name, facebook_source, facebook_link, facebook_caption, facebook_id, facebook_description, digg_item_description, digg_item_media, digg_item_thumbnail_originalheight, digg_item_thumbnail_src, digg_item_thumbnail_height, digg_item_thumbnail_contentType, digg_item_thumbnail_width, digg_item_thumbnail_originalwidth, digg_item_shorturl_view_count, digg_item_shorturl_short_url, digg_item_container_short_name, digg_item_container_name, digg_item_title, digg_item_comments, digg_item_topic_name, digg_item_topic_short_name, digg_item_submit_date, digg_item_href, digg_item_id, digg_item_status, digg_item_link, digg_item_diggs, digg_id, digg_user_name, digg_user_links, digg_user_icon, digg_user_profileviews, digg_user_registered, digg_user_fullname, digg_type, klout_amplification, klout_score, klout_true_reach, klout_slope, klout_network, klout_class, demographic_gender, salience_content_entities, salience_content_topics, salience_content_sentiment, salience_title_sentiment, salience_title_entities, salience_title_topics, trends_type, trends_source, trends_content, language_tag );
SHOW WARNINGS;
*/
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment