Skip to content

Instantly share code, notes, and snippets.

#Remove RTs and urls
CaseSensitive_FilterTerms <- c("RT", "http")
filter_regex<- paste(CaseSensitive_FilterTerms, collapse = "|")
df <- filter(df, !grepl(filter_regex, df$text))
#remove mentions. (only remove tweets that start with @)
df$noMentions <- 0 #add new temporary column
df$noMentions <- substr(c(df$text),0,1)
df <- filter(df, !grepl(c("@"), df$noMentions))
df$noMentions <- NULL #remove temporary column added in line7
#Remove RTs and urls
CaseSensitive_FilterTerms <- c("RT", "http")
filter_regex<- paste(CaseSensitive_FilterTerms, collapse = "|")
df <- filter(df, !grepl(filter_regex, df$text))
#create new dataframe and initialize with first record
df.noMentions <- df[1,]
#append rows that do not start with @mention
for(j in 1:nrow(df)){
text <- c(df$text[j])
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.240.0.3 cluster-dn1.c.symmetric-rune-115401.internal cluster-dn1
10.240.0.4 cluster-dn2.c.symmetric-rune-115401.internal cluster-dn2
10.240.0.5 cluster-dn3.c.symmetric-rune-115401.internal cluster-dn3
10.240.0.2 cluster-cm.c.symmetric-rune-115401.internal cluster-cm # Added by Google
~
$ sudo service cloudera-scm-server start
$ sudo tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log
# Copyright (c) 2013 Cloudera, Inc. All rights reserved.
#
# This file describes the database connection.
#
# The database type
# Currently 'mysql', 'postgresql' and 'oracle' are valid databases.
com.cloudera.cmf.db.type=mysql
# The database host
# If a non standard port is needed, use 'hostname:port'
com.cloudera.cmf.db.host=localhost
$ mysql –u root –p
mysql> create database testdb;
mysql> create database rman;
mysql> create database hive;
mysql> create database oozie;
mysql> grant all on testdb.* to 'root' identified by 'password';
mysql> grant all on rman.* to 'rman' identified by 'rman';
mysql> grant all on hive.* to 'hive' identified by 'hive';
mysql> grant all on oozie.* to 'oozie' identified by 'oozie';
mysql> exit;
$ cd /etc/yum.repos.d
$ sudo wget https://archive.cloudera.com/cm5/redhat/6/x86_64/cm/cloudera-manager.repo
$ sudo yum install cloudera-manager-server cloudera-manager-daemons cloudera-manager-agent jdk java
$ sudo wget http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-$ java-5.1.37.tar.gz
$ sudo tar zxvf mysql-connector-java-5.1.37.tar.gz
$ sudo mkdir /usr/share/java
$ sudo mv mysql-connector-java-5.1.37/mysql-connector-java-5.1.37-bin.jar /usr/share/java/mysql-connector-java.jar
$ cd /opt/
$ sudo wget --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" http://download.oracle.com/otn-pub/java/jdk/7u79-b15/jdk-7u79-linux-x64.tar.gz
$ sudo tar xzf jdk-7u79-linux-x64.tar.gz
$ cd /opt/jdk1.7.0_79/
$ sudo alternatives --install /usr/bin/java java /opt/jdk1.7.0_79/bin/java 2
$ sudo alternatives --config java
//[select /opt/jdk1.7.0_79/bin/java]
$ sudo yum install mysql-server
//I faced InnoDB related issues with the installed version of mysql so updated it to 5.5.x
$ sudo rpm -Uvh https://mirror.webtatic.com/yum/el6/latest.rpm
$ sudo yum install mysql.`uname -i` yum-plugin-replace
$ sudo yum replace mysql --replace-with mysql55w
$ sudo /sbin/chkconfig --levels 235 mysqld on
$ sudo service mysqld start
$ sudo mysql_secure_installation