Skip to content

Instantly share code, notes, and snippets.

View sAbakumoff's full-sized avatar

Sergey Abakumoff sAbakumoff

View GitHub Profile
SELECT
id,
FIRST(name) AS repo_name,
FIRST(copies) AS num_of_copies
FROM
[githubdataqueries:NpmStat.package_json_duplicates]
GROUP BY
id
ORDER BY
num_of_copies DESC
{
"name": "angular2",
"version": "<%= packageJson.version %>",
"description": "Angular 2 - a web framework for modern web apps",
"homepage": "<%= packageJson.homepage %>",
"bugs": "<%= packageJson.bugs %>",
"contributors": <%= JSON.stringify(packageJson.contributors) %>,
"license": "<%= packageJson.license %>",
"repository": <%= JSON.stringify(packageJson.repository) %>,
"devDependencies": <%= JSON.stringify(packageJson.defaultDevDependencies) %>,
var toString = {}.toString;
module.exports = Array.isArray || function (arr) {
return toString.call(arr) == '[object Array]';
};
@sAbakumoff
sAbakumoff / msg_len.R
Last active September 11, 2016 12:43
library(dplyr)
library(stringr)
react_angular_commits<-read.csv("react_angular_commits.csv") %>%
mutate(message=str_replace_all(message, "\n", " "),
message_len=str_length(message),
name=str_sub(repo_name, start=regexpr('/', repo_name) + 1))
boxplot(message_len ~ name, react_angular_commits, outline=FALSE,
horizontal=TRUE, las=2,
col=c("#d20013", "#53d2fa"),
main="Length of the commits messages", par(mar=c(5,6,4,6)))
library(tidytext)
data("stop_words")
tidy_commits <- react_angular_commits %>%
select(name, message) %>%
unnest_tokens(word, message) %>%
anti_join(stop_words)
tidy_commits %>%
filter(name=="angular.js") %>%
count(word, sort = TRUE) %>%
head(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", fill=I("#d20013")) +
labs(y="Occurrences",
title="Most Common Words in Angular Commit Messages") +
coord_flip()
angular_commits_type_scope <- react_angular_commits %>%
filter(name=="angular.js",
str_detect(message, "^(feat|fix|docs|style|refactor|test|chore)\\s*\\(")) %>%
mutate(type=trimws(str_sub(message, start=0, end=regexpr( "\\(", message ) - 1)),
scope=trimws(str_sub(message,
start=regexpr( "\\(", message ) + 1,
end=regexpr( "\\)", message ) - 1))) %>%
select(type, scope)
top_scope<-angular_commits_type_scope %>%
tidy_commits %>%
filter(name=="react") %>%
count(word, sort = TRUE) %>%
head(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", fill=I("#53d2fa")) +
labs(y="Occurrences", title="Most Common Words in React Commit Messages") +
coord_flip()
bing <- sentiments %>%
filter(lexicon == "bing") %>%
select(-score)
angular_word_counts<-tidy_commits %>%
filter(name=="angular.js") %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
#step 1 : remove rows with merge, pull, request words as they useless for analysis
custom_stop_words<-data.frame("word"=c("merge", "pull", "request"))
tidy_commits<-tidy_commits %>% anti_join(custom_stop_words)
#step 2: calculate tf-idf and sort the rows by its value in descending order
commit_words<-tidy_commits %>%
count(name, word, sort=TRUE) %>%
ungroup() %>%
bind_tf_idf(word, name, n) %>%
arrange(desc(tf_idf)) %>%