Skip to content

Instantly share code, notes, and snippets.

@carsonmcdonald
Created January 1, 2013 16:49
Show Gist options
  • Save carsonmcdonald/4428542 to your computer and use it in GitHub Desktop.
Save carsonmcdonald/4428542 to your computer and use it in GitHub Desktop.
CouchDB map function used to count #code2012 entires.
{
map: function (doc)
{
if(doc.text.indexOf("RT ") != -1 || doc.text.indexOf("RT:") != -1 ||
doc.text.indexOf("rt ") != -1 || doc.text.indexOf("rt:") != -1)
{
return;
}
function pre_replacements(obj)
{
return obj.toLowerCase()
.replace('objective c','objective-c').replace('objc','objective-c')
.replace('obj-c','objective-c').replace('objectivec','objective-c')
.replace(' js','javascript').replace('asm','assembler')
.replace('assembly','assembler').replace('common lisp','lisp')
.replace('x86 assembly','assembler').replace('x86asm','assembler')
.replace(' script','sh').replace('nodejs', 'node.js')
.replace('csharp','c#').replace('mysql','sql')
.replace('visual basic','vb').replace('x86 assembler','assembler')
.replace('arm asm','assembler').replace('arm assembly','assembler')
.replace('x86 asm','assembler').replace('as3','actionscript')
.replace('css3','css').replace('html5','html')
.replace('emacs','elisp').replace('turbo pascal', 'pascal')
.replace('small talk','smalltalk').replace('javascripton','javascript')
.replace('c++11','c++');
}
function post_replacements(obj)
{
return obj.toLowerCase()
.replace('"' , '').replace('&' , '').replace('<' , '')
.replace('>' , '').replace('(' , '').replace(')' , '')
.replace(/^-$/ , '').replace('..' , '').replace(':' , '')
.replace(/^--$/, '').replace(/\.$/, '').replace(/^#/, '')
.replace(/^js$/, 'javascript').replace('perlease','perl')
.replace(/^go$/, 'golang').replace(/^node$/, 'node.js')
.replace(/^bat$/, 'batch').replace(/^x86$/, 'assembler')
.replace(/^arm$/, 'assembler').replace(/^pl$/,'perl')
.replace(/^sh$/, 'shell').replace(/^coffescript$/, 'coffeescript');
}
function contains(a, obj)
{
var i = a.length;
while (i--)
{
if (a[i] === obj.toLowerCase())
{
return true;
}
}
return false;
}
skip_terms = [
'#code2012', 'code2012', 'data', 'pure', 'and', 'the', 'year', 'you', 'program', 'programming',
'it', '2012', '2013', '#code2013', 'code2013', 'this', 'that', "that's", 'tools', "i'm", "it's",
'i', 'of', 'older', 'twitter.com', 'treat', 'toyed', 'top', 'old', 'not', "i've", 'languages',
'language', 'need', 'have', 'hope', 'hey', 'my', 'in', 'for', 'src', 'must', 'a', 'an',
'around', 'as', 'at', 'be', 'been', 'bit', 'bit.ly', 'but', 'can', 'coding', 'developers', 'did',
'doubts', 'focused', 'gist.git', 'http', 'just', 'know', 'last', 'learning', 'look', 'lot', 'lots',
'mention', 'mentioned', 'mostly', 'nothing', 'order', 'really', 'rt', 'sometimes', 'statuses',
'terribly', 'used', 'weird', 'what', 'whoever', 'with', 'wow', 'wrong', 'to', 'code', 'more',
'www.ioncannon.net', 'little', 'is', '&', 'was', 'processing', 'projects', 'project', 'wrote',
'forgot', 'count', 'on', 'oh', 'time', 'some', 'me', 'too', 'all', 'also', 'think', 'past', '',
'my', 'satisfy', 'mining', 'gods:', 'the', '@deadprogram', 'using', 't.co', 'about', 'next', 'so',
'or', 'list', 'much', 'year?', 'tweet', 'do', 'no', 'by', 'work', 'tiny', 'course', 'very', 'from',
'his', 'then', 'mainly', 'now', 'will', 'others', 'here', '2', '3', 'various', 'amount', 'one',
'should', 'write', '+', 'few', 'stats', 'y', 'de', 'good', 'yes', 'are', 'how', 'fun', 'than',
'other', 'cc', 'add', 'should', 'only', 'use', 'first', 'new', 'touch', 'if', 'any', 'like', "you've",
'most', 'maybe', 'production', 'maybe', 'learned', '6502', 'out', 'even', 'trying', 'into', 'does',
'stuff', 'same', 'started', 'que', 'el', 'probably', 'played', 'play', 'please', 'coded', 'remember',
'would', 'still', "don't", 'feel', 'had', 'many', 'has', 'hoping', 'something', 'up', 'ok', ';',
'“@deadprogram', 'enough', 'read', 'hopefully', 'home', 'quite', 'en', 'there', 'people', 'lines',
'written', 'count?', 'bits', '=', '?', "didn't", 'over', 'thanks', 'want', 'plus', 'pretty', '-',
'well', 'et', 'langs', 'were', '@humancoders', 'through', 'tweets', 'end', 'learn', 'common',
'guess', '5', 'real', 'see', 'writing', 'descending', 'looking', 'standard', '@marcocantu',
'interesting', 'friends', 'encouraging', 'things', 'programmed', 'code2012"', 'ones', 'those',
'results', 'least'
];
var langs = [];
var st = pre_replacements(doc.text).split(/,|\s|\//g);
for(var i=0; i<st.length; i++)
{
var cs = post_replacements(st[i]);
if(!contains(skip_terms, cs) && !contains(langs, cs))
{
langs.push(cs);
}
}
emit(doc._id, { created: doc.created_at, text: doc.text, languages: langs });
}
},
function (err, res)
{
if (err) console.log(err);
var langs = {};
res.forEach(function(val, index, arr)
{
index.languages.forEach(function(valb, indexb, arrb)
{
if(!langs[valb])
{
langs[valb] = 0;
}
langs[valb]++;
});
});
var out = [];
for(var lang in langs)
{
if(langs[lang] > 8)
out.push({language: lang, value: langs[lang]});
}
console.log(JSON.stringify(out));
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment