Skip to content

Instantly share code, notes, and snippets.

@kirbysayshi
Created August 10, 2015 22:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kirbysayshi/ce3ea113e8df43136aa9 to your computer and use it in GitHub Desktop.
Save kirbysayshi/ce3ea113e8df43136aa9 to your computer and use it in GitHub Desktop.
requirebin sketch
var nlp = require('nlp_compromise/src/methods/tokenization/sentence');
console.log(nlp('Working with and without his Mercury Music Prize-winning group <a href="spotify:artist:3iOvXCl6edW5Um0fXEBRXy">The M. Ward xx</a>, Jamie xx established himself as a forward-thinking beatmaker and producer. Born Jamie Smith in Putney, London, he began his musical career playing the drums, but after realizing he couldn\'t use his innovation skills on the instrument, he instead turned to the \'80s Media Production Centre, which programmed his computer-based sounds onto electronic percussion pads, allowing him to play both live and in the studio. In 2008, he teamed up with former school friends Oliver Sim, Romy Madley Croft, and Baria Qureshi to form <a href="spotify:artist:3iOvXCl6edW5Um0fXEBRXy">the xx</a>, who signed to XL subsidiary Young Turks a year later and went on to record one of the most well-received albums of the decade with their self-titled debut. As well as reworking his own band\'s tracks, he also became a sought-after remixer for a number of pop artists including <a href="spotify:artist:4dpARuHxo51G3z768sgnrY">Adele</a>, <a href="spotify:artist:1VyVjE6tvQiM8T8a3WcYQd">Jack Peñate</a>, and <a href="spotify:artist:1moxjboGR7GNWYIMWsRjgG">Florence + the Machine</a>. His atmospheric version of the latter\'s "You\'ve Got the Love" sparked the interest of XL boss Richard Russell, who asked him to remix rap pioneer <a href="spotify:artist:0kEfub5RzlZOB2zGomqVSU">Gil Scott-Heron</a>\'s I\'m New Here, which was retitled We\'re New Here upon its release in 2011.'))
require=function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){var honourifics=require("./honourifics");var main=["arc","al","ave","blvd","cl","ct","cres","exp","rd","st","dist","mt","fy","hwy","pd","pl","plz","tce","llb","md","bl","ma","ba","lit","ala","ariz","ark","cal","calif","col","colo","conn","del","fed","fla","fl","ga","ida","ind","ia","la","kan","kans","ken","ky","la","md","mich","minn","mont","neb","nebr","nev","okla","penna","penn","pa","dak","tenn","tex","ut","vt","va","wash","wis","wisc","wy","wyo","usafa","alta","ont","que","sask","yuk","dept","univ","assn","bros","inc","ltd","co","corp","yahoo","joomla","jeopardy"];main=main.concat(honourifics);module.exports=main},{"./honourifics":2}],2:[function(require,module,exports){var main=["jr","mr","mrs","ms","dr","prof","sr","sen","corp","rep","gov","atty","supt","det","rev","col","gen","lt","cmdr","adm","capt","sgt","cpl","maj","miss","misses","mister","sir","esq","mstr","phd","adj","adv","asst","bldg","brig","comdr","hon","messrs","mlle","mme","op","ord","pvt","reps","res","sens","sfc","surg"];module.exports=main},{}],"nlp_compromise/src/methods/tokenization/sentence":[function(require,module,exports){module.exports=function(text){var abbreviations=require("../../data/lexicon/abbreviations");var sentences=[];var chunks=text.split(/(\S.+?[.\?!])(?=\s+|$|")/g);abbreviations=abbreviations.concat(["jan","feb","mar","apr","jun","jul","aug","sep","oct","nov","dec","sept","sep"]);abbreviations=abbreviations.concat(["ex","eg","ie","circa","ca","cca","vs","etc","esp","ft","bc","ad"]);var abbrev_reg=new RegExp("\\b("+abbreviations.join("|")+")[.!?] ?$","i");var acronym_reg=new RegExp("[ |.][A-Z].?$","i");var elipses_reg=new RegExp("\\.\\.\\.*$");var chunks_length=chunks.length;for(i=0;i<chunks_length;i++){if(chunks[i]){chunks[i]=chunks[i].replace(/^\s+|\s+$/g,"");if(chunks[i+1]&&chunks[i].match(abbrev_reg)||chunks[i].match(acronym_reg)||chunks[i].match(elipses_reg)){chunks[i+1]=((chunks[i]||"")+" "+(chunks[i+1]||"")).replace(/ +/g," ")}else if(chunks[i]&&chunks[i].length>0){sentences.push(chunks[i]);chunks[i]=""}}}if(sentences.length===0){return[text]}return sentences}},{"../../data/lexicon/abbreviations":1}]},{},[]);var nlp=require("nlp_compromise/src/methods/tokenization/sentence");console.log(nlp('Working with and without his Mercury Music Prize-winning group <a href="spotify:artist:3iOvXCl6edW5Um0fXEBRXy">The M. Ward xx</a>, Jamie xx established himself as a forward-thinking beatmaker and producer. Born Jamie Smith in Putney, London, he began his musical career playing the drums, but after realizing he couldn\'t use his innovation skills on the instrument, he instead turned to the \'80s Media Production Centre, which programmed his computer-based sounds onto electronic percussion pads, allowing him to play both live and in the studio. In 2008, he teamed up with former school friends Oliver Sim, Romy Madley Croft, and Baria Qureshi to form <a href="spotify:artist:3iOvXCl6edW5Um0fXEBRXy">the xx</a>, who signed to XL subsidiary Young Turks a year later and went on to record one of the most well-received albums of the decade with their self-titled debut. As well as reworking his own band\'s tracks, he also became a sought-after remixer for a number of pop artists including <a href="spotify:artist:4dpARuHxo51G3z768sgnrY">Adele</a>, <a href="spotify:artist:1VyVjE6tvQiM8T8a3WcYQd">Jack Peñate</a>, and <a href="spotify:artist:1moxjboGR7GNWYIMWsRjgG">Florence + the Machine</a>. His atmospheric version of the latter\'s "You\'ve Got the Love" sparked the interest of XL boss Richard Russell, who asked him to remix rap pioneer <a href="spotify:artist:0kEfub5RzlZOB2zGomqVSU">Gil Scott-Heron</a>\'s I\'m New Here, which was retitled We\'re New Here upon its release in 2011.'));
{
"name": "requirebin-sketch",
"version": "1.0.0",
"dependencies": {
"nlp_compromise": "1.1.3"
}
}
<!-- contents of this file will be placed inside the <body> -->
<!-- contents of this file will be placed inside the <head> -->
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment