Skip to content

Instantly share code, notes, and snippets.

@TechNinjaWeb
Last active October 24, 2017 15:20
Show Gist options
  • Save TechNinjaWeb/38f93cf9d96366c6672680695f5242df to your computer and use it in GitHub Desktop.
Save TechNinjaWeb/38f93cf9d96366c6672680695f5242df to your computer and use it in GitHub Desktop.
Keyword and text summarization from url
const fs = require('fs')
const port = process.env.PORT || 5000
const express = require('express')
const app = express()
const http = require('http')
const bodyParser = require('body-parser')
const server = http.createServer(app)
const _ = require('lodash')
const textract = require('textract')
const pdf_extract = require('pdf-text-extract')
const countWords = require("count-words")
const summarizer = require('nodejs-text-summarizer')
// Express App setup
app.use(express.static(__dirname))
app.use(bodyParser.json())
app.all(['/test'], function(req, res, next) {
var json = require('./test_keywords.json');
// var map = json.map(r=>!~stopwords.indexOf(r));
var map = json.filter(remove_stop_words)
// console.log("JSON MAP", map)
res.send('ok')
})
app.all(['/get'], function(req, res, next){
var url = req.query.url
!!url ? textract.fromUrl( url, function( error, text_body ) {
// Error handling
if (error) res.status(404).json({error: "could not read file at url", params: req.query});
// Get keyword density
keywords = countWords( text_body );
text_body = text_body.split(" ").slice(0, 100).join(' ')
// Summarize the text
var summary = summarizer( text_body );
var summary_keywords = countWords(summary);
// Limit text summary to 100 words
summary = summary.split(" ").slice(0, 100).join(' ');
// Respond to client
res.json({
summary,
summary_keywords,
keywords
})
}) : res.status(404).json({error: "could not determine url"})
})
// Begin server
server.listen(port, () => console.log(`App running on localhost:${port}`))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment