This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>jonathanmv</groupId> | |
<artifactId>json-to-parquet</artifactId> | |
<version>0.0.1-SNAPSHOT</version> | |
<packaging>jar</packaging> | |
<name>json-to-parquet</name> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jonathanmv.storage; | |
import org.json.simple.JSONObject; | |
import org.json.simple.parser.JSONParser; | |
import org.json.simple.parser.ParseException; | |
public class JsonToThriftConverter { | |
private JSONObject relationship; | |
private JSONObject personOne; | |
private JSONObject personTwo; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jonathanmv.storage; | |
import java.io.IOException; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.json.simple.parser.ParseException; | |
public class JsonToThriftMapper extends Mapper<Object, Text, Void, FriendsEdge> { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jonathanmv.storage; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.conf.Configured; | |
import org.apache.hadoop.fs.FileSystem; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.util.Tool; | |
import org.apache.hadoop.util.ToolRunner; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
union PersonID { | |
1: string person_id; | |
} | |
struct FriendsEdge { | |
1: required PersonID one; | |
2: required PersonID two; | |
3: required i64 timestamp; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const textInPostId = async (username, postId) => { | |
const url = userPostJsonUrl(username, postId) | |
const data = await getJsonData(url) | |
return textInPostFromResponse(data) | |
} | |
const textInPostFromResponse = (response, types = DEFAULT_TYPES) => { | |
const paragraphs = _.get(response, 'payload.value.content.bodyModel.paragraphs', []) | |
const filtered = paragraphs.filter(({ type }) => types.includes(type)) | |
return filtered.map(({text}) => text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// awsHelper.js | |
const getEntities = text => { | |
const Text = cleanText(text) | |
const LanguageCode = 'en' | |
return comprehend.detectEntitiesAsync({ Text, LanguageCode }) | |
} | |
// mediumHelper.js | |
const getStatsFromComprehendResponse = ({ Entities }) => { | |
const entityTypeCounts = _.countBy(_.uniqBy(Entities, 'Text'), 'Type') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const describePostTexts = texts => { | |
const postTitle = texts[0] | |
const postIntro = texts.slice(1, 4).join('.\n') | |
const intro = `The post is titled "${postTitle}" and it reads as it follows:\n` | |
return intro + postIntro | |
} | |
const describeEntityCounts = counts => { | |
const top = counts[0] | |
const intro = `We find a total of ${counts.length} entities mentioned. ` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const saveSpeechLocally = text => getSpeech(text).then(saveSpeech) | |
const getSpeech = text => { | |
const Text = cleanText(text) | |
const OutputFormat = 'mp3' | |
const VoiceId = 'Kimberly' | |
const params = { Text, OutputFormat, VoiceId } | |
return polly.synthesizeSpeechAsync(params) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const webshot = require('webshot') | |
const util = require('util') | |
const exec = util.promisify(require('child_process').exec) | |
const screenshot = (url, file) => new Promise((resolve, reject) => { | |
const screenSize = { width: 1920, height: 1080 } | |
webshot(url, file, { screenSize }, error => { | |
if (error) { | |
return reject(error) | |
} |
OlderNewer