Skip to content

Instantly share code, notes, and snippets.

View sandboxws's full-sized avatar
🏠
Debug → Analyze → Refactor → Performance Gains → Repeat

Ahmed Elhossaini sandboxws

🏠
Debug → Analyze → Refactor → Performance Gains → Repeat
View GitHub Profile
ratingsCounts
.apply("FormatResults", MapElements.into(TypeDescriptors.strings())
.via((KV<String, Long> ratingsCount) -> ratingsCount.getKey() + " " + ratingsCount.getValue()))
.apply(TextIO.write().to("./ratings_results").withSuffix(".csv"));
PCollection<KV<String, Long>> ratingsCounts = csvRows
.apply("Extract Ratings",
FlatMapElements.into(TypeDescriptors.strings())
.via(csvRow -> Arrays.asList(csvRow.split(",")[1])))
.apply("Count Ratings", Count.<String>perElement());
PCollection<String> csvRows = pipeline.apply("Read from CSV", TextIO.read().from("./reviews.csv"));
Pipeline pipeline = Pipeline.create();
@sandboxws
sandboxws / BeamBatchPipeline.java
Last active December 15, 2018 04:19
Beam Batch Pipeline
package com.sandboxws;
import java.util.Arrays;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.transforms.Count;
import org.apache.beam.sdk.transforms.FlatMapElements;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.values.KV;
+--------------------+-----------------------------+
| Winning Plan | FETCH (7 / 7) -> IXSCAN (7) |
+--------------------+-----------------------------+
| Used Indexes | author_id_1 (forward) |
+--------------------+-----------------------------+
| Rejected Plans | 0 |
+--------------------+-----------------------------+
| Documents Returned | 7 |
+--------------------+-----------------------------+
| Documents Examined | 7 |
ae Article.where(author_id: '5bb13233d5cc2ec29c28649b')
class Author
include Mongoid::Document
include Mongoid::Timestamps
field :first_name, type: String
field :last_name, type: String
field :email, type: String
has_many :posts
end
@sandboxws
sandboxws / mongoio_sink.py
Created March 9, 2018 01:45
WIP MongoDB Apache Beam Sink for Python
__all__ = ['WriteToMongo']
import json
from pymongo import MongoClient
from apache_beam.transforms import PTransform
from apache_beam.io import iobase
class _MongoSink(iobase.Sink):
"""A :class:`~apache_beam.io.iobase.Sink`."""
@sandboxws
sandboxws / index.html
Created August 15, 2012 19:25
A web page created at CodePen.io.
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<!-- IF PEN IS PRIVATE -->
<!-- <meta name="robots" content="noindex"> -->
<!-- END -->