Mike Atlas mikeatlas

## sync_ftp_to_s3.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              2 stars
            
          
                mikeatlas
                / sync_ftp_to_s3.md
            
            
              Last active
              January 12, 2018 15:54
            
              
                syncing ftp to s3 one-time really fast.
              
          
    Original idea from Transfer files from an FTP server to S3 by "Hack N Cheese".
I moved roughly a terrabyte in less than an hour. Granted, I couldn't take advantage of lftp's --parallel=30 switch due to my ftp source limiting me to one connection at a time, but use-pget-n=N did seem to help out.

Get a fast Ubuntu 14.4 EC2 box on Amazon for temporary usage (I went with m1.xlarge) so data tranfers aren't limited by your local bandwidth at least. I also attached a fat 2TB EBS volume and symlinked it to /bigdisk, and made sure the EBS volume was deleted after I terminated this EC2 box. I hope lftp 2.6.4 is available as a stable package by the next time I attempt this.
Build lftp 2.6.4+ (Not easy to compile, so read the INSTALL file and plow through all your missing dependencies - you'll also need to re-run `sudo ./configure && su


## index.html
<html lang="en">
<head>
  <meta charset="utf-8">

  <title>2D Picking with canvas</title>
  <meta name="description" content="">
  <meta name="author" content="Yannick Assogba">
  <script src="//rawgit.com/mrdoob/stats.js/master/build/stats.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/dat-gui/0.5/dat.gui.js"></script>

## index.html
This is a test block.

## fix csv

// Fix up our CSV data
var fs = require('fs');
var csv = require('csv');
var allWPIsInputFs = fs.createReadStream('./input-data/all-wpi-lat-long.csv');
var mappedWPIsInputFs = fs.createReadStream('./input-data/mapped-wpi-lat-lons.csv');

var output = fs.createWriteStream('./input-data/all-wpi-lat-long_trim.csv');

var parser = csv.parse({delimiter: ','});

## pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>geomesa-gdelt</groupId>
    <artifactId>geomesa-gdelt-accumulo1.5</artifactId>
    <name>GeoMesa GDELT</name>
    <version>1.0-SNAPSHOT</version>

## pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <repositories>
        <repository>
            <id>maven2-repository.dev.java.net</id>
            <name>Java.net repository</name>

## pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<!--
  ~ Copyright 2014 Commonwealth Computer Research, Inc.
  ~
  ~ Licensed under the Apache License, Version 2.0 (the "License");
  ~ you may not use this file except in compliance with the License.
  ~ You may obtain a copy of the License at
  ~
  ~ http://www.apache.org/licenses/LICENSE-2.0
  ~

## GeoMesa-CDH-5.3.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                mikeatlas
                / GeoMesa-CDH-5.3.md
            
            
              Last active
              August 29, 2015 14:14
            
              
                Getting GeoMesa 1.0 to work on Cloudera CDH 5.3 with Accumulo 1.6
              
          
    Getting GeoMesa 1.0 to work on Cloudera CDH 5.3 with Accumulo 1.6

by @mikeatlas
Thanks goes out to @manasdebashiskar for helping me work through all these steps!
Getting GeoMesa to work on Accumulo 1.6 using Cloudera's CDH 5.3 is not any less easy than getting it to work on the officially supported version of Accumulo 1.5.x, but here are the steps you can take to make it happen.
First, you will need to setup an Accumulo 1.6 cluster in CDH. This requires you create a Zookeeper cluster, an HDFS cluster, and a Hadoop MapReduce cluster. For my purposes, I have the following setup (yours may differ as you see fit/need):

3-host Zookeeper cluster, each running Ubuntu 12.02 (ami-018dd631 EC2 image) on t2.medium instances


## gist:0b69b354a8d713989147
Array.prototype.splitAtEvery = (callback) ->
	sections = []
	arrayClone = this.slice(0)
	$.each(arrayClone, (idx, item) =>
		sectionsLength = 0
		_.each(sections, (section) =>
			sectionsLength += section.length;
		)
		if callback(this, idx, item) == true
			sections.push(arrayClone.slice(0, idx+1-sectionsLength))

## application.html.haml
-# put this in your layout
-# what this does it is loads the split_asset application.css file using the correct sprockets name
/[if lt IE 10]
  = stylesheet_link_tag stylesheet_path('application').gsub(/\/assets\//, '/split_assets/')
	<html lang="en">
	<head>
	<meta charset="utf-8">

	<title>2D Picking with canvas</title>
	<meta name="description" content="">
	<meta name="author" content="Yannick Assogba">
	<script src="//rawgit.com/mrdoob/stats.js/master/build/stats.min.js"></script>
	<script src="https://cdnjs.cloudflare.com/ajax/libs/dat-gui/0.5/dat.gui.js"></script>

	// Fix up our CSV data
	var fs = require('fs');
	var csv = require('csv');
	var allWPIsInputFs = fs.createReadStream('./input-data/all-wpi-lat-long.csv');
	var mappedWPIsInputFs = fs.createReadStream('./input-data/mapped-wpi-lat-lons.csv');

	var output = fs.createWriteStream('./input-data/all-wpi-lat-long_trim.csv');

	var parser = csv.parse({delimiter: ','});
	<?xml version="1.0" encoding="UTF-8"?>
	<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>geomesa-gdelt</groupId>
	<artifactId>geomesa-gdelt-accumulo1.5</artifactId>
	<name>GeoMesa GDELT</name>
	<version>1.0-SNAPSHOT</version>
	<?xml version="1.0" encoding="UTF-8"?>
	<!--
	~ Copyright 2014 Commonwealth Computer Research, Inc.
	~
	~ Licensed under the Apache License, Version 2.0 (the "License");
	~ you may not use this file except in compliance with the License.
	~ You may obtain a copy of the License at
	~
	~ http://www.apache.org/licenses/LICENSE-2.0
	~
	Array.prototype.splitAtEvery = (callback) ->
	sections = []
	arrayClone = this.slice(0)
	$.each(arrayClone, (idx, item) =>
	sectionsLength = 0
	_.each(sections, (section) =>
	sectionsLength += section.length;
	)
	if callback(this, idx, item) == true
	sections.push(arrayClone.slice(0, idx+1-sectionsLength))
	-# put this in your layout
	-# what this does it is loads the split_asset application.css file using the correct sprockets name
	/[if lt IE 10]
	= stylesheet_link_tag stylesheet_path('application').gsub(/\/assets\//, '/split_assets/')