Skip to content

Instantly share code, notes, and snippets.

View ssemichev's full-sized avatar

Sergey Semichev ssemichev

View GitHub Profile
==IAM Roles:
1) AWSGlueServiceRoleDefault
-AWSGlueServiceRole
-SecretsManagerReadWrite
-AWSGlueKMS
-AWSGlueS3
2) AWSGlueServiceNotebookRoleDefault
-AWSGlueServiceNotebookRole
-SecretsManagerReadWrite
@ssemichev
ssemichev / go-setup
Last active November 10, 2022 19:49
==Installing go 1.4 with homebrew on OSX:
https://stackoverflow.com/questions/12843063/install-go-with-brew-and-running-the-gotour
1) Create Directories
mkdir $HOME/Go
mkdir -p $HOME/Go/src/github.com/user
2) Setup your paths
export GOPATH=$HOME/Go
export GOROOT=/usr/local/opt/go/libexec
export PATH=$PATH:$GOPATH/bin
export PATH=$PATH:$GOROOT/bin
package falcon.streaming.kafka
import com.typesafe.scalalogging.slf4j.LazyLogging
import falcon.common._
import falcon.common.exceptions.FalconException
import falcon.common.traits.TClosable
import kafka.common.TopicAndPartition
import org.apache.spark.streaming.kafka.OffsetRange
trait KafkaClient extends TClosable with LazyLogging {
@ssemichev
ssemichev / KafkaCluster.scala
Created April 7, 2017 20:46
Convenience methods for interacting with a Kafka cluster
package falcon.streaming.kafka
import java.util.Properties
import kafka.api._
import kafka.common.{ ErrorMapping, OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition }
import kafka.consumer.{ ConsumerConfig, SimpleConsumer }
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import scala.util.control.NonFatal
package falcon.workflows.common.serializers
import java.lang.reflect.Type
import java.util.Date
import falcon.workflows.datatypes.Value.ValueType
import falcon.workflows.datatypes.BaseValue
import falcon.workflows.datatypes.Value
import org.joda.time.DateTime
import org.joda.time.DateTimeZone
package falcon.workflows.datatypes
import java.util.Date
import falcon.common._
import falcon.workflows.common.parsers.AddressParser
import falcon.workflows.common.parsers.NormalizedAddress
import falcon.workflows.common.DataRecord
import falcon.workflows.common.DataTypeBuilder
import falcon.workflows.common.DataTypeBuilderHelper
//Spark notebook
import falcon.workflows.common.metadata.MappingSchema
import falcon.workflows.factories.
import org.apache.hadoop.io.compress.GzipCodec
def createSchema = () => {
val json = dbutils.fs.head("/mnt/spark/workflows_tests/schemas/voter_record_datatrust_v1.json", maxBytes = 500000)
MappingSchema.fromJson(json)
}
@ssemichev
ssemichev / gist:e81ea9c4b998ffe54b96
Created March 4, 2015 14:51
Databricks S3Exception
com.databricks.rpc.UnknownRemoteException: Remote exception occurred: +details
com.databricks.rpc.UnknownRemoteException: Remote exception occurred:
com.databricks.s3.S3Exception: org.jets3t.service.S3ServiceException: S3 PUT failed for '/es_backup_12shards_150203%2Fpart-00003' XML Error Message: <?xml version="1.0" encoding="UTF-8"?><Error><Code>EntityTooLarge</Code><Message>Your proposed upload exceeds the maximum allowed size</Message><ProposedSize>61679275922</ProposedSize><MaxSizeAllowed>5368709120</MaxSizeAllowed><RequestId>F43535FA775BC82F</RequestId><HostId>bvGUmmM8g5u4vmZ6kUXSff8DNrFBp4Pk1xDDUFK/b0E5zwJtULfQP7v8KScfftD+nXhcWwCujmg=</HostId></Error>
at com.databricks.s3.Jets3tNativeFileSystemStore.handleServiceException(Jets3tNativeFileSystemStore.java:280)
at com.databricks.s3.Jets3tNativeFileSystemStore.storeFile(Jets3tNativeFileSystemStore.java:78)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
@ssemichev
ssemichev / Failed Maps
Created March 3, 2015 20:32
Elasticsearch-hadoop export in JSON
attempt_1425044331998_0038_m_000003_1000 FAILED /default-rack/ip-10-0-xx-93.ec2.internal:8042 logs Sat, 28 Feb 2015 04:00:59 GMT Sat, 28 Feb 2015 07:06:11 GMT 3hrs, 5mins, 12sec Error: java.lang.IllegalArgumentException: Invalid position given=374535 -1 at org.elasticsearch.hadoop.serialization.ScrollReader.read(ScrollReader.java:234) at org.elasticsearch.hadoop.serialization.ScrollReader.read(ScrollReader.java:165) at org.elasticsearch.hadoop.rest.RestRepository.scroll(RestRepository.java:339) at org.elasticsearch.hadoop.rest.ScrollQuery.hasNext(ScrollQuery.java:76) at org.elasticsearch.hadoop.mr.EsInputFormat$ShardRecordReader.next(EsInputFormat.java:293) at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.moveToNext(MapTask.java:199) at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.next(MapTask.java:185) at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:52) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) at org.