Ahmet DAL javrasya

## S3InputStream.java
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import org.apache.iceberg.aws.s3.S3FileIOProperties;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.FileIOMetricsContext;
import org.apache.iceberg.io.IOUtil;
import org.apache.iceberg.io.RangeReadable;
import org.apache.iceberg.io.SeekableInputStream;
import org.apache.iceberg.metrics.Counter;

## CustomS3FileIO.java
import org.apache.iceberg.aws.AwsClientFactories;
import org.apache.iceberg.aws.s3.S3FileIO;
import org.apache.iceberg.aws.s3.S3FileIOProperties;
import org.apache.iceberg.util.SerializableSupplier;
import software.amazon.awssdk.services.s3.S3Client;

import java.util.concurrent.atomic.AtomicBoolean;

public class CustomS3FileIO extends S3FileIO {

## LimitingTaskWithSplitsAssigner.java
import com.google.common.collect.Lists;
import org.apache.iceberg.BaseCombinedScanTask;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.flink.source.assigner.DefaultSplitAssigner;
import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
import org.apache.iceberg.flink.source.split.IcebergSourceSplitState;

import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

## wikipedia_page_view_to_s3.py
import asyncio
import zlib
from typing import List, Tuple

from aiobotocore.session import AioSession
from aiohttp_retry import ExponentialRetry, RetryClient
from tqdm import tqdm

# ##### PARAMETERIZED PART #######
YEAR = 2015

## atarli-hesap-makinesi.py
print('''HESAP MAKİNEMİZE HOŞ GELDİNİZ...

TOPLAMA İŞLEMİ İÇİN :   +
ÇIKARMA İŞLEMİ İÇİN :   -
ÇARPMA İŞLEMİ İÇİN  :   *
BÖLME               :   /

TUŞLARINI KULLANINIZ....''')

çalıştır = 1

## StudentCountReportTest.java
package dal.ahmet.hive.unittest;

import com.klarna.hiverunner.HiveShell;
import com.klarna.hiverunner.StandaloneHiveRunner;
import com.klarna.hiverunner.annotations.HiveSQL;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;

## student_count_report.sql
-- execute_student_count_report.hql

use mydatabase;

INSERT INTO TABLE student_count_report
SELECT
  school.school_name,
  count(student.student_id) as cnt
FROM school
LEFT JOIN student on student.school_id = school.school_id

## hiverunner-dep.xml
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>com.klarna</groupId>
            <artifactId>hiverunner</artifactId>
            <version>3.0.0</version>

## pip-install-from-a-git-repo.md

      
              1 file
            
          
              2 forks
            
          
              8 comments
            
          
              33 stars
            
          
                javrasya
                / pip-install-from-a-git-repo.md
            
            
              Created
              December 27, 2019 15:54
            
          
    Pip is a package manager of python. You can download Python libraries from some Python repositories like PyPI. You can also download libraries from a git repository. This is gonna be the issue to be explained in this article.
I don't like to memorize things all the time. So, I guess, I couldn't be working without internet :). Whenever I need to install some python libraries from a git repositories, I see a lot of way to do it. It is really confusing. This should be the reason why I can't memorize it. I can see how a very simple requirement is handled with to many confusing way. There shouldn't be to many way. Some of them is not working neither. At last, I decided to blog it.
As you may know, you can use two protocols which are http and ssh to do something on git repositories. Using protocol ssh instead of http may provide some ease of use. Because of nature of ssh, you can do something with your primary/public keys. So, you don't have to input your credentials all the time. But I'll be

  
## count_letters.sql
select count_letters('name');
	import com.google.common.base.Joiner;
	import com.google.common.base.Preconditions;
	import com.google.common.io.ByteStreams;
	import org.apache.iceberg.aws.s3.S3FileIOProperties;
	import org.apache.iceberg.exceptions.NotFoundException;
	import org.apache.iceberg.io.FileIOMetricsContext;
	import org.apache.iceberg.io.IOUtil;
	import org.apache.iceberg.io.RangeReadable;
	import org.apache.iceberg.io.SeekableInputStream;
	import org.apache.iceberg.metrics.Counter;
	import org.apache.iceberg.aws.AwsClientFactories;
	import org.apache.iceberg.aws.s3.S3FileIO;
	import org.apache.iceberg.aws.s3.S3FileIOProperties;
	import org.apache.iceberg.util.SerializableSupplier;
	import software.amazon.awssdk.services.s3.S3Client;

	import java.util.concurrent.atomic.AtomicBoolean;

	public class CustomS3FileIO extends S3FileIO {
	import com.google.common.collect.Lists;
	import org.apache.iceberg.BaseCombinedScanTask;
	import org.apache.iceberg.FileScanTask;
	import org.apache.iceberg.flink.source.assigner.DefaultSplitAssigner;
	import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
	import org.apache.iceberg.flink.source.split.IcebergSourceSplitState;

	import java.util.Collection;
	import java.util.List;
	import java.util.stream.Collectors;
	import asyncio
	import zlib
	from typing import List, Tuple

	from aiobotocore.session import AioSession
	from aiohttp_retry import ExponentialRetry, RetryClient
	from tqdm import tqdm

	# ##### PARAMETERIZED PART #######
	YEAR = 2015
	print('''HESAP MAKİNEMİZE HOŞ GELDİNİZ...

	TOPLAMA İŞLEMİ İÇİN : +
	ÇIKARMA İŞLEMİ İÇİN : -
	ÇARPMA İŞLEMİ İÇİN : *
	BÖLME : /

	TUŞLARINI KULLANINIZ....''')

	çalıştır = 1
	package dal.ahmet.hive.unittest;

	import com.klarna.hiverunner.HiveShell;
	import com.klarna.hiverunner.StandaloneHiveRunner;
	import com.klarna.hiverunner.annotations.HiveSQL;
	import org.junit.Assert;
	import org.junit.Before;
	import org.junit.Test;
	import org.junit.runner.RunWith;
	-- execute_student_count_report.hql

	use mydatabase;

	INSERT INTO TABLE student_count_report
	SELECT
	school.school_name,
	count(student.student_id) as cnt
	FROM school
	LEFT JOIN student on student.school_id = school.school_id
	<dependency>
	<groupId>junit</groupId>
	<artifactId>junit</artifactId>
	<scope>test</scope>
	</dependency>

	<dependency>
	<groupId>com.klarna</groupId>
	<artifactId>hiverunner</artifactId>
	<version>3.0.0</version>