pip install hdfs
from hdfs import InsecureClient
hdfs_client = InsecureClient('http://192.168.0.251:50070',user='hive')
#列出文件
hdfs_client.list('.')
# 新建文件
hdfs_client.makedirs('/a/b/c')
# 通过设置append参数,向一个已经存在的文件追加写入数据
with client.write('test_liulin.txt', append=True) as writer:
writer.write('You succeed!')
# 通过设置定界符来读取数据,必须与encoding参数同时使用
with client.read('test.txt', encoding='utf-8', delimiter='\n') as reader:
for line in reader:
print line
参考链接:https://my.oschina.net/wolfoxliu/blog/862015
https://hdfscli.readthedocs.io/en/latest/quickstart.html
from pyhive import hive
cursor = hive.connect('192.168.0.251').cursor()
cursor.execute('show databases')
print (cursor.fetchall())
参考链接
https://github.com/dropbox/PyHive
>>> from pyspark import SparkFiles
>>> path = os.path.join(tempdir, "test.txt")
>>> with open(path, "w") as testFile:
... _ = testFile.write("100")
>>> sc.addFile(path)
>>> def func(iterator):
... with open(SparkFiles.get("test.txt")) as testFile:
... fileVal = int(testFile.readline())
... return [x * fileVal for x in iterator]
>>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
[100, 200, 300, 400
参考链接:
http://spark.apache.org/docs/2.1.0/api/python/pyspark.html
#先启动thrift服务
nohup hbase thrift start &
import happybase
connection = happybase.Connection('192.168.0.251')
table = connection.table('table-name')
table.put(b'row-key', {b'family:qual1': b'value1',
b'family:qual2': b'value2'})
row = table.row(b'row-key')
print(row[b'family:qual1']) # prints 'value1'
for key, data in table.rows([b'row-key-1', b'row-key-2']):
print(key, data) # prints row key and data for each row
for key, data in table.scan(row_prefix=b'row'):
print(key, data) # prints 'value1' and 'value2'
row = table.delete(b'row-key')
参考链接:
https://happybase.readthedocs.io/en/latest/index.html
from impala.dbapi import connect
conn = connect(host='my.host.com', port=21050)
cursor = conn.cursor()
cursor.execute('SELECT * FROM mytable LIMIT 100')
print cursor.description # prints the result set's schema
results = cursor.fetchall()
参考链接
https://github.com/cloudera/impyla