Skip to content

Instantly share code, notes, and snippets.

@airglow923
Created October 25, 2020 08:31
Show Gist options
  • Save airglow923/1b9da401faa442758b77fdc638d7704e to your computer and use it in GitHub Desktop.
Save airglow923/1b9da401faa442758b77fdc638d7704e to your computer and use it in GitHub Desktop.
Read random lines from a file
#!/usr/bin/python3
import sys
import getopt
import codecs
import secrets
import hashlib
from typing import List
def usage():
print('Usage: {} -i [INPUT] -o [OUTPUT] -e [ENCODING] -n [NUMBER]'
.format(sys.argv[0]))
def read_lines_from_file(filename: str,
lines: List[int],
encoding: str ='utf-8') -> List[str]:
lines = sorted(lines)
contents = []
with codecs.open(filename, mode='r', encoding=encoding) as f:
for i, line in enumerate(f):
if len(lines) == 0:
return contents;
if lines[0] == i:
contents.append(line.strip())
lines.pop(0)
def gen_rand_nums(n: int, upper: int):
line_nos = []
for i in range(n):
line_nos.append(secrets.randbelow(upper))
return line_nos
def get_no_of_lines_in_file(filename: str, encoding: str):
with open(filename, mode='r', encoding=encoding) as f:
for i, _ in enumerate(f):
pass
return i + 1
def get_hash(s: str, algo: str):
h = hashlib.new(algo)
h.update(s.encode())
return h.hexdigest()
def main(argv):
input_file = ''
output_file = ''
encoding = ''
num_of_lines = 0
try:
opts, args = getopt.getopt(argv, 'hi:o:e:n:')
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit(0)
elif opt == '-i':
input_file = arg
elif opt == '-o':
output_file = arg
elif opt == '-e':
encoding = arg
elif opt == '-n':
num_of_lines = int(arg)
if (input_file == '' or
output_file == '' or
encoding == '' or
num_of_lines == 0):
usage()
sys.exit(3)
line_nos = gen_rand_nums(num_of_lines,
get_no_of_lines_in_file(input_file, encoding))
contents = read_lines_from_file(input_file, line_nos, encoding=encoding)
with open(output_file, mode='w') as f:
for content in contents:
f.write(content + '\n')
if __name__ == '__main__':
main(sys.argv[1:])
else:
print('This script must be executed directly.')
usage()
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment