Created
October 25, 2020 08:31
-
-
Save airglow923/1b9da401faa442758b77fdc638d7704e to your computer and use it in GitHub Desktop.
Read random lines from a file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import sys | |
import getopt | |
import codecs | |
import secrets | |
import hashlib | |
from typing import List | |
def usage(): | |
print('Usage: {} -i [INPUT] -o [OUTPUT] -e [ENCODING] -n [NUMBER]' | |
.format(sys.argv[0])) | |
def read_lines_from_file(filename: str, | |
lines: List[int], | |
encoding: str ='utf-8') -> List[str]: | |
lines = sorted(lines) | |
contents = [] | |
with codecs.open(filename, mode='r', encoding=encoding) as f: | |
for i, line in enumerate(f): | |
if len(lines) == 0: | |
return contents; | |
if lines[0] == i: | |
contents.append(line.strip()) | |
lines.pop(0) | |
def gen_rand_nums(n: int, upper: int): | |
line_nos = [] | |
for i in range(n): | |
line_nos.append(secrets.randbelow(upper)) | |
return line_nos | |
def get_no_of_lines_in_file(filename: str, encoding: str): | |
with open(filename, mode='r', encoding=encoding) as f: | |
for i, _ in enumerate(f): | |
pass | |
return i + 1 | |
def get_hash(s: str, algo: str): | |
h = hashlib.new(algo) | |
h.update(s.encode()) | |
return h.hexdigest() | |
def main(argv): | |
input_file = '' | |
output_file = '' | |
encoding = '' | |
num_of_lines = 0 | |
try: | |
opts, args = getopt.getopt(argv, 'hi:o:e:n:') | |
except getopt.GetoptError: | |
usage() | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt in ('-h', '--help'): | |
usage() | |
sys.exit(0) | |
elif opt == '-i': | |
input_file = arg | |
elif opt == '-o': | |
output_file = arg | |
elif opt == '-e': | |
encoding = arg | |
elif opt == '-n': | |
num_of_lines = int(arg) | |
if (input_file == '' or | |
output_file == '' or | |
encoding == '' or | |
num_of_lines == 0): | |
usage() | |
sys.exit(3) | |
line_nos = gen_rand_nums(num_of_lines, | |
get_no_of_lines_in_file(input_file, encoding)) | |
contents = read_lines_from_file(input_file, line_nos, encoding=encoding) | |
with open(output_file, mode='w') as f: | |
for content in contents: | |
f.write(content + '\n') | |
if __name__ == '__main__': | |
main(sys.argv[1:]) | |
else: | |
print('This script must be executed directly.') | |
usage() | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment