Skip to content

Instantly share code, notes, and snippets.

@upepo
Created January 13, 2015 05:10
Show Gist options
  • Save upepo/abd3f94df3f744ca0b3f to your computer and use it in GitHub Desktop.
Save upepo/abd3f94df3f744ca0b3f to your computer and use it in GitHub Desktop.
random sampling
#!/usr/bin/env python
import sys
import random
import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('--ratio','-r', type=float, nargs='?'
,required=True
,help='ratio for random sampling')
parser.add_argument('--start_line', type=int, nargs='?',
default = 0,
help='start line')
args = parser.parse_args()
sys.stderr.write(str(args)+"\n")
def do():
ratio = args.ratio
start_line = args.start_line
#sys.stderr.write("ratio: " + str(ratio) + "\n")
#sys.stderr.write("start_line: " + str(start_line) + "\n")
for i, li in enumerate(sys.stdin.readlines()):
if i < start_line:
sys.stdout.write(li)
else:
if ratio >= random.random():
sys.stdout.write(li)
if __name__ == "__main__":
do()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment