Skip to content

Instantly share code, notes, and snippets.

@msharp
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save msharp/72586a8d94be108b8b7c to your computer and use it in GitHub Desktop.
Save msharp/72586a8d94be108b8b7c to your computer and use it in GitHub Desktop.
File Sampler - sample every n rows from a file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
class FileSampler:
def __init__(self):
self.parse_args(sys.argv)
@staticmethod
def run():
sampler = FileSampler()
sampler.split()
def split(self):
file_number = 1
line_number = 1
for line in self.in_file:
if line_number % self.sample_rate == 0:
print(line.strip())
line_number += 1
def parse_args(self,argv):
"""parse args and set up instance variables"""
try:
self.sample_rate = 10
if len(argv) > 2:
self.sample_rate = int(argv[2])
self.file_name = argv[1]
self.in_file = open(self.file_name, "r")
except:
print self.usage()
sys.exit(1)
def usage(self):
return """
Sample every n rows from a file.
Usage:
$ python file_sampler.py <file_name> [sample_rate]
sample_rate is optional (default is 10)
"""
if __name__ == "__main__":
FileSampler.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment