Created
February 20, 2013 04:51
-
-
Save miketheman/4992985 to your computer and use it in GitHub Desktop.
extract mailman member details to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'csv' | |
require 'mechanize' | |
# Tested with Ruby 1.9.3, mechanize 2.5.1 against Mailman version 2.1.14-1 | |
# NOTE: Many lists have a long memebrship list, and this is not currently | |
# written to accomodate that use case. | |
# A Mailman setting, `admin_member_chunksize` should allow you to set a longer | |
# list to be displayed - set it to a value higher to the total list members. | |
# Configure your list details here | |
URL = "http://example.com.com/mailman/admin/<list_name>/members" | |
PASSWORD = 'SomeSuperSecretPassword' | |
# Makes a request to Mailman Admin members page, retrieving the page | |
# contents to a variable. | |
# | |
# @param [String] URL to the membership page | |
# @return [Mechanize::Page] the full contents of the page | |
def get_authenticated_page(url, password) | |
agent = Mechanize.new | |
page = agent.get(url) | |
mailman_auth = page.form() # It's unnnamed | |
mailman_auth['adminpw'] = password | |
page = agent.submit(mailman_auth) | |
end | |
# Takes the output of the page and parses it for the Name, Email fields | |
# | |
# @param [Mechanize::Page] output of `get_authenticated_page` | |
# @return [nil] exits after successfully writing a CSV file | |
def extract_details(page) | |
rows = page.parser.xpath('//center[1]/table/tr/td[2]') | |
list = [] | |
details = rows.collect.with_index do |row, index| | |
next if index == 0 | |
csv_row = [row.at_xpath('input[1]')['value'], row.content] | |
list << csv_row | |
end | |
# Write the list to a Comma Separated Value file | |
CSV.open('output.csv', 'wb') do |csv| | |
list.sort.each do |row| | |
csv << row | |
end | |
end | |
puts "Extractor run against #{URL}, and has written an output file." | |
puts "There were #{list.count} records written." | |
end | |
def main | |
auth = get_authenticated_page(URL, PASSWORD) | |
extract_details(auth) | |
end | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment