Skip to content

Instantly share code, notes, and snippets.

@eregon
Created February 10, 2012 10:12
Show Gist options
  • Save eregon/2eaac57a912ba784c82b to your computer and use it in GitHub Desktop.
Save eregon/2eaac57a912ba784c82b to your computer and use it in GitHub Desktop.
iCampus documents synchronization script
#!/usr/bin/env ruby
# encoding: utf-8
# Dependencies
# apt-get:
# ruby-1.9.1-full
# libxml2-dev
# libxslt1-dev
# gem(-1.9.1) install path mechanize term-ansicolor activesupport
# Setup:
# * Default path is ~/notes, you can change it below, or use the -p option.
# * In that folder, you need to create subfolders for all the courses you wish to track,
# which must contain only the course name or an arbitrary name followed by _ and the course name (e.g.: OS_INGI1113).
# * user, pass and pass_foditic (optional) are three files (relative to this script) to store your login info.
# You could as well put them directly in the respective constants
# * If you want to search Foditic as well, just create the pass_foditic file, or assign the constant.
# * You could also create a git repository in your path to easily track modifications.
# Run with `ruby(-1.9.1) update_icampus.rb`
# If you find a bug, please send me the output with the -d option.
gem 'mechanize' #, '1.0.0' # 2.0.1 has cookies problems to authenticate :(
require 'mechanize'
require 'time'
require 'path'
require 'optparse'
require 'term/ansicolor' # term-ansicolor
require 'active_support/inflector' # activesupport
I18n.enforce_available_locales = true # avoid warning
OPTIONS = {
:debug => false,
:path => File.expand_path('~/notes')
}
OptionParser.new do |opts|
opts.banner = "Usage: [ruby] #{$0}"
opts.on('-d', '--debug', "Debug mode") { OPTIONS[:debug] = true }
opts.on('-p', '--path=PATH', "Specify the path to save the documents") do |path|
OPTIONS[:path] = File.expand_path(path)
end
end.parse!
LOGIN = Path.relative('user').read.chomp
PASS = Path.relative('pass').read.chomp
PASS_FODITIC = Path.relative('pass_foditic').read.chomp rescue nil
NOTES = Path(OPTIONS[:path])
$DEBUG = true if OPTIONS[:debug]
p NOTES if $DEBUG
raise "Missing path, give -p" unless NOTES.dir?
LINKS_TO_IGNORE = %w[Nom Name Taille Size Date Date Remonter Up]
LINKS_TO_IGNORE << "Date de dernière modification"
LINKS_TO_IGNORE << "Last modification date"
DOWNLOAD_URL = '/claroline/backends/download.php?url='
CoursesDirs = NOTES.children.select(&:dir?)
Courses = CoursesDirs.map { |f|
f.basename.to_s[/(?:\A|_)([A-Z0-9]+)\Z/,1]
}.compact
puts "Courses: #{Courses.join(', ')}"
p Courses if $DEBUG
if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
String.class_exec do
(Term::ANSIColor.public_methods - Object.public_methods).each do |meth|
define_method(meth) do |*args|
self
end
end
end
else
class String
include Term::ANSIColor
end
end
if $DEBUG
def try
yield
end
else
def try
begin
yield
rescue
puts $!.to_s.red
end
end
end
def mtime_from_row_link(link)
Time.parse link.node.parent.parent.children.find { |child|
child.text =~ %r{\d{2}[./]\d{2}[./]\d{4}|\d{4}[./]\d{2}[./]\d{2}}
}.text
end
def filter_filename filename
filename = ActiveSupport::Inflector.transliterate filename
begin
filename.encode 'US-ASCII' # will raise if anything not in ASCII
rescue
puts "Warning: not clean ASCII filename: #{filename}".red
end
filename
end
def link_address(link)
link.href.gsub('%2B', '+')
end
def analyse dir_link, local_dir, remote_dir = '', visited = []
p [:analyse, dir_link, local_dir, remote_dir, visited] if $DEBUG
visited << link_address(dir_link)
page = dir_link.click
puts(remote_dir.empty? ? '.' : remote_dir)
try { local_dir.mkdir } unless local_dir.exist?
# /claroline/document/document.php?cmd=exChDir&amp;file=L0Vub25jZXM%3D&amp;cidReset=true&amp;cidReq=SINF2125
folders = page.links
.select { |link| link.href.start_with? '/claroline/document/document.php?cmd=exChDir' }
.each { |link|
link.text.tr!(' ','') # they have \a0 (insecable space)
link.text.strip!
}.reject { |link| LINKS_TO_IGNORE.include? link.text }
puts "Folders: #{folders.join(', ')}" if $DEBUG
files = [] # [name, mtime, link]
# /claroline/backends/download.php?url=L2ludHJvUHJvamV0czAxLnBkZg%3D%3D&amp;cidReset=true&amp;cidReq=SINF2125
# goto/?url=%2FLangage_C%2Fcours_c_exercices.pdf
page.links.select { |link|
link.href.start_with? DOWNLOAD_URL or # new
link.href.start_with? 'goto/?url=' # foditic
}.each { |link|
link.text.strip!
mtime = mtime_from_row_link(link)
files << [link.text.strip, mtime, link]
}
page.links.select { |link|
link.href.start_with? '/claroline/document/document.php?docView=image&file='
}.each { |link|
page = link.click
if dl_link = page.links.find { |l| l.href.start_with? DOWNLOAD_URL }
mtime = mtime_from_row_link(link)
name = link.text.strip
files << [name, mtime, dl_link]
else
puts "Could not find download link for image #{name}".red
end
}
puts "Files: #{files.join(', ')}" if $DEBUG
files.each { |name, mtime, link|
filename = filter_filename File.basename(name)
file = local_dir / filename
if file.exist?
if mtime > file.ctime or mtime > file.mtime
puts "Remote #{filename} is newer, downloading ...".green
p [file, file.ctime, file.mtime] if $DEBUG # TODO: link.click.save does not overwrite!!! see file.rb and parser.rb, watch @filename
try do
file.unlink
link.click.save file.to_s
end
end
else
puts "Downloading #{filename} ...".green
try { link.click.save file.to_s }
end
}
folders.reject { |link| visited.include? link_address(link) }.each { |link|
remote_folder = File.basename(link.text)
local_folder = filter_filename remote_folder
analyse link, (local_dir / local_folder), "#{remote_dir}#{remote_folder}/", visited
}
end
SUBSCRIBED_COURSES = []
def search_site(site, user, pass)
agent = Mechanize.new { |a|
a.user_agent_alias = 'Mac Safari'
}
puts "\nSearching #{site}".cyan.bold
agent.get("#{site}/index.php") do |page|
home_page = page.form_with(action: /login\.php$/) { |form|
form.login, form.password = user, pass
}.submit
if home_page.form_with(action: /login\.php$/)
puts "Login failed!".red.bold
p home_page.uri
p home_page
abort
end
courses_links = Courses.each_with_object({}) { |course, h|
link = home_page.link_with(text: /#{Regexp.escape course}/)
if link
h[course] = link
SUBSCRIBED_COURSES << course
end
}
courses_links.each_pair { |course, link|
page = link.click
documents = page.link_with(text: /Document/) # match "Documents et liens" and "Document"
puts "\n#{course.blue.bold}"
if documents
dir = Path(CoursesDirs.find { |cd| cd.to_s.include? course })
analyse documents, dir
else
puts 'No Documents'.red.bold
end
}
end
end
search_site('http://icampus.uclouvain.be', LOGIN, PASS)
search_site('http://foditic.org', LOGIN, PASS_FODITIC) if PASS_FODITIC
(Courses - SUBSCRIBED_COURSES).each do |course|
puts "Warning: You are not subscribed to #{course}".red
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment