-
-
Save eregon/2eaac57a912ba784c82b to your computer and use it in GitHub Desktop.
iCampus documents synchronization script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # encoding: utf-8 | |
| # Dependencies | |
| # apt-get: | |
| # ruby-1.9.1-full | |
| # libxml2-dev | |
| # libxslt1-dev | |
| # gem(-1.9.1) install path mechanize term-ansicolor activesupport | |
| # Setup: | |
| # * Default path is ~/notes, you can change it below, or use the -p option. | |
| # * In that folder, you need to create subfolders for all the courses you wish to track, | |
| # which must contain only the course name or an arbitrary name followed by _ and the course name (e.g.: OS_INGI1113). | |
| # * user, pass and pass_foditic (optional) are three files (relative to this script) to store your login info. | |
| # You could as well put them directly in the respective constants | |
| # * If you want to search Foditic as well, just create the pass_foditic file, or assign the constant. | |
| # * You could also create a git repository in your path to easily track modifications. | |
| # Run with `ruby(-1.9.1) update_icampus.rb` | |
| # If you find a bug, please send me the output with the -d option. | |
| gem 'mechanize' #, '1.0.0' # 2.0.1 has cookies problems to authenticate :( | |
| require 'mechanize' | |
| require 'time' | |
| require 'path' | |
| require 'optparse' | |
| require 'term/ansicolor' # term-ansicolor | |
| require 'active_support/inflector' # activesupport | |
| I18n.enforce_available_locales = true # avoid warning | |
| OPTIONS = { | |
| :debug => false, | |
| :path => File.expand_path('~/notes') | |
| } | |
| OptionParser.new do |opts| | |
| opts.banner = "Usage: [ruby] #{$0}" | |
| opts.on('-d', '--debug', "Debug mode") { OPTIONS[:debug] = true } | |
| opts.on('-p', '--path=PATH', "Specify the path to save the documents") do |path| | |
| OPTIONS[:path] = File.expand_path(path) | |
| end | |
| end.parse! | |
| LOGIN = Path.relative('user').read.chomp | |
| PASS = Path.relative('pass').read.chomp | |
| PASS_FODITIC = Path.relative('pass_foditic').read.chomp rescue nil | |
| NOTES = Path(OPTIONS[:path]) | |
| $DEBUG = true if OPTIONS[:debug] | |
| p NOTES if $DEBUG | |
| raise "Missing path, give -p" unless NOTES.dir? | |
| LINKS_TO_IGNORE = %w[Nom Name Taille Size Date Date Remonter Up] | |
| LINKS_TO_IGNORE << "Date de dernière modification" | |
| LINKS_TO_IGNORE << "Last modification date" | |
| DOWNLOAD_URL = '/claroline/backends/download.php?url=' | |
| CoursesDirs = NOTES.children.select(&:dir?) | |
| Courses = CoursesDirs.map { |f| | |
| f.basename.to_s[/(?:\A|_)([A-Z0-9]+)\Z/,1] | |
| }.compact | |
| puts "Courses: #{Courses.join(', ')}" | |
| p Courses if $DEBUG | |
| if RUBY_PLATFORM =~ /mswin|mingw|cygwin/ | |
| String.class_exec do | |
| (Term::ANSIColor.public_methods - Object.public_methods).each do |meth| | |
| define_method(meth) do |*args| | |
| self | |
| end | |
| end | |
| end | |
| else | |
| class String | |
| include Term::ANSIColor | |
| end | |
| end | |
| if $DEBUG | |
| def try | |
| yield | |
| end | |
| else | |
| def try | |
| begin | |
| yield | |
| rescue | |
| puts $!.to_s.red | |
| end | |
| end | |
| end | |
| def mtime_from_row_link(link) | |
| Time.parse link.node.parent.parent.children.find { |child| | |
| child.text =~ %r{\d{2}[./]\d{2}[./]\d{4}|\d{4}[./]\d{2}[./]\d{2}} | |
| }.text | |
| end | |
| def filter_filename filename | |
| filename = ActiveSupport::Inflector.transliterate filename | |
| begin | |
| filename.encode 'US-ASCII' # will raise if anything not in ASCII | |
| rescue | |
| puts "Warning: not clean ASCII filename: #{filename}".red | |
| end | |
| filename | |
| end | |
| def link_address(link) | |
| link.href.gsub('%2B', '+') | |
| end | |
| def analyse dir_link, local_dir, remote_dir = '', visited = [] | |
| p [:analyse, dir_link, local_dir, remote_dir, visited] if $DEBUG | |
| visited << link_address(dir_link) | |
| page = dir_link.click | |
| puts(remote_dir.empty? ? '.' : remote_dir) | |
| try { local_dir.mkdir } unless local_dir.exist? | |
| # /claroline/document/document.php?cmd=exChDir&file=L0Vub25jZXM%3D&cidReset=true&cidReq=SINF2125 | |
| folders = page.links | |
| .select { |link| link.href.start_with? '/claroline/document/document.php?cmd=exChDir' } | |
| .each { |link| | |
| link.text.tr!(' ','') # they have \a0 (insecable space) | |
| link.text.strip! | |
| }.reject { |link| LINKS_TO_IGNORE.include? link.text } | |
| puts "Folders: #{folders.join(', ')}" if $DEBUG | |
| files = [] # [name, mtime, link] | |
| # /claroline/backends/download.php?url=L2ludHJvUHJvamV0czAxLnBkZg%3D%3D&cidReset=true&cidReq=SINF2125 | |
| # goto/?url=%2FLangage_C%2Fcours_c_exercices.pdf | |
| page.links.select { |link| | |
| link.href.start_with? DOWNLOAD_URL or # new | |
| link.href.start_with? 'goto/?url=' # foditic | |
| }.each { |link| | |
| link.text.strip! | |
| mtime = mtime_from_row_link(link) | |
| files << [link.text.strip, mtime, link] | |
| } | |
| page.links.select { |link| | |
| link.href.start_with? '/claroline/document/document.php?docView=image&file=' | |
| }.each { |link| | |
| page = link.click | |
| if dl_link = page.links.find { |l| l.href.start_with? DOWNLOAD_URL } | |
| mtime = mtime_from_row_link(link) | |
| name = link.text.strip | |
| files << [name, mtime, dl_link] | |
| else | |
| puts "Could not find download link for image #{name}".red | |
| end | |
| } | |
| puts "Files: #{files.join(', ')}" if $DEBUG | |
| files.each { |name, mtime, link| | |
| filename = filter_filename File.basename(name) | |
| file = local_dir / filename | |
| if file.exist? | |
| if mtime > file.ctime or mtime > file.mtime | |
| puts "Remote #{filename} is newer, downloading ...".green | |
| p [file, file.ctime, file.mtime] if $DEBUG # TODO: link.click.save does not overwrite!!! see file.rb and parser.rb, watch @filename | |
| try do | |
| file.unlink | |
| link.click.save file.to_s | |
| end | |
| end | |
| else | |
| puts "Downloading #{filename} ...".green | |
| try { link.click.save file.to_s } | |
| end | |
| } | |
| folders.reject { |link| visited.include? link_address(link) }.each { |link| | |
| remote_folder = File.basename(link.text) | |
| local_folder = filter_filename remote_folder | |
| analyse link, (local_dir / local_folder), "#{remote_dir}#{remote_folder}/", visited | |
| } | |
| end | |
| SUBSCRIBED_COURSES = [] | |
| def search_site(site, user, pass) | |
| agent = Mechanize.new { |a| | |
| a.user_agent_alias = 'Mac Safari' | |
| } | |
| puts "\nSearching #{site}".cyan.bold | |
| agent.get("#{site}/index.php") do |page| | |
| home_page = page.form_with(action: /login\.php$/) { |form| | |
| form.login, form.password = user, pass | |
| }.submit | |
| if home_page.form_with(action: /login\.php$/) | |
| puts "Login failed!".red.bold | |
| p home_page.uri | |
| p home_page | |
| abort | |
| end | |
| courses_links = Courses.each_with_object({}) { |course, h| | |
| link = home_page.link_with(text: /#{Regexp.escape course}/) | |
| if link | |
| h[course] = link | |
| SUBSCRIBED_COURSES << course | |
| end | |
| } | |
| courses_links.each_pair { |course, link| | |
| page = link.click | |
| documents = page.link_with(text: /Document/) # match "Documents et liens" and "Document" | |
| puts "\n#{course.blue.bold}" | |
| if documents | |
| dir = Path(CoursesDirs.find { |cd| cd.to_s.include? course }) | |
| analyse documents, dir | |
| else | |
| puts 'No Documents'.red.bold | |
| end | |
| } | |
| end | |
| end | |
| search_site('http://icampus.uclouvain.be', LOGIN, PASS) | |
| search_site('http://foditic.org', LOGIN, PASS_FODITIC) if PASS_FODITIC | |
| (Courses - SUBSCRIBED_COURSES).each do |course| | |
| puts "Warning: You are not subscribed to #{course}".red | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment