Last active
August 29, 2015 14:15
-
-
Save pchaigno/3905c8cecd5951f982c5 to your computer and use it in GitHub Desktop.
Python script to retrieve sample codes from Rosetta project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import yaml | |
import shutil | |
import re | |
import glob | |
import sys | |
from subprocess import call | |
# Extensions to replace: format: Language_name_in_Rosetta: [to_replace, new_extension]. | |
extension_replacements = {'OCaml': ['.ocaml', '.ml'], 'Oxygene': ['.oxy', '.oxygene'], 'Perl': ['.pl', '.perl'], 'Verilog': ['.v', '.veo'], 'Forth': ['.fth', '.forth'], 'Clean': ['.clean', '.icl'], 'CLIPS': ['.clips', '.clp'], 'Frege': ['.frege', '.fr'], 'J': ['.j', '.ijs'], 'Ioke': ['.ioke', '.ik'], 'Julia': ['.julia', '.jl'], 'BlitzMax': ['.blitz', '.bmx'], 'PureBasic': ['.purebasic', '.pb'], 'Component-Pascal': ['.component', '.cp'], 'Fancy': ['.fancy', '.fy'], 'Chapel': ['.chapel', '.chpl'], 'SuperCollider': ['.supercollider', '.scd'], 'Gosu': ['.gosu', '.gs'], 'E': ['.e', '.E'], 'NSIS': ['.nsis', '.nsi'], 'Scilab': ['.scilab', '.sci'], 'Mathematica': ['.math', '.m'], 'Befunge': ['.bf', '.befunge'], 'Rust': ['.rust', '.rs'], 'Kotlin': ['.kotlin', '.kt'], 'Haxe': ['.haxe', '.hx'], 'NetLogo': ['.netlogo', '.nlogo'], 'Nemerle': ['.nemerle', '.n'], 'Elixir': ['.elixir', '.ex'], 'ANTLR': ['.antlr', '.g4'], 'Turing': ['.turing', '.tu'], 'SystemVerilog': ['.v', '.sv'], 'LabVIEW': ['.labview', '.lvproj'], 'Mercury': ['.mercury', '.moo'], 'LLVM': ['.llvm', '.ll'], 'Fantom': ['.fantom', '.fan'], 'REALbasic': ['.realbasic', '.rbbas'], 'AutoIt': ['.autoit', '.au3']} | |
# Associations of names between Rosetta Code and Linguist. | |
associations = {'Component-Pascal': 'Component Pascal', 'Brainf---': 'Brainfuck', 'GML': 'Game Maker Language'} | |
languages_yml = open('linguist/lib/linguist/languages.yml', 'r') | |
linguist_languages = list(yaml.load(languages_yml).keys()) | |
for example_dir in os.listdir('RosettaCodeData/Task/'): | |
print example_dir | |
for rosetta_language in os.listdir(os.path.join('RosettaCodeData/Task/', example_dir)): | |
if not rosetta_language in associations: | |
# Tries to match the language name in Rosetta with a language in Linguist: | |
for linguist_language in linguist_languages: | |
if linguist_language.lower() == rosetta_language.lower() or linguist_language.lower() == rosetta_language.replace('-', ' ').lower(): | |
if not linguist_language in associations.values(): | |
associations[linguist_language] = rosetta_language | |
if rosetta_language in associations: | |
src = 'RosettaCodeData/Task/' + example_dir + '/' + rosetta_language + '/' | |
dest = 'linguist/samples/' + associations[rosetta_language] + '/' | |
if not os.path.isdir(dest): | |
# Creates the samples/[Language] if it doesn't already exists. | |
os.mkdir(dest) | |
for filepath in glob.glob(src + '*.*'): | |
if os.path.isfile(filepath): | |
filename = os.path.basename(filepath) | |
# Replaces extensions for certain languages: | |
if rosetta_language in extension_replacements: | |
filename = filename.replace(extension_replacements[rosetta_language][0], extension_replacements[rosetta_language][1]) | |
# Checks that we won't overwrite a file. | |
if os.path.isfile(dest + filename): | |
print("File %s already exists." % (filename)) | |
elif not re.search(r"-\d+\.\w+$", filename): | |
shutil.copy(filepath, dest + filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment