Skip to content

Instantly share code, notes, and snippets.

@pchaigno
Last active August 29, 2015 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pchaigno/3905c8cecd5951f982c5 to your computer and use it in GitHub Desktop.
Save pchaigno/3905c8cecd5951f982c5 to your computer and use it in GitHub Desktop.
Python script to retrieve sample codes from Rosetta project
#!/usr/bin/env python
import os
import yaml
import shutil
import re
import glob
import sys
from subprocess import call
# Extensions to replace: format: Language_name_in_Rosetta: [to_replace, new_extension].
extension_replacements = {'OCaml': ['.ocaml', '.ml'], 'Oxygene': ['.oxy', '.oxygene'], 'Perl': ['.pl', '.perl'], 'Verilog': ['.v', '.veo'], 'Forth': ['.fth', '.forth'], 'Clean': ['.clean', '.icl'], 'CLIPS': ['.clips', '.clp'], 'Frege': ['.frege', '.fr'], 'J': ['.j', '.ijs'], 'Ioke': ['.ioke', '.ik'], 'Julia': ['.julia', '.jl'], 'BlitzMax': ['.blitz', '.bmx'], 'PureBasic': ['.purebasic', '.pb'], 'Component-Pascal': ['.component', '.cp'], 'Fancy': ['.fancy', '.fy'], 'Chapel': ['.chapel', '.chpl'], 'SuperCollider': ['.supercollider', '.scd'], 'Gosu': ['.gosu', '.gs'], 'E': ['.e', '.E'], 'NSIS': ['.nsis', '.nsi'], 'Scilab': ['.scilab', '.sci'], 'Mathematica': ['.math', '.m'], 'Befunge': ['.bf', '.befunge'], 'Rust': ['.rust', '.rs'], 'Kotlin': ['.kotlin', '.kt'], 'Haxe': ['.haxe', '.hx'], 'NetLogo': ['.netlogo', '.nlogo'], 'Nemerle': ['.nemerle', '.n'], 'Elixir': ['.elixir', '.ex'], 'ANTLR': ['.antlr', '.g4'], 'Turing': ['.turing', '.tu'], 'SystemVerilog': ['.v', '.sv'], 'LabVIEW': ['.labview', '.lvproj'], 'Mercury': ['.mercury', '.moo'], 'LLVM': ['.llvm', '.ll'], 'Fantom': ['.fantom', '.fan'], 'REALbasic': ['.realbasic', '.rbbas'], 'AutoIt': ['.autoit', '.au3']}
# Associations of names between Rosetta Code and Linguist.
associations = {'Component-Pascal': 'Component Pascal', 'Brainf---': 'Brainfuck', 'GML': 'Game Maker Language'}
languages_yml = open('linguist/lib/linguist/languages.yml', 'r')
linguist_languages = list(yaml.load(languages_yml).keys())
for example_dir in os.listdir('RosettaCodeData/Task/'):
print example_dir
for rosetta_language in os.listdir(os.path.join('RosettaCodeData/Task/', example_dir)):
if not rosetta_language in associations:
# Tries to match the language name in Rosetta with a language in Linguist:
for linguist_language in linguist_languages:
if linguist_language.lower() == rosetta_language.lower() or linguist_language.lower() == rosetta_language.replace('-', ' ').lower():
if not linguist_language in associations.values():
associations[linguist_language] = rosetta_language
if rosetta_language in associations:
src = 'RosettaCodeData/Task/' + example_dir + '/' + rosetta_language + '/'
dest = 'linguist/samples/' + associations[rosetta_language] + '/'
if not os.path.isdir(dest):
# Creates the samples/[Language] if it doesn't already exists.
os.mkdir(dest)
for filepath in glob.glob(src + '*.*'):
if os.path.isfile(filepath):
filename = os.path.basename(filepath)
# Replaces extensions for certain languages:
if rosetta_language in extension_replacements:
filename = filename.replace(extension_replacements[rosetta_language][0], extension_replacements[rosetta_language][1])
# Checks that we won't overwrite a file.
if os.path.isfile(dest + filename):
print("File %s already exists." % (filename))
elif not re.search(r"-\d+\.\w+$", filename):
shutil.copy(filepath, dest + filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment