Skip to content

Instantly share code, notes, and snippets.

@jdkato
Created September 3, 2016 22:26
Show Gist options
  • Save jdkato/fcb9298b9555159ce2cdd63b9bd686a5 to your computer and use it in GitHub Desktop.
Save jdkato/fcb9298b9555159ce2cdd63b9bd686a5 to your computer and use it in GitHub Desktop.
import time
import subprocess
import os
supported = [
'haskell', 'python', 'ruby', 'java', 'c#', 'c++', 'javascript', 'scala',
'pascal', 'go', 'c', 'php', 'perl', 'matlab', 'clojure', 'visual basic'
]
folder = '' # path to benchmark data (https://github.com/nbraud/benchmarksgame/tree/master/bench)
name2ext = {
'csharp': 'C#', 'gcc': 'C', 'gpp': 'C++', 'ghc': 'Haskell',
'jruby': 'Ruby', 'python3': 'Python', 'hack': 'PHP', 'yarv': 'Ruby',
'C-sharp': 'C#'
}
result2Lang = {
'cplusplus': 'C++', 'objectivec': 'objective-c', 'csharp': 'C#'
}
count = 0.0
correct = 0
before = time.time()
for subdir, _, files in os.walk(folder):
for f in files:
in_file = os.path.join(subdir, f)
lang = in_file.split('.')[-1]
if lang in name2ext:
lang = name2ext[lang]
if not os.path.isfile(in_file) or lang.lower() not in supported:
continue
count += 1
out = subprocess.check_output(
['php', 'bin/lang-detect', 'classify', in_file]
).strip()
if out in result2Lang:
out = result2Lang[out]
if out.lower() == lang.lower():
correct += 1
else:
print(out, lang, in_file)
print("{} ({} / {})".format(round(correct / count, 3), correct, count))
print("Time: {}".format(time.time() - before))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment