Last active
June 12, 2019 11:22
-
-
Save flodolo/5051d8063c00b4d5d11ae373dbe7a8d1 to your computer and use it in GitHub Desktop.
Check for markup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import codecs | |
import json | |
import logging | |
import os | |
import re | |
import six | |
import sys | |
logging.basicConfig() | |
try: | |
from compare_locales import parser | |
except ImportError as e: | |
print('FATAL: make sure that dependencies are installed') | |
print(e) | |
sys.exit(1) | |
class StringExtraction(): | |
def __init__(self): | |
'''Initialize object.''' | |
# Set defaults | |
self.supported_formats = [ | |
'.dtd', | |
] | |
def setLocale(self, locale): | |
'''Set current locale.''' | |
self.reference_locale = True if locale == 'en-US' else False | |
self.locale = locale | |
def setRepositoryPath(self, path): | |
'''Set path to repository.''' | |
# Strip trailing '/' from repository path | |
self.repository_path = path.rstrip(os.path.sep) | |
def extractFileList(self): | |
'''Extract the list of supported files.''' | |
self.file_list = [] | |
for root, dirs, files in os.walk( | |
self.repository_path, followlinks=True): | |
for file in files: | |
for supported_format in self.supported_formats: | |
if file.endswith(supported_format): | |
self.file_list.append(os.path.join(root, file)) | |
self.file_list.sort() | |
def getRelativePath(self, file_name): | |
''' | |
Get the relative path of a filename, prepend prefix_storage if | |
defined. | |
''' | |
return file_name[len(self.repository_path) + 1:] | |
def extractStrings(self): | |
'''Extract strings from all files.''' | |
# Create a list of files to analyze | |
self.extractFileList() | |
self.translations = {} | |
for file_name in self.file_list: | |
file_extension = os.path.splitext(file_name)[1] | |
file_parser = parser.getParser(file_extension) | |
file_parser.readFile(file_name) | |
try: | |
entities = file_parser.parse() | |
for entity in entities: | |
# Ignore Junk | |
if isinstance(entity, parser.Junk): | |
#print('JUNK in {}\nFile: {}\nJunk: {}'.format(self.locale, file_name, entity)) | |
continue | |
string_id = u'{0}:{1}'.format( | |
self.getRelativePath(file_name), six.text_type(entity)) | |
self.translations[string_id] = entity.raw_val | |
# Store reference strings | |
if self.reference_locale: | |
self.getReferenceStringsMinor() | |
except Exception as e: | |
print('Error parsing file: {0}'.format(file_name)) | |
print(e) | |
def getReferenceStringsMinor(self): | |
'''Get a list of string IDs with <''' | |
self.minor_strings = [] | |
self.reference_ids = self.translations.keys() | |
for string_id, translation in self.translations.items(): | |
if '<' in translation: | |
self.minor_strings.append(string_id) | |
def checkIssues(self): | |
'''Extract strings from all files.''' | |
issues = [] | |
for string_id, translation in self.translations.items(): | |
if '<' not in translation: | |
continue | |
if string_id not in self.reference_ids: | |
# Obsolete string | |
continue | |
if string_id not in self.minor_strings: | |
issues.append('{}: {}'.format(string_id, translation)) | |
if issues: | |
print('\n\nLocale: {}'.format(self.locale)) | |
print('\n'.join(issues)) | |
def main(): | |
repos_path = '/Users/flodolo/mozilla/mercurial/l10n_clones/locales' | |
locales = [x for x in os.listdir(repos_path) if not x.startswith('.')] | |
ignored_locales = [] | |
locales = list(set(locales) - set(ignored_locales)) | |
locales.sort() | |
# Initialize class | |
extracted_strings = StringExtraction() | |
# Extract strings for en-US, and keep them stored for comparison later | |
print('Extracting reference en-US strings') | |
gecko_string_path = '/Users/flodolo/mozilla/mercurial/gecko-strings-quarantine' | |
extracted_strings.setLocale('en-US') | |
extracted_strings.setRepositoryPath(gecko_string_path) | |
extracted_strings.extractStrings() | |
# Check other locales | |
print('Checking other locales ({})'.format(len(locales))) | |
for locale in locales: | |
locale_path = os.path.join(repos_path, locale) | |
extracted_strings.setRepositoryPath(locale_path) | |
extracted_strings.setLocale(locale) | |
extracted_strings.extractStrings() | |
extracted_strings.checkIssues() | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
cd /Users/flodolo/mozilla/mercurial/l10n_clones/ | |
source venv/bin/activate | |
compare-locales --version | |
python check_dtd_bug1539759.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment