Skip to content

Instantly share code, notes, and snippets.

@flodolo
Last active June 12, 2019 11:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flodolo/5051d8063c00b4d5d11ae373dbe7a8d1 to your computer and use it in GitHub Desktop.
Save flodolo/5051d8063c00b4d5d11ae373dbe7a8d1 to your computer and use it in GitHub Desktop.
Check for markup
#!/usr/bin/env python3
import argparse
import codecs
import json
import logging
import os
import re
import six
import sys
logging.basicConfig()
try:
from compare_locales import parser
except ImportError as e:
print('FATAL: make sure that dependencies are installed')
print(e)
sys.exit(1)
class StringExtraction():
def __init__(self):
'''Initialize object.'''
# Set defaults
self.supported_formats = [
'.dtd',
]
def setLocale(self, locale):
'''Set current locale.'''
self.reference_locale = True if locale == 'en-US' else False
self.locale = locale
def setRepositoryPath(self, path):
'''Set path to repository.'''
# Strip trailing '/' from repository path
self.repository_path = path.rstrip(os.path.sep)
def extractFileList(self):
'''Extract the list of supported files.'''
self.file_list = []
for root, dirs, files in os.walk(
self.repository_path, followlinks=True):
for file in files:
for supported_format in self.supported_formats:
if file.endswith(supported_format):
self.file_list.append(os.path.join(root, file))
self.file_list.sort()
def getRelativePath(self, file_name):
'''
Get the relative path of a filename, prepend prefix_storage if
defined.
'''
return file_name[len(self.repository_path) + 1:]
def extractStrings(self):
'''Extract strings from all files.'''
# Create a list of files to analyze
self.extractFileList()
self.translations = {}
for file_name in self.file_list:
file_extension = os.path.splitext(file_name)[1]
file_parser = parser.getParser(file_extension)
file_parser.readFile(file_name)
try:
entities = file_parser.parse()
for entity in entities:
# Ignore Junk
if isinstance(entity, parser.Junk):
#print('JUNK in {}\nFile: {}\nJunk: {}'.format(self.locale, file_name, entity))
continue
string_id = u'{0}:{1}'.format(
self.getRelativePath(file_name), six.text_type(entity))
self.translations[string_id] = entity.raw_val
# Store reference strings
if self.reference_locale:
self.getReferenceStringsMinor()
except Exception as e:
print('Error parsing file: {0}'.format(file_name))
print(e)
def getReferenceStringsMinor(self):
'''Get a list of string IDs with <'''
self.minor_strings = []
self.reference_ids = self.translations.keys()
for string_id, translation in self.translations.items():
if '<' in translation:
self.minor_strings.append(string_id)
def checkIssues(self):
'''Extract strings from all files.'''
issues = []
for string_id, translation in self.translations.items():
if '<' not in translation:
continue
if string_id not in self.reference_ids:
# Obsolete string
continue
if string_id not in self.minor_strings:
issues.append('{}: {}'.format(string_id, translation))
if issues:
print('\n\nLocale: {}'.format(self.locale))
print('\n'.join(issues))
def main():
repos_path = '/Users/flodolo/mozilla/mercurial/l10n_clones/locales'
locales = [x for x in os.listdir(repos_path) if not x.startswith('.')]
ignored_locales = []
locales = list(set(locales) - set(ignored_locales))
locales.sort()
# Initialize class
extracted_strings = StringExtraction()
# Extract strings for en-US, and keep them stored for comparison later
print('Extracting reference en-US strings')
gecko_string_path = '/Users/flodolo/mozilla/mercurial/gecko-strings-quarantine'
extracted_strings.setLocale('en-US')
extracted_strings.setRepositoryPath(gecko_string_path)
extracted_strings.extractStrings()
# Check other locales
print('Checking other locales ({})'.format(len(locales)))
for locale in locales:
locale_path = os.path.join(repos_path, locale)
extracted_strings.setRepositoryPath(locale_path)
extracted_strings.setLocale(locale)
extracted_strings.extractStrings()
extracted_strings.checkIssues()
if __name__ == '__main__':
main()
#!/usr/bin/env bash
cd /Users/flodolo/mozilla/mercurial/l10n_clones/
source venv/bin/activate
compare-locales --version
python check_dtd_bug1539759.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment