Skip to content

Instantly share code, notes, and snippets.

Created March 1, 2017 05:01
Show Gist options
  • Save 1995YogeshSharma/b46414aa18f0449e69dfd7e82f33a2df to your computer and use it in GitHub Desktop.
Save 1995YogeshSharma/b46414aa18f0449e69dfd7e82f33a2df to your computer and use it in GitHub Desktop.
This is a script written to extract all css classes which include oppia in it's name and belong to a unique directive file rather than being shared among some html files.
# coding: utf-8
# Copyright 2014 The Oppia Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
This script is used to get a dictionary with directive filenames as keys and
oppia classes(belonging to them uniquely) as values.
This can be used to shift all the classes which are used only in single
files from oppia.css to respective files
The scripts ask for the path to directory in which you want to run it.
The script will search among all the files that lie below that directory
in file structure i.e it will search in subdirs recursively also.
import os
import re
import bs4
def return_all_files(dir_to_scan):
returns all files that lie below that directory in file structure
ARGS: directory name
RETURNS: all files in the path
file_result = []
subdirs = []
for root, subdir, files in os.walk(dir_to_scan):
for f in files:
file_result.append(os.path.join(root, f))
for d in subdir:
return file_result
def return_matched_files(files, pattern):
returns all the files matching given pattern
ARGS: list of files to search from, pattern to search
RETURNS: list of files matching pattern
val = []
for f in files:
if pattern.match(str(f)) is not None:
return val
def match_classes_pattern(classes, pattern):
return the classes which match given pattern
ARGS: list of classes to search from, pattern
RETURNS: list of classes matching pattern
matched_classes = []
if classes is None:
return matched_classes
for j in classes:
if pattern.match(j):
return matched_classes
def file_find_matching_classes(file_name, pattern):
returns all the classes matching the pattern in a file
ARGS: file name, pattern to match
RETURNS: list of classes matching pattern
f = open(file_name)
# making BeautifulSoup object of file then extracting all classes
# and if the tag is script we further make BeautifulSoup object to
# take case of ng-template
file_soup = bs4.BeautifulSoup(, 'html.parser')
list_oppia_classes = []
for i in file_soup.findAll():
if == 'script' and i.attrs.get('type') == 'text/ng-template':
inside_script_content = str(i.contents)
soup_from_text_inside_script = bs4.BeautifulSoup(
inside_script_content, 'html.parser')
for t in soup_from_text_inside_script.findAll():
cur_classes = t.attrs.get('class')
match_classes_pattern(cur_classes, pattern))
cur_classes = i.attrs.get('class')
match_classes_pattern(cur_classes, pattern))
return set(list_oppia_classes)
def print_output(file_dict):
prints the file-class dictionary on the screen
ARGS: dictionary of file as keys and classes as values
for i in file_dict.keys():
if len(file_dict[i]) == 0:
print '$ ',
print i
for j in file_dict[i]:
print j,
print '\n\n'
def give_classes_unique_per_file(file_dict, file_store):
takes out all the classes that contain reused classes and returns
dictionary with files only with unique classes as values
ARGS: dictionary from which files are to be removed, dictionary in
which files are to be checked
RETURNS: dictionary with files and unique classes
val = {}
for i in file_dict.keys():
val[i] = []
for j in file_dict[i]:
check_if_class_exist_in_some_other_file = 0
for k in file_store.keys():
if k == i:
for z in file_store[k]:
if z == j:
check_if_class_exist_in_some_other_file = 1
if check_if_class_exist_in_some_other_file == 0:
return val
def main():
dir_to_scan = raw_input('Enter path to directory ')
# pattern for all html files
html_pattern = re.compile(r'.*\.html')
# pattern for matching directive html files
directive_pattern = re.compile(r'.*directive\.html')
# pattern for matching the classes
class_pattern = re.compile(r'.*oppia.*')
all_files = return_all_files(dir_to_scan)
html_files = return_matched_files(all_files, html_pattern)
directive_files = return_matched_files(
all_files, directive_pattern)
all_html_file_css_classes = {}
directive_file_css_classes = {}
for fl in html_files:
all_html_file_css_classes[fl] = file_find_matching_classes(
fl, class_pattern)
for fl in directive_files:
directive_file_css_classes[fl] = file_find_matching_classes(
fl, class_pattern)
directive_file_css_classes, all_html_file_css_classes))
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment