-
-
Save tvpeter/d3fd0b11cc8ca9e94e26337fb4b190a2 to your computer and use it in GitHub Desktop.
Get statistics of various files on the bitcointranscripts repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fileinput import filename | |
from math import fabs | |
import os | |
import glob | |
import re | |
""" ALL THE FILES IN THE DIRECTORY""" | |
count = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for file in files: | |
count += 1 | |
print('Total files:', count) | |
# number of files that are not named index.md | |
counter = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for file in files: | |
if not file.endswith("index.md"): | |
counter += 1 | |
print("Files that are not index.md: ", counter) | |
# files that don't end in .es | |
not_es_or_pt_files = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for file in files: | |
if not file.endswith(".es.md") and not file.endswith(".es"): | |
not_es_or_pt_files += 1 | |
print("Files that does not end with es: ", not_es_or_pt_files) | |
# files that are not index, pt or es | |
not_es_pt_or_index = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for file in files: | |
if not file.endswith(".es.md") and not file.endswith(".pt") and not file.endswith(".es") and not file.endswith("index.md"): | |
not_es_pt_or_index += 1 | |
print("Files that are not index, pt or es: ", not_es_pt_or_index) | |
def search_str(file_path, word): | |
lines_counter = 0 | |
with open(file_path, encoding="utf8", errors='ignore') as file: | |
for line_number, line in enumerate(file, start=1): | |
lines_counter += 1 | |
if word.lower() in line.lower(): | |
return True | |
if lines_counter == 17: break | |
return False | |
#files without a speaker that can be parsed from the file name | |
#not index or .es | |
#does not have name | |
no_author_files = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md") and not name.endswith("_index.pt.md") and not name.endswith("metadata.md"): | |
file_path = os.path.abspath(os.path.join(root_dir, name)) | |
if not search_str(file_path, "name:") and not search_str(file_path, "speaker:") and not search_str(file_path, "speakers:"): | |
no_author_files += 1 | |
print(file_path) | |
with open('files_without_author.txt', 'a') as fd: | |
fd.write(f'\n{file_path}') | |
print("Files without author name: ", no_author_files) | |
# files with authors name in the body | |
with_author_name = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_path = os.path.abspath(os.path.join(root_dir, name)) | |
if search_str(file_path, "name") or search_str(file_path, "Name") or search_str(file_path, "NAME"): | |
with_author_name += 1 | |
print("Files with author name in the body: ", with_author_name) | |
# without video links | |
without_video_link = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if not search_str(file_name, "https://www.youtube.com/watch"): | |
without_video_link += 1 | |
print("Files without video links: ", without_video_link) | |
# files with video link | |
video_links = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if search_str(file_name, "https://www.youtube.com/watch"): | |
video_links += 1 | |
print("Files with video links: ", video_links) | |
def find_append_to_file(filename, find, insert): | |
with open(filename, 'r+') as file: | |
lines = file.read() | |
index = repr(lines).find(find) - 1 | |
if index > 0: | |
len_found = len(find) - 1 | |
old_lines = lines[index + len_found:] | |
file.seek(index) | |
file.write(os.linesep + insert) | |
file.write(old_lines) | |
for root_dir, cur_dir, files in os.walk(r'./adopting-bitcoin'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
youtube_link = "https://www.youtube.com/watch" | |
with open(file_name, "r+") as myFile: | |
for num, line in enumerate(myFile, 1): | |
if youtube_link in line: | |
content = myFile.readlines() | |
content.insert(6, content[num]) | |
myFile.write("".join(content)) | |
# old_lines = lines[index + len_found:] | |
# file.seek(index) | |
# file.write(os.linesep + insert) | |
# file.write(old_lines) | |
def find_date(date_file, pattern): | |
f = open(date_file, encoding="utf8", errors='ignore') | |
content = f.read() | |
dates = re.findall(pattern, content) | |
if len(dates) > 0: | |
return True | |
else: | |
return False | |
# # Files with a date | |
files_with_date = no_with_year = 0 | |
pattern = "\d{4}[/-]\d{2}[/-]\d{2}" | |
year_last_pattern = "\d{2}[/-]\d{2}[/-]\d{4}" | |
year_pattern = "\d{4}" | |
# for root_dir, cur_dir, files in os.walk(r'./adopting-bitcoin'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") or not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if find_date(file_name, year_pattern): | |
no_with_year += 1 | |
if find_date(file_name, pattern): | |
files_with_date += 1 | |
print("Files with year date: yyyy ", no_with_year) | |
print("Files with a date. Format yyyy-mm-mm: ", files_with_date) | |
# # Files without a date | |
without_date = filename_with_year = only_year = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if not find_date(file_name, pattern) and not find_date(file_name, year_last_pattern): | |
without_date += 1 | |
print(file_name) | |
with open('files_without_date.txt', 'a') as fd: | |
fd.write(f'\n{file_name}') | |
if len(re.findall(pattern, file_name)) > 0: | |
filename_with_year += 1 | |
if len(re.findall(year_pattern, file_name)) > 0: | |
only_year += 1 | |
print("Files without a date. Format yyyy-mm-mm or dd-mm-yyyy: ", without_date) | |
print("Files without a date in body but has date [yyyy-mm-dd] in filename ", filename_with_year) | |
print("Files without a date in body but has year [yyyy] in filename ", only_year) | |
# tagged files | |
category_tag = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if search_str(file_name, "categories:"): | |
category_tag += 1 | |
# print(file_name) | |
print("Files that have a category tag: ", category_tag) | |
tags = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if search_str(file_name, "tags"): | |
tags += 1 | |
# print(file_name) | |
print("Files that have a tags: ", tags) | |
""" files with both category and tags """ | |
both_cat_and_tags = 0 | |
for root_dir, cur_dir, files in os.walk(r'./'): | |
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')] | |
for name in files: | |
if not name.endswith("index.md") and not name.endswith(".es.md"): | |
file_name = os.path.abspath(os.path.join(root_dir, name)) | |
if search_str(file_name, "tags:") and search_str(file_name, "categories:"): | |
both_cat_and_tags += 1 | |
print('files with both category and tags: ', both_cat_and_tags) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment