Skip to content

Instantly share code, notes, and snippets.

@tvpeter
Created January 20, 2023 02:25
Show Gist options
  • Save tvpeter/d3fd0b11cc8ca9e94e26337fb4b190a2 to your computer and use it in GitHub Desktop.
Save tvpeter/d3fd0b11cc8ca9e94e26337fb4b190a2 to your computer and use it in GitHub Desktop.
Get statistics of various files on the bitcointranscripts repo
from fileinput import filename
from math import fabs
import os
import glob
import re
""" ALL THE FILES IN THE DIRECTORY"""
count = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for file in files:
count += 1
print('Total files:', count)
# number of files that are not named index.md
counter = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for file in files:
if not file.endswith("index.md"):
counter += 1
print("Files that are not index.md: ", counter)
# files that don't end in .es
not_es_or_pt_files = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for file in files:
if not file.endswith(".es.md") and not file.endswith(".es"):
not_es_or_pt_files += 1
print("Files that does not end with es: ", not_es_or_pt_files)
# files that are not index, pt or es
not_es_pt_or_index = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for file in files:
if not file.endswith(".es.md") and not file.endswith(".pt") and not file.endswith(".es") and not file.endswith("index.md"):
not_es_pt_or_index += 1
print("Files that are not index, pt or es: ", not_es_pt_or_index)
def search_str(file_path, word):
lines_counter = 0
with open(file_path, encoding="utf8", errors='ignore') as file:
for line_number, line in enumerate(file, start=1):
lines_counter += 1
if word.lower() in line.lower():
return True
if lines_counter == 17: break
return False
#files without a speaker that can be parsed from the file name
#not index or .es
#does not have name
no_author_files = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md") and not name.endswith("_index.pt.md") and not name.endswith("metadata.md"):
file_path = os.path.abspath(os.path.join(root_dir, name))
if not search_str(file_path, "name:") and not search_str(file_path, "speaker:") and not search_str(file_path, "speakers:"):
no_author_files += 1
print(file_path)
with open('files_without_author.txt', 'a') as fd:
fd.write(f'\n{file_path}')
print("Files without author name: ", no_author_files)
# files with authors name in the body
with_author_name = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_path = os.path.abspath(os.path.join(root_dir, name))
if search_str(file_path, "name") or search_str(file_path, "Name") or search_str(file_path, "NAME"):
with_author_name += 1
print("Files with author name in the body: ", with_author_name)
# without video links
without_video_link = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if not search_str(file_name, "https://www.youtube.com/watch"):
without_video_link += 1
print("Files without video links: ", without_video_link)
# files with video link
video_links = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if search_str(file_name, "https://www.youtube.com/watch"):
video_links += 1
print("Files with video links: ", video_links)
def find_append_to_file(filename, find, insert):
with open(filename, 'r+') as file:
lines = file.read()
index = repr(lines).find(find) - 1
if index > 0:
len_found = len(find) - 1
old_lines = lines[index + len_found:]
file.seek(index)
file.write(os.linesep + insert)
file.write(old_lines)
for root_dir, cur_dir, files in os.walk(r'./adopting-bitcoin'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
youtube_link = "https://www.youtube.com/watch"
with open(file_name, "r+") as myFile:
for num, line in enumerate(myFile, 1):
if youtube_link in line:
content = myFile.readlines()
content.insert(6, content[num])
myFile.write("".join(content))
# old_lines = lines[index + len_found:]
# file.seek(index)
# file.write(os.linesep + insert)
# file.write(old_lines)
def find_date(date_file, pattern):
f = open(date_file, encoding="utf8", errors='ignore')
content = f.read()
dates = re.findall(pattern, content)
if len(dates) > 0:
return True
else:
return False
# # Files with a date
files_with_date = no_with_year = 0
pattern = "\d{4}[/-]\d{2}[/-]\d{2}"
year_last_pattern = "\d{2}[/-]\d{2}[/-]\d{4}"
year_pattern = "\d{4}"
# for root_dir, cur_dir, files in os.walk(r'./adopting-bitcoin'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") or not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if find_date(file_name, year_pattern):
no_with_year += 1
if find_date(file_name, pattern):
files_with_date += 1
print("Files with year date: yyyy ", no_with_year)
print("Files with a date. Format yyyy-mm-mm: ", files_with_date)
# # Files without a date
without_date = filename_with_year = only_year = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if not find_date(file_name, pattern) and not find_date(file_name, year_last_pattern):
without_date += 1
print(file_name)
with open('files_without_date.txt', 'a') as fd:
fd.write(f'\n{file_name}')
if len(re.findall(pattern, file_name)) > 0:
filename_with_year += 1
if len(re.findall(year_pattern, file_name)) > 0:
only_year += 1
print("Files without a date. Format yyyy-mm-mm or dd-mm-yyyy: ", without_date)
print("Files without a date in body but has date [yyyy-mm-dd] in filename ", filename_with_year)
print("Files without a date in body but has year [yyyy] in filename ", only_year)
# tagged files
category_tag = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if search_str(file_name, "categories:"):
category_tag += 1
# print(file_name)
print("Files that have a category tag: ", category_tag)
tags = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if search_str(file_name, "tags"):
tags += 1
# print(file_name)
print("Files that have a tags: ", tags)
""" files with both category and tags """
both_cat_and_tags = 0
for root_dir, cur_dir, files in os.walk(r'./'):
cur_dir[:] = [d for d in cur_dir if not d.startswith('.')]
for name in files:
if not name.endswith("index.md") and not name.endswith(".es.md"):
file_name = os.path.abspath(os.path.join(root_dir, name))
if search_str(file_name, "tags:") and search_str(file_name, "categories:"):
both_cat_and_tags += 1
print('files with both category and tags: ', both_cat_and_tags)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment