Skip to content

Instantly share code, notes, and snippets.

@olitreadwell
Created August 29, 2023 19:31
Show Gist options
  • Save olitreadwell/5b852c84b81d1824ec50ef41a1659877 to your computer and use it in GitHub Desktop.
Save olitreadwell/5b852c84b81d1824ec50ef41a1659877 to your computer and use it in GitHub Desktop.
Download flatiron phase submissions: project, videos, blogs
#!/bin/bash
log_message() {
local message="$1"
echo "$(date +"%Y-%m-%d %H:%M:%S") - $message" >> "${processed_dir}/process_log.txt"
}
# Utility function to extract user's name from a file
extract_name() {
local file="$1"
local name=$(awk -F': ' '/<title>/{print $3}' "$file" | sed 's/<\/title>//g' | tr -d ',' | tr ' ' '_' | tr '[:upper:]' '[:lower:]')
if [ -z "$name" ]; then
log_message "ERROR: Unable to extract name from $file."
return 1
fi
echo "$name"
}
# Utility function to extract URL from a file
extract_url() {
local file="$1"
local url=$(awk -F'"' '/<a href/{print $2}' "$file")
if [ -z "$url" ]; then
log_message "ERROR: Unable to extract URL from $file."
return 1
fi
echo "$url"
}
# Create a directory if it doesn't exist
create_directory() {
local dir="$1"
if [ ! -d "$dir" ]; then
mkdir "$dir" || log_message "ERROR: Failed to create directory $dir."
else
log_message "INFO: Directory $dir already exists."
fi
}
# Create/check a file
create_file() {
local file="$1"
touch "$file" || log_message "ERROR: Failed to create file $file."
}
# Convert GitHub URL to SSH
convert_to_ssh() {
local url="$1"
echo "$url" | sed 's_https://__g' | awk -F'/' '{print "git@"$1":"$2"/"$3".git"}'
}
# Initialize CSV file with headers
initialize_csv() {
local csv_file="$1"
if [ ! -f "$csv_file" ]; then
echo "name,project url,video url,blog url" > "$csv_file"
fi
}
# Check if the user exists in the CSV
user_exists_in_csv() {
local name="$1"
local csv_file="$2"
grep -q "^${name}," "$csv_file"
}
# Append a new entry to the CSV
append_entry_to_csv() {
local name="$1"
local type="$2"
local url="$3"
local csv_file="$4"
case $type in
"GitHub Repo")
echo "$name,$url,," >> "$csv_file"
;;
"Video")
echo "$name,,$url," >> "$csv_file"
;;
"Blog")
echo "$name,,,$url" >> "$csv_file"
;;
*)
log_message "Error: Unknown content type '$type' for user '$name'."
;;
esac
}
# Update an existing entry in the CSV
update_entry_in_csv() {
local name="$1"
local type="$2"
local url="$3"
local csv_file="$4"
local tmp_file="${csv_file}.tmp"
case $type in
"GitHub Repo")
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$2=url} 1' "$csv_file" > "$tmp_file"
;;
"Video")
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$3=url} 1' "$csv_file" > "$tmp_file"
;;
"Blog")
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$4=url} 1' "$csv_file" > "$tmp_file"
;;
*)
log_message "Error: Unknown content type '$type' for user '$name'."
;;
esac
# Move the temp file back to the CSV if it was created
[ -f "$tmp_file" ] && mv "$tmp_file" "$csv_file" || log_message "Error updating CSV for user '$name' and content type '$type'."
}
append_to_csv() {
local name="$1"
local type="$2"
local url="$3"
local csv_file="$4"
initialize_csv "$csv_file"
if user_exists_in_csv "$name" "$csv_file"; then
update_entry_in_csv "$name" "$type" "$url" "$csv_file"
else
append_entry_to_csv "$name" "$type" "$url" "$csv_file"
fi
}
append_url_to_notes() {
local name="$1"
local type="$2"
local url="$3"
local dir_name="${processed_dir}/${name}"
local file_name="${dir_name}/${name}_notes.md"
echo "${type}: ${url}" >> "$file_name" || log_message "ERROR: Failed to append URL to notes for user $name."
}
process_github_repo() {
local name="$1"
local url="$2"
local ssh_url=$(convert_to_ssh "$url")
local dir_name="${processed_dir}/${name}"
local dir_for_repo="${dir_name}/$(basename "${ssh_url%%.git}")"
if ! command -v git &> /dev/null; then
log_message "ERROR: Git is not installed."
return 1
fi
git clone $ssh_url $dir_for_repo || log_message "ERROR: Failed to clone GitHub repo from $url for user $name."
}
process_content() {
local dir="$1"
local type="$2"
local csv_file="${processed_dir}/all_links.csv"
for file in "$dir"/*.html; do
local name=$(extract_name "$file")
local url=$(extract_url "$file")
if [[ -z "$name" || -z "$url" ]]; then
log_message "WARNING: Skipping file due to error: $file"
continue
fi
local dir_name="${processed_dir}/${name}"
local file_name="${dir_name}/${name}_notes.md"
create_directory "$dir_name"
create_file "$file_name"
append_url_to_notes "$name" "$type" "$url"
append_to_csv "$name" "$type" "$url" "$csv_file"
if [[ $type == "GitHub Repo" && $url == https://github.com* ]]; then
process_github_repo "$name" "$url"
fi
done
}
# Directories for repos, videos, and blogs
repos_dir="$1"
videos_dir="$2"
blogs_dir="$3"
# Check if directories exist
if [ ! -d "$repos_dir" ] || [ ! -d "$videos_dir" ] || [ ! -d "$blogs_dir" ]; then
echo "One or more directories do not exist"
exit 1
fi
# Processed directory
processed_dir="./processed"
# Ensure processed directory exists
create_directory "$processed_dir"
# Initialize CSV with headers
initialize_csv "${processed_dir}/all_links.csv"
# Process HTML files in the blogs, videos, and repos directories
process_content "$blogs_dir" "Blog"
process_content "$videos_dir" "Video"
process_content "$repos_dir" "GitHub Repo"
@olitreadwell
Copy link
Author

credit to @thompsonplyler for originally writing this

created with the help of ChatGPT4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment