Skip to content

Instantly share code, notes, and snippets.

@hbeni
Created March 8, 2025 18:34

Revisions

  1. hbeni created this gist Mar 8, 2025.
    144 changes: 144 additions & 0 deletions therion-statistics-scrapauthors.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,144 @@
    #!/bin/bash
    #
    # Generate statistics for scraps.
    #
    # It will walk the directory tree recursive and search
    # for th2-files and evaluate the -author tag.
    #
    # Usage: just call the bash script from the directory you want to generate
    # statistics from.
    # If you give arg1, this diectory will be the rot instead.
    #
    # The program will not touch source data and writes to STDOUT.
    #
    # @author beni@hallinger.org 08.03.2025
    #

    # If arg1 was given, use that as directory root
    oldcwd=$(pwd)
    if [[ -n "$1" ]] then
    cd $1
    [[ $? -gt 0 ]] && exit 1
    fi

    total_author_name="ZZ. INSGESAMT"

    declare -a all_authors # Array to store unique authors
    declare -A all_results # Associative array for counts
    declare -A curScrap_authors # Associative array for current scraps authors

    # Function to add an author to the global list (if not already added)
    add_author_to_list() {
    local author="$1"

    for existing_author in "${all_authors[@]}"; do
    if [[ "$existing_author" == "$author" ]]; then
    return # Exit function if author already exists
    fi
    done

    all_authors+=("$author") # Add new author to the list
    }

    # Function to increment author count and add to list
    increment_author_count() {
    local author="$1"
    local list="$2"
    [[ -z "$author" ]] && author="????????"
    #echo "increment_author_count($author, $list)"

    local key="${author}_$list"

    # Initialize count if not set
    if [[ -z "${all_results[$key]}" ]]; then
    all_results[$key]=0
    fi

    ((all_results[$key]++)) # Increment count
    add_author_to_list "$author" # Ensure author is in global list
    }


    # Hook for scrap start lines
    onScrapStart() {
    #echo "onScrapStart: $line"

    # add to total statistics
    increment_author_count "$total_author_name" "scraps"

    # get autors of the scrap command
    curScrap_authors=()
    while [[ $line =~ -author[[:space:]]+[0-9.\-]+[[:space:]]+\"([^\"]+)\" ]]; do
    curAuthor=("${BASH_REMATCH[1]}")
    curScrap_authors+=("${BASH_REMATCH[1]}")
    increment_author_count "$curAuthor" "scraps"
    line=${line#*-author} # Remove the first match from the line to process the next occurrence
    done
    }

    # hook for lines inside the scrap
    onScrapLine() {
    #echo "onScrapLine: $line"

    # add to total statistics
    [[ $line =~ ^line ]] && increment_author_count "$total_author_name" "objects"
    [[ $line =~ ^point ]] && increment_author_count "$total_author_name" "objects"
    [[ $line =~ ^area ]] && increment_author_count "$total_author_name" "objects"

    for author in "${!curScrap_authors[@]}"; do
    # Authors individual objects
    [[ $line =~ ^line ]] && increment_author_count "${author}" "object_line"
    [[ $line =~ ^point ]] && increment_author_count "${author}" "object_point"
    [[ $line =~ ^area ]] && increment_author_count "${author}" "object_area"

    # Authors total count
    [[ $line =~ ^line ]] && increment_author_count "${author}" "objects"
    [[ $line =~ ^point ]] && increment_author_count "${author}" "objects"
    [[ $line =~ ^area ]] && increment_author_count "${author}" "objects"
    done
    }

    # hook for end of scrap
    onScrapEnd() {
    #echo "onScrapEnd: $line"
    curScrap_authors=()
    }



    ###########
    # RUNTIME #
    ###########


    # Cat all found th2 files into on big temporary file
    tempdir=$(mktemp -d)
    find . -name '*.th2' -exec cat '{}' \; >> $tempdir/all.th2

    # Read the huge file line by line and call the evaluators
    while read line; do
    #echo "$line"
    [[ $line =~ ^scrap ]] && onScrapStart
    [[ ${#all_authors[@]} -gt 0 ]] && onScrapLine
    #[[ ${#all_authors[@]} -eq 0 ]] && echo "no scrap authors for: $line"
    [[ $line =~ ^endscrap ]] && onScrapEnd
    done < $tempdir/all.th2


    # print final result:
    echo "\"Autor\";\"Anzahl Scraps\";\"Anzahl Scrapobjekte\";\"Anzahl ScrapLines\";\"Anzahl ScrapPoints\";\"Anzahl ScrapAreas\""
    for author in "${all_authors[@]}"; do
    scraps="${all_results[${author}_scraps]}" # Get count from the associative array
    objects="${all_results[${author}_objects]}" # Get count from the associative array
    objects_line="${all_results[${author}_object_line]}" # Get count from the associative array
    objects_point="${all_results[${author}_object_point]}" # Get count from the associative array
    objects_area="${all_results[${author}_object_area]}" # Get count from the associative array

    echo "\"$author\";$scraps;$objects;$objects_line;$objects_point;$objects_area"
    done


    # cleanup
    #echo $tempdir
    rm -r $tempdir
    cd $oldcwd