llamasoft/destiny_list.sh

## destiny_list.sh

# Fetching the Destiny 2 manifest definition
api_key=""

if [[ -n "${api_key}" ]]; then
    curl --compressed "https://www.bungie.net/Platform/Destiny2/Manifest/" -H "x-api-key: ${api_key}" -o "manifest.json"

    # Extracting the name of the aggregated JSON asset database.
    aggregate_path=$(jq --raw-output '.Response.jsonWorldContentPaths.en' "manifest.json")
else
    # Use a recent aggregated JSON file as a fall-back.
    aggregate_path="/common/destiny2_content/json/en/aggregate-2fbe1829-dfcd-44ec-84d3-bb04a3777dc1.json"
fi

aggregate_name=$(basename "${aggregate_path}")

if [[ ! -f "${aggregate_name}" ]]; then
    # The aggregate JSON file is rather large, only download it if required
    curl -LO --compressed "https://www.bungie.net/${aggregate_path}"
fi

# Recursively extracting all values from keys with the names "description", "subtitle", or "name"
jq --raw-output '
    ..
    | objects
    | with_entries(select(.key == ("description","subtitle","name")))
    | select(. != {})
    | to_entries
    | .[]
    | .value
' "${aggregate_name}" > "destiny.txt"

# There are a lot of fully duplicated lines because of descriptions.
# Before we take a frequency, make sure each line is unique and non-empty.
sort -u "destiny.txt" | grep -v '^[ ]*$' > "destiny.uniq"

# Ok, this is a big one.
# The input file contains a lot of Unicode punctuation that needs to be dumbed down to ASCII if possible.
#   If we can't transliterate to ASCII, then just zap the character to a space.
# Split the transliterated output into words (alpha-numeric, dash, or single-quote) characters.
# Remove lines that are entirely numeric or begin/end with punctuation.
# If a word appears in multiple cases, only keep the all-lowercase version if present.
#   Proper nouns will always appear capitalized, but regular words may be capitalized if they start a sentence.
#   This prevents duplicates of regular words that just happen to show up at the start of a sentence.
cat "destiny.uniq" \
| uconv --fallback -f UTF8 -t UTF8 -x ':: Any-Publishing; :: [[:Punctuation:][:Symbol:]] Latin-ASCII;' --callback skip \
| tr -c "A-Za-z0-9-'" '\n' \
| grep -v -e '^[0-9]*$' -e "^[-']" -e "[-']$" \
| awk '$0 { freq[$0]++; }
END {
    for (word in freq) {
        if ( tolower(word) == word || !(tolower(word) in freq) ) {
            printf("%5d %s\n", freq[word], word);
        }
    }
}' | sort -rn > "destiny.freq"

	# Fetching the Destiny 2 manifest definition
	api_key=""

	if [[ -n "${api_key}" ]]; then
	curl --compressed "https://www.bungie.net/Platform/Destiny2/Manifest/" -H "x-api-key: ${api_key}" -o "manifest.json"

	# Extracting the name of the aggregated JSON asset database.
	aggregate_path=$(jq --raw-output '.Response.jsonWorldContentPaths.en' "manifest.json")
	else
	# Use a recent aggregated JSON file as a fall-back.
	aggregate_path="/common/destiny2_content/json/en/aggregate-2fbe1829-dfcd-44ec-84d3-bb04a3777dc1.json"
	fi

	aggregate_name=$(basename "${aggregate_path}")

	if [[ ! -f "${aggregate_name}" ]]; then
	# The aggregate JSON file is rather large, only download it if required
	curl -LO --compressed "https://www.bungie.net/${aggregate_path}"
	fi

	# Recursively extracting all values from keys with the names "description", "subtitle", or "name"
	jq --raw-output '
	..
	\| objects
	\| with_entries(select(.key == ("description","subtitle","name")))
	\| select(. != {})
	\| to_entries
	\| .[]
	\| .value
	' "${aggregate_name}" > "destiny.txt"

	# There are a lot of fully duplicated lines because of descriptions.
	# Before we take a frequency, make sure each line is unique and non-empty.
	sort -u "destiny.txt" \| grep -v '^[ ]*$' > "destiny.uniq"

	# Ok, this is a big one.
	# The input file contains a lot of Unicode punctuation that needs to be dumbed down to ASCII if possible.
	# If we can't transliterate to ASCII, then just zap the character to a space.
	# Split the transliterated output into words (alpha-numeric, dash, or single-quote) characters.
	# Remove lines that are entirely numeric or begin/end with punctuation.
	# If a word appears in multiple cases, only keep the all-lowercase version if present.
	# Proper nouns will always appear capitalized, but regular words may be capitalized if they start a sentence.
	# This prevents duplicates of regular words that just happen to show up at the start of a sentence.
	cat "destiny.uniq" \
	\| uconv --fallback -f UTF8 -t UTF8 -x ':: Any-Publishing; :: [[:Punctuation:][:Symbol:]] Latin-ASCII;' --callback skip \
	\| tr -c "A-Za-z0-9-'" '\n' \
	\| grep -v -e '^[0-9]*$' -e "^[-']" -e "[-']$" \
	\| awk '$0 { freq[$0]++; }
	END {
	for (word in freq) {
	if ( tolower(word) == word \|\| !(tolower(word) in freq) ) {
	printf("%5d %s\n", freq[word], word);
	}
	}
	}' \| sort -rn > "destiny.freq"