Skip to content

Instantly share code, notes, and snippets.

@petermueller
Created May 6, 2023 05:56
Show Gist options
  • Save petermueller/a6678e1e620fa009b7bf4ec9b1455009 to your computer and use it in GitHub Desktop.
Save petermueller/a6678e1e620fa009b7bf4ec9b1455009 to your computer and use it in GitHub Desktop.
Content-Disposition Library ideas?
defmodule HttpUtils.Download do
@moduledoc """
A module for interacting with the `MyApp.Downloadable` protocol in a web context.
This module contains a collection of functions for commonly use-cases,
such as sending chunked streams on a `t:Plug.Conn.t/0`
"""
alias MyApp.Downloadable
# RFC 2616 Section 2.2
# Clarified by RFC 6266
# ====
# OCTET = <any 8-bit sequence of data>
# CHAR = <any US-ASCII character (octets 0 - 127)>
# UPALPHA = <any US-ASCII uppercase letter "A".."Z">
# LOALPHA = <any US-ASCII lowercase letter "a".."z">
# ALPHA = UPALPHA | LOALPHA
# DIGIT = <any US-ASCII digit "0".."9">
# CTL = <any US-ASCII control character
# (octets 0 - 31) and DEL (127)>
# CR = <US-ASCII CR, carriage return (13)>
# LF = <US-ASCII LF, linefeed (10)>
# SP = <US-ASCII SP, space (32)>
# HT = <US-ASCII HT, horizontal-tab (9)>
# <"> = <US-ASCII double-quote mark (34)>
# LWS = [CRLF] 1*( SP | HT )
# TEXT = <any OCTET except CTLs, but including LWS>
# quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
# qdtext = <any TEXT except <">>
# quoted-pair = "\" CHAR
# ====
# Intentionally not including CRLF-prefixed LWS, or quoted-pair, as they require multi-char matching, which
# would be better served by using an actual parser, a la NimbleParsec
# The spec also states TEXT as based off OCTET, but RFC 6266 explicitly suggests substituting
# letters like "ä" (Latin Small Letter A With Diaeresis) as US-ASCII "ae" even though is a valid
# ASCII character, octet 228 (but not US-ASCII). Based on this we're ignoring the part of the
# spec that says "TEXT = <any OCTET ..." and assuming they meant "<any CHAR ..."
@text_chars Enum.to_list(32..126)
@qdtext_chars @text_chars -- [?"]
@quoted_string_chars @qdtext_chars
@mapset_quoted_string_strings MapSet.new(@quoted_string_chars, &List.to_string([&1]))
# RFCs 5987 & 8187, Sections 3.2.1
# ====
# ext-value = charset "'" [ language ] "'" value-chars
# ; like RFC 2231's <extended-initial-value>
# ; (see [RFC2231], Section 7)
# Parameter extension value charset
# charset = "UTF-8" / mime-charset
# mime-charset = 1*mime-charsetc
# mime-charsetc = ALPHA / DIGIT
# / "!" / "#" / "$" / "%" / "&"
# / "+" / "-" / "^" / "_" / "`"
# / "{" / "}" / "~"
# ; as <mime-charset> in Section 2.3 of [RFC2978]
# ; except that the single quote is not included
# ; SHOULD be registered in the IANA charset registry
# @rfc_5987_parameter_extension_custom_charset_chars '!#$%&+-^_`{}~'
# value-chars = *( pct-encoded / attr-char )
# pct-encoded = "%" HEXDIG HEXDIG
# ; see [RFC3986], Section 2.1
# attr-char = ALPHA / DIGIT
# / "!" / "#" / "$" / "&" / "+" / "-" / "."
# / "^" / "_" / "`" / "|" / "~"
# ; token except ( "*" / "'" / "%" )
# ====
@rfc_5987_parameter_extension_value_chars Enum.flat_map(
[?A..?Z, ?a..?z, ?0..?9, '!#$&+-.^_`|~'],
&Enum.to_list/1
)
@mapset_param_ext_value_chars MapSet.new(@rfc_5987_parameter_extension_value_chars)
# Types
@typep disposition_atom() :: :inline | :attachment
@type disposition() :: disposition_atom() | String.t()
# Public Functions
@doc false
@spec content_disposition(disposition()) :: String.t()
def content_disposition(disposition) do
disposition
|> disposition_type()
|> to_string()
end
@doc ~S"""
Formats the given options to a standards-compliant `Content-Disposition` string.
Raises if given an unsupported disposition as an atom or case-insensitive string
See the implementation comments for more context into the RFCs, and specific characters left
unescaped.
## Arguments
* `disposition` - The disposition type to use
* `downloadable` - A `t:MyApp.Downloadable.t/1` that is used to gather the base filename and
extension. This argument is optional and when passed will be encoded for the "filename*="
header parameter. For the "filename=" legacy header parameter, any non-US-ASCII characters
(interpreted as codepoints) will be replaced with `_` to support older browsers.
## Examples
iex> Download.content_disposition(:inline)
"inline"
iex> Download.content_disposition(:attachment)
"attachment"
iex> Download.content_disposition(%MyApp.ZipPdfDownload{filename: "kittens"}, "inline")
"inline; filename=\"kittens.zip\"; filename*=UTF-8''kittens.zip"
iex> Download.content_disposition(%MyApp.ZipPdfDownload{filename: "kïttéñs"}, :attachment)
"attachment; filename=\"k_tt__s.zip\"; filename*=UTF-8''k%C3%AFtt%C3%A9%C3%B1s.zip"
iex> Download.content_disposition("form-data")
** (ArgumentError) form-data unsupported, use `Plug.Parsers.MULTIPART`
iex> Download.content_disposition("filename=\"myfile.txt\"")
** (ArgumentError) invalid disposition type: `"filename=\"myfile.txt\""`, use `:inline` or `:attachment`
"""
@spec content_disposition(Downloadable.t(), disposition()) :: String.t()
def content_disposition(downloadable, disposition) do
disposition = disposition_type(disposition)
filename = Downloadable.filename(downloadable) <> "." <> Downloadable.extension(downloadable)
Enum.join([disposition, ascii_filename(filename), utf8_filename(filename)], "; ")
end
@doc false
@spec disposition_type(disposition()) :: disposition_atom() | no_return()
def disposition_type(:inline), do: :inline
def disposition_type(:attachment), do: :attachment
def disposition_type(disposition) when is_binary(disposition) do
case String.downcase(disposition) do
"attachment" ->
:attachment
"inline" ->
:inline
"form-data" ->
raise(ArgumentError, "form-data unsupported, use `Plug.Parsers.MULTIPART`")
_ ->
raise(
ArgumentError,
"invalid disposition type: `#{inspect(disposition)}`, use `:inline` or `:attachment`"
)
end
end
@doc false
@spec utf8_filename(String.t()) :: String.t()
def utf8_filename(filename) do
filename = URI.encode(filename, &(&1 in @mapset_param_ext_value_chars))
"filename*=UTF-8''#{filename}"
end
@doc false
@spec ascii_filename(String.t()) :: String.t()
def ascii_filename(filename), do: "filename=\"#{to_ascii(filename)}\""
@doc false
@spec to_ascii(String.t()) :: String.t()
def to_ascii(utf8) do
for char <- String.codepoints(utf8),
into: "",
do: if(char in @mapset_quoted_string_strings, do: char, else: "_")
end
@typedoc """
Result of a parsed Content-Disposition header
"""
@type parsed_content_disposition() :: %{
optional(:filename_utf8) => String.t(),
optional(:legacy_filename) => String.t(),
required(:disposition) => disposition_atom()
}
@doc ~S"""
Parses a [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition)
based upon the IETF RFCs, with some room for incorrect encoding from the sender.
Returns a `t:parsed_content_disposition/0` map
Raises if given an unsupported disposition type
## Important Notes on the `t:parsed_content_disposition/0` result
- only the `:disposition` key will always be present.
- when consuming, it is HIGHLY suggested to use the `:filename_utf8` field if present
- `:legacy_filename` is intentionally NOT decoded.
- No path cleanup is done. Treat the values as unsafe, like any other external input
See the implementation comments for more context into the RFCs, and specific characters.
## Examples
iex> Download.parse_content_disposition("inline")
%{disposition: :inline}
iex> Download.parse_content_disposition("attachment")
%{disposition: :attachment}
iex> Download.parse_content_disposition("attachment; filename*=UTF-8''k%C3%AFtt%C3%A9%C3%B1.jpg")
%{filename_utf8: "kïttéñ.jpg", disposition: :attachment}
iex> Download.parse_content_disposition("inline; filename=\"kitten.jpg\"; filename*=UTF-8''kitten.jpg")
%{filename_utf8: "kitten.jpg", legacy_filename: "kitten.jpg", disposition: :inline}
# Doesn't try to decode legacy filenames
iex> Download.parse_content_disposition("attachment; filename=\"k%3Ftt%3F%3F.jpg\"")
%{legacy_filename: "k%3Ftt%3F%3F.jpg", disposition: :attachment}
"""
@spec parse_content_disposition(String.t()) :: parsed_content_disposition()
def parse_content_disposition(header_value) do
[disposition | rest] = :binary.split(header_value, ";")
disposition = disposition_type(disposition)
params =
rest
|> List.first("")
|> Plug.Conn.Utils.params()
raw_filename_star = params["filename*"] || ""
filename_utf8? =
raw_filename_star
|> String.trim_leading()
|> String.downcase(:ascii)
|> String.starts_with?("utf-8")
filename_utf8 =
with true <- filename_utf8?,
{:ok, filename} <-
strip_utf8_str_and_language_tag_from_parameter_extension(raw_filename_star),
filename <- URI.decode(filename),
true <- String.valid?(filename) do
filename
else
_ -> nil
end
[
filename_utf8: filename_utf8,
legacy_filename: params["filename"],
disposition: disposition
]
|> Enum.reject(fn {_k, v} ->
is_nil(v)
end)
|> Map.new()
end
# Private functions
# This could also be a split on single-quote, "'" based on the spec, but this is a bit more forgiving of bad encoding
@filename_utf8_maybe_language_tag_regex_capture ~r/^utf-8'(?<lang_tag>.*-?.*)'(?<filename>.+)/i
defp strip_utf8_str_and_language_tag_from_parameter_extension(raw_string) do
string = String.trim(raw_string)
case Regex.named_captures(@filename_utf8_maybe_language_tag_regex_capture, string) do
nil -> {:error, "failed to extract the filename* parameter"}
%{"filename" => ""} -> {:error, "filename* parameter empty"}
%{"filename" => filename} -> {:ok, filename}
end
end
end
@petermueller
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment