Model: gemini-2.5-pro-exp-03-25
Python CLI app to test if a URL has gzip and does ETags or conditinal GET last modified, no dependencies beyond standard library
Okay, here's a Python CLI script that uses only the standard library (urllib
, argparse
, sys
) to test a URL for gzip support and conditional GET capabilities (ETag and Last-Modified).
#!/usr/bin/env python3
import argparse
import sys
import urllib.request
import urllib.parse
import urllib.error
from http.client import HTTPMessage # For type hinting response.info()
# --- Configuration ---
USER_AGENT = "Python-StdLib-HeaderCheck/1.0"
def check_url_features(url: str):
"""
Performs checks on a given URL for Gzip, ETag, and Last-Modified support.
Args:
url: The URL string to test.
Returns:
None. Prints results to stdout. Exits with code 1 on critical errors.
"""
print(f"[*] Testing URL: {url}")
# --- Initial Request (Check for Gzip, ETag, Last-Modified) ---
print("[*] Performing initial GET request...")
initial_headers = {
'Accept-Encoding': 'gzip',
'User-Agent': USER_AGENT,
}
req_initial = urllib.request.Request(url, headers=initial_headers, method='GET')
response_initial = None
initial_info: HTTPMessage = None
etag = None
last_modified = None
supports_gzip = False
try:
with urllib.request.urlopen(req_initial, timeout=10) as response:
response_initial = response
initial_info = response.info() # Headers object
status_code = response.getcode()
print(f"[+] Initial request successful (Status: {status_code})")
# Check for Gzip support in response
content_encoding = initial_info.get('Content-Encoding', '').lower()
if 'gzip' in content_encoding:
supports_gzip = True
print("[+] Gzip: Supported (Server sent 'Content-Encoding: gzip')")
else:
print("[-] Gzip: Not detected in response headers.")
# Extract ETag and Last-Modified for conditional requests
etag = initial_info.get('ETag')
last_modified = initial_info.get('Last-Modified')
if etag:
print(f"[+] ETag: Found ('{etag}')")
else:
print("[-] ETag: Header not found.")
if last_modified:
print(f"[+] Last-Modified: Found ('{last_modified}')")
else:
print("[-] Last-Modified: Header not found.")
# Ensure response body is read and connection potentially closed/reused
response.read()
except urllib.error.HTTPError as e:
print(f"[!] Initial Request Failed: HTTP Error {e.code} - {e.reason}", file=sys.stderr)
# Optionally print headers even on error if available
if e.headers:
print("[!] Server response headers (on error):")
print(e.headers)
sys.exit(1)
except urllib.error.URLError as e:
print(f"[!] Initial Request Failed: URL Error - {e.reason}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"[!] Initial Request Failed: An unexpected error occurred - {e}", file=sys.stderr)
sys.exit(1)
# --- Conditional GET Request (ETag) ---
if etag:
print("\n[*] Performing conditional GET request using ETag...")
conditional_headers_etag = {
'User-Agent': USER_AGENT,
'If-None-Match': etag,
'Accept-Encoding': 'gzip', # Keep consistent
}
req_conditional_etag = urllib.request.Request(url, headers=conditional_headers_etag, method='GET')
try:
with urllib.request.urlopen(req_conditional_etag, timeout=10) as response:
# If we get here without a 304, the conditional GET didn't work as expected
status_code = response.getcode()
print(f"[-] ETag Conditional GET: Did not work as expected. Received Status {status_code} (expected 304).")
response.read() # Consume body
except urllib.error.HTTPError as e:
if e.code == 304:
print("[+] ETag Conditional GET: Worked! Received Status 304 (Not Modified).")
else:
print(f"[!] ETag Conditional GET: Failed. HTTP Error {e.code} - {e.reason}", file=sys.stderr)
except urllib.error.URLError as e:
print(f"[!] ETag Conditional GET: Failed. URL Error - {e.reason}", file=sys.stderr)
except Exception as e:
print(f"[!] ETag Conditional GET: Failed. An unexpected error occurred - {e}", file=sys.stderr)
else:
print("\n[*] Skipping ETag conditional GET (No ETag found in initial response).")
# --- Conditional GET Request (Last-Modified) ---
if last_modified:
print("\n[*] Performing conditional GET request using Last-Modified...")
conditional_headers_lm = {
'User-Agent': USER_AGENT,
'If-Modified-Since': last_modified,
'Accept-Encoding': 'gzip', # Keep consistent
}
req_conditional_lm = urllib.request.Request(url, headers=conditional_headers_lm, method='GET')
try:
with urllib.request.urlopen(req_conditional_lm, timeout=10) as response:
# If we get here without a 304, the conditional GET didn't work as expected
status_code = response.getcode()
print(f"[-] Last-Modified Conditional GET: Did not work as expected. Received Status {status_code} (expected 304).")
response.read() # Consume body
except urllib.error.HTTPError as e:
if e.code == 304:
print("[+] Last-Modified Conditional GET: Worked! Received Status 304 (Not Modified).")
else:
print(f"[!] Last-Modified Conditional GET: Failed. HTTP Error {e.code} - {e.reason}", file=sys.stderr)
except urllib.error.URLError as e:
print(f"[!] Last-Modified Conditional GET: Failed. URL Error - {e.reason}", file=sys.stderr)
except Exception as e:
print(f"[!] Last-Modified Conditional GET: Failed. An unexpected error occurred - {e}", file=sys.stderr)
else:
print("\n[*] Skipping Last-Modified conditional GET (No Last-Modified header found in initial response).")
# --- Final Summary ---
print("\n--- Summary ---")
print(f"URL: {url}")
print(f"Gzip Supported by Server: {'Yes' if supports_gzip else 'No'}")
print(f"ETag Header Present: {'Yes' if etag else 'No'}")
if etag:
# Note: This relies on the logic above printing the success/failure
print(f"ETag Conditional GET (If-None-Match): Check log above for 304 status.")
print(f"Last-Modified Header Present: {'Yes' if last_modified else 'No'}")
if last_modified:
# Note: This relies on the logic above printing the success/failure
print(f"Last-Modified Conditional GET (If-Modified-Since): Check log above for 304 status.")
print("-------------\n")
def main():
"""Parses arguments and runs the check."""
parser = argparse.ArgumentParser(
description="Check a URL for Gzip, ETag, and Last-Modified support using only Python's standard library.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("url", help="The URL to test (e.g., https://www.example.com)")
args = parser.parse_args()
# Basic URL validation and scheme addition
parsed_url = urllib.parse.urlparse(args.url)
if not parsed_url.scheme:
print(f"[!] URL '{args.url}' is missing a scheme (e.g., http:// or https://). Assuming 'http://'.", file=sys.stderr)
url_to_test = f"http://{args.url}"
# Re-parse to ensure validity after adding scheme
parsed_url = urllib.parse.urlparse(url_to_test)
if not parsed_url.netloc:
print(f"[!] Invalid URL structure even after adding http://: '{url_to_test}'", file=sys.stderr)
sys.exit(1)
else:
url_to_test = args.url
if not parsed_url.netloc:
print(f"[!] Invalid URL structure: '{url_to_test}'", file=sys.stderr)
sys.exit(1)
# Consider trying HTTPS first if no scheme was provided? For now, stick to HTTP default.
# Or: try https first, if fails, try http? Might be too complex for stdlib only easily.
check_url_features(url_to_test)
if __name__ == "__main__":
main()
How to Use:
-
Save: Save the code above as a Python file (e.g.,
check_headers.py
). -
Make Executable (Optional): On Linux/macOS, you can make it directly executable:
chmod +x check_headers.py
. -
Run: Execute it from your terminal, passing the URL as an argument:
# Using python explicitly python check_headers.py https://www.google.com # Or, if made executable: ./check_headers.py http://example.com # Example without scheme (will default to http://) ./check_headers.py www.github.com
Explanation:
- Standard Libraries: Imports only
argparse
,sys
,urllib.request
,urllib.parse
,urllib.error
, andhttp.client
(for type hinting, not strictly required for functionality). check_url_features(url)
function:- Takes the URL string.
- Initial Request:
- Creates a
urllib.request.Request
object. - Sets the
Accept-Encoding: gzip
header to tell the server we can handle gzip. - Sets a basic
User-Agent
. - Uses
urllib.request.urlopen()
inside awith
statement (ensures the connection is closed). Atimeout
is added for robustness. - Error Handling: Uses
try...except
blocks to catchHTTPError
(like 404, 500) andURLError
(like DNS lookup failure, connection refused). Exits on critical initial errors. - Gzip Check: Examines the
Content-Encoding
header from the response (response.info().get('Content-Encoding')
). - Header Extraction: Retrieves
ETag
andLast-Modified
headers if present. response.read()
: Reads the response body. This is important to ensure the connection state is managed correctly, even though we don't use the body content here.
- Creates a
- Conditional GET (ETag):
- Only runs if an
ETag
was found previously. - Creates a new
Request
object. - Adds the
If-None-Match
header with the ETag value. - Sends the request.
- Checks for 304: The expected outcome for a successful conditional GET is an
HTTPError
with a status code of304 Not Modified
. If the request succeeds with a 200 status, it means the server sent the full content again, ignoring the ETag.
- Only runs if an
- Conditional GET (Last-Modified):
- Similar logic to the ETag check, but uses the
If-Modified-Since
header with theLast-Modified
value.
- Similar logic to the ETag check, but uses the
- Output: Prints informative messages during the process and a summary at the end.
main()
function:- Uses
argparse
to define and parse the command-lineurl
argument. - Performs basic URL validation using
urllib.parse.urlparse
and attempts to addhttp://
if no scheme is provided. - Calls
check_url_features
.
- Uses
if __name__ == "__main__":
: Standard Python practice to ensuremain()
is called only when the script is executed directly.
This script fulfills the requirements of using only the standard library while providing checks for the specified HTTP features.
28 input, 3,038 output, {"promptTokensDetails": [{"modality": "TEXT", "tokenCount": 28}]}
I do not like it telling me to check log above, it should check that for me
Okay, you're right, that's much better user experience. The script should determine the success or failure of the conditional GETs itself.
Here's the updated version that explicitly checks the status code of the conditional requests and reports the outcome clearly in the summary:
#!/usr/bin/env python3
import argparse
import sys
import urllib.request
import urllib.parse
import urllib.error
from http.client import HTTPMessage # For type hinting response.info()
# --- Configuration ---
USER_AGENT = "Python-StdLib-HeaderCheck/1.1" # Version bump
def check_url_features(url: str):
"""
Performs checks on a given URL for Gzip, ETag, and Last-Modified support.
Args:
url: The URL string to test.
Returns:
None. Prints results to stdout. Exits with code 1 on critical errors.
"""
print(f"[*] Testing URL: {url}")
# --- State Variables ---
supports_gzip = False
etag = None
last_modified = None
etag_conditional_worked = None # None = N/A, False = Failed, True = Worked (304)
last_modified_conditional_worked = None # None = N/A, False = Failed, True = Worked (304)
# --- Initial Request (Check for Gzip, ETag, Last-Modified) ---
print("[*] Performing initial GET request...")
initial_headers = {
'Accept-Encoding': 'gzip',
'User-Agent': USER_AGENT,
}
req_initial = urllib.request.Request(url, headers=initial_headers, method='GET')
try:
with urllib.request.urlopen(req_initial, timeout=15) as response: # Increased timeout slightly
initial_info = response.info() # Headers object
status_code = response.getcode()
print(f"[+] Initial request successful (Status: {status_code})")
# Check for Gzip support in response
content_encoding = initial_info.get('Content-Encoding', '').lower()
if 'gzip' in content_encoding:
supports_gzip = True
print("[+] Gzip: Supported (Server sent 'Content-Encoding: gzip')")
else:
print("[-] Gzip: Not detected in response headers.")
# Extract ETag and Last-Modified for conditional requests
etag = initial_info.get('ETag')
last_modified = initial_info.get('Last-Modified')
if etag:
print(f"[+] ETag: Found ('{etag}')")
else:
print("[-] ETag: Header not found.")
if last_modified:
print(f"[+] Last-Modified: Found ('{last_modified}')")
else:
print("[-] Last-Modified: Header not found.")
# Ensure response body is read and connection potentially closed/reused
response.read()
except urllib.error.HTTPError as e:
print(f"[!] Initial Request Failed: HTTP Error {e.code} - {e.reason}", file=sys.stderr)
if e.headers:
print("[!] Server response headers (on error):")
print(e.headers)
sys.exit(1)
except urllib.error.URLError as e:
print(f"[!] Initial Request Failed: URL Error - {e.reason}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"[!] Initial Request Failed: An unexpected error occurred - {e}", file=sys.stderr)
sys.exit(1)
# --- Conditional GET Request (ETag) ---
if etag:
print("\n[*] Performing conditional GET request using ETag...")
etag_conditional_worked = False # Assume failure unless we get 304
conditional_headers_etag = {
'User-Agent': USER_AGENT,
'If-None-Match': etag,
'Accept-Encoding': 'gzip',
}
req_conditional_etag = urllib.request.Request(url, headers=conditional_headers_etag, method='GET')
try:
with urllib.request.urlopen(req_conditional_etag, timeout=10) as response:
status_code = response.getcode()
# If we get here without a 304, the conditional GET didn't work as expected
print(f"[-] ETag Conditional GET: Did not work as expected. Received Status {status_code} (expected 304). Server sent full response.")
response.read() # Consume body
except urllib.error.HTTPError as e:
if e.code == 304:
print("[+] ETag Conditional GET: Worked! Received Status 304 (Not Modified).")
etag_conditional_worked = True
else:
print(f"[!] ETag Conditional GET: Failed. Server responded with HTTP Error {e.code} - {e.reason}", file=sys.stderr)
except urllib.error.URLError as e:
print(f"[!] ETag Conditional GET: Request failed. URL Error - {e.reason}", file=sys.stderr)
except Exception as e:
print(f"[!] ETag Conditional GET: Request failed. An unexpected error occurred - {e}", file=sys.stderr)
else:
print("\n[*] Skipping ETag conditional GET (No ETag found in initial response).")
# etag_conditional_worked remains None
# --- Conditional GET Request (Last-Modified) ---
if last_modified:
print("\n[*] Performing conditional GET request using Last-Modified...")
last_modified_conditional_worked = False # Assume failure unless we get 304
conditional_headers_lm = {
'User-Agent': USER_AGENT,
'If-Modified-Since': last_modified,
'Accept-Encoding': 'gzip',
}
req_conditional_lm = urllib.request.Request(url, headers=conditional_headers_lm, method='GET')
try:
with urllib.request.urlopen(req_conditional_lm, timeout=10) as response:
status_code = response.getcode()
# If we get here without a 304, the conditional GET didn't work as expected
print(f"[-] Last-Modified Conditional GET: Did not work as expected. Received Status {status_code} (expected 304). Server sent full response.")
response.read() # Consume body
except urllib.error.HTTPError as e:
if e.code == 304:
print("[+] Last-Modified Conditional GET: Worked! Received Status 304 (Not Modified).")
last_modified_conditional_worked = True
else:
print(f"[!] Last-Modified Conditional GET: Failed. Server responded with HTTP Error {e.code} - {e.reason}", file=sys.stderr)
except urllib.error.URLError as e:
print(f"[!] Last-Modified Conditional GET: Request failed. URL Error - {e.reason}", file=sys.stderr)
except Exception as e:
print(f"[!] Last-Modified Conditional GET: Request failed. An unexpected error occurred - {e}", file=sys.stderr)
else:
print("\n[*] Skipping Last-Modified conditional GET (No Last-Modified header found).")
# last_modified_conditional_worked remains None
# --- Final Summary ---
print("\n--- Summary ---")
print(f"URL: {url}")
print(f"Gzip Supported by Server: {'Yes' if supports_gzip else 'No'}")
print(f"ETag Header Present: {'Yes' if etag else 'No'}")
if etag:
print(f"ETag Conditional GET Works (Returns 304): {'Yes' if etag_conditional_worked else 'No'}")
else:
print(f"ETag Conditional GET Works (Returns 304): N/A")
print(f"Last-Modified Header Present: {'Yes' if last_modified else 'No'}")
if last_modified:
print(f"Last-Modified Conditional GET Works (Returns 304): {'Yes' if last_modified_conditional_worked else 'No'}")
else:
print(f"Last-Modified Conditional GET Works (Returns 304): N/A")
print("-------------\n")
def main():
"""Parses arguments and runs the check."""
parser = argparse.ArgumentParser(
description="Check a URL for Gzip, ETag, and Last-Modified support using only Python's standard library.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("url", help="The URL to test (e.g., https://www.example.com)")
args = parser.parse_args()
# Basic URL validation and scheme addition
parsed_url = urllib.parse.urlparse(args.url)
if not parsed_url.scheme:
# Try https first if no scheme provided, fall back to http if needed?
# For simplicity now, just default to http and warn.
print(f"[!] URL '{args.url}' is missing a scheme (e.g., http:// or https://). Assuming 'http://'.", file=sys.stderr)
url_to_test = f"http://{args.url}"
parsed_url = urllib.parse.urlparse(url_to_test) # Re-parse
if not parsed_url.netloc:
print(f"[!] Invalid URL structure even after adding http://: '{url_to_test}'", file=sys.stderr)
sys.exit(1)
else:
url_to_test = args.url
if not parsed_url.netloc:
print(f"[!] Invalid URL structure: '{url_to_test}'", file=sys.stderr)
sys.exit(1)
check_url_features(url_to_test)
if __name__ == "__main__":
main()
Changes:
- State Variables: Introduced
etag_conditional_worked
andlast_modified_conditional_worked
. They are initialized toNone
. - Conditional Logic Update:
- Before making a conditional request (if the relevant header exists), the corresponding state variable is set to
False
(assuming failure). - If the request results in an
HTTPError
withe.code == 304
, the state variable is set toTrue
. - If the request succeeds with a 200 status (meaning the server ignored the condition and sent the full response), the state variable remains
False
. - If any other error occurs during the conditional request, the state variable remains
False
.
- Before making a conditional request (if the relevant header exists), the corresponding state variable is set to
- Summary Update:
- The summary now checks the value of
etag_conditional_worked
andlast_modified_conditional_worked
. - It prints "Yes" if the value is
True
. - It prints "No" if the value is
False
. - It prints "N/A" if the corresponding header (
ETag
orLast-Modified
) wasn't present in the initial request (meaning the conditional check wasn't performed, and the state variable is stillNone
).
- The summary now checks the value of
Now the summary provides a direct answer about whether the conditional GET mechanism worked for each header type found.
3,094 input, 2,666 output, {"promptTokensDetails": [{"modality": "TEXT", "tokenCount": 3094}]}
I wrote this for this comment: https://news.ycombinator.com/item?id=43451579#43477521
I saved that final Python script to another Gist here: https://gist.github.com/simonw/c790f96e9b93c498932ace1f5bb2602c
Which means I can run it like this:
Output: