Skip to content

Instantly share code, notes, and snippets.

@monperrus
Created February 8, 2025 11:40
Show Gist options
  • Save monperrus/92d0b527355a59cecf7a066cbd05c74c to your computer and use it in GitHub Desktop.
Save monperrus/92d0b527355a59cecf7a066cbd05c74c to your computer and use it in GitHub Desktop.
Recollq with JSON output
#!/usr/bin/python3
"""
Recoll Query Tool with Base64 Decoder
This script provides a command-line interface to search using Recoll and decode base64-encoded fields
in the results. It returns the results as structured JSON where alternating words are treated as
key-value pairs, with values being base64-decoded.
Features:
- Command-line interface for Recoll searches
- Automatic base64 decoding of alternating fields
- JSON output format
- Built-in help system
- Error handling for malformed base64
Usage:
$ recollq.py "search query"
$ recollq.py "ext:pdf"
$ recollq.py "author:smith"
Returns:
JSON array of objects where each object contains decoded key-value pairs from the Recoll output.
Dependencies:
- subprocess: For running recoll commands
- base64: For decoding encoded values
- json: For structured output
Author: Martin Monperrus
"""
import subprocess
import shlex
import base64
import json
import sys
def print_help():
"""Print help message and exit"""
help_text = """
Usage: recollq.py <search_query>
A tool to search and decode recoll results.
Returns results as JSON with base64-decoded fields.
Examples:
recollq.py "ext:py" # Search for Python files
recollq.py "author:martin" # Search by author
recollq.py "modified:2024" # Search by modification date
The output is a JSON array of objects where alternate words are decoded from base64.
"""
print(help_text)
sys.exit(1)
def decode_alternate_words(line):
"""Decode every other word using base64, returning dict of key-value pairs
Example:
Input: "key1 dmFsdWUx key2 dmFsdWUy"
Output: {"key1": "value1", "key2": "value2"}
"""
words = line.split()
result = {}
# Process pairs of words
for i in range(0, len(words)-1, 2):
key = words[i]
encoded_value = words[i + 1]
try:
# Add padding if necessary
padding_needed = len(encoded_value) % 4
if padding_needed:
encoded_value += '=' * (4 - padding_needed)
# Decode value
decoded_value = base64.b64decode(encoded_value).decode('utf-8', errors='ignore')
result[key] = decoded_value
except:
# If decoding fails, use original value
result[key] = encoded_value
return result
def call_recollq(query):
command = f"recollq -N -F '' {query}"
args = shlex.split(command)
try:
result = subprocess.run(args, capture_output=True, text=True, check=True)
output = result.stdout
lines = output.splitlines()
# Process each line into a dictionary
decoded_results = [decode_alternate_words(line) for line in lines]
return json.dumps(decoded_results)
except subprocess.CalledProcessError as e:
return json.dumps({"error": str(e)})
if __name__ == "__main__":
if len(sys.argv) != 2:
print_help()
result = call_recollq(sys.argv[1])
print(result) # Print JSON string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment