Skip to content

Instantly share code, notes, and snippets.

@sudoker0
Last active August 20, 2023 09:39
Show Gist options
  • Save sudoker0/4ed01f9a8bcb303e0ef9671cab30166f to your computer and use it in GitHub Desktop.
Save sudoker0/4ed01f9a8bcb303e0ef9671cab30166f to your computer and use it in GitHub Desktop.
A Python script which converts images to audio using SSTV (Slow-Scan Television) encoding.
from PIL import Image
from enum import Enum
import math
import struct
# specification: http://www.barberdsp.com/downloads/Dayton%20Paper.pdf
# RGEEncodedImage.Image: 2D array, each element in the 2D array contains an array with 3 elements:
# [red value, green value, blue value] (value ranging from 1500-2300)
RGBEncodedImage = list[list[list[float]]]
# OutputSSTVDataFormat: an array, each element in the array contain a tuple with 2 elements:
# (total durations of all the frequencies, list of frequencies)
OutputSSTVDataFormat = list[tuple[float, list[float]]]
class RGBScanComponents(Enum):
Red = 0
Green = 1
Blue = 2
class YRYBYScanComponents(Enum):
Y = 0
RY = 1
BY = 2
#! -- Helper function
def num_to_bits(num: int, min_length = 7):
bits: list[int] = []
while num > 0:
bits.append(num % 2)
num //= 2
if len(bits) < min_length:
bits.extend([0] * (min_length - len(bits)))
return bits
def rgb_to_yryby(r: int, g: int, b: int):
y = 16.0 + (0.003906 * ((65.738 * r) + (129.057 * g) + (25.064 * b)))
ry = 128.0 + (0.003906 * ((112.439 * r) + (-94.154 * g) + (-18.285 * b)))
by = 128.0 + (0.003906 * ((-37.945 * r) + (-74.494 * g) + (112.439 * b)))
return [y, ry, by]
def color_to_freq(color: list[float]):
return [1500 + x * 3.1372549 for x in color]
def image_handler(image_path: str, size: tuple[int, int], start_pos = (0, 0), end_pos = (-1, -1)):
image = Image.open(image_path)
end_pos = (end_pos[0] if end_pos[0] != -1 else size[0], end_pos[1] if end_pos[1] != -1 else size[1])
resized_image = image.convert("RGB").resize(size).crop((start_pos[0], start_pos[1], end_pos[0], end_pos[1]))
return resized_image
def get_rgb_scan(image_data: Image.Image, y: int, color_components: RGBScanComponents):
data: list[float] = []
for x in range(image_data.width):
data.append(color_to_freq(image_data.getpixel((x, y))[:3])[color_components.value])
return data
def get_yryby_scan(image_data: Image.Image, y: int, yryby_components: YRYBYScanComponents):
data: list[float] = []
for x in range(image_data.width):
[r, g, b] = image_data.getpixel((x, y))[:3]
data.append(color_to_freq(rgb_to_yryby(r, g, b))[yryby_components.value])
return data
#! -- Encode function
#! Scottie
def scottie(scan_time: float, image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
if y == 0:
data.append((9, [1200]))
data.append((1.5, [1500])) # separator pulse
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Green))) # green scan
data.append((1.5, [1500])) # separator pulse
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Blue))) # blue scan
data.append((9, [1200])) # sync pulse
data.append((1.5, [1500])) # sync porch
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Red))) # red scan
return data
def scottie_1(data: Image.Image):
return scottie(138.24, data)
def scottie_2(data: Image.Image):
return scottie(88.064, data)
def scottie_dx(data: Image.Image):
return scottie(345.6, data)
#! Martin
def martin(scan_time: float, image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
data.append((4.862, [1200])) # sync pulse
data.append((0.572, [1500])) # sync porch
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Green))) # green scan
data.append((0.572, [1500])) # separator pulse
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Blue))) # blue scan
data.append((0.572, [1500])) # separator pulse
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Red))) # red scan
data.append((0.572, [1500])) # separator pulse
return data
def martin_1(data: Image.Image):
return martin(146.432, data)
def martin_2(data: Image.Image):
return martin(73.216, data)
#! Pasokon "P"
def pasokon(scan_time: float, sync_period: float, porch_period: float, image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
data.append((sync_period, [1200])) # sync pulse
data.append((porch_period, [1500])) # sync porch
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Red))) # red scan
data.append((porch_period, [1500])) # sync porch
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Green))) # green scan
data.append((porch_period, [1500])) # sync porch
data.append((scan_time, get_rgb_scan(image_data, y, RGBScanComponents.Blue))) # blue scan
data.append((porch_period, [1500])) # sync porch
return data
def p3(data: Image.Image):
return pasokon(133.333, 5.208, 1.042, data)
def p5(data: Image.Image):
return pasokon(200, 7.813, 1.563, data)
def p7(data: Image.Image):
return pasokon(266.666, 10.417, 2.083, data)
#! Wrasse SC2-180
def sc2_180(image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
data.append((5.5225, [1200])) # sync pulse
data.append((0.5, [1500])) # sync porch
data.append((235, get_rgb_scan(image_data, y, RGBScanComponents.Red))) # red scan
data.append((235, get_rgb_scan(image_data, y, RGBScanComponents.Green))) # green scan
data.append((235, get_rgb_scan(image_data, y, RGBScanComponents.Blue))) # blue scan
return data
#! Robot 36 color
def robot36(image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
data.append((9, [1200])) # sync pulse
data.append((3, [1500])) # sync porch
data.append((88, get_yryby_scan(image_data, y, YRYBYScanComponents.Y))) # Y scan
if y % 2 != 0:
data.append((4.5, [1500])) # even separator pulse
data.append((1.5, [1900])) # porch
data.append((44, get_yryby_scan(image_data, y, YRYBYScanComponents.RY))) # R-Y scan
else:
data.append((4.5, [2300])) # odd separator pulse
data.append((1.5, [1900])) # porch
data.append((44, get_yryby_scan(image_data, y, YRYBYScanComponents.BY))) # B-Y scan
return data
#! Robot 72 color
def robot72(image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(height):
data.append((9, [1200])) # sync pulse
data.append((3, [1500])) # sync porch
data.append((138, get_yryby_scan(image_data, y, YRYBYScanComponents.Y))) # Y scan
data.append((4.5, [1500])) # separator pulse
data.append((1.5, [1900])) # porch
data.append((69, get_yryby_scan(image_data, y, YRYBYScanComponents.RY))) # R-Y scan
data.append((4.5, [2300])) # separator pulse
data.append((1.5, [1500])) # porch
data.append((69, get_yryby_scan(image_data, y, YRYBYScanComponents.BY))) # B-Y scan
return data
#! PD
def pd(scan_time: float, image_data: Image.Image):
# time (ms) | freq (hz)
data: OutputSSTVDataFormat = []
_, height = image_data.size
for y in range(0, height, 2):
data.append((20, [1200])) # sync pulse
data.append((2.08, [1500])) # porch
data.append((scan_time, get_yryby_scan(image_data, y, YRYBYScanComponents.Y))) # Y scan (odd line)
ry_odd_line = get_yryby_scan(image_data, y, YRYBYScanComponents.RY)
ry_even_line = get_yryby_scan(image_data, y + 1, YRYBYScanComponents.RY)
data.append((scan_time, [(x + y) / 2 for x, y in zip(ry_odd_line, ry_even_line)])) # RY scan (averaged for two lines)
by_odd_line = get_yryby_scan(image_data, y, YRYBYScanComponents.BY)
by_even_line = get_yryby_scan(image_data, y + 1, YRYBYScanComponents.BY)
data.append((scan_time, [(x + y) / 2 for x, y in zip(by_odd_line, by_even_line)])) # BY scan (averaged for two lines)
data.append((scan_time, get_yryby_scan(image_data, y + 1, YRYBYScanComponents.Y))) # Y scan (even line)
return data
def pd50(data: Image.Image):
return pd(91.520, data)
def pd90(data: Image.Image):
return pd(170.240, data)
def pd120(data: Image.Image):
return pd(121.600, data)
def pd160(data: Image.Image):
return pd(195.584, data)
def pd180(data: Image.Image):
return pd(183.040, data)
def pd240(data: Image.Image):
return pd(244.480, data)
def pd290(data: Image.Image):
return pd(228.800, data)
class Modes(Enum):
Scottie1 = {
"function": scottie_1,
"code": 60,
"image_size": (320, 256),
}
Scottie2 = {
"function": scottie_2,
"code": 56,
"image_size": (320, 256),
}
ScottieDX = {
"function": scottie_dx,
"code": 76,
"image_size": (320, 256),
}
Martin1 = {
"function": martin_1,
"code": 44,
"image_size": (320, 256),
}
Martin2 = {
"function": martin_2,
"code": 40,
"image_size": (320, 256),
}
P3 = {
"function": p3,
"code": 113,
"image_size": (640, 496),
}
P5 = {
"function": p5,
"code": 114,
"image_size": (640, 496),
}
P7 = {
"function": p7,
"code": 115,
"image_size": (640, 496),
}
PD50 = {
"function": pd50,
"code": 93,
"image_size": (320, 256),
}
PD90 = {
"function": pd90,
"code": 99,
"image_size": (320, 256),
}
PD120 = {
"function": pd120,
"code": 95,
"image_size": (640, 496),
}
PD160 = {
"function": pd160,
"code": 98,
"image_size": (512, 400),
}
PD180 = {
"function": pd180,
"code": 96,
"image_size": (640, 496),
}
PD240 = {
"function": pd240,
"code": 97,
"image_size": (640, 496),
}
PD290 = {
"function": pd290,
"code": 94,
"image_size": (800, 616),
}
SC2_180 = {
"function": sc2_180,
"code": 55,
"image_size": (320, 256),
}
Robot36 = {
"function": robot36,
"code": 8,
"image_size": (320, 240),
}
Robot72 = {
"function": robot72,
"code": 12,
"image_size": (320, 240),
}
def sstv_header(mode: Modes):
# time (ms) | freq (hz)
header: OutputSSTVDataFormat = [
(5, [0]), # short break
(300, [1900]), # leader tone
(10, [1200]), # break
(300, [1900]), # leader tone, again
]
header.append((30, [1200])) # VIS start
# VIS code (1100hz = 1, 1300hz = 0)
vis_code = num_to_bits(mode.value["code"])
even_num_of_one = True
for bit in vis_code:
if bit == 1:
header.append((30, [1100]))
even_num_of_one = not even_num_of_one
else:
header.append((30, [1300]))
# Parity bit (uses "even parity")
header.append((30, [1300] if even_num_of_one else [1100]))
header.append((30, [1200])) # VIS end
return header
#! -- Generator function
def generate_tone_bytes(sampling_rate: int, amplitude: float, duration: float, frequencies: list[float], running_integral: float):
num_samples = round(sampling_rate * duration / 1000)
tone_bytes = bytearray()
for i in range(num_samples):
pos = int((i + 1) / num_samples * len(frequencies)) - 1
running_integral += 2 * math.pi * frequencies[pos] / sampling_rate
running_integral %= 2 * math.pi
sample = round(amplitude * 32767 * math.sin(running_integral))
tone_bytes.extend(struct.pack('<h', sample))
return [tone_bytes, running_integral]
def generate_wav(data: OutputSSTVDataFormat, output_file: str):
sampling_rate = 44100
amplitude = 1
# Generate tone bytes for each duration and frequency
combined_tone_bytes = bytearray()
running_integral = 0
for duration, frequencies in data:
[tone_bytes, new_running_integral] = generate_tone_bytes(sampling_rate, amplitude, duration, frequencies, running_integral)
combined_tone_bytes.extend(tone_bytes)
running_integral = new_running_integral
# Create WAV header
header = b'RIFF'
header += struct.pack('<I', 36 + len(combined_tone_bytes)) # File size - 8 bytes
header += b'WAVE'
header += b'fmt '
header += struct.pack('<I', 16) # Subchunk1 size (16 bytes for PCM)
header += struct.pack('<H', 1) # Audio format (PCM)
header += struct.pack('<H', 1) # Number of channels (1 for mono)
header += struct.pack('<I', sampling_rate)
header += struct.pack('<I', sampling_rate * 2) # Byte rate (sample rate * bytes per sample)
header += struct.pack('<H', 2) # Block align (2 bytes per sample)
header += struct.pack('<H', 16) # Bits per sample (16-bit PCM)
# Data subchunk
data_chunk = b'data'
data_chunk += struct.pack('<I', len(combined_tone_bytes)) # Subchunk2 size (num_samples * bytes per sample)
# Write the bytes to the WAV file
with open(output_file, 'wb') as f:
f.write(header)
f.write(data_chunk)
f.write(combined_tone_bytes)
def sstv_generator(mode: Modes, image_file: str, output_file: str = "default.wav", image_start_pos = (0, 0), image_end_pos = (-1, -1)):
data: OutputSSTVDataFormat = []
image = image_handler(image_file, mode.value["image_size"], image_start_pos, image_end_pos)
data += sstv_header(mode)
data += mode.value["function"](image)
generate_wav(data, output_file)
if __name__ == "__main__":
# Example usecases
sstv_generator(Modes.Martin1, "test_img.png", "martin1.wav")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment