Dani Mermelstein mermelstein

## spotify_json_to_df.R
library(jsonlite)
library(lubridate)

# specify local path to the downloaded JSON files
path <- "~/Downloads/Spotify Extended Streaming History/"

# get a list of all JSON files in the directory
json_files <- list.files(path, pattern = "*.json", full.names = TRUE)

# initialize an empty list to store the data

## twitter_data_to_df.R
library(jsonlite)
library(brio)

# where is the archive directory
dir_path <- "~/Downloads/twitter-archive/data"

# file in /data with the tweets
filename <- "tweets.js"

# get full file path

## Dockerfile
FROM python:3.10

WORKDIR /app

COPY . /app

RUN pip install uv
RUN uv pip install --system --no-cache-dir -r requirements.txt

CMD python -u main.py

## text_to_pdf.py
from PIL import Image
import pytesseract
from pdf2image import convert_from_path

# Convert the PDF to a list of images
images = convert_from_path('path_to_pdf.pdf')

# Process each image with Tesseract
for i, img in enumerate(images):
    text = pytesseract.image_to_string(img, lang='eng')
	library(jsonlite)
	library(lubridate)

	# specify local path to the downloaded JSON files
	path <- "~/Downloads/Spotify Extended Streaming History/"

	# get a list of all JSON files in the directory
	json_files <- list.files(path, pattern = "*.json", full.names = TRUE)

	# initialize an empty list to store the data
	library(jsonlite)
	library(brio)

	# where is the archive directory
	dir_path <- "~/Downloads/twitter-archive/data"

	# file in /data with the tweets
	filename <- "tweets.js"

	# get full file path
	FROM python:3.10

	WORKDIR /app

	COPY . /app

	RUN pip install uv
	RUN uv pip install --system --no-cache-dir -r requirements.txt

	CMD python -u main.py
	from PIL import Image
	import pytesseract
	from pdf2image import convert_from_path

	# Convert the PDF to a list of images
	images = convert_from_path('path_to_pdf.pdf')

	# Process each image with Tesseract
	for i, img in enumerate(images):
	text = pytesseract.image_to_string(img, lang='eng')