Christopher Erick Moody cemoody

## parallel_write_bq_storage_api.py
"""Wrapper around BigQuery call."""
from __future__ import annotations
from typing import Any, Iterable
import logging
from google.cloud import bigquery_storage
from google.cloud.bigquery_storage_v1 import exceptions as bqstorage_exceptions

from google.cloud.bigquery_storage_v1 import types, writer
from google.protobuf import descriptor_pb2
from google.protobuf.descriptor import Descriptor

## parallel_read_from_bq_table.py
from google.cloud.bigquery_storage import BigQueryReadClient
from google.cloud.bigquery_storage import types
from google.cloud import bigquery_storage
from tqdm import tqdm
import pandas
import os
import dill


project_id = (

## sqliteackqueue.py
import os
import io
import json
import math
import time
import random
import numpy as np
import cachetools.func
import sqlite3
from loguru import logger

## logs.txt
✓ Initialized. View app at https://modal.com/apps/ap-lHATR9JHJ7S5eGXYHigc75
✓ Created objects.
├── 🔨 Created sample_fn.
├── 🔨 Mounted /Users/chris/code/search/gumbase/jobs/partition.py at /root
├── 🔨 Mounted /Users/chris/code/gumhouse/gumhouse at /root/gumhouse
├── 🔨 Created sample_job.
├── 🔨 Created fit_top_level_kmeans.
├── 🔨 Created scatter_by_centroids_single.
├── 🔨 Created scatter_by_centroids.
├── 🔨 Created gather_single.

## statefulset.yml
apiVersion: v1
kind: Service
metadata:
  name: vespa
  labels:
    app: vespa
spec:
  selector:
    app: vespa
  type: NodePort

## file_data_loader.py
import multiprocessing
import queue
from loguru import logger

import pandas as pd


def chunks(df, chunk_size=1000):
    for i in range(0, len(df), chunk_size):
        yield df[i : i + chunk_size]

## persist_to_file.py
import os
import pickle
import time
import glob
import joblib
import inspect
from loguru import logger


def persist_to_file():

## switch_modal.py

def switch_modal_function(stub=None, use_modal=True, **kwargs):
    def wrapper(inner_func):
        if use_modal:
            assert stub is not None, "If using modal, please provide `stub`."
            return stub.function(**kwargs)(inner_func)
        else:
            return inner_func
    return wrapper

## check_recsys.py

def check_line_by_line(fn_result, fn_truth):
    logger.info(f"Checking line by line in {fn_result} and {fn_truth}")
    with open(fn_result, 'r') as results:
        res = results.readlines()
    with open(fn_truth, 'r') as truth:
        tru = truth.readlines()

    for i, (line_t, line_r) in enumerate(zip(tru, res)):
        features = line_t.split('\x01')

## typecast.py
from typing import get_type_hints


def cast(argname, value, hints):
    """ Only cast arguments if type hints are available for them.
    """
    if argname in hints:
        expected_type = hints[argname]
        if not issubclass(type(value), expected_type):
            # Will throw type error if argument cannot be cast
	"""Wrapper around BigQuery call."""
	from __future__ import annotations
	from typing import Any, Iterable
	import logging
	from google.cloud import bigquery_storage
	from google.cloud.bigquery_storage_v1 import exceptions as bqstorage_exceptions

	from google.cloud.bigquery_storage_v1 import types, writer
	from google.protobuf import descriptor_pb2
	from google.protobuf.descriptor import Descriptor
	from google.cloud.bigquery_storage import BigQueryReadClient
	from google.cloud.bigquery_storage import types
	from google.cloud import bigquery_storage
	from tqdm import tqdm
	import pandas
	import os
	import dill


	project_id = (
	import os
	import io
	import json
	import math
	import time
	import random
	import numpy as np
	import cachetools.func
	import sqlite3
	from loguru import logger
	✓ Initialized. View app at https://modal.com/apps/ap-lHATR9JHJ7S5eGXYHigc75
	✓ Created objects.
	├── 🔨 Created sample_fn.
	├── 🔨 Mounted /Users/chris/code/search/gumbase/jobs/partition.py at /root
	├── 🔨 Mounted /Users/chris/code/gumhouse/gumhouse at /root/gumhouse
	├── 🔨 Created sample_job.
	├── 🔨 Created fit_top_level_kmeans.
	├── 🔨 Created scatter_by_centroids_single.
	├── 🔨 Created scatter_by_centroids.
	├── 🔨 Created gather_single.
	apiVersion: v1
	kind: Service
	metadata:
	name: vespa
	labels:
	app: vespa
	spec:
	selector:
	app: vespa
	type: NodePort
	import multiprocessing
	import queue
	from loguru import logger

	import pandas as pd


	def chunks(df, chunk_size=1000):
	for i in range(0, len(df), chunk_size):
	yield df[i : i + chunk_size]
	import os
	import pickle
	import time
	import glob
	import joblib
	import inspect
	from loguru import logger


	def persist_to_file():

	def switch_modal_function(stub=None, use_modal=True, **kwargs):
	def wrapper(inner_func):
	if use_modal:
	assert stub is not None, "If using modal, please provide `stub`."
	return stub.function(**kwargs)(inner_func)
	else:
	return inner_func
	return wrapper

	def check_line_by_line(fn_result, fn_truth):
	logger.info(f"Checking line by line in {fn_result} and {fn_truth}")
	with open(fn_result, 'r') as results:
	res = results.readlines()
	with open(fn_truth, 'r') as truth:
	tru = truth.readlines()

	for i, (line_t, line_r) in enumerate(zip(tru, res)):
	features = line_t.split('\x01')
	from typing import get_type_hints


	def cast(argname, value, hints):
	""" Only cast arguments if type hints are available for them.
	"""
	if argname in hints:
	expected_type = hints[argname]
	if not issubclass(type(value), expected_type):
	# Will throw type error if argument cannot be cast