Hamza Tahir htahir1

## simple_zenml_pipeline.py
# {% include 'template/license_header' %}

from typing import Tuple
from typing_extensions import Annotated

import pandas as pd
from sklearn.model_selection import train_test_split
from zenml import step

import random

## sagemaker_pydantic_network_config.py
"""
To be used with the ZenML SagemakerOrchestrator
https://github.com/zenml-io/zenml/blob/main/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
as a small duck typed juke for the sagemaker network config object:
https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/network.py#L24
"""
from __future__ import absolute_import

from typing import Union, Optional, List

## zenml_pandas_csv_materializer.py
"""Materializer for Pandas CSV."""

import os
import tempfile
from typing import Any, Type, Union

import pandas as pd
import numpy as np

from zenml.artifacts import DataArtifact, SchemaArtifact, StatisticsArtifact

## cloud_instances_cost_comparison.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                htahir1
                / cloud_instances_cost_comparison.md
            
            
              Created
              January 29, 2021 11:24
            
              
                Cost of standard ML configuration across all three major cloud providers.
              
          
    | Provider | Configuration          | Cost ($/h) |
|----------|------------------------|------------|
| GCP      | n1-standard-16 with P4 | 1.36       |
| Azure    | NV6 with M60           | 1.14       |
| AWS      | g3.4xlarge with M60    | 1.14       |


## spot_instances_cost_comparison_with_savings.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                htahir1
                / spot_instances_cost_comparison_with_savings.md
            
            
              Last active
              January 29, 2021 11:22
            
              
                Comparison of costs for normal vs spot/preemptible instances for all three major cloud providers.
              
          
    | Provider | Configuration          | Cost ($/h) | Spot Cost ($/h) | Cost Savings |
|----------|------------------------|------------|-----------------|--------------|
| GCP      | n1-standard-16 with P4 | 1.36       | 0.38            | 72%          |
| Azure    | NV6 with M60           | 1.14       | 0.20            | 82%          |
| AWS      | g3.4xlarge with M60    | 1.14       | 0.34            | 70%          |


## zenml_gcp_gpu_orchestration.py
training_pipeline.run(
    backend=OrchestratorGCPBackend(
        preemptible=True,  # reduce costs by using preemptible instance
        machine_type='n1-standard-4',
        gpu='nvidia-tesla-k80',
        gpu_count=1,
    )
)

## json_pd_test.py
import pandas as pd
from time import time
from pprint import pprint
import simplejson
import json
import ujson


def _loads(data, serializer):
    t_start = time()
	# {% include 'template/license_header' %}

	from typing import Tuple
	from typing_extensions import Annotated

	import pandas as pd
	from sklearn.model_selection import train_test_split
	from zenml import step

	import random
	"""
	To be used with the ZenML SagemakerOrchestrator
	https://github.com/zenml-io/zenml/blob/main/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
	as a small duck typed juke for the sagemaker network config object:
	https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/network.py#L24
	"""
	from __future__ import absolute_import

	from typing import Union, Optional, List
	"""Materializer for Pandas CSV."""

	import os
	import tempfile
	from typing import Any, Type, Union

	import pandas as pd
	import numpy as np

	from zenml.artifacts import DataArtifact, SchemaArtifact, StatisticsArtifact
	training_pipeline.run(
	backend=OrchestratorGCPBackend(
	preemptible=True, # reduce costs by using preemptible instance
	machine_type='n1-standard-4',
	gpu='nvidia-tesla-k80',
	gpu_count=1,
	)
	)
	import pandas as pd
	from time import time
	from pprint import pprint
	import simplejson
	import json
	import ujson


	def _loads(data, serializer):
	t_start = time()