Skip to content

Instantly share code, notes, and snippets.

@raprasad
Last active June 24, 2019 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raprasad/27b74d65caf0e7d7ad127023f3628071 to your computer and use it in GitHub Desktop.
Save raprasad/27b74d65caf0e7d7ad127023f3628071 to your computer and use it in GitHub Desktop.
nyu augment

Augment via REST and python requests

def run_augment_test():
    """Augment via REST"""

    # Query result
    #
    task_data = """{"id": "datamart.socrata.data-cityofnewyork-us.k2tc-bipg", "score": 8.788908, "metadata": {"name": "Medallion Taxi Initial Inspection Schedule", "description": "This is a schedule of Medallion Taxicab initial inspections at the Taxi and Limousine Commission\\u2019...", "size": 686334, "nb_rows": 13537, "columns": [{"name": "Medallion_Number", "structural_type": "http://schema.org/Text", "semantic_types": []}, {"name": "Schedule_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1566600510.95516, "stddev": 3029873.8282078225, "coverage": [{"range": {"gte": 1561334400.0, "lte": 1571788800.0}}]}, {"name": "Schedule_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1561359725.6562016, "stddev": 4632.38259222503, "coverage": [{"range": {"gte": 1561352400.0, "lte": 1561374000.0}}]}, {"name": "Fleet_Agent_Code", "structural_type": "http://schema.org/Integer", "semantic_types": [], "mean": 148.76346310113024, "stddev": 140.04502936441713, "coverage": [{"range": {"gte": 0.0, "lte": 999.0}}]}, {"name": "Last_Updated_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561248000.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561248000.0, "lte": 1561248000.0}}]}, {"name": "Last_Updated_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561399296.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561399200.766, "lte": 1561399200.766}}]}], "materialize": {"socrata_id": "k2tc-bipg", "socrata_domain": "data.cityofnewyork.us", "socrata_updated": "2019-06-23T22:10:10.000Z", "direct_url": "https://data.cityofnewyork.us/api/views/k2tc-bipg/rows.csv?accessType=DOWNLOAD", "identifier": "datamart.socrata", "date": "2019-06-24T02:01:08.225169Z"}, "date": "2019-06-24T03:19:48.375479Z"}, "augmentation": {"type": "none", "left_columns": [], "right_columns": []}}"""

    # syntax check
    #
    assert json.loads(task_data), 'task_data not valid json'

    # Base file to augment.  Has two columns: Medallion_Number, Earnings
    #       2 columns x 20 rows
    #
    input_fpath = join(INPUT_DIR, 'medallion-test-file.csv')
    assert isfile(input_fpath), f'File not found: {input_fpath}'

    # Set file for request
    #
    files = {'data': open(input_fpath, 'rb')}

    # Set headers
    #
    headers = {"Content-Type": "multipart/form-data"}

    # Augment url
    #
    augment_url = 'https://datamart.d3m.vida-nyu.org/augment'

    print('task_data', task_data)
    print('-' * 40)
    print('augment_url', augment_url)
    print('-' * 40)

    # Make request
    #
    try:
        response = requests.post(augment_url,
                                 headers=headers,
                                 files=files,
                                 data=task_data,
                                 verify=False,
                                 stream=True)

    except requests.exceptions.Timeout as err_obj:
        user_msg = ('Request timed out. responded with: %s' % err_obj)
        print(user_msg)
        return
    except ValueError as err_obj:
        user_msg = ('ValueError: %s' % err_obj)
        print(user_msg)
        return



    if response.status_code != 200:
        user_msg = (f'Augment failed.  Status code:'
                    f' {response.status_code}.  response: {response.text}')
        print(user_msg)
        return

    print('augment success!')
    return

    data_foldername = join(OUTPUT_DIR, 'augment-results',)
    if not isdir(data_foldername):
        os.makedirs(data_foldername)

    try:
        with zipfile.ZipFile(BytesIO(response.content), 'r') as data_zip:
            data_zip.extractall(data_foldername)
    except RuntimeError as err_obj:
        user_msg = (f'Failed to extract zip to "{data_foldername}".'
                    f' Error: %s') % (err_obj,)
        print(user_msg)

    msgt('files downloaded to %s' % data_foldername)

Result

(nyu_datamart) dev_scripts $ python search_test3.py 

task_data {"id": "datamart.socrata.data-cityofnewyork-us.k2tc-bipg", "score": 8.788908, "metadata": {"name": "Medallion Taxi Initial Inspection Schedule", "description": "This is a schedule of Medallion Taxicab initial inspections at the Taxi and Limousine Commission\u2019...", "size": 686334, "nb_rows": 13537, "columns": [{"name": "Medallion_Number", "structural_type": "http://schema.org/Text", "semantic_types": []}, {"name": "Schedule_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1566600510.95516, "stddev": 3029873.8282078225, "coverage": [{"range": {"gte": 1561334400.0, "lte": 1571788800.0}}]}, {"name": "Schedule_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1561359725.6562016, "stddev": 4632.38259222503, "coverage": [{"range": {"gte": 1561352400.0, "lte": 1561374000.0}}]}, {"name": "Fleet_Agent_Code", "structural_type": "http://schema.org/Integer", "semantic_types": [], "mean": 148.76346310113024, "stddev": 140.04502936441713, "coverage": [{"range": {"gte": 0.0, "lte": 999.0}}]}, {"name": "Last_Updated_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561248000.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561248000.0, "lte": 1561248000.0}}]}, {"name": "Last_Updated_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561399296.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561399200.766, "lte": 1561399200.766}}]}], "materialize": {"socrata_id": "k2tc-bipg", "socrata_domain": "data.cityofnewyork.us", "socrata_updated": "2019-06-23T22:10:10.000Z", "direct_url": "https://data.cityofnewyork.us/api/views/k2tc-bipg/rows.csv?accessType=DOWNLOAD", "identifier": "datamart.socrata", "date": "2019-06-24T02:01:08.225169Z"}, "date": "2019-06-24T03:19:48.375479Z"}, "augmentation": {"type": "none", "left_columns": [], "right_columns": []}}
----------------------------------------
augment_url https://datamart.d3m.vida-nyu.org/augment
----------------------------------------
Traceback (most recent call last):
  File "search_test3.py", line 137, in <module>
    run_augment_test()
  File "search_test3.py", line 103, in run_augment_test
    stream=True)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/api.py", line 112, in post
    return request('post', url, data=data, json=json, **kwargs)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/sessions.py", line 498, in request
    prep = self.prepare_request(req)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/sessions.py", line 441, in prepare_request
    hooks=merge_hooks(request.hooks, self.hooks),
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 312, in prepare
    self.prepare_body(data, files, json)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 500, in prepare_body
    (body, content_type) = self._encode_files(files, data)
  File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 122, in _encode_files
    raise ValueError("Data must not be a string.")
ValueError: Data must not be a string.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment