Skip to content

Instantly share code, notes, and snippets.

@JasperHG90
Created October 25, 2016 07:46
Show Gist options
  • Save JasperHG90/d0d632cf492aee0abdf6f0f76cb8f084 to your computer and use it in GitHub Desktop.
Save JasperHG90/d0d632cf492aee0abdf6f0f76cb8f084 to your computer and use it in GitHub Desktop.
Use Cortical's SF API to create a semantic fingerprint for news articles in bulk
# Use Cortical's SF API to create a semantic fingerprint for news articles in bulk
# Copyright (C) 2016 Jasper Ginn
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Fingerprint each article
# Jasper Ginn
# 04/09/2016
#
import os
import retinasdk
import json
import sys
'''
Fingerprint function
'''
def fingerprint(record):
# Add fingerprints and keywords
try:
# Get fingerprint
record["FP"] = fp.getFingerprint(record["fields"][0].encode("utf8"))
except: # Bad form, but exceptions are a bit buggy here
record["FP"] = []
try:
# Get keywords
record["KW"] = fp.getKeywords(record["fields"][0].encode("utf8"))
except:
record["KW"] = []
# Return
return record
'''
Call main
'''
if __name__ == "__main__":
# Get arguments
data_infile_file = sys.argv[1]
data_outfile_file = sys.argv[2]
auth_key = sys.argv[3]
# Fingerprint authentication
fp = retinasdk.LiteClient(auth_key)
# Load data
with open(data_infile_file) as inFile:
data = json.load(inFile)
# For each record, get fingerprint and keywords
master_list = list()
w = 1
for section in data:
print "Now working on section " + str(w)
for record in section:
master_list.append(fingerprint(record))
w += 1
# Dump data
with open(data_outfile_file, 'w') as outFile:
json.dump(master_list, outFile, sort_keys=True, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment