Skip to content

Instantly share code, notes, and snippets.

View vanatteveldt's full-sized avatar

Wouter van Atteveldt vanatteveldt

  • VU University
  • Amsterdam
View GitHub Profile
from amcat.tools import api
conn = api.AmcatAPI('http://localhost:8000', 'amcat', 'amcat')
print conn.create_set(project=1, name='test', provenance='bla')
[
{"headline" : "test_hl", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "bla"},
{"headline" : "test_hl2", "medium" : 3, "date" : "2001-01-01", "text" : "bla2",
"children" : [
{"headline" : "child_hl1", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"},
{"headline" : "child_hl2", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"}
]}
]
@vanatteveldt
vanatteveldt / test.py
Created November 11, 2013 11:08
amcat - xtas integration test session
wva@yup:~/amcat$ python ~/test.py 16502
2013-11-11 11:56:43,305 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: PENDING
2013-11-11 11:56:44,315 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:45,320 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:46,325 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:47,330 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:48,335 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:49,342 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SUCCESS
DONE 555 [[u'PCM', 0, 3, 128L...
wva@yup:~/amcat$ python ~/test.py 16502
diff --git a/xtas/storage/elasticSearchCache.py b/xtas/storage/elasticSearchCache.py
index b8ae7cc..740dba3 100644
--- a/xtas/storage/elasticSearchCache.py
+++ b/xtas/storage/elasticSearchCache.py
@@ -180,10 +180,12 @@ class elasticSearchCache:
if ('fields' in d) and ('xtasResults' in d['fields']):
for sKey in d['fields']['xtasResults']:
aResult.append({"docid" : d[self.oES.sIdField],
- "parameters" : d['fields']['xtasResults'][sKey]['parameters'],
- "result": d['fields']['xtasResults'][sKey]['result']
wva@study:~$ python test.py 531422
Processing AmCAT article 531422 : The marines attacked the compound again. They seem to like it.
2013-11-13 15:27:17,826 [INFO amcat.tools.xtas:105] Task ae169caed33d030debfd3357702ba45a: SUCCESS
[{u'frames': [{u'target': {u'name': u'Attack', u'spans': [{u'text': u'attacked', u'end': 3, u'start': 2}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Victim', u'spans': [{u'text': u'the compound again', u'end': 6, u'start': 3}]}, {u'name': u'Assailant', u'spans': [{u'text': u'The marines', u'end': 2, u'start': 0}]}], u'score': 95.51245482430869, u'rank': 0}]}], u'tokens': [u'The', u'marines', u'attacked', u'the', u'compound', u'again', u'.']}, {u'frames': [{u'target': {u'name': u'Appearance', u'spans': [{u'text': u'seem', u'end': 2, u'start': 1}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Phenomenon', u'spans': [{u'text': u'They', u'end': 1, u'start': 0}]}], u'score': 56.66050591850465, u'rank': 0}]}, {u'target': {u'name': u'Experiencer_focus', u'spans': [{u'te
{
"frames": [
{
"target": {
"name": "Attack",
"spans": [
{
"text": "attacked",
"end": 3,
"start": 2
4
15/38
Word forms
After tokenization step, all word forms are annotated within the <text> element, and each
form is enclosed by a <wf> element.
The <wf> element has the following attributes:
• wid (required): the unique id for the word form, starting with the prefix “w”.
• sent (required): sentence id of the token.
• para (optional): paragraph id.
• page (optional): page id.
x = read.csv("ALL_COMBINED.csv", sep="\t", comment.char="", quote="")
x = x[!duplicated(x$id), c("text", "created_at", "id", "user_screen_name", "user_id", "in_reply_to_user_id")]
x$created_at = strftime(strptime(x$created_at, format="%a %b %d %H:%M:%S %z %Y"), format=("%Y-%m-%dT%H:%M:%S"))
x$in_reply_to_user_id[x$in_reply_to_user_id=="None"] = NA
write.csv(x, file="out.csv", row.names=F, na="")
from amcat.models import ArticleSet
import re
sects = {}
for a in ArticleSet.objects.get(pk=5954).articles.all():
s = a.section
if not s: continue
s = re.sub("[^A-Za-z]+", " ", s).strip()
sects[s] = sects.get(s, 0) + 1
from amcat.tools.api import AmcatAPI
import os
username = os.environ['AMCAT_USERNAME']
password = os.environ['AMCAT_PASSWORD']
api = AmcatAPI("http://amcat-dev.labs.vu.nl", username, password)
#api = AmcatAPI("http://localhost:8000", "amcat","amcat")
articles_json = [