Wouter van Atteveldt vanatteveldt

## create_articleset.py
from amcat.tools import api
conn = api.AmcatAPI('http://localhost:8000', 'amcat', 'amcat')
print conn.create_set(project=1, name='test', provenance='bla')

## articles.json
[
{"headline" : "test_hl", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "bla"},
{"headline" : "test_hl2", "medium" : 3, "date" : "2001-01-01", "text" : "bla2",
 "children" : [
     {"headline" : "child_hl1", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"},
     {"headline" : "child_hl2", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"}
  ]}
]

## test.py
wva@yup:~/amcat$ python ~/test.py 16502
2013-11-11 11:56:43,305 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: PENDING
2013-11-11 11:56:44,315 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:45,320 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:46,325 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:47,330 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:48,335 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
2013-11-11 11:56:49,342 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SUCCESS
DONE 555 [[u'PCM', 0, 3, 128L...
wva@yup:~/amcat$ python ~/test.py 16502

## elasticSearchCache.diff
diff --git a/xtas/storage/elasticSearchCache.py b/xtas/storage/elasticSearchCache.py
index b8ae7cc..740dba3 100644
--- a/xtas/storage/elasticSearchCache.py
+++ b/xtas/storage/elasticSearchCache.py
@@ -180,10 +180,12 @@ class elasticSearchCache:
             if ('fields' in d) and ('xtasResults' in d['fields']):
                 for sKey in d['fields']['xtasResults']:
                     aResult.append({"docid" : d[self.oES.sIdField],
-                 "parameters" : d['fields']['xtasResults'][sKey]['parameters'],
-                      "result": d['fields']['xtasResults'][sKey]['result']

## test.py
wva@study:~$ python test.py 531422
Processing AmCAT article 531422 : The marines attacked the compound again. They seem to like it.

2013-11-13 15:27:17,826 [INFO amcat.tools.xtas:105] Task ae169caed33d030debfd3357702ba45a: SUCCESS
[{u'frames': [{u'target': {u'name': u'Attack', u'spans': [{u'text': u'attacked', u'end': 3, u'start': 2}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Victim', u'spans': [{u'text': u'the compound again', u'end': 6, u'start': 3}]}, {u'name': u'Assailant', u'spans': [{u'text': u'The marines', u'end': 2, u'start': 0}]}], u'score': 95.51245482430869, u'rank': 0}]}], u'tokens': [u'The', u'marines', u'attacked', u'the', u'compound', u'again', u'.']}, {u'frames': [{u'target': {u'name': u'Appearance', u'spans': [{u'text': u'seem', u'end': 2, u'start': 1}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Phenomenon', u'spans': [{u'text': u'They', u'end': 1, u'start': 0}]}], u'score': 56.66050591850465, u'rank': 0}]}, {u'target': {u'name': u'Experiencer_focus', u'spans': [{u'te

## gist:7483538
  {
    "frames": [
      {
        "target": {
          "name": "Attack",
          "spans": [
            {
              "text": "attacked",
              "end": 3,
              "start": 2

## gist:7518075
4
15/38
Word forms
After tokenization step, all word forms are annotated within the <text> element, and each
form is enclosed by a <wf> element.
The <wf> element has the following attributes:
• wid (required): the unique id for the word form, starting with the prefix “w”.
• sent (required): sentence id of the token.
• para (optional): paragraph id.
• page (optional): page id.

## gist:7789958
x = read.csv("ALL_COMBINED.csv", sep="\t", comment.char="", quote="")
x = x[!duplicated(x$id), c("text", "created_at", "id", "user_screen_name", "user_id", "in_reply_to_user_id")]
x$created_at = strftime(strptime(x$created_at, format="%a %b %d %H:%M:%S %z %Y"), format=("%Y-%m-%dT%H:%M:%S"))
x$in_reply_to_user_id[x$in_reply_to_user_id=="None"] = NA
write.csv(x, file="out.csv", row.names=F, na="")

## sections.py
from amcat.models import ArticleSet
import re

sects = {}

for a in ArticleSet.objects.get(pk=5954).articles.all():
    s = a.section
    if not s: continue
    s = re.sub("[^A-Za-z]+", " ", s).strip()
    sects[s] = sects.get(s, 0) + 1

## api_test.py
from amcat.tools.api import AmcatAPI
import os

username = os.environ['AMCAT_USERNAME']
password = os.environ['AMCAT_PASSWORD']

api = AmcatAPI("http://amcat-dev.labs.vu.nl", username, password)
#api = AmcatAPI("http://localhost:8000", "amcat","amcat")

articles_json = [
	from amcat.tools import api
	conn = api.AmcatAPI('http://localhost:8000', 'amcat', 'amcat')
	print conn.create_set(project=1, name='test', provenance='bla')
	[
	{"headline" : "test_hl", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "bla"},
	{"headline" : "test_hl2", "medium" : 3, "date" : "2001-01-01", "text" : "bla2",
	"children" : [
	{"headline" : "child_hl1", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"},
	{"headline" : "child_hl2", "medium" : "test", "date" : "2001-01-01T13:30", "text" : "childtext"}
	]}
	]
	wva@yup:~/amcat$ python ~/test.py 16502
	2013-11-11 11:56:43,305 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: PENDING
	2013-11-11 11:56:44,315 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
	2013-11-11 11:56:45,320 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
	2013-11-11 11:56:46,325 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
	2013-11-11 11:56:47,330 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
	2013-11-11 11:56:48,335 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SENT
	2013-11-11 11:56:49,342 [INFO amcat.tools.xtas:63] Task e9796fa87328483c359f76e85d3b9eb9: SUCCESS
	DONE 555 [[u'PCM', 0, 3, 128L...
	wva@yup:~/amcat$ python ~/test.py 16502
	diff --git a/xtas/storage/elasticSearchCache.py b/xtas/storage/elasticSearchCache.py
	index b8ae7cc..740dba3 100644
	--- a/xtas/storage/elasticSearchCache.py
	+++ b/xtas/storage/elasticSearchCache.py
	@@ -180,10 +180,12 @@ class elasticSearchCache:
	if ('fields' in d) and ('xtasResults' in d['fields']):
	for sKey in d['fields']['xtasResults']:
	aResult.append({"docid" : d[self.oES.sIdField],
	- "parameters" : d['fields']['xtasResults'][sKey]['parameters'],
	- "result": d['fields']['xtasResults'][sKey]['result']
	wva@study:~$ python test.py 531422
	Processing AmCAT article 531422 : The marines attacked the compound again. They seem to like it.

	2013-11-13 15:27:17,826 [INFO amcat.tools.xtas:105] Task ae169caed33d030debfd3357702ba45a: SUCCESS
	[{u'frames': [{u'target': {u'name': u'Attack', u'spans': [{u'text': u'attacked', u'end': 3, u'start': 2}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Victim', u'spans': [{u'text': u'the compound again', u'end': 6, u'start': 3}]}, {u'name': u'Assailant', u'spans': [{u'text': u'The marines', u'end': 2, u'start': 0}]}], u'score': 95.51245482430869, u'rank': 0}]}], u'tokens': [u'The', u'marines', u'attacked', u'the', u'compound', u'again', u'.']}, {u'frames': [{u'target': {u'name': u'Appearance', u'spans': [{u'text': u'seem', u'end': 2, u'start': 1}]}, u'annotationSets': [{u'frameElements': [{u'name': u'Phenomenon', u'spans': [{u'text': u'They', u'end': 1, u'start': 0}]}], u'score': 56.66050591850465, u'rank': 0}]}, {u'target': {u'name': u'Experiencer_focus', u'spans': [{u'te
	{
	"frames": [
	{
	"target": {
	"name": "Attack",
	"spans": [
	{
	"text": "attacked",
	"end": 3,
	"start": 2
	4
	15/38
	Word forms
	After tokenization step, all word forms are annotated within the <text> element, and each
	form is enclosed by a <wf> element.
	The <wf> element has the following attributes:
	• wid (required): the unique id for the word form, starting with the prefix “w”.
	• sent (required): sentence id of the token.
	• para (optional): paragraph id.
	• page (optional): page id.
	x = read.csv("ALL_COMBINED.csv", sep="\t", comment.char="", quote="")
	x = x[!duplicated(x$id), c("text", "created_at", "id", "user_screen_name", "user_id", "in_reply_to_user_id")]
	x$created_at = strftime(strptime(x$created_at, format="%a %b %d %H:%M:%S %z %Y"), format=("%Y-%m-%dT%H:%M:%S"))
	x$in_reply_to_user_id[x$in_reply_to_user_id=="None"] = NA
	write.csv(x, file="out.csv", row.names=F, na="")
	from amcat.models import ArticleSet
	import re

	sects = {}

	for a in ArticleSet.objects.get(pk=5954).articles.all():
	s = a.section
	if not s: continue
	s = re.sub("[^A-Za-z]+", " ", s).strip()
	sects[s] = sects.get(s, 0) + 1
	from amcat.tools.api import AmcatAPI
	import os

	username = os.environ['AMCAT_USERNAME']
	password = os.environ['AMCAT_PASSWORD']

	api = AmcatAPI("http://amcat-dev.labs.vu.nl", username, password)
	#api = AmcatAPI("http://localhost:8000", "amcat","amcat")

	articles_json = [