Skip to content

Instantly share code, notes, and snippets.

@albertmeronyo
Last active April 12, 2016 18:23
Show Gist options
  • Save albertmeronyo/d091fc082c97803f49e9 to your computer and use it in GitHub Desktop.
Save albertmeronyo/d091fc082c97803f49e9 to your computer and use it in GitHub Desktop.
SCRY statistical queries
/* Pearson's correlation coefficient */
#include <stdio.h>
#include <math.h>
int main() {
int x[100], y[100], xy[100], xsquare[100], ysquare[100];
int i, n, xsum, ysum, xysum, xsqr_sum, ysqr_sum;
float coeff, num, deno;
xsum = ysum = xysum = xsqr_sum = ysqr_sum = 0;
/* get the number of entries from the user */
printf("Enter the value for n:");
scanf("%d", &n);
/* get the values for x and y from the user */
printf("Enter the value for x and y:\n");
for (i = 0; i < n; i++) {
printf("x[%d] and y[%d]: ", i, i);
scanf("%d%d", &x[i], &y[i]);
}
/* find the needed data to manipulate correlation coeff */
for (i = 0; i < n; i++) {
xy[i] = x[i] * y[i];
xsquare[i] = x[i] * x[i];
ysquare[i] = y[i] * y[i];
xsum = xsum + x[i];
ysum = ysum + y[i];
xysum = xysum + xy[i];
xsqr_sum = xsqr_sum + xsquare[i];
ysqr_sum = ysqr_sum + ysquare[i];
}
num = 1.0 * ((n * xysum) - (xsum * ysum));
deno = 1.0 * ((n * xsqr_sum - xsum * xsum)* (n * ysqr_sum - ysum * ysum));
/* calculate correlation coefficient */
coeff = num / sqrt(deno);
/* print the result */
printf("Correlation Coefficient : %.4f\n", coeff);
return 0;
}
/* Standard deviation implementation in C for Virtuoso extension option B */
float standard_deviation(float data[], int n)
{
float mean=0.0, sum_deviation=0.0;
int i;
for(i=0; i<n;++i)
{
mean+=data[i];
}
mean=mean/n;
for(i=0; i<n;++i)
sum_deviation+=(data[i]-mean)*(data[i]-mean);
return sqrt(sum_deviation/n);
}
// Extending Jena with custom functions
public class namespace extends FunctionBase1
{
public namespace() { super() ; }
public NodeValue exec(NodeValue v)
{
Node n = v.asNode() ;
if ( ! n.isURI() )
throw new ExprEvalException("Not a URI: "+FmtUtils.stringForNode(n)) ;
String str = n.getNameSpace() ;
return NodeValue.makeString(str) ;
}
}
// Registering the new function
// Register with the global registry.
FunctionRegistry.get().put("http://example.org/function#myFunction", new MyFunctionFactory()) ;
# Standard deviation of matched observations
PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://bit.ly/cedar#>
PREFIX scry: <http://www.scry.com/>
PREFIX math: <http://www.scry.com/math/>
PREFIX input: <http://www.scry.com/input?>
SELECT *
WHERE {
SERVICE <http://worldbank.270a.info/sparql> {
SELECT DISTINCT ?identityX ?refAreaX ?refAreaXExactMatch ?x
WHERE {
?observationX qb:dataSet <http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> .
?observationX ?propertyRefPeriodX <http://reference.data.gov.uk/id/year/2009> .
?propertyRefAreaX rdfs:subPropertyOf* sdmx-dimension:refArea .
?observationX ?propertyRefAreaX ?refAreaX .
?propertyMeasureX rdfs:subPropertyOf* sdmx-measure:obsValue .
?observationX ?propertyMeasureX ?x .
<http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> qb:structure/stats:identityDimension ?propertyIdentityX .
?observationX ?propertyIdentityX ?identityX .
OPTIONAL {
?refAreaX skos:exactMatch ?refAreaXExactMatch .
FILTER (REGEX(STR(?refAreaXExactMatch), "^http://transparency.270a.info/"))
}
?refAreaX skos:notation ?refAreaCodeX .
FILTER (!REGEX(?refAreaCodeX, "^[0-9]"))
}
}
SERVICE <http://transparency.270a.info/sparql> {
SELECT DISTINCT ?identityY ?refAreaY ?refAreaYExactMatch ?y
WHERE {
?observationY qb:dataSet <http://transparency.270a.info/dataset/CPI2009> .
?observationY ?propertyRefPeriodY <http://reference.data.gov.uk/id/year/2009> .
?propertyRefAreaY rdfs:subPropertyOf* sdmx-dimension:refArea .
?observationY ?propertyRefAreaY ?refAreaY .
?propertyMeasureY rdfs:subPropertyOf* sdmx-measure:obsValue .
?observationY ?propertyMeasureY ?y .
<http://transparency.270a.info/dataset/CPI2009> qb:structure/stats:identityDimension ?propertyIdentityY .
?observationY ?propertyIdentityY ?identityY .
OPTIONAL {
?refAreaY skos:exactMatch ?refAreaYExactMatch .
FILTER (REGEX(STR(?refAreaYExactMatch), "^http://worldbank.270a.info/"))
}
?refAreaY skos:notation ?refAreaCodeY .
FILTER (!REGEX(?refAreaCodeY, "^[0-9]"))
}
}
SERVICE <http://145.108.172.225:5000/scry/> {
math:pearsonr scry:input ?x .
math:pearsonr scry:input ?y .
math:pearsonr scry:output ?r .
math:sd scry:description ?desc .
}
}
# Define:
import services.classes, rdflib.term, numpy
PearsonR = services.classes.Procedure(rdflib.term.URIRef('http://www.scry.com/math/PearsonR'))
correlation = lambda list_of_lists: numpy.corrcoef(numpy.array(list_of_lists))[0][1]
PearsonR.function = lambda inputs,outputs,handler: rdflib.term.Literal(correlation([row.split(',') for row in inputs['in'].encode().split(';')]))
# Test:
PearsonR.execute({'in':'1,2,3;6,5,4'},None,None)
# Describe the math:sd service
PREFIX scry: <http://www.scry.com/>
PREFIX math: <http://www.scry.com/math/>
SELECT * {
SERVICE <http://145.108.172.225:5000/scry/> {
GRAPH scry:orb_description {
math:sd a scry:procedure ;
?p ?o .
OPTIONAL{?o a scry:argument ;
scry:identifier ?arg_id ;
scry:description ?arg_desc .}
}
}
} ORDER BY ?arg_id
# Standard deviation of matched observations
PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://bit.ly/cedar#>
PREFIX scry: <http://www.scry.com/>
PREFIX math: <http://www.scry.com/math/>
PREFIX input: <http://www.scry.com/input?>
SELECT *
WHERE {
{ SELECT (GROUP_CONCAT(?pop;separator=",") AS ?pops) FROM <urn:graph:cedar-mini:release> WHERE {
?obs a qb:Observation .
?obs cedar:population ?pop .
} LIMIT 10 }
SERVICE <http://145.108.172.225:5000/scry/> {
math:stdev scry:input ?pops .
math:stdev scry:output ?sd .
math:stdev scry:description ?desc .
}
}
SELECT
user1, user2,
((psum - (sum1 * sum2 / n)) / sqrt((sum1sq - pow(sum1, 2.0) / n) * (sum2sq - pow(sum2, 2.0) / n))) AS r,
n
FROM
(SELECT
n1.user AS user1,
n2.user AS user2,
SUM(n1.rating) AS sum1,
SUM(n2.rating) AS sum2,
SUM(n1.rating * n1.rating) AS sum1sq,
SUM(n2.rating * n2.rating) AS sum2sq,
SUM(n1.rating * n2.rating) AS psum,
COUNT(*) AS n
FROM
testdata AS n1
LEFT JOIN
testdata AS n2
ON
n1.movie = n2.movie
WHERE
n1.user > n2.user
GROUP BY
n1.user, n2.user) AS step1
ORDER BY
r DESC,
n DESC
# Standard deviation implementation in SQL for Virtuoso extension option A
WITH Mean AS (
SELECT SUM(Number) / COUNT(Number) AS Mean
FROM #Numbers
), Deviation AS (
SELECT Mean, POWER(Number - Mean, 2) AS Error
FROM #Numbers CROSS JOIN Mean
)
SELECT Mean, SQRT(SUM(Error) / COUNT(Error)) AS [Standard Deviation]
FROM Deviation
GROUP BY Mean;
PREFIX scry: <http://www.scry.com/>
PREFIX math: <http://www.scry.com/math/>
PREFIX input: <http://www.scry.com/input?>
PREFIX output: <http://www.scry.com/output?>
SELECT ?input ?sqrt ?par ?pwr ?sum ?last_fnc ?desc ?ans4 {
SERVICE <http://bas.eculture.labs.vu.nl/scry/> {
BIND("1,2,9,16" as ?input)
GRAPH ?g1 {math:sqrt input:_ ?input ;
output:_ ?sqrt .}
VALUES (?par) {("0") ("1") ("2")}
GRAPH ?g2 {math:power input:_ ?input ;
input:param ?par ;
output:_ ?pwr .}
BIND(CONCAT(?sqrt,";",?pwr) AS ?multi)
GRAPH ?g3 {math:sumarrays input:_ ?multi ;
output:_ ?sum .}
VALUES(?last_fnc) {(math:pearsonr) (math:covariance)}
GRAPH ?g4 {?last_fnc input:_ ?multi ;
output:_ ?ans4 ;
scry:description ?desc .}
}
} ORDER BY ?par
/*
create procedure DB.DBA.ComposeInfo (
in pname varchar,
in pnick varchar := '',
in pbox varchar := '')
{
declare ss varchar;
ss := concat(pname, ' ', pnick, ' ', pbox);
ss := rtrim (ss, ' ');
return ss;
};
*/
CREATE PROCEDURE DB.DBA.MyAvg (IN X NUMERIC) {
declare m numeric;
m := m + X;
return m;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment