albertmeronyo/c-pearson.c

## c-pearson.c
/* Pearson's correlation coefficient */

 #include <stdio.h>
  #include <math.h>

  int main() {
        int x[100], y[100], xy[100], xsquare[100], ysquare[100];
        int i, n, xsum, ysum, xysum, xsqr_sum, ysqr_sum;
        float coeff, num, deno;

        xsum = ysum = xysum = xsqr_sum = ysqr_sum = 0;

        /* get the number of entries from the user */
        printf("Enter the value for n:");
        scanf("%d", &n);

        /* get the values for x and y  from the user */
        printf("Enter the value for x and y:\n");
        for (i = 0; i < n; i++) {
                printf("x[%d] and y[%d]: ", i, i);
                scanf("%d%d", &x[i], &y[i]);
        }

        /* find the needed data to manipulate correlation coeff */
        for (i = 0; i < n; i++) {
                xy[i] = x[i] * y[i];
                xsquare[i] = x[i] * x[i];
                ysquare[i] = y[i] * y[i];
                xsum = xsum + x[i];
                ysum = ysum + y[i];
                xysum = xysum + xy[i];
                xsqr_sum = xsqr_sum + xsquare[i];
                ysqr_sum = ysqr_sum + ysquare[i];
        }

        num = 1.0 * ((n * xysum) - (xsum * ysum));
        deno = 1.0 * ((n * xsqr_sum - xsum * xsum)* (n * ysqr_sum - ysum * ysum));

        /* calculate correlation coefficient */
        coeff = num / sqrt(deno);

        /* print the result */
        printf("Correlation Coefficient : %.4f\n", coeff);
        return 0;
  }

## c-sd.c
/* Standard deviation implementation in C for Virtuoso extension option B */

float standard_deviation(float data[], int n)
{
    float mean=0.0, sum_deviation=0.0;
    int i;
    for(i=0; i<n;++i)
    {
        mean+=data[i];
    }
    mean=mean/n;
    for(i=0; i<n;++i)
    sum_deviation+=(data[i]-mean)*(data[i]-mean);
    return sqrt(sum_deviation/n);
}

## jena-sd.java
// Extending Jena with custom functions

public class namespace extends FunctionBase1
{
    public namespace() { super() ; }

    public NodeValue exec(NodeValue v)
    {
        Node n = v.asNode() ;
        if ( ! n.isURI() )
            throw new ExprEvalException("Not a URI: "+FmtUtils.stringForNode(n)) ;
        String str = n.getNameSpace() ;
        return NodeValue.makeString(str) ;
    }
}

// Registering the new function
// Register with the global registry.
FunctionRegistry.get().put("http://example.org/function#myFunction", new MyFunctionFactory()) ;

## scry-cor-usage.sparql
# Standard deviation of matched observations

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://bit.ly/cedar#>
PREFIX scry:  <http://www.scry.com/>
PREFIX math:  <http://www.scry.com/math/>
PREFIX input: <http://www.scry.com/input?>

SELECT *
WHERE {
    SERVICE <http://worldbank.270a.info/sparql> {
        SELECT DISTINCT ?identityX ?refAreaX ?refAreaXExactMatch ?x
        WHERE {
            ?observationX qb:dataSet <http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> .
            ?observationX ?propertyRefPeriodX <http://reference.data.gov.uk/id/year/2009> .
            ?propertyRefAreaX rdfs:subPropertyOf* sdmx-dimension:refArea .
            ?observationX ?propertyRefAreaX ?refAreaX .
            ?propertyMeasureX rdfs:subPropertyOf* sdmx-measure:obsValue .
            ?observationX ?propertyMeasureX ?x .

            <http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> qb:structure/stats:identityDimension ?propertyIdentityX .
            ?observationX ?propertyIdentityX ?identityX .

            OPTIONAL {
                ?refAreaX skos:exactMatch ?refAreaXExactMatch .
                FILTER (REGEX(STR(?refAreaXExactMatch), "^http://transparency.270a.info/"))
            }

            ?refAreaX skos:notation ?refAreaCodeX .
            FILTER (!REGEX(?refAreaCodeX, "^[0-9]"))
        }
    }
    SERVICE <http://transparency.270a.info/sparql> {
        SELECT DISTINCT ?identityY ?refAreaY ?refAreaYExactMatch ?y
        WHERE {
            ?observationY qb:dataSet <http://transparency.270a.info/dataset/CPI2009> .
            ?observationY ?propertyRefPeriodY <http://reference.data.gov.uk/id/year/2009> .
            ?propertyRefAreaY rdfs:subPropertyOf* sdmx-dimension:refArea .
            ?observationY ?propertyRefAreaY ?refAreaY .
            ?propertyMeasureY rdfs:subPropertyOf* sdmx-measure:obsValue .
            ?observationY ?propertyMeasureY ?y .

            <http://transparency.270a.info/dataset/CPI2009> qb:structure/stats:identityDimension ?propertyIdentityY .
            ?observationY ?propertyIdentityY ?identityY .

            OPTIONAL {
                ?refAreaY skos:exactMatch ?refAreaYExactMatch .
                FILTER (REGEX(STR(?refAreaYExactMatch), "^http://worldbank.270a.info/"))
            }

            ?refAreaY skos:notation ?refAreaCodeY .
            FILTER (!REGEX(?refAreaCodeY, "^[0-9]"))
        }
    }

    SERVICE <http://145.108.172.225:5000/scry/> {
        math:pearsonr scry:input ?x .
        math:pearsonr scry:input ?y .
        math:pearsonr scry:output ?r .
        math:sd scry:description ?desc .
    }
}

## scry-definition.py
# Define:
import services.classes, rdflib.term, numpy
PearsonR = services.classes.Procedure(rdflib.term.URIRef('http://www.scry.com/math/PearsonR'))
correlation = lambda list_of_lists: numpy.corrcoef(numpy.array(list_of_lists))[0][1]
PearsonR.function = lambda inputs,outputs,handler: rdflib.term.Literal(correlation([row.split(',') for row in inputs['in'].encode().split(';')]))

# Test:
PearsonR.execute({'in':'1,2,3;6,5,4'},None,None)

## scry-sd-description.sparql
# Describe the math:sd service

PREFIX scry:  <http://www.scry.com/>
PREFIX math:  <http://www.scry.com/math/>

SELECT * {
  SERVICE <http://145.108.172.225:5000/scry/> {
    GRAPH scry:orb_description {
      math:sd a scry:procedure ;
                          ?p ?o             .

      OPTIONAL{?o a                scry:argument ;
                  scry:identifier  ?arg_id       ;
                  scry:description ?arg_desc     .}
    }
  }
} ORDER BY ?arg_id

## scry-sd-usage.sparql
# Standard deviation of matched observations

PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://bit.ly/cedar#>
PREFIX scry:  <http://www.scry.com/>
PREFIX math:  <http://www.scry.com/math/>
PREFIX input: <http://www.scry.com/input?>

SELECT *
WHERE {
{ SELECT (GROUP_CONCAT(?pop;separator=",") AS ?pops) FROM <urn:graph:cedar-mini:release> WHERE {
?obs a qb:Observation .
?obs cedar:population ?pop .
} LIMIT 10 }
SERVICE <http://145.108.172.225:5000/scry/> {
math:stdev scry:input ?pops .
math:stdev scry:output ?sd .
math:stdev scry:description ?desc .
}
}


## sql-pearson.sql
SELECT
        user1, user2,
        ((psum - (sum1 * sum2 / n)) / sqrt((sum1sq - pow(sum1, 2.0) / n) * (sum2sq - pow(sum2, 2.0) / n))) AS r,
        n
FROM
        (SELECT
                n1.user AS user1,
                n2.user AS user2,
                SUM(n1.rating) AS sum1,
                SUM(n2.rating) AS sum2,
                SUM(n1.rating * n1.rating) AS sum1sq,
                SUM(n2.rating * n2.rating) AS sum2sq,
                SUM(n1.rating * n2.rating) AS psum,
                COUNT(*) AS n
        FROM
                testdata AS n1
	LEFT JOIN
		testdata AS n2
	ON
		n1.movie = n2.movie
        WHERE
                n1.user > n2.user
	GROUP BY
		n1.user, n2.user) AS step1
ORDER BY
        r DESC,
        n DESC

## sql-sd.sql
# Standard deviation implementation in SQL for Virtuoso extension option A

WITH Mean AS (
    SELECT SUM(Number) / COUNT(Number) AS Mean
        FROM #Numbers
), Deviation AS (
    SELECT Mean, POWER(Number - Mean, 2) AS Error
        FROM #Numbers CROSS JOIN Mean
)
SELECT Mean, SQRT(SUM(Error) / COUNT(Error)) AS [Standard Deviation]
    FROM Deviation
    GROUP BY Mean;

## triplestore-compatibility.sparql
PREFIX scry:   <http://www.scry.com/>
PREFIX math:   <http://www.scry.com/math/>
PREFIX input:  <http://www.scry.com/input?>
PREFIX output: <http://www.scry.com/output?>

SELECT ?input ?sqrt ?par ?pwr ?sum ?last_fnc ?desc ?ans4 {

  SERVICE <http://bas.eculture.labs.vu.nl/scry/> {

    BIND("1,2,9,16" as ?input)
    GRAPH ?g1 {math:sqrt input:_  ?input ;
                         output:_ ?sqrt  .}

    VALUES (?par) {("0") ("1") ("2")}
    GRAPH ?g2 {math:power input:_     ?input ;
                          input:param ?par   ;
                          output:_    ?pwr   .}

    BIND(CONCAT(?sqrt,";",?pwr) AS ?multi)
    GRAPH ?g3 {math:sumarrays input:_  ?multi ;
                              output:_ ?sum   .}

    VALUES(?last_fnc) {(math:pearsonr) (math:covariance)}
    GRAPH ?g4 {?last_fnc input:_          ?multi ;
                         output:_         ?ans4  ;
                         scry:description ?desc  .}
  }
} ORDER BY ?par

## virtuoso-sd.sql
/*
create procedure DB.DBA.ComposeInfo (
  in pname varchar,
  in pnick varchar := '',
  in pbox  varchar := '')
{
   declare ss varchar;
   ss := concat(pname, ' ', pnick, ' ', pbox);
   ss := rtrim (ss, ' ');
   return ss;

};
*/

CREATE PROCEDURE DB.DBA.MyAvg (IN X NUMERIC) {
  declare m numeric;
  m := m + X;
  return m;
};
	/* Pearson's correlation coefficient */

	#include <stdio.h>
	#include <math.h>

	int main() {
	int x[100], y[100], xy[100], xsquare[100], ysquare[100];
	int i, n, xsum, ysum, xysum, xsqr_sum, ysqr_sum;
	float coeff, num, deno;

	xsum = ysum = xysum = xsqr_sum = ysqr_sum = 0;

	/* get the number of entries from the user */
	printf("Enter the value for n:");
	scanf("%d", &n);

	/* get the values for x and y from the user */
	printf("Enter the value for x and y:\n");
	for (i = 0; i < n; i++) {
	printf("x[%d] and y[%d]: ", i, i);
	scanf("%d%d", &x[i], &y[i]);
	}

	/* find the needed data to manipulate correlation coeff */
	for (i = 0; i < n; i++) {
	xy[i] = x[i] * y[i];
	xsquare[i] = x[i] * x[i];
	ysquare[i] = y[i] * y[i];
	xsum = xsum + x[i];
	ysum = ysum + y[i];
	xysum = xysum + xy[i];
	xsqr_sum = xsqr_sum + xsquare[i];
	ysqr_sum = ysqr_sum + ysquare[i];
	}

	num = 1.0 * ((n * xysum) - (xsum * ysum));
	deno = 1.0 * ((n * xsqr_sum - xsum * xsum)* (n * ysqr_sum - ysum * ysum));

	/* calculate correlation coefficient */
	coeff = num / sqrt(deno);

	/* print the result */
	printf("Correlation Coefficient : %.4f\n", coeff);
	return 0;
	}
	/* Standard deviation implementation in C for Virtuoso extension option B */

	float standard_deviation(float data[], int n)
	{
	float mean=0.0, sum_deviation=0.0;
	int i;
	for(i=0; i<n;++i)
	{
	mean+=data[i];
	}
	mean=mean/n;
	for(i=0; i<n;++i)
	sum_deviation+=(data[i]-mean)*(data[i]-mean);
	return sqrt(sum_deviation/n);
	}
	// Extending Jena with custom functions

	public class namespace extends FunctionBase1
	{
	public namespace() { super() ; }

	public NodeValue exec(NodeValue v)
	{
	Node n = v.asNode() ;
	if ( ! n.isURI() )
	throw new ExprEvalException("Not a URI: "+FmtUtils.stringForNode(n)) ;
	String str = n.getNameSpace() ;
	return NodeValue.makeString(str) ;
	}
	}

	// Registering the new function
	// Register with the global registry.
	FunctionRegistry.get().put("http://example.org/function#myFunction", new MyFunctionFactory()) ;
	# Standard deviation of matched observations

	PREFIX qb: <http://purl.org/linked-data/cube#>
	PREFIX cedar: <http://bit.ly/cedar#>
	PREFIX scry: <http://www.scry.com/>
	PREFIX math: <http://www.scry.com/math/>
	PREFIX input: <http://www.scry.com/input?>

	SELECT *
	WHERE {
	SERVICE <http://worldbank.270a.info/sparql> {
	SELECT DISTINCT ?identityX ?refAreaX ?refAreaXExactMatch ?x
	WHERE {
	?observationX qb:dataSet <http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> .
	?observationX ?propertyRefPeriodX <http://reference.data.gov.uk/id/year/2009> .
	?propertyRefAreaX rdfs:subPropertyOf* sdmx-dimension:refArea .
	?observationX ?propertyRefAreaX ?refAreaX .
	?propertyMeasureX rdfs:subPropertyOf* sdmx-measure:obsValue .
	?observationX ?propertyMeasureX ?x .

	<http://worldbank.270a.info/dataset/SP.DYN.IMRT.IN> qb:structure/stats:identityDimension ?propertyIdentityX .
	?observationX ?propertyIdentityX ?identityX .

	OPTIONAL {
	?refAreaX skos:exactMatch ?refAreaXExactMatch .
	FILTER (REGEX(STR(?refAreaXExactMatch), "^http://transparency.270a.info/"))
	}

	?refAreaX skos:notation ?refAreaCodeX .
	FILTER (!REGEX(?refAreaCodeX, "^[0-9]"))
	}
	}
	SERVICE <http://transparency.270a.info/sparql> {
	SELECT DISTINCT ?identityY ?refAreaY ?refAreaYExactMatch ?y
	WHERE {
	?observationY qb:dataSet <http://transparency.270a.info/dataset/CPI2009> .
	?observationY ?propertyRefPeriodY <http://reference.data.gov.uk/id/year/2009> .
	?propertyRefAreaY rdfs:subPropertyOf* sdmx-dimension:refArea .
	?observationY ?propertyRefAreaY ?refAreaY .
	?propertyMeasureY rdfs:subPropertyOf* sdmx-measure:obsValue .
	?observationY ?propertyMeasureY ?y .

	<http://transparency.270a.info/dataset/CPI2009> qb:structure/stats:identityDimension ?propertyIdentityY .
	?observationY ?propertyIdentityY ?identityY .

	OPTIONAL {
	?refAreaY skos:exactMatch ?refAreaYExactMatch .
	FILTER (REGEX(STR(?refAreaYExactMatch), "^http://worldbank.270a.info/"))
	}

	?refAreaY skos:notation ?refAreaCodeY .
	FILTER (!REGEX(?refAreaCodeY, "^[0-9]"))
	}
	}

	SERVICE <http://145.108.172.225:5000/scry/> {
	math:pearsonr scry:input ?x .
	math:pearsonr scry:input ?y .
	math:pearsonr scry:output ?r .
	math:sd scry:description ?desc .
	}
	}
	# Define:
	import services.classes, rdflib.term, numpy
	PearsonR = services.classes.Procedure(rdflib.term.URIRef('http://www.scry.com/math/PearsonR'))
	correlation = lambda list_of_lists: numpy.corrcoef(numpy.array(list_of_lists))[0][1]
	PearsonR.function = lambda inputs,outputs,handler: rdflib.term.Literal(correlation([row.split(',') for row in inputs['in'].encode().split(';')]))

	# Test:
	PearsonR.execute({'in':'1,2,3;6,5,4'},None,None)
	# Describe the math:sd service

	PREFIX scry: <http://www.scry.com/>
	PREFIX math: <http://www.scry.com/math/>

	SELECT * {
	SERVICE <http://145.108.172.225:5000/scry/> {
	GRAPH scry:orb_description {
	math:sd a scry:procedure ;
	?p ?o .

	OPTIONAL{?o a scry:argument ;
	scry:identifier ?arg_id ;
	scry:description ?arg_desc .}
	}
	}
	} ORDER BY ?arg_id
	SELECT
	user1, user2,
	((psum - (sum1 * sum2 / n)) / sqrt((sum1sq - pow(sum1, 2.0) / n) * (sum2sq - pow(sum2, 2.0) / n))) AS r,
	n
	FROM
	(SELECT
	n1.user AS user1,
	n2.user AS user2,
	SUM(n1.rating) AS sum1,
	SUM(n2.rating) AS sum2,
	SUM(n1.rating * n1.rating) AS sum1sq,
	SUM(n2.rating * n2.rating) AS sum2sq,
	SUM(n1.rating * n2.rating) AS psum,
	COUNT(*) AS n
	FROM
	testdata AS n1
	LEFT JOIN
	testdata AS n2
	ON
	n1.movie = n2.movie
	WHERE
	n1.user > n2.user
	GROUP BY
	n1.user, n2.user) AS step1
	ORDER BY
	r DESC,
	n DESC
	# Standard deviation implementation in SQL for Virtuoso extension option A

	WITH Mean AS (
	SELECT SUM(Number) / COUNT(Number) AS Mean
	FROM #Numbers
	), Deviation AS (
	SELECT Mean, POWER(Number - Mean, 2) AS Error
	FROM #Numbers CROSS JOIN Mean
	)
	SELECT Mean, SQRT(SUM(Error) / COUNT(Error)) AS [Standard Deviation]
	FROM Deviation
	GROUP BY Mean;
	/*
	create procedure DB.DBA.ComposeInfo (
	in pname varchar,
	in pnick varchar := '',
	in pbox varchar := '')
	{
	declare ss varchar;
	ss := concat(pname, ' ', pnick, ' ', pbox);
	ss := rtrim (ss, ' ');
	return ss;

	};
	*/

	CREATE PROCEDURE DB.DBA.MyAvg (IN X NUMERIC) {
	declare m numeric;
	m := m + X;
	return m;
	};