Skip to content

Instantly share code, notes, and snippets.

@mcancellieri
Last active May 1, 2018 16:04
Show Gist options
  • Save mcancellieri/4762b5d87b81333daecc65aad5d370d2 to your computer and use it in GitHub Desktop.
Save mcancellieri/4762b5d87b81333daecc65aad5d370d2 to your computer and use it in GitHub Desktop.
A sample CORE article
{
"id": "1511033",
"authors": ["Bizer, Christian", "Heath, Tom", "Berners-Lee, Tim"],
"citations": [{
"id": 36190556,
"title": "A Declarative Framework for Semantic Link Discovery over Relational Data. Poster at 18th World Wide Web Conference",
"authors": [],
"date": "2009",
"doi": "10.1145/1526709.1526876",
"raw": "Hassanzadeh, O., et al. (2009). A Declarative Framework for Semantic Link Discovery over Relational Data. Poster at 18th World Wide Web Conference (WWW2009).",
"cites": null
}, {
"id": 36190544,
"title": "A Resource List Management Tool for Undergraduate Students based on Linked Open Data Principles.",
"authors": [],
"date": "2009",
"doi": "10.1007/978-3-642-02121-3_51",
"raw": "Clarke, C. (2009). A Resource List Management Tool for Undergraduate Students based on Linked Open Data Principles. Proceedings of the 6th European Semantic Web Conference (ESWC2009).",
"cites": null
}, {
"id": 36190555,
"title": "A Web-based Mapping Technique for Establishing Metadata Interoperability.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Haslhofer, B. (2008): A Web-based Mapping Technique for Establishing Metadata Interoperability. PhD thesis, Universität Wien.",
"cites": null
}, {
"id": 36190564,
"title": "Architecture of the World Wide Web, Volume One - W3C Recommendation. Retrieved",
"authors": [],
"date": "2004",
"doi": null,
"raw": "Jacobs, I., Walsh, N. (2004): Architecture of the World Wide Web, Volume One - W3C Recommendation. Retrieved June 14, 2009, http://www.w3.org/TR/webarch/ Jentzsch, A., Hassanzadeh, O., Bizer, C., Andersson, B., Stephens, S. (2009). Enabling Tailored Therapeutics with Linked Data. Proceedings of the 2nd Workshop on Linked Data onthe Web (LDOW2009).",
"cites": null
}, {
"id": 36190576,
"title": "Automatic Interlinking of Music Datasets on the Semantic Web.",
"authors": [],
"date": "2008",
"doi": "10.1109/mmul.2009.29",
"raw": "Raimond, Y., Sutton, C., Sandler, M. (2008). Automatic Interlinking of Music Datasets on the Semantic Web. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).",
"cites": null
}, {
"id": 36190536,
"title": "Best Practice Recipes for Publishing RDF Vocabularies - W3C Working Group Note. Retrieved",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Berrueta, D., Phipps, J. (2008). Best Practice Recipes for Publishing RDF Vocabularies - W3C Working Group Note. Retrieved June 14, 2009, http://www.w3.org/TR/swbp-vocab-pub/ Bizer, C., Cyganiak, R. (2006). D2R Server - Publishing Relational Databases on theSemantic Web. Poster at the 5th International Semantic Web Conference (ISWC2006).",
"cites": null
}, {
"id": 36190546,
"title": "Bootstrapping pay-as-you-go data integration systems.",
"authors": [],
"date": "2008",
"doi": "10.1145/1376616.1376702",
"raw": "Das Sarma, A., Dong, X., Halevy, A. (2008). Bootstrapping pay-as-you-go data integration systems. Proceedings of the Conference on Management of Data (SIGMOD2008).",
"cites": null
}, {
"id": 36190558,
"title": "Browsing Linked Data with Fenfire.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Hastrup, T., Cyganiak, R., Bojars, U. (2008). Browsing Linked Data with Fenfire. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).",
"cites": null
}, {
"id": 36190531,
"title": "Cleaning up the User Interface, Section - The ”Oh, yeah?”-Button.",
"authors": [],
"date": "1997",
"doi": null,
"raw": "Berners-Lee, T. (1997). Cleaning up the User Interface, Section - The ”Oh, yeah?”-Button.",
"cites": null
}, {
"id": 36190577,
"title": "Cool URIs for the Semantic Web.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Sauermann, L., Cyganiak, R. (2008): Cool URIs for the Semantic Web. W3C Interest Group",
"cites": null
}, {
"id": 36190540,
"title": "Data Fusion.",
"authors": [],
"date": "2008",
"doi": "10.1145/1456650.1456651",
"raw": "Bleiholder, J., Naumann, F. (2008). Data Fusion. ACM Computing Surveys, 41(1):1-41.",
"cites": null
}, {
"id": 36190525,
"title": "DBpedia Mobile - A Location-Aware Semantic Web Client.",
"authors": [],
"date": "2008",
"doi": "10.1016/j.websem.2009.09.004",
"raw": "Becker, C., Bizer, C. (2008). DBpedia Mobile - A Location-Aware Semantic Web Client.",
"cites": null
}, {
"id": 36190522,
"title": "DBpedia: A Nucleus for a Web of Open Data.",
"authors": [],
"date": "2007",
"doi": "10.1007/978-3-540-76298-0_52",
"raw": "Auer, S., Bizer, C., Kobilarov, G., Lehmann, J., Cyganiak, C., Ives, Z. (2007). DBpedia: A Nucleus for a Web of Open Data. Proceedings of the 6th International Semantic Web Conference (ISWC2007).",
"cites": null
}, {
"id": 36190548,
"title": "Duplicate Record Detection: A survey.",
"authors": [],
"date": "2007",
"doi": "10.1109/tkde.2007.250581",
"raw": "Elmagarmid, A., Ipeirotis, P., Verykios, V. (2007). Duplicate Record Detection: A survey. IEEE Transactions on Knowledge and Data Engineering 19(1):1–16.",
"cites": null
}, {
"id": 36190549,
"title": "Expressive alignment language and implementation. Knowledge Web project report,",
"authors": [],
"date": "2007",
"doi": null,
"raw": "Euzenat, J., Scharffe, F., Zimmermann A. (2007). Expressive alignment language and implementation. Knowledge Web project report, KWEB/2004/D2.2.10/1.0.Euzenat, J., Shvaiko, P. (2007). Ontology Matching. Springer, Heidelberg.",
"cites": null
}, {
"id": 36190547,
"title": "Finding and Ranking Knowledge on the Semantic Web,",
"authors": [],
"date": "2005",
"doi": "10.1007/11574620_14",
"raw": "Ding, L., et al. (2005). Finding and Ranking Knowledge on the Semantic Web, Proceedings of the 4th International Semantic Web Conference, November 2005.",
"cites": null
}, {
"id": 36190538,
"title": "How to publish Linked Data on the Web. Retrieved",
"authors": [],
"date": "2007",
"doi": null,
"raw": "Bizer, C., Cyganiak, R., Heath, T. (2007). How to publish Linked Data on the Web. Retrieved June 14, 2009, http://www4.wiwiss.fu-berlin.de/bizer/pub/LinkedDataTutorial/ Bizer, C., Cyganiak, R., Gauß, T. (2007): The RDF Book Mashup: From Web APIs to a Web of Data. Proceedings of the 3rd Workshop on Scripting for the Semantic Web (SFSW2007).",
"cites": null
}, {
"id": 36190561,
"title": "How Will We Interact with the Web of Data? In",
"authors": [],
"date": "2008",
"doi": "10.1109/mic.2008.101",
"raw": "Heath, T. (2008b). How Will We Interact with the Web of Data? In IEEE Internet Computing, Vol. 12(5): 88-91.",
"cites": null
}, {
"id": 36190550,
"title": "Hypertext Transfer Protocol -- HTTP/1.1. Request for Comments: 2616. Retrieved",
"authors": [],
"date": "1999",
"doi": null,
"raw": "Fielding, R., et al. (1999). Hypertext Transfer Protocol -- HTTP/1.1. Request for Comments: 2616. Retrieved June 14, 2009, http://www.w3.org/Protocols/rfc2616/rfc2616.html Franklin, M., Halevy, A., Maier, D. (2005). From databases to dataspaces: a new abstraction for information management. ACM SIGMOD Records, 34(4):27-33.",
"cites": null
}, {
"id": 36190560,
"title": "Information-seeking on the Web with Trusted Social Networks – from Theory to Systems.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Heath, T. (2008a). Information-seeking on the Web with Trusted Social Networks – from Theory to Systems. PhD Thesis, The Open University.",
"cites": null
}, {
"id": 36190572,
"title": "Integration of Semantically Annotated Data by the KnoFuss Architecture.",
"authors": [],
"date": "2008",
"doi": "10.1007/978-3-540-87696-0_24",
"raw": "Nikolov, A., et al. (2008): Integration of Semantically Annotated Data by the KnoFuss Architecture. Proceedings of the 16th International Conference on Knowledge Engineering and Knowledge Management.",
"cites": null
}, {
"id": 36190557,
"title": "Linked Movie Data Base.",
"authors": [],
"date": "2009",
"doi": null,
"raw": "Hassanzadeh, O., Consens, M. (2009). Linked Movie Data Base. Proceedings of the 2nd Workshop on Linked Data on the Web (LDOW2009).",
"cites": null
}, {
"id": 36190542,
"title": "Named graphs.",
"authors": [],
"date": "2005",
"doi": "10.1145/1060745.1060835",
"raw": "Carroll, J., Bizer, C., Hayes, P., Stickler, P. (2005): Named graphs. Journal of Web Semantics, 3(4):247-267.",
"cites": null
}, {
"id": 36190570,
"title": "Open Data Commons, a License for Open Data.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Miller, P., Styles, R., Heath, T. (2008). Open Data Commons, a License for Open Data. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).",
"cites": null
}, {
"id": 36190569,
"title": "OWL Web Ontology Language - W3C Recommendation. Retrieved",
"authors": [],
"date": "2004",
"doi": "10.1007/978-3-540-24750-0_4",
"raw": "McGuinness, D., van Harmelen, F. (2004). OWL Web Ontology Language - W3C Recommendation. Retrieved June 14, 2009, http://www.w3.org/TR/owl-features/ McGuinness, D., da Silva, P. (2003). Infrastructure for Web Explanations. Proceedings of the 2nd International Semantic Web Conference (ISWC2003).",
"cites": null
}, {
"id": 36190565,
"title": "Pathetic Fallacy of RDF.",
"authors": [],
"date": "2006",
"doi": null,
"raw": "Karger, D., schraefel, m.c. (2006). Pathetic Fallacy of RDF. Proceedings of 3rd Semantic Web User Interaction Workshop (SWUI2006).",
"cites": null
}, {
"id": 36190551,
"title": "Principles of dataspace systems.",
"authors": [],
"date": "2006",
"doi": "10.1145/1142351.1142352",
"raw": "Halevy, A., Franklin, M., Maier, D. (2006). Principles of dataspace systems. Proceedings of the Symposium on Principles of database systems (PODS2006).",
"cites": null
}, {
"id": 36190553,
"title": "Provenance Information in the Web of Data.",
"authors": [],
"date": "2009",
"doi": "10.1007/978-3-642-17819-1_10",
"raw": "Hartig, O. (2009). Provenance Information in the Web of Data. Proceedings of the 2nd Workshop on Linked Data on the Web (LDOW2009).",
"cites": null
}, {
"id": 36190545,
"title": "Pubby - A Linked Data Frontend for SPARQL Endpoints.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Cyganiak, R., Bizer, C. (2008). Pubby - A Linked Data Frontend for SPARQL Endpoints.",
"cites": null
}, {
"id": 36190539,
"title": "Quality-driven Information Filtering using the WIQA Policy Framework.",
"authors": [],
"date": "2009",
"doi": "10.1016/j.websem.2008.02.005",
"raw": "Bizer, C., Cyganiak, R. (2009): Quality-driven Information Filtering using the WIQA Policy Framework. Journal of Web Semantics, 7(1):1-10.",
"cites": null
}, {
"id": 36190575,
"title": "Querying distributed RDF data sources with SPARQL.",
"authors": [],
"date": "2008",
"doi": "10.1007/978-3-540-68234-9_39",
"raw": "Quilitz, B., Leser, U. (2008). Querying distributed RDF data sources with SPARQL. Proceedings of the 5th European Semantic Web Conference (ESWC2008).",
"cites": null
}, {
"id": 36190567,
"title": "Rapid semantic web mashup development through semantic web pipes.",
"authors": [],
"date": "2009",
"doi": "10.1145/1526709.1526788",
"raw": "Le Phuoc, D., Polleres, A., Morbidoni, C., Hauswirth, M., Tummarello, G. (2009). Rapid semantic web mashup development through semantic web pipes. Proceedings of the 18th World Wide Web Conference (WWW2009).",
"cites": null
}, {
"id": 36190541,
"title": "RDF Vocabulary Description Language 1.0: RDF Schema -W3C Recommendation. Retrieved",
"authors": [],
"date": "2004",
"doi": null,
"raw": "Brickley, D., Guha, R. (2004). RDF Vocabulary Description Language 1.0: RDF Schema -W3C Recommendation. Retrieved June 14, 2009, http://www.w3.org/TR/rdf-schema/ Brin, S., Page, L. (1998). The Anatomy of a Large-Scale Hypertextual Web Search Engine. Computer Networks and ISDN Systems, 30(1-7):107-117.",
"cites": null
}, {
"id": 36190527,
"title": "RDF/XML Syntax Specification (Revised) - W3C Recommendation.",
"authors": [],
"date": "2004",
"doi": null,
"raw": "Beckett, D. (2004). RDF/XML Syntax Specification (Revised) - W3C Recommendation.",
"cites": null
}, {
"id": 36190519,
"title": "RDFa in XHTML: Syntax and Processing - W3C Recommendation. Retrieved",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Adida, B., et al. (2008). RDFa in XHTML: Syntax and Processing - W3C Recommendation. Retrieved June 14, 2009, http://www.w3.org/TR/rdfa-syntax/ Alexander, K., Cyganiak, R., Hausenblas, M., Zhao, J. (2009). Describing Linked Datasets. Proceedings of the 2nd Workshop on Linked Data on the Web (LDOW2009).",
"cites": null
}, {
"id": 36190566,
"title": "Resource Description Framework (RDF): Concepts and Abstract Syntax - W3C Recommendation. Retrieved",
"authors": [],
"date": "2004",
"doi": null,
"raw": "Klyne, G., Carroll, J. (2004). Resource Description Framework (RDF): Concepts and Abstract Syntax - W3C Recommendation. Retrieved June 14, 2009,http://www.w3.org/TR/rdfconcepts/ Kobilarov, G., et al. (2009). Media Meets Semantic Web - How the BBC Uses DBpedia and Linked Data to Make Conections. Proceedings of the 6th European Semantic Web Conference (ESWC2009).",
"cites": null
}, {
"id": 36190562,
"title": "Revyu: Linking reviews and ratings into the Web of Data.",
"authors": [],
"date": "2008",
"doi": "10.1016/j.websem.2008.09.003",
"raw": "Heath, T., Motta, E. (2008). Revyu: Linking reviews and ratings into the Web of Data. Journal of Web Semantics, 6(4):266-273.",
"cites": null
}, {
"id": 36190573,
"title": "ScentTrails: Integrating Browsing and Searching on the Web.",
"authors": [],
"date": "2003",
"doi": "10.1145/889692.889699",
"raw": "Olston, C., Chi, E. (2003). ScentTrails: Integrating Browsing and Searching on the Web. ACM Transactions on Computer-Human Interaction, 10(3):177-197.",
"cites": null
}, {
"id": 36190574,
"title": "Sindice.com: A document-oriented lookup index for open linked data.",
"authors": [],
"date": "2008",
"doi": "10.1504/ijmso.2008.021204",
"raw": "Oren, E., et al. (2008). Sindice.com: A document-oriented lookup index for open linked data. Journal of Metadata, Semantics and Ontologies, 3(1):37-52.",
"cites": null
}, {
"id": 36190552,
"title": "Special Issue on Identiy, Reference and the Web.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Halpin, H., Thomson, H. (2008). Special Issue on Identiy, Reference and the Web. International Journal on Semantic Web & Information Systems, 4(2):1-72.",
"cites": null
}, {
"id": 36190535,
"title": "Tabulator Redux: Browsing and Writing Linked Data.",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Berners-Lee, T., et al. (2008). Tabulator Redux: Browsing and Writing Linked Data. Proceedings of the 1st Workshop on Linked Data on the Web (LDOW2008).",
"cites": null
}, {
"id": 36190554,
"title": "The OAI2LOD Server: Exposing OAI-PMH Metadata as Linked Data.",
"authors": [],
"date": "2008",
"doi": "10.1504/ijmso.2010.032648",
"raw": "Haslhofer, B., Schandl, B. (2008). The OAI2LOD Server: Exposing OAI-PMH Metadata as Linked Data. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).",
"cites": null
}, {
"id": 36190571,
"title": "The Open Provenance Model.",
"authors": [],
"date": "2008",
"doi": "10.1007/978-3-540-89965-5_31",
"raw": "Moreau, L., et al. (2008). The Open Provenance Model. Technical report, Electronics and Computer Science, University of Southampton.",
"cites": null
}, {
"id": 36190533,
"title": "The Semantic Web.",
"authors": [],
"date": "2001",
"doi": "10.1038/scientificamerican0501-34",
"raw": "Berners-Lee, T., Hendler, J., Lassila, O. (2001) The Semantic Web. Scientific American, 284(5):34-43.",
"cites": null
}, {
"id": 36190529,
"title": "The World-Wide Web.",
"authors": [],
"date": "1994",
"doi": "10.1145/179606.179671",
"raw": "Berners-Lee, T. et al. (1994). The World-Wide Web. Communications of the ACM, 37(8):76-82.",
"cites": null
}, {
"id": 36190520,
"title": "Toward a New Generation of Semantic Web Applications. IEEEIntelligent Systems,",
"authors": [],
"date": "2008",
"doi": "10.1109/mis.2008.54",
"raw": "d'Aquin, M., et al. (2008). Toward a New Generation of Semantic Web Applications. IEEEIntelligent Systems, 23(3):20-28.",
"cites": null
}, {
"id": 36190563,
"title": "Towards a scalable search and query engine for the web.",
"authors": [],
"date": "2007",
"doi": "10.1145/1242572.1242819",
"raw": "Hogan, A., Harth, A., Umrich, J., Decker, S. (2007). Towards a scalable search and query engine for the web. Proceedings of the 16th Conference on World Wide Web (WWW2007).",
"cites": null
}, {
"id": 36190543,
"title": "Towards ECSSE: live Web of Data search and integration.",
"authors": [],
"date": "2009",
"doi": "10.1145/1772690.1772907",
"raw": "Catasta, M., Cyganiak, R., Tummarello, G. (2009). Towards ECSSE: live Web of Data search and integration. Proceedings of the Semantic Search 2009 Workshop at WWW2009. Cheng, G., Qu, Y. (this issue). Searching Linked Objects with Falcons: Approach, Implementation and Evaluation. International Journal on Semantic Web and Information Systems, Special Issue on Linked Data.",
"cites": null
}, {
"id": 36190521,
"title": "Triplify – Light-Weight Linked Data Publication from Relational Databases.",
"authors": [],
"date": "2009",
"doi": "10.1145/1526709.1526793",
"raw": "Auer, S., et al. (2009). Triplify – Light-Weight Linked Data Publication from Relational Databases. Proceedings of the 18th World Wide Web Conference (WWW2009).",
"cites": null
}, {
"id": 36190528,
"title": "Turtle - Terse RDF Triple Language - W3C Team Submission. Retrieved",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Retrieved June 14, 2009, http://www.w3.org/TR/rdf-syntax-grammar/ Beckett, D., Berners-Lee, T. (2008). Turtle - Terse RDF Triple Language - W3C Team Submission. Retrieved July 23, 2009, http://www.w3.org/TeamSubmission/turtle/ Belleau, F., Nolin, M., Tourigny, N., Rigault, P., Morissette, J. (2008). Bio2RDF: Towards a mashup to build bioinformatics knowledge systems. Journal of Biomedical Informatics, 41(5):706-16.",
"cites": null
}, {
"id": 36190534,
"title": "Uniform Resource Identifier (URI): Generic Syntax. Request for Comments: 3986. Retrieved",
"authors": [],
"date": "2005",
"doi": null,
"raw": "Berners-Lee, T., et al. (2005). Uniform Resource Identifier (URI): Generic Syntax. Request for Comments: 3986. Retrieved June 14, 2009, http://tools.ietf.org/html/rfc3986 Berners-Lee, T. (2006). Linked Data - Design Issues. Retrieved July 23, http://www.w3.org/ DesignIssues/LinkedData.html Berners-Lee, T., et. al. (2006), Tabulator: Exploring and Analyzing Linked Data on the Semantic Web. Procedings of the 3rd International Semantic Web User Interaction Workshop (SWUI06).",
"cites": null
}, {
"id": 36190532,
"title": "Weaving the Web: The Past, Present and Future of the World Wide Web by its Inventor.",
"authors": [],
"date": "2009",
"doi": null,
"raw": "Retrieved June 14, 2009, http://www.w3.org/DesignIssues/UI.html Berners-Lee, T. (1998). Notation3 (N3) A readable RDF syntax. Retrieved July 23, 2009, http://www.w3.org/DesignIssues/Notation3.html Berners-Lee, T. (2000): Weaving the Web: The Past, Present and Future of the World Wide Web by its Inventor. London, Texere.",
"cites": null
}, {
"id": 36190559,
"title": "What is the Size of the Semantic Web?",
"authors": [],
"date": "2008",
"doi": null,
"raw": "Hausenblas, M., Halb, W., Raimond, Y., Heath, T. (2008). What is the Size of the Semantic Web? In Proceedings of the International Conference on Semantic Systems (ISemantics2008).",
"cites": null
}, {
"id": 36190568,
"title": "Which semantic web?",
"authors": [],
"date": "2003",
"doi": "10.1145/900051.900063",
"raw": "Marshall, C., Shipman, F. (2003). Which semantic web? Proceedings of the 14th ACM Conference on Hypertext and Hypermedia (HT2003).",
"cites": null
}],
"contributors": [],
"datePublished": "2009",
"deleted": "ALLOWED",
"description": "The term “Linked Data” refers to a set of best practices for publishing and connecting structured data on the Web. These best practices have been adopted by an increasing number of data providers over the last three years, leading to the creation of a global data space containing billions of assertions— the Web of Data. In this article, the authors present the concept and technical principles of Linked Data, and situate these within the broader context of related technological developments. They describe progress to date in publishing Linked Data on the Web, review applications that have been developed to exploit the Web of Data, and map out a research agenda for the Linked Data community as it moves forward",
"fullText": "Linked Data - The Story So Far\nChristian Bizer, Freie Universität Berlin, Germany\nTom Heath, Talis Information Ltd, United Kingdom\nTim Berners-Lee, Massachusetts Institute of Technology, USA\nThis is a preprint of a paper to appear in: Heath, T., Hepp, M., and Bizer, C. (eds.). Special\nIssue on Linked Data, International Journal on Semantic Web and Information Systems\n(IJSWIS). http://linkeddata.org/docs/ijswis-special-issue\nAbstract\nThe term Linked Data refers to a set of best practices for publishing and connecting\nstructured data on the Web. These best practices have been adopted by an increasing\nnumber of data providers over the last three years, leading to the creation of a global data\nspace containing billions of assertions - the Web of Data. In this article we present the\nconcept and technical principles of Linked Data, and situate these within the broader context\nof related technological developments. We describe progress to date in publishing Linked\nData on the Web, review applications that have been developed to exploit the Web of Data,\nand map out a research agenda for the Linked Data community as it moves forward.\nKeywords: Linked Data, Web of Data, Semantic Web, Data Sharing, Data Exploration\n1. Introduction\nThe World Wide Web has radically altered the way we share knowledge by lowering the\nbarrier to publishing and accessing documents as part of a global information space.\nHypertext links allow users to traverse this information space using Web browsers, while\nsearch engines index the documents and analyse the structure of links between them to\ninfer potential relevance to users' search queries (Brin & Page, 1998). This functionality has\nbeen enabled by the generic, open and extensible nature of the Web (Jacobs & Walsh,\n2004), which is also seen as a key feature in the Web's unconstrained growth.\nDespite the inarguable benefits the Web provides, until recently the same principles that\nenabled the Web of documents to flourish have not been applied to data. Traditionally, data\npublished on the Web has been made available as raw dumps in formats such as CSV or\nXML, or marked up as HTML tables, sacrificing much of its structure and semantics. In the\nconventional hypertext Web, the nature of the relationship between two linked documents is\nimplicit, as the data format, i.e. HTML, is not sufficiently expressive to enable individual\nentities described in a particular document to be connected by typed links to related\nentities.\nHowever, in recent years the Web has evolved from a global information space of linked\ndocuments to one where both documents and data are linked. Underpinning this evolution is\na set of best practices for publishing and connecting structured data on the Web known as\nLinked Data. The adoption of the Linked Data best practices has lead to the extension of the\nWeb with a global data space connecting data from diverse domains such as people,\ncompanies, books, scientific publications, films, music, television and radio programmes,\ngenes, proteins, drugs and clinical trials, online communities, statistical and scientific data,\nand reviews. This Web of Data enables new types of applications. There are generic Linked\nData browsers which allow users to start browsing in one data source and then navigate\nalong links into related data sources. There are Linked Data search engines that crawl the\nWeb of Data by following links between data sources and provide expressive query\ncapabilities over aggregated data, similar to how a local database is queried today. The Web\nof Data also opens up new possibilities for domain-specific applications. Unlike Web 2.0\nmashups which work against a fixed set of data sources, Linked Data applications operate\non top of an unbound, global data space. This enables them to deliver more complete\nanswers as new data sources appear on the Web.\nThe remainder of this paper is structured as follows. In Section 2 we provide an overview\nof the key features of Linked Data. Section 3 describes the activities and outputs of the\nLinking Open Data project, a community effort to apply the Linked Data principles to data\npublished under open licenses. The state of the art in publishing Linked Data is reviewed in\nSection 4, while section 5 gives an overview of Linked Data applications. Section 6\ncompares Linked Data to other technologies for publishing structured data on the Web,\nbefore we discuss ongoing research challenges in Section 7.\n2. What is Linked Data?\nIn summary, Linked Data is simply about using the Web to create typed links between data\nfrom different sources. These may be as diverse as databases maintained by two\norganisations in different geographical locations, or simply heterogeneous systems within\none organisation that, historically, have not easily interoperated at the data level.\nTechnically, Linked Data refers to data published on the Web in such a way that it is\nmachine-readable, its meaning is explicitly defined, it is linked to other external data sets,\nand can in turn be linked to from external data sets.\nWhile the primary units of the hypertext Web are HTML (HyperText Markup Language)\ndocuments connected by untyped hyperlinks, Linked Data relies on documents containing\ndata in RDF (Resource Description Framework) format (Klyne and Carroll, 2004). However,\nrather than simply connecting these documents, Linked Data uses RDF to make typed\nstatements that link arbitrary things in the world. The result, which we will refer to as the\nWeb of Data, may more accurately be described as a web of things in the world, described\nby data on the Web.\nBerners-Lee (2006) outlined a set of 'rules' for publishing data on the Web in a way that all\npublished data becomes part of a single global data space:\n1. Use URIs as names for things\n2. Use HTTP URIs so that people can look up those names\n3. When someone looks up a URI, provide useful information, using the standards\n(RDF, SPARQL)\n4. Include links to other URIs, so that they can discover more things\nThese have become known as the 'Linked Data principles', and provide a basic recipe for\npublishing and connecting data using the infrastructure of the Web while adhering to its\narchitecture and standards.\nThe Linked Data Technology Stack\nLinked Data relies on two technologies that are fundamental to the Web: Uniform Resource\nIdentifiers (URIs) (Berners-Lee et al., 2005) and the HyperText Transfer Protocol (HTTP)\n(Fielding et al., 1999). While Uniform Resource Locators (URLs) have become familiar as\naddresses for documents and other entities that can be located on the Web, Uniform\nResource Identifiers provide a more generic means to identify any entity that exists in the\nworld.\nWhere entities are identified by URIs that use the http:// scheme, these entities can be\nlooked up simply by dereferencing the URI over the HTTP protocol. In this way, the HTTP\nprotocol provides a simple yet universal mechanism for retrieving resources that can be\nserialised as a stream of bytes (such as a photograph of a dog), or retrieving descriptions of\nentities that cannot themselves be sent across the network in this way (such as the dog\nitself).\nURIs and HTTP are supplemented by a technology that is critical to the Web of Data – RDF,\nintroduced above. Whilst HTML provides a means to structure and link documents on the\nWeb, RDF provides a generic, graph-based data model with which to structure and link data\nthat describes things in the world.\nThe RDF model encodes data in the form of subject, predicate, object triples. The subject\nand object of a triple are both URIs that each identify a resource, or a URI and a string\nliteral respectively. The predicate specifies how the subject and object are related, and is\nalso represented by a URI.\nFor example, an RDF triple can state that two people, A and B, each identified by a URI, are\nrelated by the fact that A knows B. Similarly an RDF triple may relate a person C to a\nscientific article D in a bibliographic database by stating that C is the author of D. Two\nresources linked in this fashion can be drawn from different data sets on the Web, allowing\ndata in one data source to be linked to that in another, thereby creating a Web of Data.\nConsequently it is possible to think of RDF triples that link items in different data sets as\nanalogous to the hypertext links that tie together the Web of documents.\nRDF links (Bizer & Cyganiak & Heath, 2007) take the form of RDF triples, where the subject\nof the triple is a URI reference in the namespace of one data set, while the object of the\ntriple is a URI reference in the other. Figure 1 shows two example RDF links. The first link\nstates that a resource identified by the URI http://www.w3.org/People/Berners-Lee/card#i\nis member of another resource called http://dig.csail.mit.edu/data#DIG. When the subject\nURI is dereferenced over the HTTP protocol, the dig.csail.mit.edu server answers with a RDF\ndescription of the identified resource, in this case the MIT Decentralized Information Group.\nWhen the object URI is dereferenced the W3C server provides an RDF graph describing Tim\nBerners-Lee. Dereferencing the predicate URI http://xmlns.com/foaf/0.1/member yields a\ndefinition of the link type member, described in RDF using the RDF Vocabulary Definition\nLanguage (RDFS), introduced below. The second RDF link connects the description of the\nfilm Pulp Fiction in the Linked Movie Database with the description of the film provided by\nDBpedia, by stating that the URI http://data.linkedmdb.org/resource/film/77 and the URI\nhttp://dbpedia.org/resource/Pulp_Fiction_%28film%29 refer to the same real-world entity -\nthe film Pulp Fiction.\nSubject: http://dig.csail.mit.edu/data#DIG\nPredicate: http://xmlns.com/foaf/0.1/member\nObject: http://www.w3.org/People/Berners-Lee/card#i\nSubject: http://data.linkedmdb.org/resource/film/77\nPredicate: http://www.w3.org/2002/07/owl#sameAs\nObject: http://dbpedia.org/resource/Pulp_Fiction_%28film%29\nFigure 1. Example RDF links\nThe RDF Vocabulary Definition Language (RDFS) (Brickley & Guha, 2004) and the Web\nOntology Language (OWL) (McGuinness & van Harmelen, 2004) provide a basis for creating\nvocabularies that can be used to describe entities in the world and how they are related.\nVocabularies are collections of classes and properties. Vocabularies are themselves\nexpressed in RDF, using terms from RDFS and OWL, which provide varying degrees of\nexpressivity in modelling domains of interest. Anyone is free to publish vocabularies to the\nWeb of Data (Berrueta & Phipps, 2008), which in turn can be connected by RDF triples that\nlink classes and properties in one vocabulary to those in another, thereby defining mappings\nbetween related vocabularies.\nBy employing HTTP URIs to identify resources, the HTTP protocol as retrieval mechanism,\nand the RDF data model to represent resource descriptions, Linked Data directly builds on\nthe general architecture of the Web (Jacobs & Walsh, 2004). The Web of Data can therefore\nbe seen as an additional layer that is tightly interwoven with the classic document Web and\nhas many of the same properties:\n• The Web of Data is generic and can contain any type of data.\n• Anyone can publish data to the Web of Data.\n• Data publishers are not constrained in choice of vocabularies with which to\nrepresent data.\n• Entities are connected by RDF links, creating a global data graph that spans data\nsources and enables the discovery of new data sources.\nFrom an application development perspective the Web of Data has the following\ncharacteristics:\n• Data is strictly separated from formatting and presentational aspects.\n• Data is self-describing. If an application consuming Linked Data encounters data\ndescribed with an unfamiliar vocabulary, the application can dereference the URIs\nthat identify vocabulary terms in order to find their definition.\n• The use of HTTP as a standardized data access mechanism and RDF as a\nstandardized data model simplifies data access compared to Web APIs, which rely\non heterogeneous data models and access interfaces.\n• The Web of Data is open, meaning that applications do not have to be implemented\nagainst a fixed set of data sources, but can discover new data sources at run-time\nby following RDF links.\n3. The Linking Open Data Project\nThe most visible example of adoption and application of the Linked Data principles has been\nthe Linking Open Data project [Endnote: http://esw.w3.org/topic/SweoIG/TaskForces/\nCommunityProjects/LinkingOpenData], a grassroots community effort founded in January\n2007 and supported by the W3C Semantic Web Education and Outreach Group [Endnote:\nhttp://www.w3.org/2001/sw/sweo/]. The original and ongoing aim of the project is to\nbootstrap the Web of Data by identifying existing data sets that are available under open\nlicenses, converting these to RDF according to the Linked Data principles, and publishing\nthem on the Web.\nParticipants in the early stages of the project were primarily researchers and developers in\nuniversity research labs and small companies. Since that time the project has grown\nconsiderably, to include significant involvement from large organisations such as the BBC,\nThomson Reuters and the Library of Congress. This growth is enabled by the open nature of\nthe project, where anyone can participate simply by publishing a data set according to the\nLinked Data principles and interlinking it with existing data sets. An indication of the range\nand scale of the Web of Data originating from the Linking Open Data project is provided in\nFigure 2. Each node in this cloud diagram represents a distinct data set published as Linked\nData, as of March 2009.\nFigure 2. Linking Open Data cloud diagram giving an overview of published data sets and\ntheir interlinkage relationships.\nThe arcs in Figure 2 indicate that links exist between items in the two connected data sets.\nHeavier arcs roughly correspond to a greater number of links between two data sets, while\nbidirectional arcs indicate the outward links to the other exist in each data set.\nThe content of the cloud is diverse in nature, comprising data about geographic locations,\npeople, companies, books (Bizer & Cyganiak & Gauss, 2007), scientific publications (Van de\nSompel et al., 2009), films (Hassanzadeh & Consens, 2009), music, television and radio\nprogrammes (Kobilarov et al, 2009), genes, proteins, drugs and clinical trials (Belleau et al.,\n2008, Jentzsch et al., 2009), online communities, statistical data, census results, and\nreviews (Heath & Motta, 2008).\nCalculating the exact size of the Web of Data is challenging due to the fact that much of the\ndata is being generated by wrappers around existing relational databases or APIs and\ntherefore first need to be crawled before it can be counted or analyzed (Hausenblas et al.,\n2008). Alternatively, the size of the Web of Data can be estimated based on the data set\nstatistics that are collected by the LOD community in the ESW wiki. According to these\nstatistics, the Web of Data currently consists of 4.7 billion RDF triples, which are interlinked\nby around 142 million RDF links (May 2009). [Endnote: http://esw.w3.org/topic/\nTaskForces/CommunityProjects/LinkingOpenData/DataSets/LinkStatistics and\nhttp://esw.w3.org/topic/TaskForces/CommunityProjects/LinkingOpenData/DataSets/\nStatistics]\nAs Figure 2 shows, certain data sets serve as linking hubs in the Web of Data. For example,\nthe DBpedia data set (Auer et al., 2007) consists of RDF triples extracted from the\n\"infoboxes\" commonly seen on the right hand side of Wikipedia articles, while Geonames\n[Endnote: http://www.geonames.org/ontology/] provides RDF descriptions of millions of\ngeographical locations worldwide. As these two data sets provide URIs and RDF descriptions\nfor many common entities or concepts, they are frequently referenced in other more\nspecialised data sets and have therefore developed into hubs to which an increasing number\nof other data sets are connected.\n4. Publishing Linked Data on the Web\nBy publishing data on the Web according to the Linked Data principles, data providers add\ntheir data to a global data space, which allows data to be discovered and used by various\napplications. Publishing a data set as Linked Data on the Web involves the following three\nbasic steps:\n1. Assign URIs to the entities described by the data set and provide for dereferencing these\nURIs over the HTTP protocol into RDF representations.\n2. Set RDF links to other data sources on the Web, so that clients can navigate the Web of\nData as a whole by following RDF links.\n3. Provide metadata about published data, so that clients can assess the quality of\npublished data and choose between different means of access.\nIn the following, we will give an overview about each of these tasks as well as about tools\nthat have been developed to support publishers with each task.\nChoosing URIs and RDF Vocabularies\nData providers can choose between two HTTP URI usage patterns to identify entities: 303\nURIs and hash URIs. Both patterns ensure that clients can distinguish between URIs that\nidentify real-world entities and URIs that identify Web documents describing these real-\nworld entities (Sauermann & Cyganiak, 2008). In an open environment like the Web,\ndifferent information providers publish data about the same real-world entity, for instance a\ngeographic location or a celebrity. As they may not know about each other, they introduce\ndifferent URIs to identify the same entitiy. For instance, DBpedia uses the URI\nhttp://dbpedia.org/resource/Berlin to identify Berlin, while Geonames uses the URI\nhttp://sws.geonames.org/2950159/ to identify Berlin. As both URIs refer to the same real-\nworld entity, they are called URI aliases. URI aliases are common on the Web of Data, as it\ncan not realistically be expected that all information providers agree on the same URIs to\nidentify an entity. URI aliases also provide an important social function to the Web of Data\nas they are dereferenced to different descriptions of the same real-world entity and thus\nallow different views and opinions to be expressed on the Web. In order to still be able to\ntrack that different information providers speak about the same entity, it is common\npractice that information providers set owl:sameAs links to URI aliases they know about.\nDifferent communities have specific preferences on the vocabularies they prefer to use for\npublishing data on the Web. The Web of Data is therefore open to arbitrary vocabularies\nbeing used in parallel. Despite this general openness, it is considered good practice to reuse\nterms from well-known RDF vocabularies such as FOAF, SIOC, SKOS, DOAP, vCard, Dublin\nCore, OAI-ORE or GoodRelations wherever possible in order to make it easier for client\napplications to process Linked Data. Only if these vocabularies do not provide the required\nterms should data publishers define new, data source-specific terminology (Bizer &\nCyganiak & Heath, 2007). If new terminology is defined, it should be made self-describing\nby making the URIs that identify terms Web dereferencable (Berrueta & Phipps, 2008). This\nallows clients to retrieve RDF Schema or OWL definitions of the terms as well as term\nmappings to other vocabularies. The Web of Data thus relies on a pay as you go data\nintegration approach (Das Sarma & Dong & Halevy, 2008) based on a mixture of using\ncommon vocabularies together with data source-specific terms that are connected by\nmappings as deemed necessary.\nA common serialization format for Linked Data is RDF/XML (Beckett, 2004). In situations\nwhere human inspection of RDF data is required, Notation3 (Berners-Lee, 1998), and its\nsubset Turtle (Beckett and Berners-Lee, 2008), are often provided as alternative, inter-\nconvertible serializations, due to the greater perceived readability of these formats.\nAlternatively, Linked Data can also be serialized as RDFa (Adida et al., 2008) which provides\nfor embedding RDF triples into HTML. In the second case, data publishers should use the\nRDFa about attribute to assign URIs to entities in order to allow other data providers to set\nRDF links to them.\nLink Generation\nRDF links allow client applications to navigate between data sources and to discover\nadditional data. In order to be part of the Web of Data, data sources should set RDF links to\nrelated entities in other data sources. As data sources often provide information about large\nnumbers of entities, it is common practice to use automated or semi-automated approaches\nto generate RDF links.\nIn various domains, there are generally accepted naming schemata. For instance, in the\npublication domain there are ISBN and ISSN numbers, in the financial domain there are\nISIN identifiers, EAN and EPC codes are widely used to identify products, in life science\nvarious accepted identification schemata exist for genes, molecules, and chemical\nsubstances. If the link source and the link target data sets already both support one of\nthese identification schema, the implicit relationship between entities in both data sets can\neasily be made explicit as RDF links. This approach has been used to generate links between\nvarious data sources in the LOD cloud.\nIf no shared naming schema exist, RDF links are often generated based on the similarity of\nentities within both data sets. Such similarity computations can build on a large body of\nrelated work on record linkage (Winkler, 2006) and duplicate detection (Elmagarmid et al.,\n2007) within the database community as well as on ontology matching (Euzenat & Shvaiko,\n2007) in the knowledge representation community. An example of a similarity based\ninterlinking algorithm is presented in (Raimond et al., 2008). In order to set RDF links\nbetween artists in the Jamendo and Musicbrainz data sets, the authors use a similarity\nmetric that compares the names of artists as well as the titles of their albums and songs.\nVarious RDF link generation frameworks are available, that provide declarative languages\nfor specifying which types of RDF links should be created, which combination of similarity\nmetrics should be used to compare entities and how similarity scores for specific properties\nare aggregated into an overall score. The Silk framework (Volz et al., 2009) works against\nlocal and remote SPARQL [Endnote: http://www.w3.org/TR/rdf-sparql-query/] endpoints\nand is designed to be employed in distributed environments without having to replicate data\nsets locally. The LinQL framework (Hassanzadeh et al., 2009) works over relational\ndatabases and is designed to be used together with database to RDF mapping tools such as\nD2R Server or Virtuoso.\nMetadata\nLinked Data should be published alongside several types of metadata, in order to increase\nits utility for data consumers. In order to enable clients to assess the quality of published\ndata and to determine whether they want to trust data, data should be accompanied with\nmeta-information about its creator, its creation date as well as the creation method (Hartig,\n2009). Basic provenance meta-information can be provided using Dublin Core terms or the\nSemantic Web Publishing vocabulary (Carroll et al., 2005). The Open Provenance Model\n(Moreau et al., 2008) provides terms for describing data transformation workflows. In (Zhao\net al., 2008), the authors propose a method for providing evidence for RDF links and for\ntracing how the RDF links change over time\nIn order to support clients in choosing the most efficient way to access Web data for the\nspecific task they have to perform, data publishers can provide additional technical\nmetadata about their data set and its interlinkage relationships with other data sets: The\nSemantic Web Crawling sitemap extension (Cyganiak et al., 2008) allows data publishers to\nstate which alternative means of access (SPARQL endpoint, RDF dumps) are provided\nbesides dereferenceable URIs. The Vocabulary Of Interlinked Datasets (Alexander et al.,\n2009) defines terms and best practices to categorize and provide statistical meta-\ninformation about data sets as well as the linksets connecting them.\nPublishing Tools\nA variety of Linked Data publishing tools has been developed. The tools either serve the\ncontent of RDF stores as Linked Data on the Web or provide Linked Data views over non-\nRDF legacy data sources. The tools shield publishers from dealing with technical details such\nas content negotiation and ensure that data is published according to the Linked Data\ncommunity best practices (Sauermann & Cyganiak, 2008; Berrueta & Phipps, 2008; Bizer &\nCyganiak & Heath, 2007). All tools support dereferencing URIs into RDF descriptions. In\naddition, some of the tools also provide SPARQL query access to the served data sets and\nsupport the publication of RDF dumps.\n• D2R Server. D2R Server (Bizer & Cyganiak, 2006) is a tool for publishing non-RDF\nrelational databases as Linked Data on the Web. Using a declarative mapping\nlanguage, the data publisher defines a mapping between the relational schema of\nthe database and the target RDF vocabulary. Based on the mapping, D2R server\npublishes a Linked Data view over the database and allows clients to query the\ndatabase via the SPARQL protocol.\n• Virtuoso Universal Server. The OpenLink Virtuoso server[Endnote:\nhttp://www.openlinksw.com/dataspace/dav/wiki/Main/VOSRDF] provides for serving\nRDF data via a Linked Data interface and a SPARQL endpoint. RDF data can either\nbe stored directly in Virtuoso or can be created on the fly from non-RDF relational\ndatabases based on a mapping.\n• Talis Platform. The Talis Platform[Endnote: http://www.talis.com/platform/] is\ndelivered as Software as a Service accessed over HTTP, and provides native storage\nfor RDF/Linked Data. Access rights permitting, the contents of each Talis Platform\nstore are accessible via a SPARQL endpoint and a series of REST APIs that adhere to\nthe Linked Data principles.\n• Pubby. The Pubby server (Cyganiak & Bizer, 2008) can be used as an extension to\nany RDF store that supports SPARQL. Pubby rewrites URI requests into SPARQL\nDESCRIBE queries against the underlying RDF store. Besides RDF, Pubby also\nprovides a simple HTML view over the data store and takes care of handling 303\nredirects and content negotiation between the two representations.\n• Triplify. The Triplify toolkit (Auer et al, 2009) supports developers in extending\nexisting Web applications with Linked Data front-ends. Based on SQL query\ntemplates, Triplify serves a Linked Data and a JSON view over the application's\ndatabase.\n• SparqPlug. SparqPlug (Coetzee, Heath and Motta, 2008) is a service that enables\nthe extraction of Linked Data from legacy HTML documents on the Web that do not\ncontain RDF data. The service operates by serialising the HTML DOM as RDF and\nallowing users to define SPARQL queries that transform elements of this into an RDF\ngraph of their choice.\n• OAI2LOD Server. The OAI2LOD (Haslhofer & Schandl, 2008) is a Linked Data\nwrapper for document servers that support the Open Archives OAI-RMH protocol.\n• SIOC Exporters. The SIOC project has developed Linked Data wrappers for several\npopular blogging engines, content management systems and discussion forums such\nas WordPress, Drupal, and phpBB [Endnote: http://sioc-project.org/exporters].\nA service that helps publishers to debug their Linked Data site is the Vapour validation\nservice [Endnote: http://vapour.sourceforge.net/]. Vapour verifies that published data\ncomplies with the Linked Data principles and community best practices.\n5. Linked Data Applications\nWith significant volumes of Linked Data being published on the Web, numerous efforts are\nunderway to research and build applications that exploit this Web of Data. At present these\nefforts can be broadly classified into three categories: Linked Data browsers, Linked Data\nsearch engines, and domain-specific Linked Data applications. In the following section we\nwill examine each of these categories.\nLinked Data Browsers\nJust as traditional Web browsers allow users to navigate between HTML pages by following\nhypertext links, Linked Data browsers allow users to navigate between data sources by\nfollowing links expressed as RDF triples. For example, a user may view DBpedia's RDF\ndescription of the city of Birmingham (UK), follow a 'birthplace' link to the description of the\ncomedian Tony Hancock (who was born in the city), and from there onward into RDF data\nfrom the BBC describing broadcasts in which Hancock starred. The result is that a user may\nbegin navigation in one data source and progressively traverse the Web by following RDF\nrather than HTML links. The Disco hyperdata browser [Endnote: http://www4.wiwiss.fu-\nberlin.de/bizer/ng4j/disco/] follows this approach and can be seen as a direct application of\nthe hypertext navigation paradigm to the Web of Data.\nData, however, provides human interface opportunities and challenges beyond those of the\nhypertext Web. People need to be able to explore the Web of links between items, but also\nto powerfully analyze data in bulk. The Tabulator (Berners-Lee et al, 2006; Berners-Lee et\nal, 2008), for example, allows the user traverse the Web of Data, and expose pieces of it in\na controlled way, in \"outline mode\"; to discover and highlight a pattern of interest; and then\nquery for any other similar patterns in the data Web. The results of the query form a table\nthat can then be analyzed with various conventional data presentation methods, such as\nfaceted browsers, maps, timelines, and so on.\nTabulator and Marbles (Becker & Bizer, 2008) (see Figure 3) are among the data browsers\nwhich track the provenance of data, while merging data about the same thing from different\nsources. While authors such as (Karger & schraefel, 2006) have questioned the use of\ngraph-oriented views over RDF data, as seen in browsers such as FOAFNaut [Endnote:\nhttp://www.jibbering.com/foaf/], (Hastrup, Cyganiak & Bojars, 2008) argue that such\ninterfaces fill an important niche, and describe their Fenfire browser that follows this display\nparadigm.\nFigure 3. The Marbles Linked Data browser displaying data about Tim Berners-Lee. The\ncolored dots indicate the data sources from which data was merged.\nLinked Data Search Engines and Indexes\nIn the traditional hypertext Web, browsing and searching are often seen as the two\ndominant modes of interaction (Olston & Chi, 2003). While browsers provide the\nmechanisms for navigating the information space, search engines are often the place at\nwhich that navigation process begins. A number of search engines have been developed\nthat crawl Linked Data from the Web by following RDF links, and provide query capabilities\nover aggregated data. Broadly speaking, these services can be divided into two categories:\nhuman-oriented search engines, and application-oriented indexes.\nHuman-oriented Search Engines\nSearch engines such as Falcons (Cheng & Qu, this issue) and SWSE (Hogan et al., 2007)\nprovide keyword-based search services oriented towards human users, and follow a similar\ninteraction paradigm to existing market leaders such as Google and Yahoo. The user is\npresented with a search box into which they can enter keywords related to the item or topic\nin which they are interested, and the application returns a list of results that may be\nrelevant to the query. However, rather than simply providing links from search results\nthrough to the source documents in which the queried keywords are mentioned, both SWSE\nand Falcons provide a more detailed interface to the user that exploits the underlying\nstructure of the data. Both provide a summary of the entity the user selects from the results\nlist, alongside additional structured data crawled from the Web and links to related entities.\nFalcons provides users with the option of searching for objects, concepts and documents,\neach of which leads to slightly different presentation of results. While the object search\n(Figure 4.) is suited to searching for people, places and other more concrete items, the\nconcept search is oriented to locating classes and properties in ontologies published on the\nWeb. The document search feature provides a more traditional search engine experience,\nwhere results point to RDF documents that contain the specified search terms.\nIt is worth noting that, while they may be referred to as distinct entities, the document Web\nand the data Web form one connected, navigable information space. For example, a user\nmay perform a search in the existing document Web, follow a link from an HTML document\ninto the Web of Data, navigate this space for some time, and then follow a link to a different\nHTML document, and so on.\nFigure 4. Falcons object search results for the keyword 'Berlin'.\nIt is interesting to note that while both SWSE and Falcons operate over corpuses of\nstructured data crawled from the Web, they choose to provide very simple query capabilities\nthat mimic the query interfaces of conventional Web search engines. While one may\nintuitively expect the additional structure in the data to be exploited to provide sophisticated\nquery capabilities for advanced users at least, this has not proved to be the case to date,\nwith the exception of Tabulator's style of query-by-example and faceted browsing interfaces\nfor query refinement. SWSE does provide access to its underlying data store via the SPARQL\nquery language, however this is suitable primarily for application developers with a\nknowledge of the language rather than regular users wishing to ask very specific questions\nthrough a usable human interface.\nApplication-oriented Indexes\nWhile SWSE and Falcons provide search capabilities oriented towards humans, another\nbreed of services have been developed to serve the needs of applications built on top of\ndistributed Linked Data. These application-oriented indexes, such as Swoogle (Ding et al,\n2005), Sindice (Oren et al, 2008) and Watson (d'Aquin et al, 2008) provide APIs through\nwhich Linked Data applications can discover RDF documents on the Web that reference a\ncertain URI or contain certain keywords. The rationale for such services is that each new\nLinked Data application should not need to implement its own infrastructure for crawling\nand indexing all parts of the Web of Data of which it might wish to make use. Instead,\napplications can query these indexes to receive pointers to potentially relevant documents\nwhich can then be retrieved and processed by the application itself. Despite this common\ntheme, these services have slightly different emphases. Sindice is oriented more to\nproviding access to documents containing instance data, while in contrast the emphasis of\nSwoogle and Watson is on finding ontologies that provide coverage of certain concepts\nrelevant to a query.\nDomain-specific Applications\nWhile the Linked Data browsers and search engines described above provide largely generic\nfunctionality, a number of services have been developed that offer more domain-specific\nfunctionality by 'mashing up' data from various Linked Data sources.\nRevyu\nRevyu (Heath & Motta, 2008) is a generic reviewing and rating site based on Linked Data\nprinciples and the Semantic Web technology stack. In addition to publishing Linked Data,\nRevyu consumes Linked Data from the Web to enhance the experience of site users. For\nexample, when films are reviewed on Revyu, the site attempts to match these with the\ncorresponding entry in DBpedia. Where a match is made, additional information about the\nfilm (such as the director's name and the film poster) is retrieved from DBpedia and shown\nin the human-oriented (HTML) pages of the site. In addition, links are made at the RDF level\nto the corresponding item, ensuring that while human users see a richer view of the item\nthrough the mashing up of data from various sources, Linked Data-aware applications are\nprovided with references to URIs from which related data may be retrieved. Similar\nprinciples are followed to link items such as books and pubs to corresponding entries in\nexternal data sets, and to enhance user profiles with FOAF data.\nDBpedia Mobile\nDBpedia Mobile (Becker & Bizer, 2008) is a location-aware Linked Data browser designed to\nbe run on an iPhone or other mobile device. DBpedia Mobile is oriented to the use case of a\ntourist exploring a city. Based on the current GPS position of the mobile device, the\napplication provides a location-centric mashup of nearby locations from DBpedia, associated\nreviews from Revyu, and related photos via a Linked Data wrapper around the Flickr photo-\nsharing API. Figure 5 shows DBpedia Mobile displaying data from DBpedia and Revyu about\nthe Brandenburg Gate in Berlin. Besides accessing Web data, DBpedia Mobile also enables\nusers to publish their current location, pictures and reviews to the Web as Linked Data, so\nthat they can be used by other applications. Instead of simply being tagged with\ngeographical coordinates, published content is interlinked with a nearby DBpedia resource\nand thus contributes to the overall richness of the Web of Data.\nFigure 5. DBpedia Mobile displaying information about Berlin\nTalis Aspire\nTalis Aspire (Clarke, 2009) is a Web-based Resource List Management application deployed\nto university lecturers and students. As users create lists through a conventional Web\ninterface, the application produces RDF triples which are persisted to an underlying Linked\nData-compatible store. The use of Linked Data principles enables items present on one list\nto be transparently linked to the corresponding items featured on lists at other institutions,\nthereby building a Web of scholarly data through the actions of non-specialist users.\nBBC Programmes and Music\nThe British Broadcasting Corporation (BBC) uses Linked Data internally as a lightweight data\nintegration technology. The BBC runs numerous radio stations and television channels.\nTraditionally, these stations and channels use separate content management systems. The\nBBC has thus started to use Linked Data technologies together with DBpedia and\nMusicBrainz as controlled vocabularies to connect content about the same topic residing in\ndifferent repositories and to augment content with additional data from the Linking Open\nData cloud. Based on these connections, BBC Programmes and BBC Music build Linked Data\nsites for all of its music and programmes related brands (Kobilarov et al., 2009).\nDERI Pipes\nModelled on Yahoo Pipes, DERI Pipes (Le Phuoc et al. 2009) provides a data level mashup\nplatform that enables data sources to be plugged together to form new feeds of data. The\nresulting aggregation workflows may contain sophisticated operations such as identifier\nconsolidation, schema mapping, RDFS or OWL reasoning, with data transformations being\nexpressed using SPARQL CONSTRUCT operations or XSLT templates. Figure 6. shows the\nassembly of a workflow to integrate data about Tim Berners-Lee within the DERI pipes\ndevelopment environment.\nFigure 6. DERI pipes workflow integrating data about Tim Berners-Lee from three data\nsources.\n6. Related Developments (in Research and Practice)\nThere are several other developments related to Linked Data happening on the Web or\nbeing pursued by related research communities. In the following sections, we will compare\nthese developments with Linked Data.\nMicroformats\nSimilar to Linked Data, Microformats[Endnote: http://microformats.org/] aim at extending\nthe Web with structured data. Microformats define a set of simple data formats that are\nembedded into HTML via class attributes. Two major differences between Microformats and\nLinked Data in its RDFa serialization are: Linked Data is not limited in the vocabularies that\ncan be used to represent data, and the vocabulary development process itself is completely\nopen, while Microformats are restricted to a small set of vocabularies developed through a\nprocess closely managed by a specific community. Data items that are included in HTML\npages via Microformats do not have their own identifier. This prevents the assertion, across\ndocuments and Web sites, of relationships between data items. By using URIs as global\nidentifiers and RDF to represent relationships, Linked Data does not have these limitations.\nWeb APIs\nMany major Web data sources such as Amazon, eBay, Yahoo!, and Google provide access to\ntheir data via Web APIs. The website ProgrammableWeb.com currently lists 1309 Web APIs\nas well as 3966 mashups based on these APIs. Web APIs are accessed using a wide range of\ndifferent mechanisms, and data retrieved from these APIs is represented using various\ncontent formats. In contrast, Linked Data commits itself to a small set of standardized\ntechnologies: URIs and HTTP as identification and access mechanism, RDF as content\nformat. Using a single set of technologies instead of relying on diverse interfaces and result\nformats allows data sources to be more easily crawled by search engines and accessed\nusing generic data browsers. Beside these technical details, there is also a major conceptual\ndifference between Web APIs and Linked Data: most Web APIs do not assign globally unique\nidentifiers to data items. Therefore it is not possible to set links between items in different\ndata sources in order to connect data into a global data space. Mashups based on these\nAPIs are therefore always implemented against a fixed set of data sources. In contrast,\nLinked Data applications can work on top of an unbounded, global data space. They can\ndiscover new data sources by following RDF links and take advantage of new data sources\nas they appear on the Web without needing to change the application code. Therefore,\nLinked Data technologies can contribute to connecting the different data silos that currently\nexist on the Web back into the single global information space.\nDataspaces\nA recent concept within the databases community that is very similar to Linked Data is\ndataspaces (Franklin et al. 2005). Dataspaces provide a target system architecture around\nwhich ongoing research on reference reconciliation, schema matching and mapping, data\nlineage, data quality and information extraction are unified (Halevy et al., 2006). In contrast\nwith other information-integration systems, dataspaces systems offer best-effort answers\nbefore complete semantic mappings are provided to the system. A key idea of dataspaces is\nthat the semantic cohesion of a dataspace is increased over time by different parties\nproviding mappings; the same pay as you go data integration approach that currently\nemerges on the Web of Data. The Web of Data can therefore be seen as a realization of the\ndataspaces concept on global scale, relying on a specific set of Web standards in order to be\nclosely aligned with the overall architecture of the Web. It is therefore likely that the Web of\nData will benefit considerably from research into dataspaces that is ongoing in the database\ncommunity.\nSemantic Web\nThe desire to extend the capabilities of the Web to publishing of structured data is not new,\nand can be traced back to the earliest proposal for the World Wide Web [Endnote:\nhttp://www.w3.org/History/1989/proposal.html] and subsequent papers on the topic\n(Berners-Lee et al., 1994). Trends foreseen at these early stages of the Web's existence\nincluded “Evolution of objects from being principally human-readable documents to contain\nmore machine-oriented semantic information” (Berners-Lee et al., 1994), which can be seen\nas the seeds of an idea that became known as the Semantic Web.\nThe vision of a Semantic Web has been interpreted in many different ways (e.g. Berners-\nLee, Hendler & Lassila, 2001; Marshall & Shipman, 2003). However, despite this diversity in\ninterpretation, the original goal of building a global Web of machine-readable data remains\nconstant across the original literature on the subject. According to (Berners-Lee, 2000,\npp.191), “The first step is putting data on the Web in a form that machines can naturally\nunderstand, or converting it to that form. This creates what I call a Semantic Web – a web\nof data that can be processed directly or indirectly by machines”. Therefore, while the\nSemantic Web, or Web of Data, is the goal or the end result of this process, Linked Data\nprovides the means to reach that goal.\nBy publishing Linked Data, numerous individuals and groups have contributed to the\nbuilding of a Web of Data, which can lower the barrier to reuse, integration and application\nof data from multiple, distributed and heterogeneous sources. Over time, with Linked Data\nas a foundation, some of the more sophisticated proposals associated with the Semantic\nWeb vision, such as intelligent agents, may become a reality.\n7. Research Challenges\nBy publishing and interlinking various data sources on the Web, the Linking Open Data\ncommunity has created an crystallization point for the Web of Data and a challenging test\nbed for Linked Data technologies. However, to address the ultimate goal of being able to\nuse the Web like a single global database, various remaining research challenges must be\novercome.\nUser Interfaces and Interaction Paradigms\nArguably the key benefit of Linked Data from the user perspective is the provision of\nintegrated access to data from a wide range of distributed and heterogeneous data sources.\nBy definition, this may involve integration of data from sources not explicitly selected by\nusers, as to do so would likely incur an unacceptable cognitive overhead. While the\nbrowsers described in Section 5 demonstrate promising trends in how applications may be\ndeveloped that exploit Linked Data, numerous challenges remain in understanding\nappropriate user interaction paradigms for applications built on data assembled dynamically\nin this fashion (Heath, 2008b). For example, while hypertext browsers provide mechanisms\nfor navigation forwards and backwards in a document-centric information space, similar\nnavigation controls in a Linked Data browser should enable the user to move forwards and\nbackwards between entities, thereby changing the focal point of the application. Linked Data\nbrowsers will also need to provide intuitive and effective mechanisms for adding and\nremoving data sources from an integrated, entity-centric view. Sigma (Catasta & Cyganiak\n& Tummarello, 2009), a search engine based on the Sindice service, gives an indication of\nhow such functionality could be delivered. However understanding how such an interface\ncan be realised when data sources number in the thousands or millions is a captivating\nresearch challenge.\nApplication Architectures\nIn principle, Linked Data may be accessed through advance crawling and caching, or on-\nthe-fly at application runtime through link traversal or federated querying. Search engines\nsuch as SWSE, Sindice, Falcons, and Watson crawl the Web of Data and provide applications\nwith access to crawled data through APIs. Federated query architectures for Linked Data\ninclude DARQ (Quilitz & Leser, 2008) and SemaPlorer (Schenk et al., 2008). The Semantic\nWeb Client Library[Endnote: http://www4.wiwiss.fu-berlin.de/bizer/ng4j/semwebclient/]\nand SQUIN [Endnote: http://squin.org/] have demonstrated that expressive queries can be\nanswered against the Web of Data by relying on runtime link traversal. The appropriate\nmixture of these methods will always depend on the specific needs of a Linked Data\napplication. However, due to the likelihood of scalability problems with on-the-fly link\ntraversal and federated querying, it may transpire that widespread crawling and caching will\nbecome the norm in making data available to applications in a timely fashion, while being\nable to take advantage of the openness of the Web of Data by discovering new data sources\nthrough link traversal.\nSchema Mapping and Data Fusion\nOnce data has been retrieved from distributed sources, it must be integrated in a\nmeaningful way before it is displayed to the user or is further processed. Today, most\nLinked Data applications display data from different sources alongside each other but do\nlittle to integrate it further. To do so does require mapping of terms from different\nvocabularies to the applications target schema, as well as fusing data about the same entity\nfrom different sources, by resolving data conflicts.\nLinked Data sources either use their own schemata or use a mixture of terms from existing,\nwell-known vocabularies together with self-defined terms specific to the particular data\nsource. In order to support clients in transforming data between different schemata, data\nsources can publish correspondences between their local terminology and the terminology of\nrelated data sources on the Web of Data. Current W3C recommendations such as RDF\nSchema (Brickley & Guha, 2004) and OWL (McGuinness & van Harmelen, 2004) define basic\nterminology like owl:equivalentClass, owl:equivalentProperty, rdfs:subClassOf,\nrdfs:subPropertyOf that can be used to publish basic correspondences. In many situations,\nthese correspondences are too coarse-grained to properly transform data between\nschemata. Problems include for instance structural heterogeneity as well as value\ntransformations. An open research issue is therefore the development of languages to\npublish more fine grained schema mappings on the Web. Ideally, such languages would\nsupport transitive mappings and provide for combining partial mappings in order to cover\ncases where data sources mix terminology from different vocabularies. Candidate\ntechnologies for this include the alignment languages presented in (Haslhofer, 2008) and\n(Euzenat & Scharffe & Zimmermann, 2007) as well as the rules interchange format\n(RIF)[Endnote: http://www.w3.org/2005/rules/wiki/RIF_Working_Group].\nIn addition to enhanced support for schema mapping, further research is needed in the area\nof data fusion for Linked Data applications. Data fusion is the process of integrating multiple\ndata items representing the same real-world object into a single, consistent, and clean\nrepresentation. The main challenge in data fusion is the resolution of data conflicts, i.e.\nchoosing a value in situations where multiple sources provide different values for the same\nproperty of an object. There is a large body of work on data fusion in the database\ncommunity (Bleiholder & Naumann, 2008) and an increasing body of work on identity\nreconciliation in the Web community (Halpin & Thomson, 2008). Specific requirements that\ndistinguish the Web of Data from other data fusion scenarios arise from the autonomy of\ndata sources and the scarceness and uncertainty of quality-related meta-information that is\nrequired to assess data quality in order to resolve inconsistencies. Prototypical systems for\nfusing Linked Data from multiple sources include DERI Pipes (Le Phuoc et al., 2009) and the\nKnoFuss architecture (Nikolov et al., 2008).\nLink Maintenance\nThe content of Linked Data sources changes: data about new entities is added, outdated\ndata is changed or removed. Today, RDF links between data sources are updated only\nsporadically which leads to dead links pointing at URIs that are no longer maintained and to\npotential links not being set as new data is published. Web architecture is in principle\ntolerant to dead links, but having too many of them leads to a large number of unnecessary\nHTTP requests by client applications. A current research topic within the Linked Data\ncommunity is therefore link maintenance. Proposed approaches to this problem range from\nrecalculating links at regular intervals using frameworks such as Silk (Volz et al., 2009) or\nLinQL (Hassanzadeh et al., 2009), through data sources publishing update feeds (Auer et\nal., 2009) or informing link sources about changes via subscription models to central\nregistries such as Ping the Semantic Web that keep track of new or changed data items.\nLicensing\nApplications that consume data from the Web must be able to access explicit specifications\nof the terms under which data can be reused and republished. Availability of appropriate\nframeworks for publishing such specifications is an essential requirement in encouraging\ndata owners to participate in the Web of Data, and in providing assurances to data\nconsumers that they are not infringing the rights of others by using data in a certain way.\nInitiatives such as the Creative Commons [Endnote: http://creativecommons.org/] have\nprovided a framework for open licensing of creative works, underpinned by the notion of\ncopyright. However, as (Miller et al., 2008) discuss, copyright law is not applicable to data,\nwhich from a legal perspective is also treated differently across jurisdictions. Therefore\nframeworks such as the Open Data Commons Public Domain Dedication and License\n[Endnote: http://www.opendatacommons.org/licenses/pddl/1.0/] should be adopted by the\ncommunity to provide clarity in this area. In situations where attribution is a condition of\ndata reuse, further research may also be required to explore how this can be achieved in\nuser interfaces that combine data from large numbers of sources.\nTrust, Quality and Relevance\nA significant consideration for Linked Data applications is how to ensure the data most\nrelevant or appropriate to the user's needs is identified and made available. For example, in\nscenarios where data quality and trustworthiness are paramount, how can this be\ndetermined heuristically, particularly where the data set may not have been encountered\npreviously?\nAn overview of different content-, context-, and rating-based techniques that can be used to\nheuristically assess the relevance, quality and trustworthiness of data is given in (Bizer &\nCyganiak, 2009; Heath, 2008a). Equivalents to the PageRank algorithm will likely be\nimportant in determining coarse-grained measures of the popularity or significance of a\nparticular data source, as a proxy for relevance or quality of the data, however such\nalgorithms will need to be adapted to the linkage patterns that emerge on the Web of Data.\nFrom an interface perspective, the question of how to represent the provenance and\ntrustworthiness of data drawn from many sources into an integrated view is a significant\nresearch challenge. (Berners-Lee, 1997) proposed that browser interfaces should be\nenhanced with an “Oh, yeah?” button to support the user in assessing the reliability of\ninformation encountered on the Web. Whenever a user encounteres a piece of information\nthat they would like to verify, pressing such a button would produce an explanation of the\ntrustworthiness of the displayed information. This goal has yet to be realised, however\nexisting developments such as WIQA (Bizer & Cyganiak, 2009) and InferenceWeb\n(McGuinness & da Silva, 2003) can contribute to work in this area by providing explanations\nabout information quality as well as inference processes that are used to derive query\nresults.\nPrivacy\nThe ultimate goal of Linked Data is to be able to use the Web like a single global database.\nThe realization of this vision would provide benefits in many areas but will also aggravate\ndangers in others. One problematic area are the opportunities to violate privacy that arise\nfrom integrating data from distinct sources. Protecting privacy in the Linked Data context is\nlikely to require a combination of technical and legal means together with a higher\nawareness of the users about what data to provide in which context. Interesting research\ninitiatives in this domain are Weitzner’s work on the privacy paradox (Weitzner, 2007) and\nthe recent work by the TAMI project on information accountability (Weitzner et al., 2008).\nConclusions\nLinked Data principles and practices have been adopted by an increasing number of data\nproviders, resulting in the creation of a global data space on the Web containing billions of\nRDF triples. Just as the Web has brought about a revolution in the publication and\nconsumption of documents, Linked Data has the potential to enable a revolution in how data\nis accessed and utilised. The success of Web APIs has shown the power of applications that\ncan be created by mashing up content from different Web data sources. However, mashup\ndevelopers face the challenge of scaling their development approach beyond fixed,\npredefined data silos, to encompass large numbers of data sets with heterogeneous data\nmodels and access methods. In contrast, Linked Data realizes the vision of evolving the\nWeb into a global data commons, allowing applications to operate on top of an unbounded\nset of data sources, via standardised access mechanisms. If the research challenges\nhighlighted above can be adequately addressed, we expect that Linked Data will enable a\nsignificant evolutionary step in leading the Web to its full potential.\nReferences\nAdida, B., et al. (2008). RDFa in XHTML: Syntax and Processing - W3C Recommendation.\nRetrieved June 14, 2009, http://www.w3.org/TR/rdfa-syntax/\nAlexander, K., Cyganiak, R., Hausenblas, M., Zhao, J. (2009). Describing Linked Datasets.\nProceedings of the 2nd Workshop on Linked Data on the Web (LDOW2009).\nd'Aquin, M., et al. (2008). Toward a New Generation of Semantic Web Applications. IEEE\nIntelligent Systems, 23(3):20-28.\nAuer, S., et al. (2009). Triplify – Light-Weight Linked Data Publication from Relational\nDatabases. Proceedings of the 18th World Wide Web Conference (WWW2009).\nAuer, S., Bizer, C., Kobilarov, G., Lehmann, J., Cyganiak, C., Ives, Z. (2007). DBpedia: A\nNucleus for a Web of Open Data. Proceedings of the 6th International Semantic Web\nConference (ISWC2007).\nBecker, C., Bizer, C. (2008). DBpedia Mobile - A Location-Aware Semantic Web Client.\nProceedings of the Semantic Web Challenge at ISWC 2008.\nBeckett, D. (2004). RDF/XML Syntax Specification (Revised) - W3C Recommendation.\nRetrieved June 14, 2009, http://www.w3.org/TR/rdf-syntax-grammar/\nBeckett, D., Berners-Lee, T. (2008). Turtle - Terse RDF Triple Language - W3C Team\nSubmission. Retrieved July 23, 2009, http://www.w3.org/TeamSubmission/turtle/\nBelleau, F., Nolin, M., Tourigny, N., Rigault, P., Morissette, J. (2008). Bio2RDF: Towards a\nmashup to build bioinformatics knowledge systems. Journal of Biomedical Informatics,\n41(5):706-16.\nBerners-Lee, T. et al. (1994). The World-Wide Web. Communications of the ACM,\n37(8):76-82.\nBerners-Lee, T. (1997). Cleaning up the User Interface, Section - The ”Oh, yeah?”-Button.\nRetrieved June 14, 2009, http://www.w3.org/DesignIssues/UI.html\nBerners-Lee, T. (1998). Notation3 (N3) A readable RDF syntax. Retrieved July 23, 2009,\nhttp://www.w3.org/DesignIssues/Notation3.html\nBerners-Lee, T. (2000): Weaving the Web: The Past, Present and Future of the World Wide\nWeb by its Inventor. London, Texere.\nBerners-Lee, T., Hendler, J., Lassila, O. (2001) The Semantic Web. Scientific American,\n284(5):34-43.\nBerners-Lee, T., et al. (2005). Uniform Resource Identifier (URI): Generic Syntax. Request\nfor Comments: 3986. Retrieved June 14, 2009, http://tools.ietf.org/html/rfc3986\nBerners-Lee, T. (2006). Linked Data - Design Issues. Retrieved July 23, http://www.w3.org/\nDesignIssues/LinkedData.html\nBerners-Lee, T., et. al. (2006), Tabulator: Exploring and Analyzing Linked Data on the\nSemantic Web. Procedings of the 3rd International Semantic Web User Interaction\nWorkshop (SWUI06).\nBerners-Lee, T., et al. (2008). Tabulator Redux: Browsing and Writing Linked Data.\nProceedings of the 1st Workshop on Linked Data on the Web (LDOW2008).\nBerrueta, D., Phipps, J. (2008). Best Practice Recipes for Publishing RDF Vocabularies - W3C\nWorking Group Note. Retrieved June 14, 2009, http://www.w3.org/TR/swbp-vocab-pub/\nBizer, C., Cyganiak, R. (2006). D2R Server - Publishing Relational Databases on the\nSemantic Web. Poster at the 5th International Semantic Web Conference (ISWC2006).\nBizer, C., Cyganiak, R., Heath, T. (2007). How to publish Linked Data on the Web. Retrieved\nJune 14, 2009, http://www4.wiwiss.fu-berlin.de/bizer/pub/LinkedDataTutorial/\nBizer, C., Cyganiak, R., Gauß, T. (2007): The RDF Book Mashup: From Web APIs to a Web\nof Data. Proceedings of the 3rd Workshop on Scripting for the Semantic Web (SFSW2007).\nBizer, C., Cyganiak, R. (2009): Quality-driven Information Filtering using the WIQA Policy\nFramework. Journal of Web Semantics, 7(1):1-10.\nBleiholder, J., Naumann, F. (2008). Data Fusion. ACM Computing Surveys, 41(1):1-41.\nBrickley, D., Guha, R. (2004). RDF Vocabulary Description Language 1.0: RDF Schema -\nW3C Recommendation. Retrieved June 14, 2009, http://www.w3.org/TR/rdf-schema/\nBrin, S., Page, L. (1998). The Anatomy of a Large-Scale Hypertextual Web Search Engine.\nComputer Networks and ISDN Systems, 30(1-7):107-117.\nCarroll, J., Bizer, C., Hayes, P., Stickler, P. (2005): Named graphs. Journal of Web\nSemantics, 3(4):247-267.\nCatasta, M., Cyganiak, R., Tummarello, G. (2009). Towards ECSSE: live Web of Data search\nand integration. Proceedings of the Semantic Search 2009 Workshop at WWW2009.\nCheng, G., Qu, Y. (this issue). Searching Linked Objects with Falcons: Approach,\nImplementation and Evaluation. International Journal on Semantic Web and Information\nSystems, Special Issue on Linked Data.\nClarke, C. (2009). A Resource List Management Tool for Undergraduate Students based on\nLinked Open Data Principles. Proceedings of the 6th European Semantic Web Conference\n(ESWC2009).\nCoetzee, P., Heath, T., Motta, E. (2008). SparqPlug. Proceedings of the 1st Workshop on\nLinked Data on the Web (LDOW2008).\nCyganiak, R., Bizer, C. (2008). Pubby - A Linked Data Frontend for SPARQL Endpoints.\nRetrieved June 14, 2009, http://www4.wiwiss.fu-berlin.de/pubby/\nCyganiak, R., Delbru, R., Stenzhorn, H., Tummarello, G., Decker, S. (2008): Semantic\nSitemaps: Efficient and Flexible Access to Datasets on the Semantic Web. Proceedings of\nthe 5th European Semantic Web Conference (ESWC2008).\nDas Sarma, A., Dong, X., Halevy, A. (2008). Bootstrapping pay-as-you-go data integration\nsystems. Proceedings of the Conference on Management of Data (SIGMOD2008).\nDing, L., et al. (2005). Finding and Ranking Knowledge on the Semantic Web, Proceedings\nof the 4th International Semantic Web Conference, November 2005.\nElmagarmid, A., Ipeirotis, P., Verykios, V. (2007). Duplicate Record Detection: A survey.\nIEEE Transactions on Knowledge and Data Engineering 19(1):1–16.\nEuzenat, J., Scharffe, F., Zimmermann A. (2007). Expressive alignment language and\nimplementation. Knowledge Web project report, KWEB/2004/D2.2.10/1.0.\nEuzenat, J., Shvaiko, P. (2007). Ontology Matching. Springer, Heidelberg.\nFielding, R., et al. (1999). Hypertext Transfer Protocol -- HTTP/1.1. Request for Comments:\n2616. Retrieved June 14, 2009, http://www.w3.org/Protocols/rfc2616/rfc2616.html\nFranklin, M., Halevy, A., Maier, D. (2005). From databases to dataspaces: a new abstraction\nfor information management. ACM SIGMOD Records, 34(4):27-33.\nHalevy, A., Franklin, M., Maier, D. (2006). Principles of dataspace systems. Proceedings of\nthe Symposium on Principles of database systems (PODS2006).\nHalpin, H., Thomson, H. (2008). Special Issue on Identiy, Reference and the Web.\nInternational Journal on Semantic Web & Information Systems, 4(2):1-72.\nHartig, O. (2009). Provenance Information in the Web of Data. Proceedings of the 2nd\nWorkshop on Linked Data on the Web (LDOW2009).\nHaslhofer, B., Schandl, B. (2008). The OAI2LOD Server: Exposing OAI-PMH Metadata as\nLinked Data. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).\nHaslhofer, B. (2008): A Web-based Mapping Technique for Establishing Metadata\nInteroperability. PhD thesis, Universität Wien.\nHassanzadeh, O., et al. (2009). A Declarative Framework for Semantic Link Discovery over\nRelational Data. Poster at 18th World Wide Web Conference (WWW2009).\nHassanzadeh, O., Consens, M. (2009). Linked Movie Data Base. Proceedings of the 2nd\nWorkshop on Linked Data on the Web (LDOW2009).\nHastrup, T., Cyganiak, R., Bojars, U. (2008). Browsing Linked Data with Fenfire.\nProceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).\nHausenblas, M., Halb, W., Raimond, Y., Heath, T. (2008). What is the Size of the Semantic\nWeb? In Proceedings of the International Conference on Semantic Systems (I-\nSemantics2008).\nHeath, T. (2008a). Information-seeking on the Web with Trusted Social Networks – from\nTheory to Systems. PhD Thesis, The Open University.\nHeath, T. (2008b). How Will We Interact with the Web of Data? In IEEE Internet Computing,\nVol. 12(5): 88-91.\nHeath, T., Motta, E. (2008). Revyu: Linking reviews and ratings into the Web of Data.\nJournal of Web Semantics, 6(4):266-273.\nHogan, A., Harth, A., Umrich, J., Decker, S. (2007). Towards a scalable search and query\nengine for the web. Proceedings of the 16th Conference on World Wide Web (WWW2007).\nJacobs, I., Walsh, N. (2004): Architecture of the World Wide Web, Volume One - W3C\nRecommendation. Retrieved June 14, 2009, http://www.w3.org/TR/webarch/\nJentzsch, A., Hassanzadeh, O., Bizer, C., Andersson, B., Stephens, S. (2009). Enabling\nTailored Therapeutics with Linked Data. Proceedings of the 2nd Workshop on Linked Data on\nthe Web (LDOW2009).\nKarger, D., schraefel, m.c. (2006). Pathetic Fallacy of RDF. Proceedings of 3rd Semantic\nWeb User Interaction Workshop (SWUI2006).\nKlyne, G., Carroll, J. (2004). Resource Description Framework (RDF): Concepts and Abstract\nSyntax - W3C Recommendation. Retrieved June 14, 2009,http://www.w3.org/TR/rdf-\nconcepts/\nKobilarov, G., et al. (2009). Media Meets Semantic Web - How the BBC Uses DBpedia and\nLinked Data to Make Conections. Proceedings of the 6th European Semantic Web\nConference (ESWC2009).\nLe Phuoc, D., Polleres, A., Morbidoni, C., Hauswirth, M., Tummarello, G. (2009). Rapid\nsemantic web mashup development through semantic web pipes. Proceedings of the 18th\nWorld Wide Web Conference (WWW2009).\nMarshall, C., Shipman, F. (2003). Which semantic web? Proceedings of the 14th ACM\nConference on Hypertext and Hypermedia (HT2003).\nMcGuinness, D., van Harmelen, F. (2004). OWL Web Ontology Language - W3C\nRecommendation. Retrieved June 14, 2009, http://www.w3.org/TR/owl-features/\nMcGuinness, D., da Silva, P. (2003). Infrastructure for Web Explanations. Proceedings of the\n2nd International Semantic Web Conference (ISWC2003).\nMiller, P., Styles, R., Heath, T. (2008). Open Data Commons, a License for Open Data.\nProceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).\nMoreau, L., et al. (2008). The Open Provenance Model. Technical report, Electronics and\nComputer Science, University of Southampton.\nNikolov, A., et al. (2008): Integration of Semantically Annotated Data by the KnoFuss\nArchitecture. Proceedings of the 16th International Conference on Knowledge Engineering\nand Knowledge Management.\nOlston, C., Chi, E. (2003). ScentTrails: Integrating Browsing and Searching on the Web.\nACM Transactions on Computer-Human Interaction, 10(3):177-197.\nOren, E., et al. (2008). Sindice.com: A document-oriented lookup index for open linked\ndata. Journal of Metadata, Semantics and Ontologies, 3(1):37-52.\nQuilitz, B., Leser, U. (2008). Querying distributed RDF data sources with SPARQL.\nProceedings of the 5th European Semantic Web Conference (ESWC2008).\nRaimond, Y., Sutton, C., Sandler, M. (2008). Automatic Interlinking of Music Datasets on\nthe Semantic Web. Proceedings of the 1st Workshop about Linked Data on the Web\n(LDOW2008).\nSauermann, L., Cyganiak, R. (2008): Cool URIs for the Semantic Web. W3C Interest Group\nNote. Retrieved June 14, 2009, http://www.w3.org/TR/cooluris/\nSchenk, S., et al. (2008). SemaPlorer—Interactive Semantic Exploration of Data and Media\nbased on a Federated Cloud Infrastructure. Proceedings of the Semantic Web Challenge at\nISWC 2008.\nVan de Sompel, H., Lagoze, C., Nelson, M., Warner, S., Sanderson, R., Johnston, P. (2009).\nAdding eScience Assets to the Data Web. Proceedings of the 2nd Workshop on Linked Data\non the Web (LDOW2009).\nVolz, J., Bizer, C., Gaedke, M., Kobilarov, G. (2009): Silk – A Link Discovery Framework for\nthe Web of Data. Proceedings of the 2nd Workshop on Linked Data on the Web\n(LDOW2009).\nWeitzner, D. (2007): Beyond Secrecy: New Privacy Protection Strategies for Open\nInformation Spaces. IEEE Internet Computing, 11(5):94-96.\nWeitzner, D., et al. (2008). Information Accountability. Communications of the ACM,\n51(6):82-87.\nWinkler, W. (2006). Overview of Record Linkage and Current Research Directions. US\nBureau of the Census, Technical Report.\nZhao, J., Klyne, G., Shotton, D. (2008). Provenance and Linked Data in Biological Data\nWebs. Proceedings of the 1st Workshop about Linked Data on the Web (LDOW2008).\nBios and Photos\nChristian Bizer\nProfessor Christian Bizer is the head of the Web-based Systems Group at Freie Universität\nBerlin. The group explores technical and economic questions concerning the development of\nglobal, decentralized information environments. The results of his work include the Named\nGraphs data model, which was adopted into the W3C SPARQL standard, the Fresnel display\nvocabulary implemented by several data browsers, and the D2RQ mapping language which\nis widely used for mapping relational databases to the Web of Data. He initialized the\nLinking Open Data community project and the DBpedia project.\nTom Heath\nDr. Tom Heath is a researcher in the Platform Division of Talis Information Ltd, a leading\nprovider of Linked Data storage, management and publishing technologies, where he\ncoordinates internal research focusing on collective intelligence and human-computer\ninteraction in a Linked Data and Semantic Web context. He is a leading member of the\nLinking Open Data community project, and creator of the Linked Data-enabled reviewing\nand rating site Revyu.com, winner of the 2007 Semantic Web Challenge. Tom has a PhD in\nComputer Science from The Open University.\nTim Berners-Lee\nSir Tim Berners-Lee invented the World Wide Web, an internet-based hypermedia initiative\nfor global information sharing while at CERN, the European Particle Physics Laboratory. He\nwrote the first web client and server in 1990. His specifications of URIs, HTTP and HTML\nwere refined as Web technology spread. Tim is professor at the Laboratory for Computer\nScience and Artificial Intelligence (CSAIL) at the Massachusetts Institute of Technology\n(MIT) and the Computer Science Department at the University of Southampton, UK. In 2001\nhe became a Fellow of the Royal Society.\n",
"fullTextIdentifier": null,
"identifiers": ["oai:eprints.soton.ac.uk:271285", null],
"journals": null,
"language": {
"code": "en",
"id": 9,
"name": "English"
},
"duplicateId": null,
"publisher": null,
"rawRecordXml": "<record><header><identifier>\n \n \n oai:eprints.soton.ac.uk:271285</identifier><datestamp>\n 2017-07-18T06:44:55Z</datestamp><setSpec>\n 7374617475733D756E707562</setSpec><setSpec>\n 74797065733D61727469636C65</setSpec><setSpec>\n 7374617475733D756E707562</setSpec><setSpec>\n 66756C6C746578743D7075626C6963</setSpec></header><metadata><rioxx xmlns=\"http://www.rioxx.net/schema/v2.0/rioxx/\" xmlns:ali=\"http://ali.niso.org/2014/ali/1.0\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:rioxxterms=\"http://docs.rioxx.net/schema/v2.0/rioxxterms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.rioxx.net/schema/v2.0/rioxx/ http://www.rioxx.net/schema/v2.0/rioxx/rioxx.xsd\" ><dc:description>\n \n The term “Linked Data” refers to a set of best practices for publishing and connecting structured data on the Web. These best practices have been adopted by an increasing number of data providers over the last three years, leading to the creation of a global data space containing billions of assertions— the Web of Data. In this article, the authors present the concept and technical principles of Linked Data, and situate these within the broader context of related technological developments. They describe progress to date in publishing Linked Data on the Web, review applications that have been developed to exploit the Web of Data, and map out a research agenda for the Linked Data community as it moves forward.</dc:description><dc:language>en</dc:language><dc:source>1552-6283</dc:source><dc:subject>QA75</dc:subject><dc:title>Linked Data - the story so far</dc:title><rioxxterms:author>Bizer, Christian</rioxxterms:author><rioxxterms:author>Heath, Tom</rioxxterms:author><rioxxterms:author>Berners-Lee, Tim</rioxxterms:author><rioxxterms:contributor>Bizer, Christian</rioxxterms:contributor><rioxxterms:contributor>Heath, Tom</rioxxterms:contributor><rioxxterms:contributor>Berners-Lee, Tim</rioxxterms:contributor><rioxxterms:publication_date>2009</rioxxterms:publication_date><rioxxterms:type>Journal Article/Review</rioxxterms:type><rioxxterms:version>NA</rioxxterms:version><rioxxterms:version_of_record>http://dx.doi.org/10.4018/jswis.2009081901</rioxxterms:version_of_record></rioxx></metadata></record>",
"relations": [],
"repositories": [{
"id": "34",
"openDoarId": 0,
"name": "e-Prints Soton",
"uri": null,
"urlHomepage": null,
"urlOaipmh": null,
"uriJournals": null,
"physicalName": "noname",
"source": null,
"software": null,
"metadataFormat": null,
"description": null,
"journal": null,
"roarId": 0,
"pdfStatus": null,
"nrUpdates": 0,
"disabled": false,
"lastUpdateTime": null,
"repositoryLocation": null
}],
"repositoryDocument": {
"pdfStatus": 0,
"textStatus": 1,
"metadataAdded": 1333650712000,
"metadataUpdated": 1516236481000,
"timestamp": 1516395717000,
"indexed": 1,
"deletedStatus": "0",
"pdfSize": 2121833,
"tdmOnly": false,
"pdfOrigin": null
},
"similarities": null,
"subjects": ["QA75"],
"title": "Linked Data - the story so far",
"topics": ["QA75"],
"types": [],
"urls": ["http://dx.doi.org/10.4018/jswis.2009081901"],
"year": 2009,
"doi": null,
"oai": "oai:eprints.soton.ac.uk:271285",
"downloadUrl": "",
"pdfHashValue": null,
"documentType": "research",
"documentTypeConfidence": 1.0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment