Skip to content

Instantly share code, notes, and snippets.

@backpackerhh
Created May 23, 2014 19:03
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save backpackerhh/94187846aecd158152fe to your computer and use it in GitHub Desktop.
Save backpackerhh/94187846aecd158152fe to your computer and use it in GitHub Desktop.
XML World-Countries XPath and XQuery Exercises
<!-- 1. Return the area of Mongolia. -->
<!-- Reminder: To return the value of an attribute attr, you must use data(@attr), although just @attr may be used in comparisons. You will need to remember this for some later questions as well. -->
//country[@name = "Mongolia"]/data(@area)
<!-- 2. Return the names of all cities that have the same name as the country in which they are located. -->
//city[name = parent::country/data(@name)]/name
<!-- 3. Return the average population of Russian-speaking countries. -->
avg(//country[language = 'Russian']/data(@population))
<!-- 4. Return the names of all countries that have at least three cities with population greater than 3 million. -->
//country[count(city[population > 3000000]) >= 3]/data(@name)
<!-- 5. Create a list of French-speaking and German-speaking countries. The result should take the form: -->
<!--
<result>
<French>
<country>country-name</country>
<country>country-name</country>
...
</French>
<German>
<country>country-name</country>
<country>country-name</country>
...
</German>
</result>
-->
let $countries := //country
return
<result>
<French>
{
for $country in $countries[language = 'French']
return
<country>{ $country/data(@name) }</country>
}
</French>
<German>
{
for $country in $countries[language = 'German']
return
<country>{ $country/data(@name) }</country>
}
</German>
</result>
<!-- 6. Return the countries with the highest and lowest population densities. Note that because the "/" operator has its own meaning in XPath and XQuery, the division operator is infix "div". To compute population density use "(@population div @area)". You can assume density values are unique. The result should take the form: -->
<!--
<result>
<highest density="value">country-name</highest>
<lowest density="value">country-name</lowest>
</result>
-->
let $countries := //country
let $population_density := $countries/(@population div @area)
let $highest_density := max($population_density)
let $country_with_highest_density := $countries[(@population div @area) = $highest_density]
let $lowest_density := min($population_density)
let $country_with_lowest_density := $countries[(@population div @area) = $lowest_density]
return
<result>
<highest density="{ $highest_density }">
{ $country_with_highest_density/data(@name) }
</highest>
<lowest density="{ $lowest_density }">
{ $country_with_lowest_density/data(@name) }
</lowest>
</result>
<!-- 1. Return the names of all countries with population greater than 100 million. -->
//country[@population > 100000000]/data(@name)
<!-- 2. Return the names of all countries where over 50% of the population speaks German. (Hint: Depending on your solution, you may want to use ".", which refers to the "current element" within an XPath expression.) -->
//country[language[data(.) = 'German' and @percentage > 50]]/data(@name)
//country[language[text() = 'German' and @percentage > 50]]/data(@name)
//country[language[text() = 'German'][@percentage > 50]]/data(@name)
<!-- 3. Return the names of all countries where a city in that country contains more than one-third of the country's population. -->
//country[city/population > (@population div 3)]/data(@name)
<!-- 4. Return the population density of Qatar. Note: Since the "/" operator has its own meaning in XPath and XQuery, the division operator is "div". To compute population density use "(@population div @area)". -->
//country[@name = 'Qatar']/(@population div @area)
<!-- 5. Return the names of all countries whose population is less than one thousandth that of some city (in any country). -->
//country[@population < //city/(population div 1000)]/data(@name)
<!-- 6. Return all city names that appear more than once, i.e., there is more than one city with that name in the data. Return only one instance of each such city name. (Hint: You might want to use the "preceding" and/or "following" navigation axes for this query, which were not covered in the video or our demo script; they match any preceding or following node, not just siblings.) -->
//city[name = following::name]/name
<!-- 7. Return the names of all countries containing a city such that some other country has a city of the same name. (Hint: You might want to use the "preceding" and/or "following" navigation axes for this query, which were not covered in the video or our demo script; they match any preceding or following node, not just siblings.) -->
//country[city/name = following::city/name | preceding::city/name]/data(@name)
//country[city/name = following::city/name or city/name = preceding::city/name]/data(@name)
<!-- 8. Return the names of all countries whose name textually contains a language spoken in that country. For instance, Uzbek is spoken in Uzbekistan, so return Uzbekistan. (Hint: You may want to use ".", which refers to the "current element" within an XPath expression.) -->
//country[language[contains(../@name, .)]]/data(@name)
//country[language[contains(parent::country/@name, self::language)]]/data(@name)
<!-- 9. Return the names of all countries in which people speak a language whose name textually contains the name of the country. For instance, Japanese is spoken in Japan, so return Japan. (Hint: You may want to use ".", which refers to the "current element" within an XPath expression.) -->
//country[language[contains(., ../@name)]]/data(@name)
<!-- 10. Return all languages spoken in a country whose name textually contains the language name. For instance, German is spoken in Germany, so return German. (Hint: Depending on your solution, may want to use data(.), which returns the text value of the "current element" within an XPath expression.) -->
//language[contains(../@name, .)]/data(.)
//language[contains(../@name, .)]/text()
//language[contains(parent::country/@name, self::language)]/data(.)
<!-- 11. Return all languages whose name textually contains the name of a country in which the language is spoken. For instance, Icelandic is spoken in Iceland, so return Icelandic. (Hint: Depending on your solution, may want to use data(.), which returns the text value of the "current element" within an XPath expression.) -->
//language[contains(., ../@name)]/data(.)
<!-- 12. Return the number of countries where Russian is spoken. -->
count(//country[language = 'Russian'])
<!-- 13. Return the names of all countries for which the data does not include any languages or cities, but the country has more than 10 million people. -->
//country[not(language)][not(city)][@population > 10000000]/data(@name)
//country[count(language) = 0 and count(city) = 0 and @population > 10000000]/data(@name)
<!-- 14. Return the name of the country with the highest population. (Hint: You may need to explicitly cast population numbers as integers with xs:int() to get the correct answer.) -->
//country[@population = max(//country/@population)]/data(@name)
<!-- 15. Return the name of the country that has the city with the highest population. (Hint: You may need to explicitly cast population numbers as integers with xs:int() to get the correct answer.) -->
//country[city/population = max(//city/population)]/data(@name)
<!-- 16. Return the average number of languages spoken in countries where Russian is spoken. -->
avg(//country[language = 'Russian']/count(language))
<!-- 17. Return all country-language pairs where the language is spoken in the country and the name of the country textually contains the language name. Return each pair as a country element with language attribute, e.g., <country language="French">French Guiana</country> -->
for $country in //country
for $language in $country/language[contains(../data(@name), .)]
return
<country language="{ data($language) }">
{ $country/data(@name) }
</country>
<!-- 18. Return all countries that have at least one city with population greater than 7 million. For each one, return the country name along with the cities greater than 7 million, in the format: -->
<!--
<country name="country-name">
<big>city-name</big>
<big>city-name</big>
...
</country>
-->
for $country in //country[city[population > 7000000]]
return
<country>
{ $country/@name }
{
for $city in $country/city[population > 7000000]
return
<big>
{ $city/data(name) }
</big>
}
</country>
<!-- 19. Return all countries where at least one language is listed, but the total percentage for all listed languages is less than 90%. Return the country element with its name attribute and its language subelements, but no other attributes or subelements. -->
for $country in //country[language][sum(language/@percentage) < 90]
return
<country>
{ $country/@name }
{
for $language in $country/language
return $language
}
</country>
for $country in //country[language]
where sum($country/language/@percentage) < 90
return
<country>
{ $country/@name }
{
for $language in $country/language
return $language
}
</country>
<!-- 20. Return all countries where at least one language is listed, and every listed language is spoken by less than 20% of the population. Return the country element with its name attribute and its language subelements, but no other attributes or subelements. -->
for $country in //country[language]
where every $language in $country/language satisfies $language/data(@percentage) < 20
return
<country name="{ $country/data(@name) }">
{
for $language in $country/language
return $language
}
</country>
<!-- 21. Find all situations where one country's most popular language is another country's least popular, and both countries list more than one language. (Hint: You may need to explicitly cast percentages as floating-point numbers with xs:float() to get the correct answer.) Return the name of the language and the two countries, each in the format: -->
<!--
<LangPair language="lang-name">
<MostPopular>country-name</MostPopular>
<LeastPopular>country-name</LeastPopular>
</LangPair>
-->
let $countries := //country[count(language) > 1]
let $most_popular_languages :=
for $country in $countries
for $language in $country/language
where xs:float($language/data(@percentage)) = xs:float(max($country/language/data(@percentage)))
return $language
let $least_popular_languages :=
for $country in $countries
for $language in $country/language
where xs:float($language/data(@percentage)) = xs:float(min($country/language/data(@percentage)))
return $language
for $most_popular in $most_popular_languages
for $least_popular in $least_popular_languages
where data($most_popular) = data($least_popular)
return
<LangPair language="{ data($most_popular) }">
<MostPopular>{ $most_popular/parent::country/data(@name) }</MostPopular>
<LeastPopular>{ $least_popular/parent::country/data(@name) }</LeastPopular>
</LangPair>
<!-- 22. For each language spoken in one or more countries, create a "language" element with a "name" attribute and one "country" subelement for each country in which the language is spoken. The "country" subelements should have two attributes: the country "name", and "speakers" containing the number of speakers of that language (based on language percentage and the country's population). Order the result by language name, and enclose the entire list in a single "languages" element. For example, your result might look like: -->
<!--
<languages>
...
<language name="Arabic">
<country name="Iran" speakers="660942"/>
<country name="Saudi Arabia" speakers="19409058"/>
<country name="Yemen" speakers="13483178"/>
</language>
...
</languages>
-->
let $languages := //country/language
let $language_names := distinct-values($languages)
return
<languages>
{
for $language_name in $language_names
order by $language_name
return
<language name="{ $language_name }">
{
for $language in $languages
let $speakers := xs:int($language/parent::country/@population * $language/(@percentage div 100))
let $country_name := $language/parent::country/data(@name)
where data($language) = $language_name
return
<country name="{ $country_name }" speakers="{ $speakers }" />
}
</language>
}
</languages>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment