Last active
May 4, 2019 01:08
-
-
Save binhqd/07ad78b04839ea7729871b804b2ad0c4 to your computer and use it in GitHub Desktop.
Note for Linux command line (Mercury)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LOAD CSV FROM "file:///cities1.csv" AS csvLine | |
MERGE (c:City {uuid: csvLine[0]}) | |
ON MATCH SET c.name_ja = csvLine[3]; | |
CALL apoc.export.csv.query("match (c:Country) return c.uuid, c.name_en","/tmp/countries.csv", {}); | |
LOAD CSV FROM "file:///countries-translated.csv" AS csvLine | |
MERGE (c:Country {uuid: csvLine[0]}) | |
ON MATCH SET c.name_ja = csvLine[2]; | |
CALL apoc.export.csv.query("match (c:City)-[:BELONGS_TO]->(ct:Country) return c.uuid as city_id,ct.uuid as country_id, c.name_en","/tmp/cities.csv", {}); | |
# list all json files | |
find . -type f -name "*.json" | |
# Find fail-extracted articles | |
grep -rnw '.' -e '^\[\]' | |
CALL apoc.export.csv.query("match (ct:Country)<-[:BELONGS_TO]-(c:City) | |
return ID(ct) as countryID, ct.uuid, ct.country_code, ct.name_en, ct.crawler_id, ct.name_ja, ID(c) as cityID, c.uuid, c.name_en, c.name_ja, c.area_name, c.best_season, c.canonical_name, c.climate, c.languages, c.region_name, c.time_zone, c.urban, c.coordinates","/tmp/countries-cities.csv", {}); | |
CALL apoc.export.csv.query("match (c:OBJECT_IN_SUB_TITLE {uuid: ''})-[:BELONGS_TO]->(ct:Country) return c.uuid as city_id,ct.uuid as country_id, c.name_en","/tmp/cities.csv", {}); | |
LOAD CSV FROM "file:///object-extraction-2018.04.16.csv" AS csvLine | |
MATCH (c:OBJECT_IN_SUB_TITLE {uuid: csvLine[1]}) | |
ON MATCH SET c.remain_string = csvLine[3] | |
SET c.potential_object = csvLine[4] | |
SET c.potential_type = csvLine[5] | |
; | |
LOAD CSV FROM "file:///object-extraction-2018.04.16.csv" AS csvLine | |
MATCH (c:OBJECT_IN_SUB_TITLE {uuid: csvLine[0]}) | |
with c, [csvLine[3], csvLine[4], csvLine[6], csvLine[7]] as l | |
with filter(x in l where x is not null) as fl, c | |
unwind fl as x | |
create (o:PotentialObject {value: x})<-[:CONTAINS]-(c) | |
LOAD CSV WITH HEADERS FROM "file:///hotels/france-paris.csv" AS csvLine | |
MATCH (ct:Country {name_en: csvLine["countryName"]})<-[:BELONGS_TO]-(c:City {name_en: csvLine["cityName"]}) | |
merge (c)<-[:BELONGS_TO]-(h:Hotel {name_en: csvLine["objectName"], name_ja: csvLine["objectNameJA"], uuid: replace(apoc.create.uuid(), '-', '')}) | |
elasticsearch_loader --index objects --id-field uuid csv all-hotels.csv | |
echo | openssl s_client -servername pavoexpo.vn -connect pavoexpo.vn:443 2>/dev/null | openssl x509 -noout -dates | |
echo | openssl s_client -servername danangtrade.gov.vn -connect danangtrade.gov.vn:443 2>/dev/null | openssl x509 -noout -dates | |
# Import missing cities | |
LOAD CSV WITH HEADERS FROM "file:///list-spot-cities.csv" AS csvLine | |
OPTIONAL match (ct:Country {name_en: csvLine["country"]})<-[:BELONGS_TO]-(c:City {name_en: csvLine["city"]}) | |
with [csvLine["country"], csvLine["city"], csvLine["city_ja"], ct.name_en, c.name_en] as out | |
where out[3] is null | |
match (ct:Country {name_en: out[0]}) | |
create (c:City:Instance:DESCRIBABLE {name_en: out[1], name_ja: out[2], uuid: replace(apoc.create.uuid(), '-', '')})-[:BELONGS_TO]->(ct) | |
return c, ct | |
# Find duplicate cities | |
match (ct:Country)<-[:BELONGS_TO]-(c:City) | |
with [ct.name_en, c.name_en] as l | |
where l[0] = "Japan" | |
return l, count(l) as cnt | |
order by cnt desc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment