Skip to content

Instantly share code, notes, and snippets.

@amorton
Created August 19, 2012 10:27
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save amorton/3394160 to your computer and use it in GitHub Desktop.
Sorting lists for humans with Cassandra
*.py[cod]
# C extensions
*.so
# Packages
*.egg
*.egg-info
dist
build
eggs
parts
bin
var
sdist
develop-eggs
.installed.cfg
lib
lib64
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
nosetests.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
Country Name;ISO 3166-1-alpha-2 code
AFGHANISTAN;AF
ÅLAND ISLANDS;AX
ALBANIA;AL
ALGERIA;DZ
AMERICAN SAMOA;AS
ANDORRA;AD
ANGOLA;AO
ANGUILLA;AI
ANTARCTICA;AQ
ANTIGUA AND BARBUDA;AG
ARGENTINA;AR
ARMENIA;AM
ARUBA;AW
AUSTRALIA;AU
AUSTRIA;AT
AZERBAIJAN;AZ
BAHAMAS;BS
BAHRAIN;BH
BANGLADESH;BD
BARBADOS;BB
BELARUS;BY
BELGIUM;BE
BELIZE;BZ
BENIN;BJ
BERMUDA;BM
BHUTAN;BT
BOLIVIA, PLURINATIONAL STATE OF;BO
BONAIRE, SINT EUSTATIUS AND SABA;BQ
BOSNIA AND HERZEGOVINA;BA
BOTSWANA;BW
BOUVET ISLAND;BV
BRAZIL;BR
BRITISH INDIAN OCEAN TERRITORY;IO
BRUNEI DARUSSALAM;BN
BULGARIA;BG
BURKINA FASO;BF
BURUNDI;BI
CAMBODIA;KH
CAMEROON;CM
CANADA;CA
CAPE VERDE;CV
CAYMAN ISLANDS;KY
CENTRAL AFRICAN REPUBLIC;CF
CHAD;TD
CHILE;CL
CHINA;CN
CHRISTMAS ISLAND;CX
COCOS (KEELING) ISLANDS;CC
COLOMBIA;CO
COMOROS;KM
CONGO;CG
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
COOK ISLANDS;CK
COSTA RICA;CR
CÔTE D'IVOIRE;CI
CROATIA;HR
CUBA;CU
CURAÇAO;CW
CYPRUS;CY
CZECH REPUBLIC;CZ
DENMARK;DK
DJIBOUTI;DJ
DOMINICA;DM
DOMINICAN REPUBLIC;DO
ECUADOR;EC
EGYPT;EG
EL SALVADOR;SV
EQUATORIAL GUINEA;GQ
ERITREA;ER
ESTONIA;EE
ETHIOPIA;ET
FALKLAND ISLANDS (MALVINAS);FK
FAROE ISLANDS;FO
FIJI;FJ
FINLAND;FI
FRANCE;FR
FRENCH GUIANA;GF
FRENCH POLYNESIA;PF
FRENCH SOUTHERN TERRITORIES;TF
GABON;GA
GAMBIA;GM
GEORGIA;GE
GERMANY;DE
GHANA;GH
GIBRALTAR;GI
GREECE;GR
GREENLAND;GL
GRENADA;GD
GUADELOUPE;GP
GUAM;GU
GUATEMALA;GT
GUERNSEY;GG
GUINEA;GN
GUINEA-BISSAU;GW
GUYANA;GY
HAITI;HT
HEARD ISLAND AND MCDONALD ISLANDS;HM
HOLY SEE (VATICAN CITY STATE);VA
HONDURAS;HN
HONG KONG;HK
HUNGARY;HU
ICELAND;IS
INDIA;IN
INDONESIA;ID
IRAN, ISLAMIC REPUBLIC OF;IR
IRAQ;IQ
IRELAND;IE
ISLE OF MAN;IM
ISRAEL;IL
ITALY;IT
JAMAICA;JM
JAPAN;JP
JERSEY;JE
JORDAN;JO
KAZAKHSTAN;KZ
KENYA;KE
KIRIBATI;KI
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
KOREA, REPUBLIC OF;KR
KUWAIT;KW
KYRGYZSTAN;KG
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
LATVIA;LV
LEBANON;LB
LESOTHO;LS
LIBERIA;LR
LIBYA;LY
LIECHTENSTEIN;LI
LITHUANIA;LT
LUXEMBOURG;LU
MACAO;MO
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
MADAGASCAR;MG
MALAWI;MW
MALAYSIA;MY
MALDIVES;MV
MALI;ML
MALTA;MT
MARSHALL ISLANDS;MH
MARTINIQUE;MQ
MAURITANIA;MR
MAURITIUS;MU
MAYOTTE;YT
MEXICO;MX
MICRONESIA, FEDERATED STATES OF;FM
MOLDOVA, REPUBLIC OF;MD
MONACO;MC
MONGOLIA;MN
MONTENEGRO;ME
MONTSERRAT;MS
MOROCCO;MA
MOZAMBIQUE;MZ
MYANMAR;MM
NAMIBIA;NA
NAURU;NR
NEPAL;NP
NETHERLANDS;NL
NEW CALEDONIA;NC
NEW ZEALAND;NZ
NICARAGUA;NI
NIGER;NE
NIGERIA;NG
NIUE;NU
NORFOLK ISLAND;NF
NORTHERN MARIANA ISLANDS;MP
NORWAY;NO
OMAN;OM
PAKISTAN;PK
PALAU;PW
PALESTINIAN TERRITORY, OCCUPIED;PS
PANAMA;PA
PAPUA NEW GUINEA;PG
PARAGUAY;PY
PERU;PE
PHILIPPINES;PH
PITCAIRN;PN
POLAND;PL
PORTUGAL;PT
PUERTO RICO;PR
QATAR;QA
RÉUNION;RE
ROMANIA;RO
RUSSIAN FEDERATION;RU
RWANDA;RW
SAINT BARTHÉLEMY;BL
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
SAINT KITTS AND NEVIS;KN
SAINT LUCIA;LC
SAINT MARTIN (FRENCH PART);MF
SAINT PIERRE AND MIQUELON;PM
SAINT VINCENT AND THE GRENADINES;VC
SAMOA;WS
SAN MARINO;SM
SAO TOME AND PRINCIPE;ST
SAUDI ARABIA;SA
SENEGAL;SN
SERBIA;RS
SEYCHELLES;SC
SIERRA LEONE;SL
SINGAPORE;SG
SINT MAARTEN (DUTCH PART);SX
SLOVAKIA;SK
SLOVENIA;SI
SOLOMON ISLANDS;SB
SOMALIA;SO
SOUTH AFRICA;ZA
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
SOUTH SUDAN;SS
SPAIN;ES
SRI LANKA;LK
SUDAN;SD
SURINAME;SR
SVALBARD AND JAN MAYEN;SJ
SWAZILAND;SZ
SWEDEN;SE
SWITZERLAND;CH
SYRIAN ARAB REPUBLIC;SY
TAIWAN, PROVINCE OF CHINA;TW
TAJIKISTAN;TJ
TANZANIA, UNITED REPUBLIC OF;TZ
THAILAND;TH
TIMOR-LESTE;TL
TOGO;TG
TOKELAU;TK
TONGA;TO
TRINIDAD AND TOBAGO;TT
TUNISIA;TN
TURKEY;TR
TURKMENISTAN;TM
TURKS AND CAICOS ISLANDS;TC
TUVALU;TV
UGANDA;UG
UKRAINE;UA
UNITED ARAB EMIRATES;AE
UNITED KINGDOM;GB
UNITED STATES;US
UNITED STATES MINOR OUTLYING ISLANDS;UM
URUGUAY;UY
UZBEKISTAN;UZ
VANUATU;VU
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
VIET NAM;VN
VIRGIN ISLANDS, BRITISH;VG
VIRGIN ISLANDS, U.S.;VI
WALLIS AND FUTUNA;WF
WESTERN SAHARA;EH
YEMEN;YE
ZAMBIA;ZM
ZIMBABWE;ZW
#
# Initialise and view the list in iPython
#
In [114]: ledger_list.initialise()
In [115]: l = ledger_list.get()
In [116]: l
Out[116]:
[((1347363305468354, 1347363305468354, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA'),
((1347363305468377, 1347363305468377, u'AG'), u'AG', u'ANTIGUA AND BARBUDA')]
#
# View the list in cassandra-cli
#
[default@dev] get LedgerList['countries'] limit 10;
=> (column=1347363305468354:1347363305468354:AF:false, value=AFGHANISTAN, timestamp=1347363305468906)
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468377:1347363305468377:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347363305468906)
Returned 10 results.
Elapsed time: 11 msec(s).
#
# Move "ANTIGUA AND BARBUDA" to be before "ANDORRA"
#
In [117]: ledger_list.move(l, "AG", "AD")
In [118]: l = ledger_list.get()
Applying Tx {"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
In [119]: l
Out[119]:
[((1347363305468354, 1347363305468354, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468367, 1347363368220175, u'AG'), u'AG', u'ANTIGUA AND BARBUDA'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA')]
#
# View the list in cassandra-cli
#
[default@dev] get LedgerList['countries'] limit 10;
=> (column=1347363305468354:1347363305468354:AF:false, value=AFGHANISTAN, timestamp=1347363305468906)
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468377:1347363305468377:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347363305468906)
Returned 10 results.
Elapsed time: 13 msec(s).
#
# View the ledger transactions in cassandra-cli
#
[default@dev] get LedgerTransactions['countries'];
=> (column=1347363368220175, value={"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363368220230)
Returned 1 results.
Elapsed time: 5 msec(s).
[default@dev]
#
# Move item "AFGHANISTAN" to be before "ANDORRA"
#
In [121]: ledger_list.move(l, "AF", "AD")
In [122]: l = ledger_list.get()
Applying Tx {"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
Applying Tx {"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
In [123]: l
Out[123]:
[((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468367, 1347363368220175, u'AG'), u'AG', u'ANTIGUA AND BARBUDA'),
((1347363305468367, 1347363481676775, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA')]
#
# View the list in cassandra-cli
#
[default@dev] get LedgerList['countries'] limit 10;
=> (column=1347363305468354:1347363305468354:AF:false, value=AFGHANISTAN, timestamp=1347363305468906)
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468377:1347363305468377:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347363305468906)
Returned 10 results.
Elapsed time: 8 msec(s).
#
# View the ledger transactions in cassandra-cli
#
[default@dev] get LedgerTransactions['countries'];
=> (column=1347363368220175, value={"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363368220230)
=> (column=1347363481676775, value={"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363481676834)
Returned 2 results.
Elapsed time: 4 msec(s).
#
# Apply the transactions, breaking after step 1.
#
In [126]: ledger_list.apply_tx(break_at_1=True)
#
# View the list in cassandra-cli
# NOTE: 3 columns, including soft delete, for the items we have moved.
#
[default@dev] get LedgerList['countries'] limit 20;
=> (column=1347363305468354:1347363305468354:AF:false, value=AFGHANISTAN, timestamp=1347363305468906)
=> (column=1347363305468354:1347363305468354:AF:true, value=, timestamp=1347364141395288)
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363368220175:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347364141388678)
=> (column=1347363305468367:1347363481676775:AF:false, value=AFGHANISTAN, timestamp=1347364141395288)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468377:1347363305468377:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347363305468906)
=> (column=1347363305468377:1347363305468377:AG:true, value=, timestamp=1347364141388678)
=> (column=1347363305468379:1347363305468379:AR:false, value=ARGENTINA, timestamp=1347363305468906)
=> (column=1347363305468381:1347363305468381:AM:false, value=ARMENIA, timestamp=1347363305468906)
=> (column=1347363305468383:1347363305468383:AW:false, value=ARUBA, timestamp=1347363305468906)
=> (column=1347363305468384:1347363305468384:AU:false, value=AUSTRALIA, timestamp=1347363305468906)
=> (column=1347363305468386:1347363305468386:AT:false, value=AUSTRIA, timestamp=1347363305468906)
=> (column=1347363305468388:1347363305468388:AZ:false, value=AZERBAIJAN, timestamp=1347363305468906)
Returned 20 results.
Elapsed time: 16 msec(s).
#
# View the ledger transactions in cassandra-cli
# NOTE: Both still there
#
[default@dev] get LedgerTransactions['countries'];
=> (column=1347363368220175, value={"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363368220230)
=> (column=1347363481676775, value={"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363481676834)
Returned 2 results.
Elapsed time: 6 msec(s).
[default@dev]
#
# View the list in Python
# NOTE: Log messages about soft deletes.
#
In [127]: l = ledger_list.get()
Applying Tx {"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
Soft delete detected - Tx in flight, skipping {u'tx_id': u'1347363368220175', u'move_sort_key': [1347363305468377, 1347363305468377, u'AG'], u'sibling_sort_key': [1347363305468368, 1347363305468368, u'AD'], u'op': u'move'}
Applying Tx {"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
Soft delete detected - Tx in flight, skipping {u'tx_id': u'1347363481676775', u'move_sort_key': [1347363305468354, 1347363305468354, u'AF'], u'sibling_sort_key': [1347363305468368, 1347363305468368, u'AD'], u'op': u'move'}
In [128]: l
Out[128]:
[((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468367, 1347363368220175, u'AG'), u'AG', u'ANTIGUA AND BARBUDA'),
((1347363305468367, 1347363481676775, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA')]
#
# Apply the transactions, breaking after step 2.
#
In [126]: ledger_list.apply_tx(break_at_2=True)
#
# View the list in cassandra-cli
# NOTE: No soft deletes and only a single column for all items.
#
[default@dev] get LedgerList['countries'] limit 20;
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363368220175:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347364558918847)
=> (column=1347363305468367:1347363481676775:AF:false, value=AFGHANISTAN, timestamp=1347364558925431)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468379:1347363305468379:AR:false, value=ARGENTINA, timestamp=1347363305468906)
=> (column=1347363305468381:1347363305468381:AM:false, value=ARMENIA, timestamp=1347363305468906)
=> (column=1347363305468383:1347363305468383:AW:false, value=ARUBA, timestamp=1347363305468906)
=> (column=1347363305468384:1347363305468384:AU:false, value=AUSTRALIA, timestamp=1347363305468906)
=> (column=1347363305468386:1347363305468386:AT:false, value=AUSTRIA, timestamp=1347363305468906)
=> (column=1347363305468388:1347363305468388:AZ:false, value=AZERBAIJAN, timestamp=1347363305468906)
=> (column=1347363305468390:1347363305468390:BS:false, value=BAHAMAS, timestamp=1347363305468906)
=> (column=1347363305468391:1347363305468391:BH:false, value=BAHRAIN, timestamp=1347363305468906)
=> (column=1347363305468393:1347363305468393:BD:false, value=BANGLADESH, timestamp=1347363305468906)
=> (column=1347363305468394:1347363305468394:BB:false, value=BARBADOS, timestamp=1347363305468906)
Returned 20 results.
Elapsed time: 17 msec(s).
#
# View the ledger transactions in cassandra-cli
# NOTE: Both still there
#
[default@dev] get LedgerTransactions['countries'];
=> (column=1347363368220175, value={"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363368220230)
=> (column=1347363481676775, value={"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}, timestamp=1347363481676834)
Returned 2 results.
Elapsed time: 5 msec(s).
#
# View the list in Python
# NOTE: Log messages about pre-cond failing. The item we want to move
# has already moved.
#
In [130]: l = ledger_list.get()
Applying Tx {"tx_id": "1347363368220175", "move_sort_key": [1347363305468377, 1347363305468377, "AG"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
Pre-cond fail - Tx in flight, skipping {u'tx_id': u'1347363368220175', u'move_sort_key': [1347363305468377, 1347363305468377, u'AG'], u'sibling_sort_key': [1347363305468368, 1347363305468368, u'AD'], u'op': u'move'}
Applying Tx {"tx_id": "1347363481676775", "move_sort_key": [1347363305468354, 1347363305468354, "AF"], "sibling_sort_key": [1347363305468368, 1347363305468368, "AD"], "op": "move"}
Pre-cond fail - Tx in flight, skipping {u'tx_id': u'1347363481676775', u'move_sort_key': [1347363305468354, 1347363305468354, u'AF'], u'sibling_sort_key': [1347363305468368, 1347363305468368, u'AD'], u'op': u'move'}
In [131]: l
Out[131]:
[((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468367, 1347363368220175, u'AG'), u'AG', u'ANTIGUA AND BARBUDA'),
((1347363305468367, 1347363481676775, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA')]
#
# Apply the transactions.
#
In [136]: ledger_list.apply_tx()
# View the list in cassandra-cli
# NOTE: No soft deletes and only a single column for all items.
#
[default@dev] get LedgerList['countries'] limit 20;
=> (column=1347363305468360:1347363305468360:AX:false, value=?LAND ISLANDS, timestamp=1347363305468906)
=> (column=1347363305468363:1347363305468363:AL:false, value=ALBANIA, timestamp=1347363305468906)
=> (column=1347363305468365:1347363305468365:DZ:false, value=ALGERIA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363305468367:AS:false, value=AMERICAN SAMOA, timestamp=1347363305468906)
=> (column=1347363305468367:1347363368220175:AG:false, value=ANTIGUA AND BARBUDA, timestamp=1347364558918847)
=> (column=1347363305468367:1347363481676775:AF:false, value=AFGHANISTAN, timestamp=1347364558925431)
=> (column=1347363305468368:1347363305468368:AD:false, value=ANDORRA, timestamp=1347363305468906)
=> (column=1347363305468371:1347363305468371:AO:false, value=ANGOLA, timestamp=1347363305468906)
=> (column=1347363305468373:1347363305468373:AI:false, value=ANGUILLA, timestamp=1347363305468906)
=> (column=1347363305468375:1347363305468375:AQ:false, value=ANTARCTICA, timestamp=1347363305468906)
=> (column=1347363305468379:1347363305468379:AR:false, value=ARGENTINA, timestamp=1347363305468906)
=> (column=1347363305468381:1347363305468381:AM:false, value=ARMENIA, timestamp=1347363305468906)
=> (column=1347363305468383:1347363305468383:AW:false, value=ARUBA, timestamp=1347363305468906)
=> (column=1347363305468384:1347363305468384:AU:false, value=AUSTRALIA, timestamp=1347363305468906)
=> (column=1347363305468386:1347363305468386:AT:false, value=AUSTRIA, timestamp=1347363305468906)
=> (column=1347363305468388:1347363305468388:AZ:false, value=AZERBAIJAN, timestamp=1347363305468906)
=> (column=1347363305468390:1347363305468390:BS:false, value=BAHAMAS, timestamp=1347363305468906)
=> (column=1347363305468391:1347363305468391:BH:false, value=BAHRAIN, timestamp=1347363305468906)
=> (column=1347363305468393:1347363305468393:BD:false, value=BANGLADESH, timestamp=1347363305468906)
=> (column=1347363305468394:1347363305468394:BB:false, value=BARBADOS, timestamp=1347363305468906)
Returned 20 results.
Elapsed time: 18 msec(s).
#
# View the ledger transactions in cassandra-cli
# NOTE: transactions hard deleted
#
[default@dev] get LedgerTransactions['countries'];
Returned 0 results.
Elapsed time: 3 msec(s).
#
# View the list in Python
# NOTE: No Transactions to apply.
#
In [137]: l = ledger_list.get()
In [138]: l
Out[138]:
[((1347363305468360, 1347363305468360, u'AX'), u'AX', u'\xc5LAND ISLANDS'),
((1347363305468363, 1347363305468363, u'AL'), u'AL', u'ALBANIA'),
((1347363305468365, 1347363305468365, u'DZ'), u'DZ', u'ALGERIA'),
((1347363305468367, 1347363305468367, u'AS'), u'AS', u'AMERICAN SAMOA'),
((1347363305468367, 1347363368220175, u'AG'), u'AG', u'ANTIGUA AND BARBUDA'),
((1347363305468367, 1347363481676775, u'AF'), u'AF', u'AFGHANISTAN'),
((1347363305468368, 1347363305468368, u'AD'), u'AD', u'ANDORRA'),
((1347363305468371, 1347363305468371, u'AO'), u'AO', u'ANGOLA'),
((1347363305468373, 1347363305468373, u'AI'), u'AI', u'ANGUILLA'),
((1347363305468375, 1347363305468375, u'AQ'), u'AQ', u'ANTARCTICA')]
In [139]:
use dev;
/*
Each row is an ordered list.
Each item is represented by a single column.
Each column name is (weight, seq, item_id, deleted).
Each column value is the item label.
*/
CREATE COLUMN FAMILY
LedgerList
WITH
key_validation_class = UTF8Type
AND
comparator = 'CompositeType(IntegerType, IntegerType, UTF8Type, BooleanType)'
AND
default_validation_class = UTF8Type
;
/*
Each row is a list of transactional changes to apply to a list in LedgerList.
Each transaction is represented by a single column.
Each column name is a timestamp.
Each column value is a serialised transaction.
*/
CREATE COLUMN FAMILY
LedgerTransactions
WITH
key_validation_class = UTF8Type
AND
comparator = IntegerType
AND
default_validation_class = UTF8Type
;
"""Lists where the sort order in Cassandra matches the list order.
Changes are stored in a ledger and applied by an updater process.
"""
import csv
import json
import os.path
import time
import pycassa
from pycassa.cassandra import ttypes as cass_types
pool = pycassa.ConnectionPool("dev")
list_cf = pycassa.ColumnFamily(pool, "LedgerList",
read_consistency_level=cass_types.ConsistencyLevel.QUORUM,
write_consistency_level=cass_types.ConsistencyLevel.QUORUM)
trans_cf = pycassa.ColumnFamily(pool, "LedgerTransactions",
read_consistency_level=cass_types.ConsistencyLevel.QUORUM,
write_consistency_level=cass_types.ConsistencyLevel.QUORUM)
LIST_NAME = "countries"
def initialise():
"""Initalise the list with ISO county names and codes from
country_list.txt
"""
def gen_countries():
"""Yields a tuple of (country_name, iso_code)
"""
with open(os.path.join(".", "country_list.txt"), "r") as f:
# skip header
f.readline()
reader = csv.reader(f, delimiter=';')
for name_code in reader:
yield name_code
row_key = LIST_NAME
columns = {}
for country_name, country_code in gen_countries():
now = int(time.time() * 10**6)
col_name = (
now, # weight set to current time to append item
now, # seq set to current time to append item
country_code, # item_id is country code
False # soft delete is false
)
columns[col_name] = country_name
list_cf.insert(row_key, columns)
return
def get(page_size=10):
"""Get the first page of results from the list.
"""
row_key = LIST_NAME
# Get transactions first.
try:
trans_cols = trans_cf.get(row_key, column_count=page_size + 1)
except (cass_types.NotFoundException):
trans_cols = {}
if len(trans_cols) > page_size:
# For efficiency we want to handle at most 1 page_size of changes.
# We could continue and flag the data as stale or complain.
# I have chosen to complain.
raise RuntimeError("Too many changes :(")
# Get the current list.
# Inflight transactions may add 2 columns to the list.
# One for the soft delete of the old sort key, another for new sort key
trans_page_fudge = page_size * 2 if trans_cols else 0
list_cols = list_cf.get(row_key,
column_count=page_size + trans_page_fudge)
# Now have a dict of { col_name : col_value} where col name is
# (weight, seq, item_id, deleted)
# Apply the ledger transactions to this dict
for trans_json in trans_cols.values():
print "Applying Tx", trans_json
trans = json.loads(trans_json)
tx_id = int(trans["tx_id"])
op = trans["op"]
move_sort_key = tuple(trans["move_sort_key"])
sibling_sort_key = tuple(trans["sibling_sort_key"])
assert op == "move"
# If there is a soft delete for this sort key the transaction is in
# flight. And the item is at it's new location.
if move_sort_key + (True,) in list_cols:
print "Soft delete detected - Tx in flight, skipping %s" % (
trans,)
continue
# Try to remove the column we want to move from our
# current list state.
# We match on col names that have not been soft deleted.
move_col_name = move_sort_key + (False,)
try:
move_col_value = list_cols.pop(move_col_name)
except (KeyError):
# The transaction is in flight. Original item has been removed
# but the Tx record is still there.
# Or the item is in another page.
print "Pre-cond fail - Tx in flight, skipping %s" % (trans,)
else:
# The target item was still there, add it back at it's
# new location
sibling_weight = sibling_sort_key[0]
new_col_name = (
sibling_weight - 1, # new weight is sibling weight - 1
tx_id, # new seq is tx_id
move_sort_key[2], # copy item_id
False # not deleted
)
existing = list_cols.setdefault(new_col_name, move_col_value)
# Check we did not replace anything.
assert existing == move_col_value
# We have now applied all the changes
# Have a dict of { (weight, seq, item_id, deleted) : item_label}
# Want a list of [ ( (weight, seq, item_id) , item_id, item_label)]
# Use a set of the sort keys that have been deleted to filter.
# Does not inlclude the deleted flag, so it will remove deleted and
# non deleted sort keys
deleted = frozenset(
col_name[:3] # col name without deletion part
for col_name in list_cols.keys()
if col_name[3] # deleted is 4th item in col name
)
# project the list_cols dict into a list.
sorted = [
(
col_name[:3], # sort key (weight, seq, item_id)
col_name[2], # item_id
col_value # item_label
)
for col_name, col_value in list_cols.iteritems()
if col_name[:3] not in deleted # test sort key without deletion part
]
sorted.sort(key=lambda x:x[0])
# Return the first page_size items
sorted_page = sorted[:page_size]
assert len(sorted_page) == page_size
return sorted_page
def move(the_list, move_item_id, new_sibling_id):
"""
"""
# Find the current state of the item we want to move and it's new sibling
move_sort_key = None
sibling_sort_key = None
for sort_key, item_id, item_label in the_list:
if item_id == move_item_id:
move_sort_key = sort_key
elif item_id == new_sibling_id:
sibling_sort_key = sort_key
assert move_sort_key, "Move item not found."
assert sibling_sort_key, "New sibling not found."
# Add a new column for the Ledger Transactions CF
row_key = LIST_NAME
tx_id = int(time.time() * 10**6) # tx_id will be used as seq later
action = {
"tx_id" : str(tx_id), # 64bit int in JSON can be trunk'd
"op" : "move",
"move_sort_key" : move_sort_key,
"sibling_sort_key" : sibling_sort_key
}
columns = {
tx_id : json.dumps(action)
}
trans_cf.insert(row_key, columns)
return
def apply_tx(page_size=10, break_at_1=False, break_at_2=False):
"""Applies the transactions in LedgerTransactions CF to the
LedgerList CF.
**NOTE:** Assumes a single instance of the function is running.
``break_at_1`` and ``break_at_1`` flags are for stepping code.
They stop processing the transaction at step 1 or 2.
"""
row_key = LIST_NAME
# Get pending transactions
try:
trans_cols = trans_cf.get(row_key, column_count=page_size)
except (cass_types.NotFoundException):
trans_cols = {}
for trans_json in trans_cols.values():
trans = json.loads(trans_json)
tx_id = int(trans["tx_id"])
op = trans["op"]
move_sort_key = tuple(trans["move_sort_key"])
sibling_sort_key = tuple(trans["sibling_sort_key"])
sibling_weight = sibling_sort_key[0]
assert op == "move"
# Step 0 - need the current label for the item we are moving
columns = [
move_sort_key + (False,) # non deleted sort key
]
move_col_deleted = False
try:
list_cols = list_cf.get(row_key, columns=columns)
except (cass_types.NotFoundException):
# item has already been moved.
# we are reprocessing an in flight Tx
move_label = None
move_col_deleted = True
else:
assert len(list_cols) == 1
move_label = list_cols.values()[0]
# Step 1 - soft delete the move_sort_key and add the new one
# Does not matter if these cols have been written before.
# Skip if the column has already been hard deleted
if not move_col_deleted:
new_col_name = (
sibling_weight - 1, # new weight is sibling weight - 1
tx_id, # use the tx_id as the sequence as
# it's repeatable.
move_sort_key[2], # copy item_id
False # not deleted
)
columns = {
move_sort_key + (True,) : "", # soft delete old item
new_col_name : move_label # add item in new position
}
list_cf.insert(row_key, columns)
if break_at_1:
# for stepping code
continue
# Step 2 - Hard delete the old item and the soft delete
# Skip if the column has already been hard deleted
if not move_col_deleted:
columns = [
move_sort_key + (False,), # the non deleted sort key
move_sort_key + (True,), # the soft deleted sort key
]
list_cf.remove(row_key, columns=columns)
if break_at_2:
# for stepping code
continue
# Step 3 - Hard delete the Ledger Transaction
columns = [
tx_id
]
trans_cf.remove(row_key, columns=columns)
#
# Initialise the list in iPython
#
In [23]: natural_list.initialise()
#
# View the list in iPython
#
In [24]: l = natural_list.get()
In [25]: l
Out[25]:
[((1346275601751603, 1346275601751603), 1, u'Apples'),
((1346275601751606, 1346275601751606), 2, u'Bananas'),
((1346275601751610, 1346275601751610), 3, u'Cherries'),
((1346275601751612, 1346275601751612), 4, u'Dragon Fruit'),
((1346275601751613, 1346275601751613), 5, u'Elderberry')]
#
# View the list in cassandra-cli
#
[default@dev] get NaturalList['fruits'];
=> (column=1346275601751603:1346275601751603:1, value=Apples, timestamp=1346275601751616)
=> (column=1346275601751606:1346275601751606:2, value=Bananas, timestamp=1346275601751616)
=> (column=1346275601751610:1346275601751610:3, value=Cherries, timestamp=1346275601751616)
=> (column=1346275601751612:1346275601751612:4, value=Dragon Fruit, timestamp=1346275601751616)
=> (column=1346275601751613:1346275601751613:5, value=Elderberry, timestamp=1346275601751616)
Returned 5 results.
Elapsed time: 3 msec(s).
#
# Move item 5 to be before 4
#
In [26]: natural_list.move(l, 5, 2)
In [27]: l = natural_list.get()
In [28]: l
Out[28]:
[((1346275601751603, 1346275601751603), 1, u'Apples'),
((1346275601751605, 1346275826354056), 5, u'Elderberry'),
((1346275601751606, 1346275601751606), 2, u'Bananas'),
((1346275601751610, 1346275601751610), 3, u'Cherries'),
((1346275601751612, 1346275601751612), 4, u'Dragon Fruit')]
#
# View the list in cassandra-cli
#
[default@dev] get NaturalList['fruits'];
=> (column=1346275601751603:1346275601751603:1, value=Apples, timestamp=1346275601751616)
=> (column=1346275601751605:1346275826354056:5, value=Elderberry, timestamp=1346275826354063)
=> (column=1346275601751606:1346275601751606:2, value=Bananas, timestamp=1346275601751616)
=> (column=1346275601751610:1346275601751610:3, value=Cherries, timestamp=1346275601751616)
=> (column=1346275601751612:1346275601751612:4, value=Dragon Fruit, timestamp=1346275601751616)
Returned 5 results.
Elapsed time: 3 msec(s).
#
# Move item 4 to be before 2
#
In [29]: natural_list.move(l, 4, 2)
In [30]: l = natural_list.get()
In [31]: l
Out[31]:
[((1346275601751603, 1346275601751603), 1, u'Apples'),
((1346275601751605, 1346275826354056), 5, u'Elderberry'),
((1346275601751605, 1346275922384242), 4, u'Dragon Fruit'),
((1346275601751606, 1346275601751606), 2, u'Bananas'),
((1346275601751610, 1346275601751610), 3, u'Cherries')]
#
# View the list in cassandra-cli
#
[default@dev] get NaturalList['fruits'];
=> (column=1346275601751603:1346275601751603:1, value=Apples, timestamp=1346275601751616)
=> (column=1346275601751605:1346275826354056:5, value=Elderberry, timestamp=1346275826354063)
=> (column=1346275601751605:1346275922384242:4, value=Dragon Fruit, timestamp=1346275922384250)
=> (column=1346275601751606:1346275601751606:2, value=Bananas, timestamp=1346275601751616)
=> (column=1346275601751610:1346275601751610:3, value=Cherries, timestamp=1346275601751616)
Returned 5 results.
Elapsed time: 3 msec(s).
create keyspace dev;
use dev;
/*
Each row is an ordered list.
Each item is represented by a single column.
Each column name is (weight, seq, item_id)
Each column value is the item label
*/
CREATE COLUMN FAMILY
NaturalList
WITH
key_validation_class = UTF8Type
AND
comparator = 'CompositeType(IntegerType, IntegerType, IntegerType)'
AND
default_validation_class = UTF8Type
;
"""Natural List
A sorted list in Cassandra where the Column order matches the list order.
"""
import time
import pycassa
from pycassa.cassandra import ttypes as cass_types
pool = pycassa.ConnectionPool("dev")
cf = pycassa.ColumnFamily(pool, "NaturalList",
read_consistency_level=cass_types.ConsistencyLevel.QUORUM,
write_consistency_level=cass_types.ConsistencyLevel.QUORUM)
LIST_NAME = "fruits"
LIST_ITEMS = [
(1, "Apples"),
(2, "Bananas"),
(3, "Cherries"),
(4, "Dragon Fruit"),
(5, "Elderberry")
]
"""Items to load into the list."""
def initialise():
"""Initalise the list with :attr:`LIST_ITEMS`.
"""
row_key = LIST_NAME
columns = {}
for item_id, item_label in LIST_ITEMS:
# weight and seq set to current time to append item
weight = int(time.time() * 10**6)
seq = weight
col_name = (weight, seq, item_id)
col_value = item_label
columns[col_name] = col_value
cf.insert(row_key, columns)
return
def get():
"""Gets the sorted list items.
Returns a list of the form [ (sort_key, item_id, item_label)]
"""
row_key = LIST_NAME
cols = cf.get(row_key)
items = [
(col_name[:2], col_name[2], col_value)
for col_name, col_value in cols.iteritems()
]
# No need to sort.
return items
def move(the_list, move_item_id, new_sibling_id):
"""
"""
# Find the current state of the item we want to move
for sort_key, item_id, item_label in the_list:
if item_id == move_item_id:
break
# Find the current state of the new sibling
for sibling_sort_key, sibling_item_id, sibling_item_label in the_list:
if sibling_item_id == new_sibling_id:
break
# Delete old column for the item
row_key = LIST_NAME
columns = [
(sort_key) + (item_id,)
]
cf.remove(row_key, columns=columns)
# Add the new column for the item
new_col_name = (
sibling_sort_key[0] - 1, # weight is new sibling weight -1
int(time.time() * 10**6), # seq is current timestamp
move_item_id # id is same as old.
)
columns = {
new_col_name : item_label
}
cf.insert(row_key, columns)
return
#
# Initialise the list in iPython
#
In [4]: read_sorted.initialise()
#
# View the list in iPython
#
In [5]: l = read_sorted.get()
In [6]: l
Out[6]:
[((1347272021771938, 1347272021771938, 1), 1, u'Apples'),
((1347272021771950, 1347272021771950, 2), 2, u'Bananas'),
((1347272021771960, 1347272021771960, 3), 3, u'Cherries'),
((1347272021771966, 1347272021771966, 4), 4, u'Dragon Fruit'),
((1347272021771973, 1347272021771973, 5), 5, u'Elderberry')]
#
# View the list in cassandra-cli
#
[default@dev] get ReadSorted['fruits'];
=> (column=1:label, value=Apples, timestamp=1347272021771983)
=> (column=1:seq, value=1347272021771938, timestamp=1347272021771983)
=> (column=1:weight, value=1347272021771938, timestamp=1347272021771983)
=> (column=2:label, value=Bananas, timestamp=1347272021771983)
=> (column=2:seq, value=1347272021771950, timestamp=1347272021771983)
=> (column=2:weight, value=1347272021771950, timestamp=1347272021771983)
=> (column=3:label, value=Cherries, timestamp=1347272021771983)
=> (column=3:seq, value=1347272021771960, timestamp=1347272021771983)
=> (column=3:weight, value=1347272021771960, timestamp=1347272021771983)
=> (column=4:label, value=Dragon Fruit, timestamp=1347272021771983)
=> (column=4:seq, value=1347272021771966, timestamp=1347272021771983)
=> (column=4:weight, value=1347272021771966, timestamp=1347272021771983)
=> (column=5:label, value=Elderberry, timestamp=1347272021771983)
=> (column=5:seq, value=1347272021771973, timestamp=1347272021771983)
=> (column=5:weight, value=1347272021771973, timestamp=1347272021771983)
Returned 15 results.
Elapsed time: 27 msec(s).
#
# Move item 5 to be before 4
#
In [7]: read_sorted.move(l, 5, 2)
In [8]: l = read_sorted.get()
In [9]: l
Out[9]:
[((1347272021771938, 1347272021771938, 1), 1, u'Apples'),
((1347272021771949, 1347272094993286, 5), 5, u'Elderberry'),
((1347272021771950, 1347272021771950, 2), 2, u'Bananas'),
((1347272021771960, 1347272021771960, 3), 3, u'Cherries'),
((1347272021771966, 1347272021771966, 4), 4, u'Dragon Fruit')]
#
# View the list in cassandra-cli
#
[default@dev] get ReadSorted['fruits'];
=> (column=1:label, value=Apples, timestamp=1347272021771983)
=> (column=1:seq, value=1347272021771938, timestamp=1347272021771983)
=> (column=1:weight, value=1347272021771938, timestamp=1347272021771983)
=> (column=2:label, value=Bananas, timestamp=1347272021771983)
=> (column=2:seq, value=1347272021771950, timestamp=1347272021771983)
=> (column=2:weight, value=1347272021771950, timestamp=1347272021771983)
=> (column=3:label, value=Cherries, timestamp=1347272021771983)
=> (column=3:seq, value=1347272021771960, timestamp=1347272021771983)
=> (column=3:weight, value=1347272021771960, timestamp=1347272021771983)
=> (column=4:label, value=Dragon Fruit, timestamp=1347272021771983)
=> (column=4:seq, value=1347272021771966, timestamp=1347272021771983)
=> (column=4:weight, value=1347272021771966, timestamp=1347272021771983)
=> (column=5:label, value=Elderberry, timestamp=1347272021771983)
=> (column=5:seq, value=1347272094993286, timestamp=1347272094993296)
=> (column=5:weight, value=1347272021771949, timestamp=1347272094993296)
Returned 15 results.
Elapsed time: 18 msec(s).
#
# Move item 4 to be before 2
#
In [10]: read_sorted.move(l, 4, 2)
In [11]: l = read_sorted.get()
In [12]: l
Out[12]:
[((1347272021771938, 1347272021771938, 1), 1, u'Apples'),
((1347272021771949, 1347272094993286, 5), 5, u'Elderberry'),
((1347272021771949, 1347272153236970, 4), 4, u'Dragon Fruit'),
((1347272021771950, 1347272021771950, 2), 2, u'Bananas'),
((1347272021771960, 1347272021771960, 3), 3, u'Cherries')]
#
# View the list in cassandra-cli
#
[default@dev] get ReadSorted['fruits'];
=> (column=1:label, value=Apples, timestamp=1347272021771983)
=> (column=1:seq, value=1347272021771938, timestamp=1347272021771983)
=> (column=1:weight, value=1347272021771938, timestamp=1347272021771983)
=> (column=2:label, value=Bananas, timestamp=1347272021771983)
=> (column=2:seq, value=1347272021771950, timestamp=1347272021771983)
=> (column=2:weight, value=1347272021771950, timestamp=1347272021771983)
=> (column=3:label, value=Cherries, timestamp=1347272021771983)
=> (column=3:seq, value=1347272021771960, timestamp=1347272021771983)
=> (column=3:weight, value=1347272021771960, timestamp=1347272021771983)
=> (column=4:label, value=Dragon Fruit, timestamp=1347272021771983)
=> (column=4:seq, value=1347272153236970, timestamp=1347272153236978)
=> (column=4:weight, value=1347272021771949, timestamp=1347272153236978)
=> (column=5:label, value=Elderberry, timestamp=1347272021771983)
=> (column=5:seq, value=1347272094993286, timestamp=1347272094993296)
=> (column=5:weight, value=1347272021771949, timestamp=1347272094993296)
Returned 15 results.
Elapsed time: 16 msec(s).
use dev;
/*
Each row is an ordered list.
Each item is represented by multiple columns.
Each column name is (item_id, property_name)
Property names are "label", "weight", "seq"
Each column value is property value
*/
CREATE COLUMN FAMILY
ReadSorted
WITH
key_validation_class = UTF8Type
AND
comparator = 'CompositeType(IntegerType, UTF8Type)'
AND
default_validation_class = UTF8Type
;
"""Read Sorted List
A sorted list in Cassandra where the column order does not match the list
order. Readers must order the items.
"""
import time
import pycassa
from pycassa.cassandra import ttypes as cass_types
pool = pycassa.ConnectionPool("dev")
cf = pycassa.ColumnFamily(pool, "ReadSorted",
read_consistency_level=cass_types.ConsistencyLevel.QUORUM,
write_consistency_level=cass_types.ConsistencyLevel.QUORUM)
LIST_NAME = "fruits"
LIST_ITEMS = [
(1, "Apples"),
(2, "Bananas"),
(3, "Cherries"),
(4, "Dragon Fruit"),
(5, "Elderberry")
]
"""Items to load into the list."""
def initialise():
"""Initalise the list with :attr:`LIST_ITEMS`.
"""
row_key = LIST_NAME
columns = {}
for item_id, item_label in LIST_ITEMS:
# weight and seq set to current time to append item
weight = int(time.time() * 10**6)
seq = weight
columns.update({
(item_id, "label") : item_label,
(item_id, "weight") : str(weight),
(item_id, "seq") : str(seq),
})
cf.insert(row_key, columns)
return
def get():
"""Gets the sorted list items.
Returns a list of the form [ (sort_key, item_id, item_label)]
where sort_key is (weight, seq, item_id)
"""
# Read all columns from the list
row_key = LIST_NAME
cols = cf.get(row_key)
# unqiue list of the item_ids
# this is the first part of the col_name
unique_ids = frozenset(
col_name[0]
for col_name in cols.keys()
)
# Get the sort_key, item_id and item_label together
items = [
(
# Sort key is (weight, seq, item_id)
(
int(cols[(item_id, "weight")]),
int(cols[(item_id, "seq")]),
int(item_id)
),
item_id,
cols[(item_id, "label")]
)
for item_id in unique_ids
]
# Have to sort, sort_key is first element
items.sort(key=lambda x:x[0])
return items
def move(the_list, move_item_id, new_sibling_id):
"""
"""
move_sort_key = None
sibling_sort_key = None
for sort_key, item_id, item_label in the_list:
if item_id == move_item_id:
move_sort_key = sort_key
elif item_id == new_sibling_id:
sibling_sort_key = sort_key
assert move_sort_key, "Move item not found."
assert sibling_sort_key, "New sibling not found."
# Update the weight and seq for the item we are moving
sibling_weight = sibling_sort_key[0]
row_key = LIST_NAME
columns = {
# weight is new sibling weight -1
(item_id, "weight") : str(sibling_weight - 1),
# seq is current timestamp
(item_id, "seq") : str(int(time.time() * 10**6)),
}
cf.insert(row_key, columns)
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment