Skip to content

Instantly share code, notes, and snippets.

@OriPekelman

OriPekelman/slugify.py

Last active Oct 10, 2015
Embed
What would you like to do?
domain compatible slugifcation
#!/usr/bin/python
# coding=UTF-8
import re
import unicodedata
import string
import random
first_cap_re = re.compile('(.)([A-Z][a-z]+)')
all_cap_re = re.compile('([a-z0-9])([A-Z])')
def slugify(value):
"""
Convert to ASCII convert spaces to hyphens. Remove characters that aren't
alphanumerics, or hyphens. Convert to lowercase. Camel case
to hyphen. Also strip leading and trailing whitespace.
"""
# normalized unicode to ascii
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
#CamelCase to hypens
value = de_camel_case(value)
#remove all non alphanumeric and hypen, strip whitespace and make lower
value = re.sub('[^a-zA-Z0-9-]', '', value)
#remove any double hyphens, we use those in mangling
value = re.sub('--', '', value)
#remove leading and trailing hyphen
return value.strip("-")
def de_camel_case(value):
s1 = first_cap_re.sub(r'\1-\2', value)
return all_cap_re.sub(r'\1-\2', s1).lower()
def id_generator(size=3, chars=string.ascii_lowercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def slug(value):
return (slugify(value) + "-" + id_generator()).strip("-")
print(slug(u"/a/"))
print(slug(u":"))
print(slug(u""))
print(slug(u"-"))
print(slug(u" -changeMe or don't \ | • We-🚔A.re/You/âé--ï_$#@% -"))
print(slug(u"--- -changeMe or don't \ | • We-🚔A.re/You/âé--ï_$#@% -| - - - --- ---- --"))
print(slug(u" ChangeMe --- ChangeMe "))
print(slug(u"-changeMe or don't \ | • We-🚔A.re/You/âé-ï_$#@%-"))
#which gives us:
#a-8z9
#5b2
#por
#3m5
#change-meordont-we-are-youaei-hvb
#change-meordont-we-are-youaei-m5k
#change-mechange-me-12v
#change-meordont-we-are-youae-i-5rq
@OriPekelman

This comment has been minimized.

Copy link
Owner Author

@OriPekelman OriPekelman commented Oct 10, 2015

And why not instead of random chars random stars.

#!/usr/bin/python
# coding=UTF-8
import re
import unicodedata
import string
import random
first_cap_re = re.compile('(.)([A-Z][a-z]+)')
all_cap_re = re.compile('([a-z0-9])([A-Z])')
def slugify(value):
    """
    Convert to ASCII convert spaces to hyphens. Remove characters that aren't 
    alphanumerics, or hyphens. Convert to lowercase. Camel case
    to hyphen. Also strip leading and trailing whitespace.
    """
    # normalized unicode to ascii
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    #CamelCase to hypens
    value = de_camel_case(value) 
    #remove all non alphanumeric and hypen, strip whitespace and make lower
    value = re.sub('[^a-zA-Z0-9-]', '', value)
    #remove any double hyphens, we use those in mangling
    value = re.sub('--', '', value)
    #remove leading and trailing hyphen
    return value.strip("-")
def de_camel_case(value):
    s1 = first_cap_re.sub(r'\1-\2', value)
    return all_cap_re.sub(r'\1-\2', s1).lower()
def id_generator():
    stars = ["acamar","achernar","achird","acrab","acrux","acubens","adhafera","adhara","ain","al-dhanab","al-fawaris","al-giedi","al-kalb-al-rai","al-kaphrah","al-kurud","al-minliar-al-asad","al-nair","al-niyat","al-thalimain","aladfar","alamak","alaraph","alathfar","albaldah","albali","albireo","alchiba","alcor","alcyone","aldebaran","alderamin","aldhafera","aldhibah","aldib","alfecca-meridiana","alfirk","algedi","algenib","algieba","algol","algorab","alhajoth","alhena","alioth","alkaid","alkalurops","alkes","alkurah","almach","alnasl","alnilam","alnitak","alniyat","alphard","alphecca","alpheratz","alrai","alrakis","alrami","alrischa","alsafi","alsciaukat","alshain","alshat","altair","altais","altarf","alterf","aludra","alula-australis","alula-borealis","alwaid","alya","alzir","ancha","angetenar","ankaa","antares","arcturus","arich","arided","arkab","arkab-posterior","arkab-prior","armus","arneb","arrakis","ascella","asellus-australis","asellus-borealis","asellus-primus","asellus-secundus","asellus-tertius","ashlesha","askella","aspidiske","asterion","asterope","atik","atlas","atria","auva","avior","azaleh","azelfafage","azha","azimech","azmidiske","baham","baten-kaitos","becrux","beid","bellatrix","benetnasch","betelgeuse","botein","brachium","canopus","capella","caph","caphir","caput-andromedae","caput-medusae","caput-trianguli","castor","castula","cebalrai","ceginus","celaeno","chara","cheleb","chertan","cor-caroli","cor-hydrae","cor-leonis","cor-scorpii","cor-serpentis","coxa","cujam","cursa","cynosura","dabih","decrux","deneb","deneb-algedi","deneb-dulfim","deneb-el-okab","deneb-kaitos","deneb-kaitos-schemali","denebola","dheneb","diadem","diphda","dnoces","dschubba","dubhe","duhr","edasich","electra","elmuthalleth","elnath","eltanin","enif","errai","etamin","fomalhaut","fum-al-samakah","furud","gacrux","garnet-star","gatria","gemma","gianfar","giedi","gienah-gurab","giennah","girtab","gomeisa","gorgonea-tertia","graffias","grafias","grassias","grumium","hadar","hadir","haedus","haldus","hamal","hassaleh","head-of-hydrus","heka","heze","hoedus-i","hoedus-ii","homam","hyadum-i","hyadum-ii","hydrobius","izar","jabbah","jih","kabdhilinan","kaffaljidhma","kajam","kastra","kaus-australis","kaus-borealis","kaus-media","keid","kitalpha","kleeia","kochab","kornephoros","kraz","ksora","kullat-nunu","kuma","la-superba","lanx-australis","lesath","lucida-anseris","maasym","mahasim","maia","marfark","marfik","markab","matar","mebsuta","media","megrez","meissa","mekbuda","menchib","menkab","menkalinan","menkar","menkent","menkib","merak","merga","merope","mesarthim","miaplacidus","mimosa","minchir","minelava","minkar","mintaka","mira","mirach","miram","mirfak","mirzam","misam","mizar","mothallah","muliphein","muphrid","murzim","muscida","nair-al-saif","naos","nash","nashira","navi","nekkar","nembus","neshmet","nihal","nunki","nusakan","okul","peacock","phact","phad","pherkad","pherkard","pleione","polaris","polaris-australis","pollux","porrima","praecipua","procyon","propus","proxi-centauri","pulcherrim","rana","ras-algethi","ras-alhague","ras-elased-australis","ras-thaoum","rasalas","rastaban","regor","regulus","rigel","rigil-kentaurus","rijl-al-awwa","rotanev","ruchba","ruchbah","rukbat","sabik","sadachbia","sadalbari","sadalmelik","sadalsuud","sadatoni","sadira","sadlamulk","sadr","saiph","salm","sargas","sarin","sceptrum","scheat","schedar","scheddi","segin","seginus","sham","shaula","sheliak","sheratan","shurnarkabti-shashutu","sinistra","sirius","situla","skat","sol","spica","sterope-i","sterope-ii","sualocin","subra","suhail","suhel","sulafat","syrma","tabit","talitha-australis","talitha-borealis","tania-australis","tania-borealis","tarazet-or-tarazed","taygeta","tegmen-or-tegmine","tejat-posterior","tejat-prior","terebellum","thabit","theemin","thuban","tien-kwan","toliman","torcularis-septentrionalis","tseen-kee","turais","tyl","unuk-elhaia","unukalhai","vega","vindemiatrix","wasat","wei","wezen","yed-posterior","yed-prior","yildun","zaniah","zaurak","zavijava","zawiat-al-awwa","zedaron","zelphah","zibal","zosma","zuben-el-akrab","zuben-elgenubi","zuben-elschemali","zuben-hakrabi","zubenelgenubi","zubenelgubi","zubeneschemali","zubenhakrabi"]
    return random.choice(stars)
def slug(value):
  return (slugify(value) + "-" + id_generator()).strip("-")

print(slug(u"/a/"))
print(slug(u":"))
print(slug(u""))
print(slug(u"-"))
print(slug(u"  -changeMe or don't \ | • We-A.re/You/âé--ï_$#@%   -"))
print(slug(u"---  -changeMe or don't \ | • We-A.re/You/âé--ï_$#@%   -|  - - - --- ---- --"))
print(slug(u"  ChangeMe --- ChangeMe  "))
print(slug(u"-changeMe or don't \ | • We-A.re/You/âé-ï_$#@%-"))
#which gives us:
#a-procyon
#atik
#ascella
#sadalsuud
#change-meordont-we-are-youaei-eltanin
#change-meordont-we-are-youaei-shaula
#change-mechange-me-arcturus
#change-meordont-we-are-youae-i-sceptrum
``
@OriPekelman

This comment has been minimized.

Copy link
Owner Author

@OriPekelman OriPekelman commented Oct 10, 2015

Or maybe we can go heroku style... and generate nice names like

# coding=UTF-8

import random
def random_names():
  attributes = "active", "adaptable", "adventurous", "affable", "affectionate", "agreeable", "ambitious", "amiable", "amicable", "amusing", "balanced", "brave", "bright", "broad-minded", "calm", "candid", "careful", "charming", "cheerful", "communicative", "compassionate", "competitive", "conscientious", "considerate", "consistent", "convivial", "cooperative", "courageous", "courteous", "creative", "curious", "decisive", "determined", "devoted", "diligent", "diplomatic", "discreet", "dynamic", "easy going", "easygoing", "emotional", "energetic", "enterprising", "enthusiastic", "entrepreneurial", "exciting", "exuberant", "facilitator", "fair-minded", "faithful", "fast", "fearless", "flexible", "focused", "forceful", "forgiving", "frank", "friendly", "funny", "generous", "gentle", "genuine", "good", "gregarious", "hard-working", "helpful", "honest", "humorous", "imaginative", "impartial", "incredible", "independent", "industrious", "initiator", "insightful", "intellectual", "intelligent", "interesting", "intuitive", "inventive", "kind", "knowledgeable", "literate", "logical", "loving", "loyal", "mediator", "meditative", "modest", "neat", "nice", "open minded", "optimistic", "organised", "organized", "original", "outgoing", "particular", "passionate", "patient", "perceptive", "persistent", "personable", "persuasive", "philosophical", "pioneering", "placid", "pleasant", "plucky", "polite", "positive", "powerful", "practical", "pro-active", "proactive", "productive", "professional", "quick", "quick-witted", "quiet", "quirky", "quixotic", "racy", "rational", "rebellious", "reliable", "reserved", "resourceful", "responsible", "romantic", "sensible", "sensitive", "sensuous", "shy", "sincere", "skilled", "sociable", "solid", "sporty", "straightforward", "sympathetic", "thoughtful", "tidy", "tough", "trustworthy", "unassuming", "understanding", "versatile", "warm", "warmhearted", "willing", "wise", "witty",
  animal_modifiers = "white", "black", "gray", "blue", "green", "red", "yellow", "brown", "striped", "common", "rare","mountain", "sea", "velvet", "tiny", "giant"
  animals = "aardvark", "albatross", "alligator", "alpaca", "amphibian", "anaconda", "ant", "anteater", "antelope", "antlion", "ape", "aphid", "armadillo", "asp", "ass", "baboon", "badger", "bandicoot", "barnacle", "barracuda", "basilisk", "bass", "bat", "bear", "beaver", "bedbug", "bee", "beetle", "bird", "bison", "blackbird", "bluejay", "boar", "boa", "bobcat", "buffalo", "butterfly", "buzzard", "camel", "capybara", "cardinal", "caribou", "carp", "cassowary", "cat", "caterpillar", "catfish", "centipede", "chameleon", "chamois", "cheetah", "chickadee", "chicken", "chimpanzee", "chinchilla", "chipmunk", "chough", "clam", "clownfish", "coati", "cobra", "cockroach", "cod", "condor", "constrictor", "coral", "cormorant", "cougar", "coyote", "crab", "crane", "crawdad", "crayfish", "cricket", "crocodile", "crow", "cuckoo", "curlew", "damselfly", "deer", "dingo", "dinosaur", "dog", "dogfish", "dolphin", "donkey", "dormouse", "dotterel", "dove", "dragonfly", "duck", "dugong", "dung beetle", "dunlin", "eagle", "earthworm", "earwig", "echidna", "eel", "egret", "eland", "elephant", "elephant seal", "elk", "emu", "ermine", "f", "falcon", "ferret", "finch", "firefly", "fish", "flamingo", "flea", "fly", "flyingfish", "fox", "frog", "fruit bat", "gaur", "gazelle", "gecko", "gerbil", "giant panda", "giant squid", "gibbon", "gila monster", "giraffe", "gnat", "gnu", "goat", "goldfinch", "goldfish", "goose", "gopher", "gorilla", "goshawk", "grasshopper", "grizzly bear", "grouse", "guanaco", "guinea-fowl", "guinea pig", "gull", "guppy", "haddock", "halibut", "hamster", "hare", "hawk", "hedgehog", "hermit-crab", "heron", "herring", "hippopotamus", "hookworm", "hornet", "horse", "hummingbird", "hyena", "ibex", "ibis", "iguana", "impala", "jackal", "jaguar", "jay", "jellyfish", "kangaroo", "kingfisher", "kinkajou", "kiwi", "koala", "koi", "komodo", "kookabura", "kouprey", "krill", "kudu", "ladybug", "lamprey", "lapwing", "lark", "leech", "lemming", "lemur", "leopard", "lion", "lizard", "llama", "lobster", "locust", "loon", "loris", "louse", "lynx", "lyrebird", "macaw", "mackerel", "magpie", "mallard", "manatee", "mandrill", "manta-ray", "mantis", "marlin", "marmoset", "marmot", "marsupial", "marten", "meadowlark", "meerkat", "mink", "minnow", "mite", "mockingbird", "mole", "mollusk", "mongoose", "monkey", "moose", "mosquito", "moth", "mountain-goat", "mouse", "mule", "muskox", "narwhal", "newt", "nightingale", "ocelot", "octopus", "okapi", "opossum", "orangutan", "orca", "oryx", "ostrich", "otter", "owl", "oyster", "panda", "panther", "parrot", "partridge", "peafowl", "pelican", "penguin", "pheasant", "pig", "pigeon", "polar-bear", "pony", "porcupine", "porpoise", "prairie dog", "quail", "quelea", "quetzal", "rabbit", "raccoon", "rail", "ram", "rat", "raven", "reindeer", "rhinoceros", "rodent", "rook", "salamander", "salmon", "sand dollar", "sandpiper", "sardine", "scorpion", "seahorse", "seal", "shark", "sheep", "shrew", "skunk", "sloth", "snail", "snake", "sparrow", "spider", "spoonbill", "squid", "squirrel", "starling", "stingray", "stinkbug", "stork", "swallow", "swan", "tapir", "tarantula", "tarsier", "termite", "tiger", "toad", "trout", "turkey", "turtle", "vicuña", "viper", "vulture", "wallaby", "walrus", "wasp", "weasel", "whale", "wildcat", "wolf", "wolverine", "wombat", "woodcock", "woodpecker", "worm", "wren", "yak", "zebra"
  return  random.choice(attributes) + "-" + random.choice(animal_modifiers) + "-" + random.choice(animals)


for x in range(0, 50):
    print random_names()

which gives us:

skilled-tiny-bobcat
sincere-red-elephant-seal
discreet-red-flea
active-red-mongoose
passionate-black-salamander
industrious-velvet-mongoose
straightforward-brown-eland
conscientious-giant-amphibian
original-rare-porcupine
amiable-striped-sparrow
straightforward-blue-giant panda
frank-tiny-gull
considerate-gray-jackal
reserved-yellow-camel
particular-yellow-wolf
fair-minded-sea-elephant-seal
fearless-brown-gila-monster
careful-common-fox
impartial-black-ocelot
original-mountain-turtle
rebellious-tiny-gerbil
modest-black-grouse
romantic-black-guinea-fowl
logical-white-beetle
racy-black-lynx
quiet-tiny-mandrill
loyal-blue-alligator
balanced-gray-rodent
incredible-green-camel
witty-white-mantis
neat-red-iguana
placid-brown-hookworm
persuasive-striped-carp
competitive-common-donkey
proactive-gray-orca
facilitator-black-viper
warmhearted-gray-chicken
solid-giant-porcupine
emotional-yellow-swan
meditative-blue-minnow
enterprising-black-wildcat
candid-green-tarsier
rebellious-blue-mockingbird
courageous-gray-chicken
bright-common-horse
good-rare-leopard
inventive-gray-mallard
ambitious-striped-mandrill
considerate-red-owl
determined-black-wildcat

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.