Skip to content

Instantly share code, notes, and snippets.

@danishabdullah
Last active April 22, 2018 09:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danishabdullah/aaabb6849afe1e815e57667019389c09 to your computer and use it in GitHub Desktop.
Save danishabdullah/aaabb6849afe1e815e57667019389c09 to your computer and use it in GitHub Desktop.
Merge duplicated vcards using python. 3 to 2 autoconverted version of https://github.com/ddiguy/nodupe/blob/master/src/nodupe.py
#!/usr/bin/env python
import sys
import getopt
import re, quopri, codecs
import vobject
def parse_vcf(f):
"""
Parses a vcf string, potentially containing many vcards
@returns: A list of Contacts
"""
infile = open(f,"r")
string = infile.read()
infile.close()
string = vcard2vcf3(string)
contacts = []
for vobj in vobject.readComponents(string, True, True, True, True):
try:
contacts.append(vobj)
except vobject.base.ParseError:
print("errore Parse")
return contacts
def vcard2vcf3(string):
string = re.compile('TEL;(\w+):',re.IGNORECASE).sub(r'TEL;TYPE=\1:', string)
string = re.compile('X-messaging/(\w+)-All',re.IGNORECASE).sub(r'X-\1',string)
# indent b64 multi-line: use this f* re.M
string = re.compile('^([+=A-Za-z0-9/]+\r\n)',re.MULTILINE).sub(r' \1',string)
#string = re.compile('^(.*);ENCODING=QUOTED-PRINTABLE([:;].*)').sub(r'\1')
return string
def dedupe(allContacts):
noDups=[]
for v in allContacts:
noDups=isInArray(v, noDups)
return noDups
# this function return a hashname for the vObj
def hashName(vObj, swap=False):
name = vObj.n.value
ret = ""
if name.__class__.__name__ == 'Name':
if swap:
if name.family:
ret += name.family.capitalize()
if name.given:
ret += name.given.capitalize()
else:
if name.given:
ret += name.given.capitalize()
if name.family:
ret += name.family.capitalize()
elif name.__class__.__name__ == 'unicode':
for str in name.split():
if swap:
ret = str.capitalize()+ret
else:
ret = ret+str.capitalize()
#print "\t\tdebug:" + ret
return ret
# two contacts are the same if
# same name
# share one mail address
# share one phone number (todo not work phone number)
def areTheSame(first, second):
if ((hashName(first) == hashName(second)) or
(hashName(first) == hashName(second,True)) ):
return True
for field in "TEL", "EMAIL":
ff = getFields(first, field)
fs = getFields(second, field)
intersection=[x for x in fs if x in ff]
if intersection:
return True
#print "field: ",field," ff=",ff,"fs=",fs, "intersect=",intersection
#retrieve a given field from a contact
# ex getField(vobj, "TEL")
# ex getField(vobj, "EMAIL")
def getFields(vobj, string, full=False):
fields=[]
for i in vobj.getSortedChildren():
if i.name==string:
if string=="TEL" and not i.value.startswith("+"):
i.value = "+39" + i.value
if full:
fields.append(i)
else:
fields.append(i.value)
return fields
#if contact is still in array...
def isInArray(object, array):
for a in array:
if (areTheSame(a,object)):
#merg'em and validate
print("still there")
print(object.serialize())
print(a.serialize())
a = mergeItems(a,object)
return array
try:
object.serialize()
except vobject.base.ValidateError:
try:
object.n
except AttributeError:
print("added n")
object.n.value = vobject.vcard.Name(family="Nemo")
#object.prettyPrint()
try:
object.fn
except AttributeError:
print("added fn to "+str(object.n))
object.add("fn")
object.fn.value = "Nemo"
#object.prettyPrint()
array.append(object)
return array
# merge two items
# we could use fuzzy results to select the %
def mergeItems(one,two):
print("mergeItems()")
one.prettyPrint()
two.prettyPrint()
#find a smart way to
# merge two Formatted Name http://tools.ietf.org/html/rfc2426#section-3.1.1
try:
if len(two.fn.value) > len(one.fn.value):
one.add("nickname").value=one.fn.value
one.fn.value=two.fn.value
except:
pass
#fmerge Name http://tools.ietf.org/html/rfc2426#section-3.1.2
# this attribute is REQUIRED and can be multi-valued
try:
if (hashName(one) != hashName(two))\
and (hashName(one) != hashName(two, True)):
#name is almost the same, use the first one
one.add("n").value = two.n.value
except:
pass
#join mail address and phone number
for field in "TEL", "EMAIL":
ot=getFields(one,field, True)
tt=getFields(two,field, True)
nt=[x for x in tt if x not in ot]
for i in nt:
one.add(i)
print("mergedItem:")
one.prettyPrint()
return one
def main():
# parse command line options
try:
opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"])
except getopt.error as msg:
print(msg)
print("for help use --help")
sys.exit(2)
for i in opts:
print("opts:"+i[0])
for i in args:
print("args:"+i)
files = args
allContacts = []
for f in files:
print("file:"+f)
try:
allContacts=parse_vcf(f)
except IOError:
print("errore: file not found")
sys.exit(2)
myContacts=dedupe(allContacts)
outfile = open("deduped_addressbook.vcf","w+")
print("Creating new addressbook: deduped_addressbook.vcf")
for i in myContacts:
#print i.serialize()
outfile.write(i.serialize())
print("done")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment