flips22/_cbltomylar.py

## _cbltomylar.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Installation:
1) Add this package as a python wrapper to search the comicvine api:
   https://github.com/jessebraham/comicvine-search
    I wasn't able to get this module to install so I copied it to the same folder as the .py file (or add to you env of course)
2) Replace [mylar api key] with your api key
3) Replace [mylar server address] with your server in the format: http://servername:port/  (make sure to include the slash at the end)
4) Replace [comicvine api key] with your api key

Usage:
    python3 cbltomylar.py cblfiletoimport.cbl
You can also use wildcards:
    python3 cbltomylar.py *.cbl

results are output to std out if you want to save to a file add > logfile.txt

Notes:
Marvel or DC Comics are the only acceptable publishers that are allowed.  This is hardcoded in, but you can edit the code to add more.
Without this, I was getting non-english publications.
All matches of series and volume year will be added.  So if there are multiple matches, multiple series will be added.

Reference: Mylar general structure for API:
        http://localhost:8090 + HTTP_ROOT + /api?apikey=$apikey&cmd=$command
'''

import requests
import time
import comicvine_search
from comicvine_search import ComicVineClient
import xml.etree.ElementTree as ET
from glob import glob
from sys import argv

mylarapikey = '[mylar api key]'
mylarURL = '[mylar server address]'
#    format: http://servername:port/
cv = ComicVineClient('[comicvine api key]')
#comicvine api key

mylarURLfull = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=addComic&id='
mylarURLfullcheck = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=getComic&id='

#filename = 'input.cbl'
#filename = sys.argv[1]
for filename in glob(argv[1]):
    tree = ET.parse(filename)
    root = tree.getroot()

    series_list = []
    input_list = []

    cblhave = 0
    cbladd = 0
    notfound = 0

    cblinput = root.findall("./Books/Book")
    for series in cblinput:
        series.attrib.pop('Year')
        series.attrib.pop('Number')
        #print (series.attrib)
        line = series.attrib['Series'],series.attrib['Volume']
        series_list.append(line)
    #print(series_list)


    #remove duplicates
    input_list = list(set(series_list))
    print ("Found ", len(input_list), " unique series in CBL file. Starting search.")


    for n in range(len(input_list)):
        inputname = input_list[n][0]
        inputyear = input_list[n][1]
        print("Searching for " + inputname + " (" + inputyear + ")")
        time.sleep(1)
        #sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2.
        response = cv.search(inputname , resources=['volume'])


        for r in range(1000):
    # I could not figure out how to use the length of results to stop the loop, so I just eject out of it on error
            try:
                response.results[r]['start_year']
            except IndexError:
                #print('     Done with: ' + inputname)
                break
            if response.results[r]['name'] == inputname:
                if response.results[r]['start_year'] == inputyear:
                    if response.results[r]['publisher']['name'] == "DC Comics" or response.results[r]['publisher']['name'] == "Marvel" or response.results[r]['publisher']['name'] == "Marvel Digital Comics Unlimited" or response.results[r]['publisher']['name'] == "Dynamite Entertainment":
                        #added: Dynamite Entertainment	and Marvel Digital Comics Unlimited
                        #print(response.results[r]['publisher']['name'])
                        print("     Found on comicvine: " + response.results[r]['publisher']['name'] + ": " + inputname + " (" + inputyear + ") ","comicid:", response.results[r]['id'])
                        checkURL = mylarURLfullcheck + str(response.results[r]['id'])
                        addURL = mylarURLfull + str(response.results[r]['id'])
                        #print(checkURL)
                        responsecheck = requests.get(checkURL)
                        if len(responsecheck.text) > 75: #Checks the length of the response. If it is short then assume you don't have it. I'm sure there are more elegant ways to do this.
                            print("     Mylar is already tracking this series")
                            cblhave = cblhave +1
                            #print(responsecheck.status_code)
                        else:
                            responsem = requests.get(addURL)
                            print("     Adding series to mylar")
                            cbladd = cbladd +1
    notfound = len(input_list) - cblhave - cbladd
    print("CBL File: ", filename,"  Series in CBL:", len(input_list),"  Series already in mylar:", cblhave, "  Series added to mylar:",cbladd, "  Series not found on comicvine:",notfound)

## cbltomylar-import.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
This is an updated script from TheMadman which adds a ton of functionality to my script and also cleans up the code. Here's a description from him:
It will import all the CBL files in a subfolder called 'ReadingLists' and store the data in a csv file to keep track of changes. It will keep a register
of series name and year (as found in your readinglists), and lookup the CV id and publisher which match (allowing for preferred and blacklisted publishers).
It will then check/add to Mylar based on user preference.
If you add new files to your readinglist folder and re-run the script, it will merge the new series it finds with the existing data from previous runs rather
than potentially double handling the same series every time you read a cbl file.
Only issue I've found is that sometimes there are 2 matches for a series with the same year found in CV (both with Marvel as publisher) so it's impossible to
know which comicID is correct without more info.


Installation:
1) Download & install this package (required for searching the comicvine api):
   https://github.com/jessebraham/comicvine-search
2) Create a folder called 'ReadingLists' in the same directory as the script and add any CBL files you want to process into this folder
3) Replace [MYLAR API KEY] with your Mylar3 api key
4) Replace [MYLAR SERVER ADDRESS] with your server in the format: http://servername:port/  (make sure to include the slash at the end)
5) Replace [CV API KEY] with your comicvine api key
6) Optional - Modify the following options:
    - PUBLISHER_BLACKLIST : List of publishers to ignore during CV searching
    - PUBLISHER_PREFERRED : List of publishers to prioritise when multiple CV matches are found
    - ADD_NEW_SERIES_TO_MYLAR : Automatically add CV search results to Mylar as new series
    - CV_SEARCH_LIMIT : Set a limit on the number of CV API calls made during this processing.
                        This is useful for large collections if you want to break the process into smaller chunks.

Usage:
    python3 cbl-mylar-import.py

Results are output to "output.csv" in the same directory as the script

Notes:
    - Series are found based on series name and year match.
    - If multiple results are found, any matches of the preferred publisher will be prioritised.
    - For multiple matches, this script will output the last result found.
    - CV api calls are limited to once every 2 seconds, so this script can take a while for large collections.
        It is not recommended to reduce this, however you can modify the rate using the CV_API_RATE var.
    - If you mess anything up, you can simply delete the output.csv or force a re-run using the Mylar & CV FORCE_RECHECK vars.

'''

import requests
import json
import time
import os
from enum import IntEnum
import comicvine_search
from comicvine_search import ComicVineClient
import xml.etree.ElementTree as ET
from glob import glob
from sys import argv

### DEV OPTIONS
#Enable verbose output
VERBOSE = False
#Prevent overwriting of main CSV data file
TEST_MODE = False

#File prefs
SCRIPT_DIR = os.getcwd()
READINGLIST_DIR = os.path.join(SCRIPT_DIR, "ReadingLists")
DATA_FILE = os.path.join(SCRIPT_DIR, "output.csv")

if TEST_MODE:
    #Create new file instead of overwriting data file
    OUTPUT_FILE = os.path.join(SCRIPT_DIR, "output_new.csv")
else:
    OUTPUT_FILE = DATA_FILE

CSV_HEADERS = ["Series","Year","Publisher", "ComicID","InMylar"]
class Column(IntEnum):
    SERIES = 0
    YEAR = 1
    PUBLISHER = 2
    COMICID = 3
    INMYLAR = 4

#CV prefs
CV_SEARCH_LIMIT = 10000 #Maximum allowed number of CV API calls
CV_API_KEY = '[COMICVINE API KEY]'
CV_API_RATE = 2 #Seconds between CV API calls
FORCE_RECHECK_CV = False
PUBLISHER_BLACKLIST = ["Panini Comics","Editorial Televisa","Planeta DeAgostini","Unknown"]
PUBLISHER_PREFERRED = ["Marvel","DC Comics"] #If multiple matches found, prefer this result
CV = None

#Mylar prefs
mylarAPI = '[MYLAR API KEY]'
mylarBaseURL = '[MYLAR URL]'   #format= http://servername:port/
FORCE_RECHECK_MYLAR_MATCHES = False
ADD_NEW_SERIES_TO_MYLAR = True

mylarAddURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=addComic&id='
mylarCheckURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=getComic&id='

numNewSeries = 0
numExistingSeries = 0
numCBLSeries = 0

#Initialise counters
mylarExisting = 0
mylarMissing = 0
CVFound = 0
CVNotFound = 0
searchCount = 0

def parseCBLfiles():
    series_list = []

    print("Checking CBL files in %s" % (READINGLIST_DIR))
    for root, dirs, files in os.walk(READINGLIST_DIR):
        for file in files:
            if file.endswith(".cbl"):
                try:
                    filename = os.path.join(root, file)
                    #print("Parsing %s" % (filename))
                    tree = ET.parse(filename)
                    fileroot = tree.getroot()

                    cblinput = fileroot.findall("./Books/Book")
                    for series in cblinput:
                        line = series.attrib['Series'].replace(",",""),series.attrib['Volume']
                        series_list.append(list(line))
                except:
                    print("Unable to process file at %s" % ( os.path.join(str(root), str(file)) ))

    return series_list

def isSeriesInMylar(comicID):
    found = False
    global mylarExisting
    global mylarMissing

    #print("Checking if comicID %s exists in Mylar" % (comicID))

    if comicID.isnumeric():
        comicCheckURL = "%s%s" % (mylarCheckURL, str(comicID))
        mylarData = requests.get(comicCheckURL).text
        jsonData = json.loads(mylarData)
        #jsonData = mylarData.json()
        mylarComicData = jsonData['data']['comic']

        if not len(mylarComicData) == 0:
            found = True
    elif comicID != "Unknown":
        print("         Mylar series status unknown - invalid ComicID:%s" % (comicID))

    if found:
        if VERBOSE: print("         Match found for %s in Mylar" % (comicID))
        mylarExisting += 1
        return True
    else:
        if VERBOSE: print("         No match found for %s in Mylar" % (comicID))
        mylarMissing += 1
        return False

    #In the event of if else failure
    return False;

def addSeriesToMylar(comicID):
    if comicID.isnumeric():
        if VERBOSE: print("         Adding %s to Mylar" % (comicID))
        comicAddURL = "%s%s" % (mylarAddURL, str(comicID))
        mylarData = requests.get(comicAddURL).text

        ## Check result of API call
        jsonData = json.loads(mylarData)
        #jsonData = mylarData.json()
        #mylarComicData = jsonData['data']['comic']

        if jsonData['success'] == "true":
            return True
        else:
            return False
    else:
        return False

def findVolumeDetails(series,year):
    found = False
    comicID = "Unknown"
    publisher = "Unknown"
    global searchCount
    global CVNotFound
    global CVFound
    global CV

    if isinstance(series,str):
        searchCount += 1

        result_matches = 0
        preferred_matches = 0
        result_publishers = []
        result_matches_blacklist = 0
        issueCounter = 0

        series_matches = []
        publisher_blacklist_results = set()

        try:
            if VERBOSE: print("     Searching for %s (%s) on CV" % (series,year))
            #response = CV.Volume.search(series)
            response = CV.search(series , resources=['volume'])

            if response.results is None:
                print("     No results found for %s (%s)" % (series,year))
            else: #Results were found
                for result in response.results: #Iterate through CV results
                    #If exact series name and year match
                    if result['name'] == series and result['start_year'] == year:

                        publisher_temp = result['publisher']['name']
                        result_publishers.append(publisher_temp)

                        series_matches.append(result)

                        if publisher_temp in PUBLISHER_BLACKLIST:
                            result_matches_blacklist += 1
                            publisher_blacklist_results.add(publisher_temp)
                        else:
                            found = True
                            result_matches += 1
                            publisher = publisher_temp
                            if publisher in PUBLISHER_PREFERRED: preferred_matches += 1
                            comicID = result['id']
                            numIssues = result['count_of_issues']
                            print("         Found on comicvine: %s - %s (%s) : %s (%s issues)" % (publisher, series, year, comicID, numIssues))

                    #Handle multiple publisher matches
                if result_matches > 1:
                    print("             Warning: Multiple valid matches found! Publishers: %s" % (", ".join(result_publishers)))

                    #set result to preferred publisher
                    for item in series_matches:
                        if item['publisher']['name'] in PUBLISHER_PREFERRED or preferred_matches == 0:
                            numIssues = item['count_of_issues']
                            if numIssues > issueCounter:
                                #Current series has more issues than any other preferred results!
                                publisher = item['publisher']['name']
                                comicID = item['id']
                                issueCounter = numIssues
                                ## TODO: Remove "preferred text labels"
                                print("             Selected series from multiple results: %s - %s (%s issues)" % (publisher,comicID,numIssues))
                            else:
                                #Another series has more issues
                                print("             Skipped Series : %s - %s (%s issues) - another preferred series has more issues!" % (item['publisher']['name'],item['id'],numIssues))

                if len(response.results) == 0:
                    print("         No results found for %s (%s)" % (series,year))

                if result_matches_blacklist > 0 and result_matches == 0:
                    #Only invalid results found
                    print("             No valid results found for %s (%s). %s blacklisted results found with the following publishers: %s" % (series,year,result_matches_blacklist, ",".join(publisher_blacklist_results)))
        except Exception as e:
            print("     There was an error processing %s (%s)" % (series,year))
            print(repr(e))

    #Update counters
    if not found:
        CVNotFound += 1
    else:
        CVFound += 1

    return [publisher,comicID]

def readExistingData():
    print("Reading data from %s" % (DATA_FILE))

    dataList = []

    if os.path.exists(DATA_FILE):
        #Import raw csv data as lines
        with open(DATA_FILE, mode='r') as csv_file:
            data = csv_file.readlines()

            #Parse csv data and strip whitespace
            for i in range(len(data)):
                if not i == 0: #Skip header row
                    fields = [x.strip() for x in data[i].split(",")]
                    dataList.append(fields)

    return dataList

def outputData(data):
    print("Exporting data to %s" % (OUTPUT_FILE))
    with open(OUTPUT_FILE, mode='w') as output_file:
        output_file.write("%s\n" % (",".join(CSV_HEADERS)))
        #Check if list contains multiple columns
        if len(data[0]) == 1:
            output_file.writelines(data)
        else:
            for row in data:
                output_file.write("%s\n" % (",".join(map(str,row))))

def index_2d(myList, v):
    for i, x in enumerate(myList):
        if v[0] == x[0] and v[1] == x[1]:
            return (i)

def mergeDataLists(list1, list2):
    # list1 = Main list with rows of 4 items
    # list2 = Import list with rows of 2 items
    print("Merging data lists")

    mainDataList = list1
    dataToMerge = list2
    global numExistingSeries
    global numCBLSeries
    global numNewSeries

    mainDataTitles = []
    mergedTitleSet = ()
    finalMergedList = []

    #Extract first 2 row elements to modified list
    for row in mainDataList:
        mainDataTitles.append([row[Column.SERIES], row[Column.YEAR]])

    mergedTitleList = mainDataTitles + dataToMerge
    mergedTitleList.sort()

    numExistingSeries = len(mainDataList)
    numCBLSeries = len(mergedTitleList)

    mergedTitleSet = set(tuple(map(tuple,mergedTitleList)))

    for row in mergedTitleSet:
        if list(row) in mainDataTitles:
          #Find index of exact match in mainDataSet
          match_row = index_2d(mainDataList,row)
          #if VERBOSE: print("Merged row: %s found in main data at row %s" % (list(row),match_row))

          finalMergedList.append(mainDataList[match_row])
          #Removing
          #if VERBOSE: print("Removing %s from mainDataList" % (list(row)))
          mainDataList.pop(match_row)

        else:
          #if VERBOSE: print("Merged row: %s NOT found in main data" % (list(row)))
          #Use the list with only
          newData = [row[Column.SERIES],row[Column.YEAR],"Unknown","Unknown",False]
          finalMergedList.append(newData)

    numNewSeries = len(finalMergedList) - numExistingSeries

    return finalMergedList


def main():
    #Initialise CV API tool
    global CV
    CV = ComicVineClient(CV_API_KEY)

    global numExistingSeries
    global numCBLSeries
    global numNewSeries

    #Extract list from existing csv
    importData = readExistingData()

    #Process CBL files
    cblSeriesList = parseCBLfiles()

    #Merge csv data with cbl data
    mergedData = mergeDataLists(importData, cblSeriesList)
    mergedData.sort()

    print("Found %s series in CSV, %s new series in CBL" % (numExistingSeries,numNewSeries))

    #Run all data checks in CV & Mylar
    for rowIndex in range(len(mergedData)):
        series = mergedData[rowIndex][Column.SERIES]
        year = mergedData[rowIndex][Column.YEAR]
        publisher = mergedData[rowIndex][Column.PUBLISHER]
        comicID = mergedData[rowIndex][Column.COMICID]
        inMylar = mergedData[rowIndex][Column.INMYLAR]
        checkMylar = False
        comicIDExists = comicID.isnumeric()

        #Check for new comicIDs
        if not comicIDExists or FORCE_RECHECK_CV:
            #Self-imposed search limit to prevent hitting limits
            if searchCount < CV_SEARCH_LIMIT:
                #sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2.
                if searchCount > 0: time.sleep(CV_API_RATE)

                #Update field in data list
                cv_data = findVolumeDetails(series,year)
                mergedData[rowIndex][Column.PUBLISHER] = cv_data[0]
                mergedData[rowIndex][Column.COMICID] = cv_data[1]

                #update vars for use elsewhere
                publisher = str(cv_data[0])
                comicID = str(cv_data[1])

        #Check if series exists in mylar
        if inMylar == "True":
            #Match exists in mylar
            if FORCE_RECHECK_MYLAR_MATCHES:
                #Force recheck anyway
                checkMylar = True
            else:
                checkMylar = False
        else:
            #No mylar match found
            checkMylar = True

        if checkMylar:
            #Update field in data list
            inMylar = isSeriesInMylar(comicID)
            mergedData[rowIndex][Column.INMYLAR] = inMylar

        #Add new series to Mylar
        if not inMylar and ADD_NEW_SERIES_TO_MYLAR:
            mergedData[rowIndex][Column.INMYLAR] = addSeriesToMylar(comicID)


    #Write modified data to file
    outputData(mergedData)

    #Print summary to terminal
    print("Total Number of Series: %s, New Series Added From CBL: %s,  Existing Series (Mylar): %s,  Missing Series (Mylar): %s,  New Matches (CV): %s, Unfound Series (CV): %s" % (numExistingSeries,numNewSeries,mylarExisting,mylarMissing,CVFound,CVNotFound))

    ## TODO: Summarise list of publishers in results

main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	'''
	Installation:
	1) Add this package as a python wrapper to search the comicvine api:
	https://github.com/jessebraham/comicvine-search
	I wasn't able to get this module to install so I copied it to the same folder as the .py file (or add to you env of course)
	2) Replace [mylar api key] with your api key
	3) Replace [mylar server address] with your server in the format: http://servername:port/ (make sure to include the slash at the end)
	4) Replace [comicvine api key] with your api key

	Usage:
	python3 cbltomylar.py cblfiletoimport.cbl
	You can also use wildcards:
	python3 cbltomylar.py *.cbl

	results are output to std out if you want to save to a file add > logfile.txt

	Notes:
	Marvel or DC Comics are the only acceptable publishers that are allowed. This is hardcoded in, but you can edit the code to add more.
	Without this, I was getting non-english publications.
	All matches of series and volume year will be added. So if there are multiple matches, multiple series will be added.

	Reference: Mylar general structure for API:
	http://localhost:8090 + HTTP_ROOT + /api?apikey=$apikey&cmd=$command
	'''

	import requests
	import time
	import comicvine_search
	from comicvine_search import ComicVineClient
	import xml.etree.ElementTree as ET
	from glob import glob
	from sys import argv

	mylarapikey = '[mylar api key]'
	mylarURL = '[mylar server address]'
	# format: http://servername:port/
	cv = ComicVineClient('[comicvine api key]')
	#comicvine api key

	mylarURLfull = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=addComic&id='
	mylarURLfullcheck = mylarURL + 'api?apikey=' + mylarapikey + '&cmd=getComic&id='

	#filename = 'input.cbl'
	#filename = sys.argv[1]
	for filename in glob(argv[1]):
	tree = ET.parse(filename)
	root = tree.getroot()

	series_list = []
	input_list = []

	cblhave = 0
	cbladd = 0
	notfound = 0

	cblinput = root.findall("./Books/Book")
	for series in cblinput:
	series.attrib.pop('Year')
	series.attrib.pop('Number')
	#print (series.attrib)
	line = series.attrib['Series'],series.attrib['Volume']
	series_list.append(line)
	#print(series_list)


	#remove duplicates
	input_list = list(set(series_list))
	print ("Found ", len(input_list), " unique series in CBL file. Starting search.")



	for n in range(len(input_list)):
	inputname = input_list[n][0]
	inputyear = input_list[n][1]
	print("Searching for " + inputname + " (" + inputyear + ")")
	time.sleep(1)
	#sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2.
	response = cv.search(inputname , resources=['volume'])


	for r in range(1000):
	# I could not figure out how to use the length of results to stop the loop, so I just eject out of it on error
	try:
	response.results[r]['start_year']
	except IndexError:
	#print(' Done with: ' + inputname)
	break
	if response.results[r]['name'] == inputname:
	if response.results[r]['start_year'] == inputyear:
	if response.results[r]['publisher']['name'] == "DC Comics" or response.results[r]['publisher']['name'] == "Marvel" or response.results[r]['publisher']['name'] == "Marvel Digital Comics Unlimited" or response.results[r]['publisher']['name'] == "Dynamite Entertainment":
	#added: Dynamite Entertainment and Marvel Digital Comics Unlimited
	#print(response.results[r]['publisher']['name'])
	print(" Found on comicvine: " + response.results[r]['publisher']['name'] + ": " + inputname + " (" + inputyear + ") ","comicid:", response.results[r]['id'])
	checkURL = mylarURLfullcheck + str(response.results[r]['id'])
	addURL = mylarURLfull + str(response.results[r]['id'])
	#print(checkURL)
	responsecheck = requests.get(checkURL)
	if len(responsecheck.text) > 75: #Checks the length of the response. If it is short then assume you don't have it. I'm sure there are more elegant ways to do this.
	print(" Mylar is already tracking this series")
	cblhave = cblhave +1
	#print(responsecheck.status_code)
	else:
	responsem = requests.get(addURL)
	print(" Adding series to mylar")
	cbladd = cbladd +1
	notfound = len(input_list) - cblhave - cbladd
	print("CBL File: ", filename," Series in CBL:", len(input_list)," Series already in mylar:", cblhave, " Series added to mylar:",cbladd, " Series not found on comicvine:",notfound)
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	'''
	This is an updated script from TheMadman which adds a ton of functionality to my script and also cleans up the code. Here's a description from him:
	It will import all the CBL files in a subfolder called 'ReadingLists' and store the data in a csv file to keep track of changes. It will keep a register
	of series name and year (as found in your readinglists), and lookup the CV id and publisher which match (allowing for preferred and blacklisted publishers).
	It will then check/add to Mylar based on user preference.
	If you add new files to your readinglist folder and re-run the script, it will merge the new series it finds with the existing data from previous runs rather
	than potentially double handling the same series every time you read a cbl file.
	Only issue I've found is that sometimes there are 2 matches for a series with the same year found in CV (both with Marvel as publisher) so it's impossible to
	know which comicID is correct without more info.


	Installation:
	1) Download & install this package (required for searching the comicvine api):
	https://github.com/jessebraham/comicvine-search
	2) Create a folder called 'ReadingLists' in the same directory as the script and add any CBL files you want to process into this folder
	3) Replace [MYLAR API KEY] with your Mylar3 api key
	4) Replace [MYLAR SERVER ADDRESS] with your server in the format: http://servername:port/ (make sure to include the slash at the end)
	5) Replace [CV API KEY] with your comicvine api key
	6) Optional - Modify the following options:
	- PUBLISHER_BLACKLIST : List of publishers to ignore during CV searching
	- PUBLISHER_PREFERRED : List of publishers to prioritise when multiple CV matches are found
	- ADD_NEW_SERIES_TO_MYLAR : Automatically add CV search results to Mylar as new series
	- CV_SEARCH_LIMIT : Set a limit on the number of CV API calls made during this processing.
	This is useful for large collections if you want to break the process into smaller chunks.

	Usage:
	python3 cbl-mylar-import.py

	Results are output to "output.csv" in the same directory as the script

	Notes:
	- Series are found based on series name and year match.
	- If multiple results are found, any matches of the preferred publisher will be prioritised.
	- For multiple matches, this script will output the last result found.
	- CV api calls are limited to once every 2 seconds, so this script can take a while for large collections.
	It is not recommended to reduce this, however you can modify the rate using the CV_API_RATE var.
	- If you mess anything up, you can simply delete the output.csv or force a re-run using the Mylar & CV FORCE_RECHECK vars.

	'''

	import requests
	import json
	import time
	import os
	from enum import IntEnum
	import comicvine_search
	from comicvine_search import ComicVineClient
	import xml.etree.ElementTree as ET
	from glob import glob
	from sys import argv

	### DEV OPTIONS
	#Enable verbose output
	VERBOSE = False
	#Prevent overwriting of main CSV data file
	TEST_MODE = False

	#File prefs
	SCRIPT_DIR = os.getcwd()
	READINGLIST_DIR = os.path.join(SCRIPT_DIR, "ReadingLists")
	DATA_FILE = os.path.join(SCRIPT_DIR, "output.csv")

	if TEST_MODE:
	#Create new file instead of overwriting data file
	OUTPUT_FILE = os.path.join(SCRIPT_DIR, "output_new.csv")
	else:
	OUTPUT_FILE = DATA_FILE

	CSV_HEADERS = ["Series","Year","Publisher", "ComicID","InMylar"]
	class Column(IntEnum):
	SERIES = 0
	YEAR = 1
	PUBLISHER = 2
	COMICID = 3
	INMYLAR = 4

	#CV prefs
	CV_SEARCH_LIMIT = 10000 #Maximum allowed number of CV API calls
	CV_API_KEY = '[COMICVINE API KEY]'
	CV_API_RATE = 2 #Seconds between CV API calls
	FORCE_RECHECK_CV = False
	PUBLISHER_BLACKLIST = ["Panini Comics","Editorial Televisa","Planeta DeAgostini","Unknown"]
	PUBLISHER_PREFERRED = ["Marvel","DC Comics"] #If multiple matches found, prefer this result
	CV = None

	#Mylar prefs
	mylarAPI = '[MYLAR API KEY]'
	mylarBaseURL = '[MYLAR URL]' #format= http://servername:port/
	FORCE_RECHECK_MYLAR_MATCHES = False
	ADD_NEW_SERIES_TO_MYLAR = True

	mylarAddURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=addComic&id='
	mylarCheckURL = mylarBaseURL + 'api?apikey=' + mylarAPI + '&cmd=getComic&id='

	numNewSeries = 0
	numExistingSeries = 0
	numCBLSeries = 0

	#Initialise counters
	mylarExisting = 0
	mylarMissing = 0
	CVFound = 0
	CVNotFound = 0
	searchCount = 0

	def parseCBLfiles():
	series_list = []

	print("Checking CBL files in %s" % (READINGLIST_DIR))
	for root, dirs, files in os.walk(READINGLIST_DIR):
	for file in files:
	if file.endswith(".cbl"):
	try:
	filename = os.path.join(root, file)
	#print("Parsing %s" % (filename))
	tree = ET.parse(filename)
	fileroot = tree.getroot()

	cblinput = fileroot.findall("./Books/Book")
	for series in cblinput:
	line = series.attrib['Series'].replace(",",""),series.attrib['Volume']
	series_list.append(list(line))
	except:
	print("Unable to process file at %s" % ( os.path.join(str(root), str(file)) ))

	return series_list

	def isSeriesInMylar(comicID):
	found = False
	global mylarExisting
	global mylarMissing

	#print("Checking if comicID %s exists in Mylar" % (comicID))

	if comicID.isnumeric():
	comicCheckURL = "%s%s" % (mylarCheckURL, str(comicID))
	mylarData = requests.get(comicCheckURL).text
	jsonData = json.loads(mylarData)
	#jsonData = mylarData.json()
	mylarComicData = jsonData['data']['comic']

	if not len(mylarComicData) == 0:
	found = True
	elif comicID != "Unknown":
	print(" Mylar series status unknown - invalid ComicID:%s" % (comicID))

	if found:
	if VERBOSE: print(" Match found for %s in Mylar" % (comicID))
	mylarExisting += 1
	return True
	else:
	if VERBOSE: print(" No match found for %s in Mylar" % (comicID))
	mylarMissing += 1
	return False

	#In the event of if else failure
	return False;

	def addSeriesToMylar(comicID):
	if comicID.isnumeric():
	if VERBOSE: print(" Adding %s to Mylar" % (comicID))
	comicAddURL = "%s%s" % (mylarAddURL, str(comicID))
	mylarData = requests.get(comicAddURL).text

	## Check result of API call
	jsonData = json.loads(mylarData)
	#jsonData = mylarData.json()
	#mylarComicData = jsonData['data']['comic']

	if jsonData['success'] == "true":
	return True
	else:
	return False
	else:
	return False

	def findVolumeDetails(series,year):
	found = False
	comicID = "Unknown"
	publisher = "Unknown"
	global searchCount
	global CVNotFound
	global CVFound
	global CV

	if isinstance(series,str):
	searchCount += 1

	result_matches = 0
	preferred_matches = 0
	result_publishers = []
	result_matches_blacklist = 0
	issueCounter = 0

	series_matches = []
	publisher_blacklist_results = set()

	try:
	if VERBOSE: print(" Searching for %s (%s) on CV" % (series,year))
	#response = CV.Volume.search(series)
	response = CV.search(series , resources=['volume'])

	if response.results is None:
	print(" No results found for %s (%s)" % (series,year))
	else: #Results were found
	for result in response.results: #Iterate through CV results
	#If exact series name and year match
	if result['name'] == series and result['start_year'] == year:

	publisher_temp = result['publisher']['name']
	result_publishers.append(publisher_temp)

	series_matches.append(result)

	if publisher_temp in PUBLISHER_BLACKLIST:
	result_matches_blacklist += 1
	publisher_blacklist_results.add(publisher_temp)
	else:
	found = True
	result_matches += 1
	publisher = publisher_temp
	if publisher in PUBLISHER_PREFERRED: preferred_matches += 1
	comicID = result['id']
	numIssues = result['count_of_issues']
	print(" Found on comicvine: %s - %s (%s) : %s (%s issues)" % (publisher, series, year, comicID, numIssues))

	#Handle multiple publisher matches
	if result_matches > 1:
	print(" Warning: Multiple valid matches found! Publishers: %s" % (", ".join(result_publishers)))

	#set result to preferred publisher
	for item in series_matches:
	if item['publisher']['name'] in PUBLISHER_PREFERRED or preferred_matches == 0:
	numIssues = item['count_of_issues']
	if numIssues > issueCounter:
	#Current series has more issues than any other preferred results!
	publisher = item['publisher']['name']
	comicID = item['id']
	issueCounter = numIssues
	## TODO: Remove "preferred text labels"
	print(" Selected series from multiple results: %s - %s (%s issues)" % (publisher,comicID,numIssues))
	else:
	#Another series has more issues
	print(" Skipped Series : %s - %s (%s issues) - another preferred series has more issues!" % (item['publisher']['name'],item['id'],numIssues))

	if len(response.results) == 0:
	print(" No results found for %s (%s)" % (series,year))

	if result_matches_blacklist > 0 and result_matches == 0:
	#Only invalid results found
	print(" No valid results found for %s (%s). %s blacklisted results found with the following publishers: %s" % (series,year,result_matches_blacklist, ",".join(publisher_blacklist_results)))
	except Exception as e:
	print(" There was an error processing %s (%s)" % (series,year))
	print(repr(e))

	#Update counters
	if not found:
	CVNotFound += 1
	else:
	CVFound += 1

	return [publisher,comicID]

	def readExistingData():
	print("Reading data from %s" % (DATA_FILE))

	dataList = []

	if os.path.exists(DATA_FILE):
	#Import raw csv data as lines
	with open(DATA_FILE, mode='r') as csv_file:
	data = csv_file.readlines()

	#Parse csv data and strip whitespace
	for i in range(len(data)):
	if not i == 0: #Skip header row
	fields = [x.strip() for x in data[i].split(",")]
	dataList.append(fields)

	return dataList

	def outputData(data):
	print("Exporting data to %s" % (OUTPUT_FILE))
	with open(OUTPUT_FILE, mode='w') as output_file:
	output_file.write("%s\n" % (",".join(CSV_HEADERS)))
	#Check if list contains multiple columns
	if len(data[0]) == 1:
	output_file.writelines(data)
	else:
	for row in data:
	output_file.write("%s\n" % (",".join(map(str,row))))

	def index_2d(myList, v):
	for i, x in enumerate(myList):
	if v[0] == x[0] and v[1] == x[1]:
	return (i)

	def mergeDataLists(list1, list2):
	# list1 = Main list with rows of 4 items
	# list2 = Import list with rows of 2 items
	print("Merging data lists")

	mainDataList = list1
	dataToMerge = list2
	global numExistingSeries
	global numCBLSeries
	global numNewSeries

	mainDataTitles = []
	mergedTitleSet = ()
	finalMergedList = []

	#Extract first 2 row elements to modified list
	for row in mainDataList:
	mainDataTitles.append([row[Column.SERIES], row[Column.YEAR]])

	mergedTitleList = mainDataTitles + dataToMerge
	mergedTitleList.sort()

	numExistingSeries = len(mainDataList)
	numCBLSeries = len(mergedTitleList)

	mergedTitleSet = set(tuple(map(tuple,mergedTitleList)))

	for row in mergedTitleSet:
	if list(row) in mainDataTitles:
	#Find index of exact match in mainDataSet
	match_row = index_2d(mainDataList,row)
	#if VERBOSE: print("Merged row: %s found in main data at row %s" % (list(row),match_row))

	finalMergedList.append(mainDataList[match_row])
	#Removing
	#if VERBOSE: print("Removing %s from mainDataList" % (list(row)))
	mainDataList.pop(match_row)

	else:
	#if VERBOSE: print("Merged row: %s NOT found in main data" % (list(row)))
	#Use the list with only
	newData = [row[Column.SERIES],row[Column.YEAR],"Unknown","Unknown",False]
	finalMergedList.append(newData)

	numNewSeries = len(finalMergedList) - numExistingSeries

	return finalMergedList


	def main():
	#Initialise CV API tool
	global CV
	CV = ComicVineClient(CV_API_KEY)

	global numExistingSeries
	global numCBLSeries
	global numNewSeries

	#Extract list from existing csv
	importData = readExistingData()

	#Process CBL files
	cblSeriesList = parseCBLfiles()

	#Merge csv data with cbl data
	mergedData = mergeDataLists(importData, cblSeriesList)
	mergedData.sort()

	print("Found %s series in CSV, %s new series in CBL" % (numExistingSeries,numNewSeries))

	#Run all data checks in CV & Mylar
	for rowIndex in range(len(mergedData)):
	series = mergedData[rowIndex][Column.SERIES]
	year = mergedData[rowIndex][Column.YEAR]
	publisher = mergedData[rowIndex][Column.PUBLISHER]
	comicID = mergedData[rowIndex][Column.COMICID]
	inMylar = mergedData[rowIndex][Column.INMYLAR]
	checkMylar = False
	comicIDExists = comicID.isnumeric()

	#Check for new comicIDs
	if not comicIDExists or FORCE_RECHECK_CV:
	#Self-imposed search limit to prevent hitting limits
	if searchCount < CV_SEARCH_LIMIT:
	#sleeping at least 1 second is what comicvine reccomends. If you are more than 450 requests in 15 minutes (900 seconds) you will be rate limited. So if you are going to be importing for a straight 15 minutes (wow), then you would want to changet this to 2.
	if searchCount > 0: time.sleep(CV_API_RATE)

	#Update field in data list
	cv_data = findVolumeDetails(series,year)
	mergedData[rowIndex][Column.PUBLISHER] = cv_data[0]
	mergedData[rowIndex][Column.COMICID] = cv_data[1]

	#update vars for use elsewhere
	publisher = str(cv_data[0])
	comicID = str(cv_data[1])

	#Check if series exists in mylar
	if inMylar == "True":
	#Match exists in mylar
	if FORCE_RECHECK_MYLAR_MATCHES:
	#Force recheck anyway
	checkMylar = True
	else:
	checkMylar = False
	else:
	#No mylar match found
	checkMylar = True

	if checkMylar:
	#Update field in data list
	inMylar = isSeriesInMylar(comicID)
	mergedData[rowIndex][Column.INMYLAR] = inMylar

	#Add new series to Mylar
	if not inMylar and ADD_NEW_SERIES_TO_MYLAR:
	mergedData[rowIndex][Column.INMYLAR] = addSeriesToMylar(comicID)


	#Write modified data to file
	outputData(mergedData)

	#Print summary to terminal
	print("Total Number of Series: %s, New Series Added From CBL: %s, Existing Series (Mylar): %s, Missing Series (Mylar): %s, New Matches (CV): %s, Unfound Series (CV): %s" % (numExistingSeries,numNewSeries,mylarExisting,mylarMissing,CVFound,CVNotFound))

	## TODO: Summarise list of publishers in results

	main()