Skip to content

Instantly share code, notes, and snippets.

@Lanjelin Lanjelin/nbno.py Secret
Last active Dec 4, 2017

Embed
What would you like to do?
NB.no nedlaster
# -*- coding: utf-8 -*-
import wx, sys, subprocess, os
import urllib2
import StringIO
import cStringIO
from PIL import Image
from random import randint
class MainFrame(wx.Frame):
def __init__(self, parent, mytitle, mysize):
wx.Frame.__init__(self, parent, wx.ID_ANY, mytitle,\
size=mysize,style=wx.DEFAULT_DIALOG_STYLE | wx.MINIMIZE_BOX | wx.TAB_TRAVERSAL)
font = wx.Font(10, wx.SWISS, wx.NORMAL, wx.BOLD)
#http://www.nb.no/nbsok/resources/images/nblogo.png
#ico = wx.Icon('nbno.ico', wx.BITMAP_TYPE_ICO)
#self.SetIcon(ico)
self.SetFont(font)
self.SetBackgroundColour('#EEEEEE')
self.panel = wx.Panel(self, size=(290,355), style=wx.TAB_TRAVERSAL)
rowY = [10,40,70,100,130,160]
self.labelBokID = wx.StaticText(self, label="BokID", pos=(50,rowY[0]))
self.inputBokID = wx.TextCtrl(self.panel, value="2012031908162", pos=(100, (rowY[0]-3)), size=(140,-1))
self.labelFraSide = wx.StaticText(self.panel, label="Fra side", pos=(40,rowY[1]))
self.inputFraSide = wx.TextCtrl(self.panel, value="1", pos=(100, (rowY[1]-3)), size=(35,-1))
self.labelTilSide = wx.StaticText(self.panel, label="Til side", pos=(150,rowY[1]))
self.inputTilSide = wx.TextCtrl(self.panel, value="12", pos=(205, (rowY[1]-3)), size=(35,-1))
self.labelMaxLevel = wx.StaticText(self.panel, label="maxLevel", pos=(30,rowY[2]))
self.inputMaxLevel = wx.TextCtrl(self.panel, value="6", pos=(100, (rowY[2]-3)), size=(35,-1))
self.inputMaxLevel.SetMaxLength(2)
self.labelLevel = wx.StaticText(self.panel, label="level", pos=(150,rowY[2]))
self.inputLevel = wx.TextCtrl(self.panel, value="6", pos=(205, (rowY[2]-3)), size=(35,-1))
self.inputLevel.SetMaxLength(2)
self.labelResX = wx.StaticText(self.panel, label="resX", pos=(50,rowY[3]))
self.inputResX = wx.TextCtrl(self.panel, value="1744", pos=(85, (rowY[3]-3)), size=(50,-1))
self.labelResY = wx.StaticText(self.panel, label="resY", pos=(150,rowY[3]))
self.inputResY = wx.TextCtrl(self.panel, value="2872", pos=(190, (rowY[3]-3)), size=(50,-1))
self.labelAntallBilder = wx.StaticText(self.panel,label="Antall bilder per side", pos=(50,rowY[4]))
self.inputAntallBilder = wx.TextCtrl(self.panel, value="8", pos=(190,(rowY[4]-3)), size=(50,-1), style=wx.TE_READONLY)
self.inputAntallBilder.SetMaxLength(1)
self.preButton = wx.Button(self.panel, 1, u'Forhåndsvis', (50,rowY[5]), (85,-1))
self.Bind(wx.EVT_BUTTON, self.GetPreview, id=1)
self.saveButton = wx.Button(self.panel, 2, 'Last ned', (175,rowY[5]), (65,-1))
self.Bind(wx.EVT_BUTTON, self.DownloadBook, id=2)
self.progress = wx.Gauge(self.panel, 100, pos=(1,200), size=(285,-1))
self.outputField = wx.TextCtrl(self.panel, pos=(1, 215), size=(285,110), style=wx.TE_MULTILINE | wx.SUNKEN_BORDER | wx.TE_READONLY)
self.colrow = ["00","10","01","11","02","12","03","13","04","14"]
def DownloadBook(self, event):
wx.BeginBusyCursor()
folder = "."+os.path.sep+str(self.inputBokID.GetValue())
try:
os.stat(folder)
except:
os.mkdir(folder)
# Getting covers
self.Covers = ["C1","C2","C3"]
self.outputField.AppendText("Lagrer covers.\n")
for cover in range(0,len(self.Covers)):
self.progress.SetValue(0)
self.Page = self.Covers[cover]
self.URL = self.GetUrl()
self.Image = self.GetImage()
self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
self.inputAntallBilder.SetValue(str("8")) # Resetting image numbers, as it varies in covers
self.outputField.AppendText('Lagret '+self.Page+'.jpg\n')
# Getting pages
for page in range(int(self.inputFraSide.GetValue()), (int(self.inputTilSide.GetValue()))+1):
wx.Yield()
self.Page = str(page).rjust(4, "0")
self.URL = self.GetUrl()
self.Image = self.GetImage()
if self.Image == "Done":
break
try:
self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
except:
self.outputField.AppendText('Error\n')
else:
self.outputField.AppendText('Lagret '+self.Page+'.jpg\n')
self.progress.SetValue(int(float((page-int(self.inputFraSide.GetValue())+1)*100)\
/ float(int(self.inputTilSide.GetValue())-int(self.inputFraSide.GetValue())+1)))
self.progress.SetValue(100)
self.outputField.AppendText('Alle sider er nedlastet!\n')
wx.EndBusyCursor()
wx.Yield()
def GetPreview(self, event):
self.inputAntallBilder.SetValue(str("8"))
self.outputField.SetValue('')
self.Page = str(randint(int(self.inputFraSide.GetValue()), (int(self.inputTilSide.GetValue()))+1)).rjust(4, "0")
self.URL = self.GetUrl()
self.Image = self.GetImage()
self.Image.save("nbnoTest.jpg")
self.outputField.AppendText(u'Forhåndsviser side '+self.Page+'\n')
imgFromCmd = {'linux':'eog','win32':'explorer','darwin':'open'}[sys.platform]
subprocess.call([imgFromCmd, "nbnoTest.jpg"])
def GetUrl(self):
URL = []
for x in range(0,int(self.inputAntallBilder.GetValue())):
URL.append("http://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_"+\
""+str(self.inputBokID.GetValue())+\
"_"+self.Page+\
"&maxLevel="+str(self.inputMaxLevel.GetValue())+\
"&level="+str(self.inputLevel.GetValue())+\
"&col="+(self.colrow[x+1][0] if ((int(self.inputAntallBilder.GetValue()) == 2) and (x == 1)) else self.colrow[x][0])+\
"&row="+(self.colrow[x+1][1] if ((int(self.inputAntallBilder.GetValue()) == 2) and (x == 1)) else self.colrow[x][1])+\
"&resX="+str(self.inputResX.GetValue())+\
"&resY="+str(self.inputResY.GetValue())+\
"&tileWidth=1024&tileHeight=1024")
return URL
def GetImage(self):
#Get image positions and new image size
wiX, wiY = 0, 0
imP, imE = self.GetImageParts()
# Anything using 4 or more images
if ((int(self.inputAntallBilder.GetValue())-imE) >= 4):
for x in range(0,(int(self.inputAntallBilder.GetValue()))-imE):
imP[x].XY = (int(self.colrow[x][0])*int(imP[x-1].size[0])), (int(self.colrow[x][1])*int(imP[x-2].size[1]))
if x % 2 == 0: #even
1
else: #odd
wiY = wiY + imP[x].size[1]
if x <= 1:
wiX = wiX + imP[x].size[0]
# Cover 2 only uses 3 images
elif ((int(self.inputAntallBilder.GetValue())-imE) == 3):
imP[0].XY = 0,0
imP[1].XY = 0,imP[0].size[1]
imP[2].XY = 0,(imP[0].size[1]+imP[1].size[1])
wiX = imP[0].size[0]
wiY = (imP[0].size[1]+imP[1].size[1]+imP[2].size[1])
#No more pages found
elif ((int(self.inputAntallBilder.GetValue())-imE) == 0):
return "Done"
#Anything using 2 or 1 image
else:
imP[0].XY = 0,0
wiX = imP[0].size[0]
wiY = imP[0].size[1]
if len(imP) > 1:
imP[1].XY = 0,(imP[0].size[1])
wiY = imP[0].size[1]+imP[1].size[1]
#Create new image
newImg = Image.new("RGB", (wiX,wiY))
for x in range(0,(int(self.inputAntallBilder.GetValue()))-imE):
newImg.paste(imP[x], imP[x].XY)
if imE:
self.outputField.AppendText("Bilder per side satt til "+str(int(self.inputAntallBilder.GetValue())-imE)+"\n")
self.inputAntallBilder.SetValue(str(int(self.inputAntallBilder.GetValue())-imE))
return newImg
def GetImageParts(self):
imageParts = []
imageErrors = 0
for x in range(0,int(self.inputAntallBilder.GetValue())):
try:
req = urllib2.Request(self.URL[x], headers={ 'User-Agent': 'Mozilla/5.0' })
response = urllib2.urlopen(req)
except urllib2.HTTPError as e:
imageErrors = imageErrors + 1
else:
imageParts.append(Image.open(cStringIO.StringIO(response.read())))
wx.Yield()
return imageParts, imageErrors
if __name__ == "__main__":
app = wx.App()
mytitle = 'nb.no nedlaster'
OSwidth = {'linux':285,'win32':293,'darwin':285}[sys.platform]
height = 355
MainFrame(None, mytitle, (OSwidth, height)).Show()
app.MainLoop()
@Lanjelin

This comment has been minimized.

Copy link
Owner Author

commented Jun 26, 2016

Fetching book title

import urllib2
from urllib2 import urlopen
from bs4 import BeautifulSoup as Soup

class MyHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
        return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
    http_error_301 = http_error_303 = http_error_307 = http_error_302

cookieprocessor = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(MyHTTPRedirectHandler, cookieprocessor)
urllib2.install_opener(opener)

url = "http://urn.nb.no/URN:NBN:no-nb_digibok_2012031908162"
soup = Soup(urlopen(url), "lxml")
for meta in soup.findAll("meta"):
    if meta.get("name","") == "dc:title":
        print meta.get("content","")
for div in soup.findAll("div"):
    if div.get("class","") == "rpage":
        print div.get("pgtype","")
#print cookieprocessor.cookiejar
@Lanjelin

This comment has been minimized.

Copy link
Owner Author

commented Dec 28, 2016

For bruk med http proxy, legg til ved linje 56

		proxy = urllib2.ProxyHandler({'http': 'proxy_provider_ip:port'})
		opener = urllib2.build_opener(proxy)
		urllib2.install_opener(opener)
@MrDemocracy

This comment has been minimized.

Copy link

commented Oct 8, 2017

Jeg får denne feilmelding:

Traceback (most recent call last):
File "nbno.py", line 72, in DownloadBook self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
AttributeError: 'str' object has no attribute 'save'
@Lanjelin

This comment has been minimized.

Copy link
Owner Author

commented Dec 4, 2017

Manglende useragent gjorde at de ikke tillot å hente bilder, oppdatert og fungerer.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.