Skip to content

Instantly share code, notes, and snippets.

@Lanjelin

Lanjelin/nbno.py Secret

Last active March 2, 2021 09:13
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lanjelin/3d7445c4e0bc0fc32559e7a4a436e046 to your computer and use it in GitHub Desktop.
Save Lanjelin/3d7445c4e0bc0fc32559e7a4a436e046 to your computer and use it in GitHub Desktop.
NB.no nedlaster
# -*- coding: utf-8 -*-
import wx, sys, subprocess, os
import urllib2
import StringIO
import cStringIO
from PIL import Image
from random import randint
class MainFrame(wx.Frame):
def __init__(self, parent, mytitle, mysize):
wx.Frame.__init__(self, parent, wx.ID_ANY, mytitle,\
size=mysize,style=wx.DEFAULT_DIALOG_STYLE | wx.MINIMIZE_BOX | wx.TAB_TRAVERSAL)
font = wx.Font(10, wx.SWISS, wx.NORMAL, wx.BOLD)
#http://www.nb.no/nbsok/resources/images/nblogo.png
#ico = wx.Icon('nbno.ico', wx.BITMAP_TYPE_ICO)
#self.SetIcon(ico)
self.SetFont(font)
self.SetBackgroundColour('#EEEEEE')
self.panel = wx.Panel(self, size=(290,355), style=wx.TAB_TRAVERSAL)
rowY = [10,40,70,100,130,160]
self.labelBokID = wx.StaticText(self, label="BokID", pos=(50,rowY[0]))
self.inputBokID = wx.TextCtrl(self.panel, value="2012031908162", pos=(100, (rowY[0]-3)), size=(140,-1))
self.labelFraSide = wx.StaticText(self.panel, label="Fra side", pos=(40,rowY[1]))
self.inputFraSide = wx.TextCtrl(self.panel, value="1", pos=(100, (rowY[1]-3)), size=(35,-1))
self.labelTilSide = wx.StaticText(self.panel, label="Til side", pos=(150,rowY[1]))
self.inputTilSide = wx.TextCtrl(self.panel, value="12", pos=(205, (rowY[1]-3)), size=(35,-1))
self.labelMaxLevel = wx.StaticText(self.panel, label="maxLevel", pos=(30,rowY[2]))
self.inputMaxLevel = wx.TextCtrl(self.panel, value="6", pos=(100, (rowY[2]-3)), size=(35,-1))
self.inputMaxLevel.SetMaxLength(2)
self.labelLevel = wx.StaticText(self.panel, label="level", pos=(150,rowY[2]))
self.inputLevel = wx.TextCtrl(self.panel, value="6", pos=(205, (rowY[2]-3)), size=(35,-1))
self.inputLevel.SetMaxLength(2)
self.labelResX = wx.StaticText(self.panel, label="resX", pos=(50,rowY[3]))
self.inputResX = wx.TextCtrl(self.panel, value="1744", pos=(85, (rowY[3]-3)), size=(50,-1))
self.labelResY = wx.StaticText(self.panel, label="resY", pos=(150,rowY[3]))
self.inputResY = wx.TextCtrl(self.panel, value="2872", pos=(190, (rowY[3]-3)), size=(50,-1))
self.labelAntallBilder = wx.StaticText(self.panel,label="Antall bilder per side", pos=(50,rowY[4]))
self.inputAntallBilder = wx.TextCtrl(self.panel, value="8", pos=(190,(rowY[4]-3)), size=(50,-1), style=wx.TE_READONLY)
self.inputAntallBilder.SetMaxLength(1)
self.preButton = wx.Button(self.panel, 1, u'Forhåndsvis', (50,rowY[5]), (85,-1))
self.Bind(wx.EVT_BUTTON, self.GetPreview, id=1)
self.saveButton = wx.Button(self.panel, 2, 'Last ned', (175,rowY[5]), (65,-1))
self.Bind(wx.EVT_BUTTON, self.DownloadBook, id=2)
self.progress = wx.Gauge(self.panel, 100, pos=(1,200), size=(285,-1))
self.outputField = wx.TextCtrl(self.panel, pos=(1, 215), size=(285,110), style=wx.TE_MULTILINE | wx.SUNKEN_BORDER | wx.TE_READONLY)
self.colrow = ["00","10","01","11","02","12","03","13","04","14"]
def DownloadBook(self, event):
wx.BeginBusyCursor()
folder = "."+os.path.sep+str(self.inputBokID.GetValue())
try:
os.stat(folder)
except:
os.mkdir(folder)
# Getting covers
self.Covers = ["C1","C2","C3"]
self.outputField.AppendText("Lagrer covers.\n")
for cover in range(0,len(self.Covers)):
self.progress.SetValue(0)
self.Page = self.Covers[cover]
self.URL = self.GetUrl()
self.Image = self.GetImage()
self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
self.inputAntallBilder.SetValue(str("8")) # Resetting image numbers, as it varies in covers
self.outputField.AppendText('Lagret '+self.Page+'.jpg\n')
# Getting pages
for page in range(int(self.inputFraSide.GetValue()), (int(self.inputTilSide.GetValue()))+1):
wx.Yield()
self.Page = str(page).rjust(4, "0")
self.URL = self.GetUrl()
self.Image = self.GetImage()
if self.Image == "Done":
break
try:
self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
except:
self.outputField.AppendText('Error\n')
else:
self.outputField.AppendText('Lagret '+self.Page+'.jpg\n')
self.progress.SetValue(int(float((page-int(self.inputFraSide.GetValue())+1)*100)\
/ float(int(self.inputTilSide.GetValue())-int(self.inputFraSide.GetValue())+1)))
self.progress.SetValue(100)
self.outputField.AppendText('Alle sider er nedlastet!\n')
wx.EndBusyCursor()
wx.Yield()
def GetPreview(self, event):
self.inputAntallBilder.SetValue(str("8"))
self.outputField.SetValue('')
self.Page = str(randint(int(self.inputFraSide.GetValue()), (int(self.inputTilSide.GetValue()))+1)).rjust(4, "0")
self.URL = self.GetUrl()
self.Image = self.GetImage()
self.Image.save("nbnoTest.jpg")
self.outputField.AppendText(u'Forhåndsviser side '+self.Page+'\n')
imgFromCmd = {'linux':'eog','win32':'explorer','darwin':'open'}[sys.platform]
subprocess.call([imgFromCmd, "nbnoTest.jpg"])
def GetUrl(self):
URL = []
for x in range(0,int(self.inputAntallBilder.GetValue())):
URL.append("http://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_"+\
""+str(self.inputBokID.GetValue())+\
"_"+self.Page+\
"&maxLevel="+str(self.inputMaxLevel.GetValue())+\
"&level="+str(self.inputLevel.GetValue())+\
"&col="+(self.colrow[x+1][0] if ((int(self.inputAntallBilder.GetValue()) == 2) and (x == 1)) else self.colrow[x][0])+\
"&row="+(self.colrow[x+1][1] if ((int(self.inputAntallBilder.GetValue()) == 2) and (x == 1)) else self.colrow[x][1])+\
"&resX="+str(self.inputResX.GetValue())+\
"&resY="+str(self.inputResY.GetValue())+\
"&tileWidth=1024&tileHeight=1024")
return URL
def GetImage(self):
#Get image positions and new image size
wiX, wiY = 0, 0
imP, imE = self.GetImageParts()
# Anything using 4 or more images
if ((int(self.inputAntallBilder.GetValue())-imE) >= 4):
for x in range(0,(int(self.inputAntallBilder.GetValue()))-imE):
imP[x].XY = (int(self.colrow[x][0])*int(imP[x-1].size[0])), (int(self.colrow[x][1])*int(imP[x-2].size[1]))
if x % 2 == 0: #even
1
else: #odd
wiY = wiY + imP[x].size[1]
if x <= 1:
wiX = wiX + imP[x].size[0]
# Cover 2 only uses 3 images
elif ((int(self.inputAntallBilder.GetValue())-imE) == 3):
imP[0].XY = 0,0
imP[1].XY = 0,imP[0].size[1]
imP[2].XY = 0,(imP[0].size[1]+imP[1].size[1])
wiX = imP[0].size[0]
wiY = (imP[0].size[1]+imP[1].size[1]+imP[2].size[1])
#No more pages found
elif ((int(self.inputAntallBilder.GetValue())-imE) == 0):
return "Done"
#Anything using 2 or 1 image
else:
imP[0].XY = 0,0
wiX = imP[0].size[0]
wiY = imP[0].size[1]
if len(imP) > 1:
imP[1].XY = 0,(imP[0].size[1])
wiY = imP[0].size[1]+imP[1].size[1]
#Create new image
newImg = Image.new("RGB", (wiX,wiY))
for x in range(0,(int(self.inputAntallBilder.GetValue()))-imE):
newImg.paste(imP[x], imP[x].XY)
if imE:
self.outputField.AppendText("Bilder per side satt til "+str(int(self.inputAntallBilder.GetValue())-imE)+"\n")
self.inputAntallBilder.SetValue(str(int(self.inputAntallBilder.GetValue())-imE))
return newImg
def GetImageParts(self):
imageParts = []
imageErrors = 0
for x in range(0,int(self.inputAntallBilder.GetValue())):
try:
req = urllib2.Request(self.URL[x], headers={ 'User-Agent': 'Mozilla/5.0' })
response = urllib2.urlopen(req)
except urllib2.HTTPError as e:
imageErrors = imageErrors + 1
else:
imageParts.append(Image.open(cStringIO.StringIO(response.read())))
wx.Yield()
return imageParts, imageErrors
if __name__ == "__main__":
app = wx.App()
mytitle = 'nb.no nedlaster'
OSwidth = {'linux':285,'win32':293,'darwin':285}[sys.platform]
height = 355
MainFrame(None, mytitle, (OSwidth, height)).Show()
app.MainLoop()
@Lanjelin
Copy link
Author

Lanjelin commented Jun 26, 2016

Fetching book title

import urllib2
from urllib2 import urlopen
from bs4 import BeautifulSoup as Soup

class MyHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
        return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
    http_error_301 = http_error_303 = http_error_307 = http_error_302

cookieprocessor = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(MyHTTPRedirectHandler, cookieprocessor)
urllib2.install_opener(opener)

url = "http://urn.nb.no/URN:NBN:no-nb_digibok_2012031908162"
soup = Soup(urlopen(url), "lxml")
for meta in soup.findAll("meta"):
    if meta.get("name","") == "dc:title":
        print meta.get("content","")
for div in soup.findAll("div"):
    if div.get("class","") == "rpage":
        print div.get("pgtype","")
#print cookieprocessor.cookiejar

@Lanjelin
Copy link
Author

Lanjelin commented Dec 28, 2016

For bruk med http proxy, legg til ved linje 56

		proxy = urllib2.ProxyHandler({'http': 'proxy_provider_ip:port'})
		opener = urllib2.build_opener(proxy)
		urllib2.install_opener(opener)

@MrDemocracy
Copy link

Jeg får denne feilmelding:

Traceback (most recent call last):
File "nbno.py", line 72, in DownloadBook self.Image.save("."+os.path.sep+str(self.inputBokID.GetValue())+os.path.sep+self.Page+".jpg")
AttributeError: 'str' object has no attribute 'save'

@Lanjelin
Copy link
Author

Lanjelin commented Dec 4, 2017

Manglende useragent gjorde at de ikke tillot å hente bilder, oppdatert og fungerer.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment