Skip to content

Instantly share code, notes, and snippets.

@scturtle
Last active December 11, 2015 20:18
Show Gist options
  • Save scturtle/7d956feb12084e9c1fbe to your computer and use it in GitHub Desktop.
Save scturtle/7d956feb12084e9c1fbe to your computer and use it in GitHub Desktop.
steal book from reader.douban.com
// step 1
var content=$('.info')[0].innerHTML;
var last='';
var observer = new MutationObserver(function(e) {
var c=$('.inner .content:first')[0].innerHTML;
if(c!=last){
last = c;
content = content + '\n\n' + c;
$('.page-next').click();
}
});
var target = document.querySelector('.article .inner');
var config = { attributes: true, childList: true, characterData: true };
observer.observe(target, config);
// step 2
content=content+'\n\n'+$('.inner .content:last')[0].innerHTML;
var bb = new Blob([content], {type:'text/html'});
var a = document.createElement('a');
a.download = 'content.html';
a.href = window.URL.createObjectURL(bb);
a.textContent = 'Download';
a.classList.add('icon-fullscreen');
a.dataset.downloadurl = ['text/html', a.download, a.href].join(':');
$('.panel').append(a);
from bs4 import BeautifulSoup
soup = BeautifulSoup(open('content.html').read(), 'html.parser')
for s in soup.findAll('span'):
s.unwrap()
saw = set()
for p in soup.findAll('p'):
if 'data-pid' in p.attrs:
pid = p.attrs['data-pid']
if pid in saw:
p.extract()
saw.add(pid)
open('content2.html','w').write(soup.encode('utf-8'))
% COMMAND: pandoc -s 中国式青春.htm -o z.pdf --latex-engine=xelatex
% --template=mytemplate.tex --no-tex-ligatures -V title=中国式青春 -V author=今何在
\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$lang$,$endif$$if(papersize)$$papersize$,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$}
$if(fontfamily)$
\usepackage{$fontfamily$}
$else$
\usepackage{lmodern}
$endif$
$if(linestretch)$
\usepackage{setspace}
\setstretch{$linestretch$}
$endif$
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
$if(euro)$
\usepackage{eurosym}
$endif$
\else % if luatex or xelatex
\ifxetex
\usepackage{mathspec}
\usepackage{xltxtra,xunicode}
\usepackage{xeCJK}
\setCJKmainfont{Hiragino Sans GB}
\setCJKsansfont{Droid Sans Fallback}
\setCJKmonofont{Droid Sans Fallback}
\else
\usepackage{fontspec}
\fi
%\defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
\newcommand{\euro}{€}
$if(mainfont)$
\setmainfont{$mainfont$}
$endif$
$if(sansfont)$
\setsansfont{$sansfont$}
$endif$
$if(monofont)$
\setmonofont[Mapping=tex-ansi]{$monofont$}
$endif$
$if(mathfont)$
\setmathfont(Digits,Latin,Greek){$mathfont$}
$endif$
\fi
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
% use microtype if available
\IfFileExists{microtype.sty}{%
\usepackage{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
$if(geometry)$
\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry}
$endif$
$if(lang)$
\ifxetex
\usepackage{polyglossia}
\setmainlanguage{$mainlang$}
\else
\usepackage[shorthands=off,$lang$]{babel}
\fi
$endif$
$if(natbib)$
\usepackage{natbib}
\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$}
$endif$
$if(biblatex)$
\usepackage{biblatex}
$if(biblio-files)$
\bibliography{$biblio-files$}
$endif$
$endif$
$if(listings)$
\usepackage{listings}
$endif$
$if(lhs)$
\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
$endif$
$if(highlighting-macros)$
$highlighting-macros$
$endif$
$if(verbatim-in-note)$
\usepackage{fancyvrb}
\VerbatimFootnotes
$endif$
$if(tables)$
\usepackage{longtable,booktabs}
$endif$
$if(graphics)$
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
$endif$
\ifxetex
\usepackage[setpagesize=false, % page size defined by xetex
unicode=false, % unicode breaks when used with xetex
xetex]{hyperref}
\else
\usepackage[unicode=true]{hyperref}
\fi
\hypersetup{breaklinks=true,
bookmarks=true,
pdfauthor={$author-meta$},
pdftitle={$title-meta$},
colorlinks=true,
citecolor=$if(citecolor)$$citecolor$$else$blue$endif$,
urlcolor=$if(urlcolor)$$urlcolor$$else$blue$endif$,
linkcolor=$if(linkcolor)$$linkcolor$$else$magenta$endif$,
pdfborder={0 0 0}}
\urlstyle{same} % don't use monospace font for urls
$if(links-as-notes)$
% Make links footnotes instead of hotlinks:
\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
$endif$
$if(strikeout)$
\usepackage[normalem]{ulem}
% avoid problems with \sout in headers with hyperref:
\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
$endif$
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
\setlength{\emergencystretch}{3em} % prevent overfull lines
$if(numbersections)$
\setcounter{secnumdepth}{5}
$else$
\setcounter{secnumdepth}{0}
$endif$
$if(verbatim-in-note)$
\VerbatimFootnotes % allows verbatim text in footnotes
$endif$
$if(title)$
\title{$title$$if(subtitle)$\\\vspace{0.5em}{\large $subtitle$}$endif$}
$endif$
$if(author)$
\author{$for(author)$$author$$sep$ \and $endfor$}
$endif$
\date{$date$}
$for(header-includes)$
$header-includes$
$endfor$
\begin{document}
$if(title)$
\maketitle
$endif$
$if(abstract)$
\begin{abstract}
$abstract$
\end{abstract}
$endif$
$for(include-before)$
$include-before$
$endfor$
$if(toc)$
{
\hypersetup{linkcolor=black}
\setcounter{tocdepth}{$toc-depth$}
\tableofcontents
}
$endif$
$if(lot)$
\listoftables
$endif$
$if(lof)$
\listoffigures
$endif$
$body$
$if(natbib)$
$if(biblio-files)$
$if(biblio-title)$
$if(book-class)$
\renewcommand\bibname{$biblio-title$}
$else$
\renewcommand\refname{$biblio-title$}
$endif$
$endif$
\bibliography{$biblio-files$}
$endif$
$endif$
$if(biblatex)$
\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
$endif$
$for(include-after)$
$include-after$
$endfor$
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment