Created
October 17, 2020 04:01
-
-
Save failable/ea8008b52901ab74da6f860002403270 to your computer and use it in GitHub Desktop.
references.bib
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@inproceedings{turian-2010-word-repres, | |
title = "Word representations: a simple and general method for | |
semi-supervised learning", | |
author = "Turian, Joseph and Ratinov, Lev and Bengio, Yoshua", | |
booktitle = "Proceedings of the 48th annual meeting of the association for | |
computational linguistics", | |
abstract = "If we take an existing supervised NLP sys- tem, a simple and | |
general way to improve accuracy is to use unsupervised word | |
representations as extra word features. We evaluate Brown | |
clusters, Collobert and Weston (2008) embeddings, and HLBL | |
(Mnih \& Hinton, 2009) embeddings of words on both NER and | |
chunking. We use near state-of-the-art supervised baselines, | |
and find that each of the three word representations improves | |
the accu- racy of these baselines. We find further | |
improvements by combining diðerent word representations. You | |
can download our word features, for oð-the-shelf use in | |
existing NLP systems, as well as our code, here: | |
\url{http://metaoptimize.com/projects/wordreprs}", | |
pages = "384-394", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{levy-2014-depend-based, | |
title = "Dependency-Based Word Embeddings.", | |
author = "Levy, Omer and Goldberg, Yoav", | |
booktitle = "ACL (2)", | |
pages = "302-308", | |
year = 2014 | |
} | |
@article{bengio-2008-neural-net, | |
title = "Neural net language models", | |
author = "Bengio, Yoshua", | |
journal = "Scholarpedia", | |
volume = 3, | |
number = 1, | |
pages = 3881, | |
year = 2008 | |
} | |
@article{sahlgren-2006-the-word-space-model, | |
title = {The Word-Space Model: Using distributional analysis to | |
represent syntagmatic and paradigmatic relations between words | |
in high-dimensional vector spaces}, | |
author = {Sahlgren, Magnus}, | |
year = {2006}, | |
publisher = {Institutionen for lingvistik} | |
} | |
@inproceedings{pereira-1993-dist-cluster, | |
title = "Distributional clustering of English words", | |
author = "Pereira, Fernando and Tishby, Naftali and Lee, Lillian", | |
booktitle = "Proceedings of the 31st annual meeting on Association for | |
Computational Linguistics", | |
pages = "183-190", | |
year = 1993, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{freund-1999-short, | |
title = "A short introduction to boosting", | |
author = "Freund, Yoav and Schapire, Robert and Abe, N", | |
journal = "Journal-Japanese Society For Artificial Intelligence", | |
volume = 14, | |
number = "771-780", | |
pages = 1612, | |
year = 1999, | |
publisher = "JAPANESE SOC ARTIFICIAL INTELL" | |
} | |
@inproceedings{caruana-2006-empirical-compari, | |
title = "An empirical comparison of supervised learning algorithms", | |
author = "Caruana, Rich and Niculescu-Mizil, Alexandru", | |
booktitle = "Proceedings of the 23rd international conference on Machine | |
learning", | |
pages = "161-168", | |
year = 2006, | |
organization = "ACM" | |
} | |
@article{natekin-2013-gradient-boosting, | |
title = "Gradient boosting machines, a tutorial", | |
author = "Natekin, Alexey and Knoll, Alois", | |
journal = "Frontiers in neurorobotics", | |
volume = 7, | |
year = 2013, | |
publisher = "Frontiers Media SA", | |
url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3885826/" | |
} | |
@article{loh-2011-classification-regression, | |
title = "Classification and regression trees", | |
author = "Loh, Wei-Yin", | |
journal = "Wiley Interdisciplinary Reviews: Data Mining and Knowledge | |
Discovery", | |
volume = 1, | |
number = 1, | |
pages = "14-23", | |
year = 2011, | |
publisher = "Wiley Online Library" | |
} | |
@inproceedings{chen-2015-higgs-boson, | |
title = "Higgs boson discovery with boosted trees", | |
author = "Chen, Tianqi and He, Tong", | |
booktitle = "Cowan et al., editor, JMLR: Workshop and Conference | |
Proceedings", | |
number = 42, | |
pages = "69-80", | |
year = 2015 | |
} | |
@inproceedings{gutmann-2010-noise-contra-estima, | |
title = "Noise-contrastive estimation: A new estimation principle for | |
unnormalized statistical models.", | |
author = "Gutmann, Michael and Hyv{\"a}rinen, Aapo", | |
booktitle = "AISTATS", | |
volume = 1, | |
number = 2, | |
pages = 6, | |
year = 2010 | |
} | |
@phdthesis{sutskever-2013-training-recurrent, | |
title = "Training recurrent neural networks", | |
author = "Sutskever, Ilya", | |
year = 2013, | |
school = "University of Toronto" | |
} | |
@inproceedings{szegedy-2015-going-deeper, | |
title = "Going deeper with convolutions", | |
author = "Szegedy, Christian and Liu, Wei and Jia, Yangqing and | |
Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and | |
Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew", | |
booktitle = "Proceedings of the IEEE Conference on Computer Vision and | |
Pattern Recognition", | |
pages = "1-9", | |
year = 2015 | |
} | |
@inproceedings{krizhevsky-2012-imagenet-classification, | |
title = "Imagenet classification with deep convolutional neural | |
networks", | |
author = "Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1097-1105", | |
year = 2012 | |
} | |
@article{he-2009-learning-from-imbalanced, | |
title = "Learning from imbalanced data", | |
author = "He, Haibo and Garcia, Edwardo A", | |
journal = "IEEE Transactions on knowledge and data engineering", | |
volume = 21, | |
number = 9, | |
pages = "1263-1284", | |
year = 2009, | |
publisher = "IEEE" | |
} | |
@article{zaremba-2015-empir-explor, | |
author = {Zaremba, Wojciech}, | |
title = {An Empirical Exploration of Recurrent Network Architectures}, | |
year = {2015}, | |
} | |
@article{friedman-2001-greedy-func-approx, | |
title = "Greedy function approximation: a gradient boosting machine", | |
author = "Friedman, Jerome H", | |
journal = "Annals of statistics", | |
pages = "1189-1232", | |
year = 2001, | |
publisher = "JSTOR" | |
} | |
@article{friedman-2002-stochastic-gradient-boost, | |
title = "Stochastic gradient boosting", | |
author = "Friedman, Jerome H", | |
journal = "Computational Statistics \& Data Analysis", | |
volume = 38, | |
number = 4, | |
pages = "367-378", | |
year = 2002, | |
publisher = "Elsevier" | |
} | |
@article{friedman-2000-additive-logistic-regression, | |
title = "Additive logistic regression: a statistical view of boosting | |
(with discussion and a rejoinder by the authors)", | |
author = "Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert | |
and others", | |
journal = "The annals of statistics", | |
volume = 28, | |
number = 2, | |
pages = "337-407", | |
year = 2000, | |
publisher = "Institute of Mathematical Statistics" | |
} | |
@inproceedings{greenwald-2001-space-efficient-online, | |
title = "Space-efficient online computation of quantile summaries", | |
author = "Greenwald, Michael and Khanna, Sanjeev", | |
booktitle = "ACM SIGMOD Record", | |
volume = 30, | |
number = 2, | |
pages = "58-66", | |
year = 2001, | |
organization = "ACM" | |
} | |
@inproceedings{zhang-2007-fast-algorithm, | |
title = "A fast algorithm for approximate quantiles in high speed data | |
streams", | |
author = "Zhang, Qi and Wang, Wei", | |
booktitle = "Scientific and Statistical Database Management, | |
2007. SSBDM'07. 19th International Conference on", | |
pages = "29-29", | |
year = 2007, | |
organization = "IEEE" | |
} | |
@article{greenwald-2016-quant-equid, | |
author = "Greenwald, Michael B and Khanna, Sanjeev", | |
title = "Quantiles and Equidepth Histograms Over Streams", | |
journal = "In Data Stream Management: Processing High-Speed Data | |
Streams. Springer", | |
year = 2016, | |
publisher = "Citeseer" | |
} | |
@ARTICLE{goldberg-2014-explain, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1402.3722G", | |
archivePrefix= "arXiv", | |
author = "{Goldberg}, Y. and {Levy}, O.", | |
eprint = "1402.3722", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.CL", | |
title = "{word2vec Explained: Deriving Mikolov Et Al.'s | |
Negative-Sampling Word-Embedding method}", | |
year = 2014 | |
} | |
@ARTICLE{turney-2010-from-frequen-to-meanin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2010arXiv1003.1141T", | |
archivePrefix= "arXiv", | |
author = "{Turney}, P.~D. and {Pantel}, P.", | |
eprint = "1003.1141", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Information Retrieval, Computer Science - Learning, H.3.1, I.2.6, I.2.7", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{From Frequency To Meaning: Vector Space Models of | |
Semantics}", | |
year = 2010 | |
} | |
@ARTICLE{zaremba-2014-recur-neural-networ-regul, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1409.2329Z", | |
archivePrefix= "arXiv", | |
author = "{Zaremba}, W. and {Sutskever}, I. and {Vinyals}, O.", | |
eprint = "1409.2329", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing", | |
month = sep, | |
title = "{Recurrent Neural Network Regularization}", | |
year = 2014 | |
} | |
@ARTICLE{cho-2014-encoder-decoder, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1406.1078C", | |
archivePrefix= "arXiv", | |
author = "{Cho}, K. and {van Merrienboer}, B. and {Gulcehre}, C. and | |
{Bahdanau}, D. and {Bougares}, F. and {Schwenk}, H. and | |
{Bengio}, Y.", | |
eprint = "1406.1078", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing, Statistics | |
- Machine Learning", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Learning Phrase Representations Using Rnn Encoder-Decoder | |
for Statistical Machine Translation}", | |
year = 2014 | |
} | |
@ARTICLE{sutskever-2014-seq2seq, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1409.3215S", | |
archivePrefix= "arXiv", | |
author = "{Sutskever}, I. and {Vinyals}, O. and {Le}, Q.~V.", | |
eprint = "1409.3215", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = sep, | |
primaryClass = "cs.CL", | |
title = "{Sequence To Sequence Learning With Neural Networks}", | |
year = 2014 | |
} | |
@ARTICLE{bengio-2015-schedule-sampling, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150603099B", | |
archivePrefix= "arXiv", | |
author = "{Bengio}, S. and {Vinyals}, O. and {Jaitly}, N. and | |
{Shazeer}, N.", | |
eprint = "1506.03099", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Computer Science - Computer Vision and Pattern | |
Recognition", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{Scheduled Sampling for Sequence Prediction With Recurrent | |
Neural Networks}", | |
year = 2015 | |
} | |
@ARTICLE{jean-2014-using-very, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.2007J", | |
archivePrefix= "arXiv", | |
author = "{Jean}, S. and {Cho}, K. and {Memisevic}, R. and {Bengio}, | |
Y.", | |
eprint = "1412.2007", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = dec, | |
primaryClass = "cs.CL", | |
title = "{On Using Very Large Target Vocabulary for Neural Machine | |
Translation}", | |
year = 2014 | |
} | |
@ARTICLE{chen-2016-xgboos, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160302754C", | |
archivePrefix= "arXiv", | |
author = "{Chen}, T. and {Guestrin}, C.", | |
eprint = "1603.02754", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = mar, | |
primaryClass = "cs.LG", | |
title = "{XGBoost: A Scalable Tree Boosting System}", | |
year = 2016 | |
} | |
@ARTICLE{kawaguchi-2016-deep-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160507110K", | |
archivePrefix= "arXiv", | |
author = "{Kawaguchi}, K.", | |
eprint = "1605.07110", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning, | |
Mathematics - Optimization and Control", | |
month = may, | |
primaryClass = "stat.ML", | |
title = "{Deep Learning Without Poor Local Minima}", | |
year = 2016 | |
} | |
@ARTICLE{ruder-2016-overv-gradien, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160904747R", | |
archivePrefix= "arXiv", | |
author = "{Ruder}, S.", | |
eprint = "1609.04747", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = sep, | |
primaryClass = "cs.LG", | |
title = "{An Overview of Gradient Descent Optimization algorithms}", | |
year = 2016 | |
} | |
@ARTICLE{zeiler-2012-adadel, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1212.5701Z", | |
archivePrefix= "arXiv", | |
author = "{Zeiler}, M.~D.", | |
eprint = "1212.5701", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{ADADELTA: An Adaptive Learning Rate Method}", | |
year = 2012 | |
} | |
@ARTICLE{bengio-2012-advan-optim-recur-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1212.0901B", | |
archivePrefix= "arXiv", | |
author = "{Bengio}, Y. and {Boulanger-Lewandowski}, N. and {Pascanu}, | |
R.", | |
eprint = "1212.0901", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{Advances in Optimizing Recurrent Networks}", | |
year = 2012 | |
} | |
@ARTICLE{he-2015-deep-resid, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151203385H", | |
archivePrefix= "arXiv", | |
author = "{He}, K. and {Zhang}, X. and {Ren}, S. and {Sun}, J.", | |
eprint = "1512.03385", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = dec, | |
primaryClass = "cs.CV", | |
title = "{Deep Residual Learning for Image Recognition}", | |
year = 2015 | |
} | |
@ARTICLE{simonyan-2014-very-deep, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1409.1556S", | |
archivePrefix= "arXiv", | |
author = "{Simonyan}, K. and {Zisserman}, A.", | |
eprint = "1409.1556", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = sep, | |
primaryClass = "cs.CV", | |
title = "{Very Deep Convolutional Networks for Large-Scale Image | |
Recognition}", | |
year = 2014 | |
} | |
@ARTICLE{lin-2013-networ-in-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.4400L", | |
archivePrefix= "arXiv", | |
author = "{Lin}, M. and {Chen}, Q. and {Yan}, S.", | |
eprint = "1312.4400", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning", | |
month = dec, | |
title = "{Network In Network}", | |
year = 2013 | |
} | |
@ARTICLE{montufar-2014-number-linear, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1402.1869M", | |
archivePrefix= "arXiv", | |
author = "{Mont{\'u}far}, G. and {Pascanu}, R. and {Cho}, K. and | |
{Bengio}, Y.", | |
eprint = "1402.1869", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning, | |
Computer Science - Neural and Evolutionary Computing", | |
month = feb, | |
primaryClass = "stat.ML", | |
title = "{On the Number of Linear Regions of Deep Neural Networks}", | |
year = 2014 | |
} | |
@ARTICLE{luxburg-2010-clust-stabil, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2010arXiv1007.1075V", | |
archivePrefix= "arXiv", | |
author = "{von Luxburg}, U.", | |
eprint = "1007.1075", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning", | |
month = jul, | |
primaryClass = "stat.ML", | |
title = "{Clustering Stability: An Overview}", | |
year = 2010 | |
} | |
@ARTICLE{shah-2014-bayes-regres-bitcoin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.1231S", | |
archivePrefix= "arXiv", | |
author = "{Shah}, D. and {Zhang}, K.", | |
eprint = "1410.1231", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Artificial Intelligence, Mathematics - | |
Statistics Theory", | |
month = oct, | |
primaryClass = "cs.AI", | |
title = "{Bayesian Regression and Bitcoin}", | |
year = 2014 | |
} | |
@article{domingos-2012-few-useful-things, | |
title = "A few useful things to know about machine learning", | |
author = "Domingos, Pedro", | |
journal = "Communications of the ACM", | |
volume = 55, | |
number = 10, | |
pages = "78-87", | |
year = 2012, | |
publisher = "ACM" | |
} | |
@ARTICLE{thakur-2015-autoc, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150702188T", | |
archivePrefix= "arXiv", | |
author = "{Thakur}, A. and {Krohn-Grimberghe}, A.", | |
eprint = "1507.02188", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = jul, | |
primaryClass = "stat.ML", | |
title = "{AutoCompete: A Framework for Machine Learning Competition}", | |
year = 2015 | |
} | |
@article{huang-1998-k-modes, | |
title = "Extensions to the k-means algorithm for clustering large data | |
sets with categorical values", | |
author = "Huang, Zhexue", | |
journal = "Data mining and knowledge discovery", | |
volume = 2, | |
number = 3, | |
pages = "283-304", | |
year = 1998, | |
publisher = "Springer" | |
} | |
@inproceedings{he-2006-approximation-algorithms, | |
title = "Approximation algorithms for k-modes clustering", | |
author = "He, Zengyou and Deng, Shengchun and Xu, Xiaofei", | |
booktitle = "International Conference on Intelligent Computing", | |
pages = "296-302", | |
year = 2006, | |
organization = "Springer" | |
} | |
@inproceedings{plant-2011-inconco-interp-cluster, | |
title = "Inconco: interpretable clustering of numerical and | |
categorical objects", | |
author = "Plant, Claudia and B{\"o}hm, Christian", | |
booktitle = "Proceedings of the 17th ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
pages = "1127-1135", | |
year = 2011, | |
organization = "ACM" | |
} | |
@article{kim-2004-fuzzy-cluster, | |
title = "Fuzzy clustering of categorical data using fuzzy centroids", | |
author = "Kim, Dae-Won and Lee, Kwang H and Lee, Doheon", | |
journal = "Pattern Recognition Letters", | |
volume = 25, | |
number = 11, | |
pages = "1263-1271", | |
year = 2004, | |
publisher = "Elsevier" | |
} | |
@article{guha-2000-rock-robust-cluster, | |
title = "ROCK: A robust clustering algorithm for categorical | |
attributes", | |
author = "Guha, Sudipto and Rastogi, Rajeev and Shim, Kyuseok", | |
journal = "Information systems", | |
volume = 25, | |
number = 5, | |
pages = "345-366", | |
year = 2000, | |
publisher = "Elsevier" | |
} | |
@inproceedings{louppe-2013-understanding-variable-import, | |
title = "Understanding variable importances in forests of randomized | |
trees", | |
author = "Louppe, Gilles and Wehenkel, Louis and Sutera, Antonio and | |
Geurts, Pierre", | |
booktitle = "Advances in neural information processing systems", | |
pages = "431-439", | |
year = 2013 | |
} | |
@article{gelman-2008-scaling-regress-inputs, | |
title = "Scaling regression inputs by dividing by two standard | |
deviations", | |
author = "Gelman, Andrew", | |
journal = "Statistics in medicine", | |
volume = 27, | |
number = 15, | |
pages = "2865-2873", | |
year = 2008, | |
publisher = "Wiley Online Library" | |
} | |
@article{reshef-2011-detecting-novel-assoc, | |
title = "Detecting novel associations in large data sets", | |
author = "Reshef, David N and Reshef, Yakir A and Finucane, Hilary K | |
and Grossman, Sharon R and McVean, Gilean and Turnbaugh, | |
Peter J and Lander, Eric S and Mitzenmacher, Michael and | |
Sabeti, Pardis C", | |
journal = "science", | |
volume = 334, | |
number = 6062, | |
pages = "1518-1524", | |
year = 2011, | |
publisher = "American Association for the Advancement of Science" | |
} | |
@article{cawley-2010-over-fitting, | |
title = "On over-fitting in model selection and subsequent selection | |
bias in performance evaluation", | |
author = "Cawley, Gavin C and Talbot, Nicola LC", | |
journal = "Journal of Machine Learning Research", | |
volume = 11, | |
number = "Jul", | |
pages = "2079-2107", | |
year = 2010 | |
} | |
@article{varma-2006-bias-error-estim, | |
title = "Bias in error estimation when using cross-validation for | |
model selection", | |
author = "Varma, Sudhir and Simon, Richard", | |
journal = "BMC bioinformatics", | |
volume = 7, | |
number = 1, | |
pages = 91, | |
year = 2006, | |
publisher = "BioMed Central" | |
} | |
@ARTICLE{heaton-2016-deep-learn-finan, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160206561H", | |
archivePrefix= "arXiv", | |
author = "{Heaton}, J.~B. and {Polson}, N.~G. and {Witte}, J.~H.", | |
eprint = "1602.06561", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Deep Learning in Finance}", | |
year = 2016 | |
} | |
@ARTICLE{sirignano-2016-deep-learn-mortg-risk, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160702470S", | |
archivePrefix= "arXiv", | |
author = "{Sirignano}, J. and {Sadhwani}, A. and {Giesecke}, K.", | |
eprint = "1607.02470", | |
journal = "ArXiv e-prints", | |
keywords = "Quantitative Finance - Statistical Finance", | |
month = jul, | |
primaryClass = "q-fin.ST", | |
title = "{Deep Learning for Mortgage Risk}", | |
year = 2016 | |
} | |
@article{heaton-2016-deep-learning-finance, | |
title = "Deep learning for finance: deep portfolios", | |
author = "Heaton, JB and Polson, NG and Witte, Jan Hendrik", | |
journal = "Applied Stochastic Models in Business and Industry", | |
year = 2016, | |
publisher = "Wiley Online Library" | |
} | |
@ARTICLE{dixon-2016-class-based, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160308604D", | |
archivePrefix= "arXiv", | |
author = "{Dixon}, M. and {Klabjan}, D. and {Bang}, J.~H.", | |
eprint = "1603.08604", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computational | |
Engineering, Finance, and Science", | |
month = mar, | |
primaryClass = "cs.LG", | |
title = "{Classiffication-Based Financial Markets Prediction Using | |
Deep Neural Networks}", | |
year = 2016 | |
} | |
@article{langkvist-2014-review-unsuper-feature, | |
title = "A review of unsupervised feature learning and deep learning | |
for time-series modeling", | |
author = "L{\"a}ngkvist, Martin and Karlsson, Lars and Loutfi, Amy", | |
journal = "Pattern Recognition Letters", | |
volume = 42, | |
pages = "11-24", | |
year = 2014, | |
publisher = "Elsevier" | |
} | |
@article{qiu-2016-predicting-direction, | |
title = "Predicting the Direction of Stock Market Index Movement Using | |
an Optimized Artificial Neural Network Model", | |
author = "Qiu, Mingyue and Song, Yu", | |
journal = "PLoS One", | |
volume = 11, | |
number = 5, | |
year = 2016, | |
publisher = "Public Library of Science" | |
} | |
@inproceedings{yang-2016-ensemble-model-stock, | |
title = "Ensemble Model for Stock Price Movement Trend Prediction on | |
Different Investing Periods", | |
author = "Yang, Jian and Rao, Ruonan and Hong, Pei and Ding, Peng", | |
booktitle = "2016 12th International Conference on Computational | |
Intelligence and Security (CIS)", | |
pages = "358-361", | |
year = 2016, | |
organization = "IEEE" | |
} | |
@article{lecun-2015-deep-learning, | |
title = "Deep learning", | |
author = "LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey", | |
journal = "Nature", | |
volume = 521, | |
number = 7553, | |
pages = "436-444", | |
year = 2015, | |
publisher = "Nature Research" | |
} | |
@ARTICLE{bollen-2010-twitt-mood, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2010arXiv1010.3003B", | |
archivePrefix= "arXiv", | |
author = "{Bollen}, J. and {Mao}, H. and {Zeng}, X.-J.", | |
eprint = "1010.3003", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computational Engineering, Finance, and | |
Science, Computer Science - Computation and Language, | |
Computer Science - Social and Information Networks, Physics - | |
Physics and Society", | |
month = oct, | |
primaryClass = "cs.CE", | |
title = "{Twitter Mood Predicts the Stock market}", | |
year = 2010 | |
} | |
@ARTICLE{goerg-2012-forec-compon-analy-forec, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1205.4591G", | |
archivePrefix= "arXiv", | |
author = "{Goerg}, G.~M.", | |
eprint = "1205.4591", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Methodology, Statistics - Machine Learning", | |
month = may, | |
primaryClass = "stat.ME", | |
title = "{Forecastable Component Analysis (ForeCA)}", | |
year = 2012 | |
} | |
@ARTICLE{fehrer-2015-improv-decis, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150801993F", | |
archivePrefix= "arXiv", | |
author = "{Fehrer}, R. and {Feuerriegel}, S.", | |
eprint = "1508.01993", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Computation | |
and Language, Computer Science - Learning", | |
month = aug, | |
primaryClass = "stat.ML", | |
title = "{Improving Decision Analytics With Deep Learning: The Case of | |
Financial Disclosures}", | |
year = 2015 | |
} | |
@article{kaastra-1996-designing-neural-net, | |
title = "Designing a neural network for forecasting financial and | |
economic time series", | |
author = "Kaastra, Iebeling and Boyd, Milton", | |
journal = "Neurocomputing", | |
volume = 10, | |
number = 3, | |
pages = "215-236", | |
year = 1996, | |
publisher = "Elsevier" | |
} | |
@article{ahmed-2010-empirical-comparison, | |
title = "An empirical comparison of machine learning models for time | |
series forecasting", | |
author = "Ahmed, Nesreen K and Atiya, Amir F and Gayar, Neamat El and | |
El-Shishiny, Hisham", | |
journal = "Econometric Reviews", | |
volume = 29, | |
number = "5-6", | |
pages = "594-621", | |
year = 2010, | |
publisher = "Taylor \& Francis" | |
} | |
@article{dubovikov-2004-dimension-minimal-cover, | |
title = "Dimension of the minimal cover and fractal analysis of time | |
series", | |
author = "Dubovikov, MM and Starchenko, NV and Dubovikov, MS", | |
journal = "Physica A: Statistical Mechanics and its Applications", | |
volume = 339, | |
number = 3, | |
pages = "591-608", | |
year = 2004, | |
publisher = "Elsevier" | |
} | |
@inproceedings{dalto-2015-deep-neural-net, | |
title = "Deep neural networks for ultra-short-term wind forecasting", | |
author = "Dalto, Mladen and Matu{\v{s}}ko, Jadranko and Va{\v{s}}ak, | |
Mario", | |
booktitle = "Industrial Technology (ICIT), 2015 IEEE International | |
Conference on", | |
pages = "1657-1663", | |
year = 2015, | |
organization = "IEEE" | |
} | |
@inproceedings{goodfellow-2014-gan, | |
title = "Generative adversarial nets", | |
author = "Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and | |
Xu, Bing and Warde-Farley, David and Ozair, Sherjil and | |
Courville, Aaron and Bengio, Yoshua", | |
booktitle = "Advances in neural information processing systems", | |
pages = "2672-2680", | |
year = 2014 | |
} | |
@ARTICLE{goodfellow-2014-explain-harnes-adver-examp, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.6572G", | |
archivePrefix= "arXiv", | |
author = "{Goodfellow}, I.~J. and {Shlens}, J. and {Szegedy}, C.", | |
eprint = "1412.6572", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = dec, | |
primaryClass = "stat.ML", | |
title = "{Explaining and Harnessing Adversarial Examples}", | |
year = 2014 | |
} | |
@inproceedings{denton-2015-deep-generative-image, | |
title = "Deep Generative Image Models using a Laplacian Pyramid of | |
Adversarial Networks", | |
author = "Denton, Emily L and Chintala, Soumith and Fergus, Rob and | |
others", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1486-1494", | |
year = 2015 | |
} | |
@ARTICLE{radford-2015-dcgan, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106434R", | |
archivePrefix= "arXiv", | |
author = "{Radford}, A. and {Metz}, L. and {Chintala}, S.", | |
eprint = "1511.06434", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computer | |
Vision and Pattern Recognition", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Unsupervised Representation Learning With Deep Convolutional | |
Generative Adversarial Networks}", | |
year = 2015 | |
} | |
@inproceedings{dosovitskiy-2015-learning-to-generate, | |
title = "Learning to generate chairs with convolutional neural | |
networks", | |
author = "Dosovitskiy, Alexey and Tobias Springenberg, Jost and Brox, | |
Thomas", | |
booktitle = "Proceedings of the IEEE Conference on Computer Vision and | |
Pattern Recognition", | |
pages = "1538-1546", | |
year = 2015 | |
} | |
@ARTICLE{burda-2015-impor-weigh-autoen, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150900519B", | |
archivePrefix= "arXiv", | |
author = "{Burda}, Y. and {Grosse}, R. and {Salakhutdinov}, R.", | |
eprint = "1509.00519", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Statistics - Machine Learning", | |
month = sep, | |
primaryClass = "cs.LG", | |
title = "{Importance Weighted Autoencoders}", | |
year = 2015 | |
} | |
@ARTICLE{ganin-2014-unsup-domain, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1409.7495G", | |
archivePrefix= "arXiv", | |
author = "{Ganin}, Y. and {Lempitsky}, V.", | |
eprint = "1409.7495", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning, | |
Computer Science - Neural and Evolutionary Computing", | |
month = sep, | |
primaryClass = "stat.ML", | |
title = "{Unsupervised Domain Adaptation By Backpropagation}", | |
year = 2014 | |
} | |
@ARTICLE{makhzani-2015-adver-autoen, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151105644M", | |
archivePrefix= "arXiv", | |
author = "{Makhzani}, A. and {Shlens}, J. and {Jaitly}, N. and | |
{Goodfellow}, I. and {Frey}, B.", | |
eprint = "1511.05644", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Adversarial Autoencoders}", | |
year = 2015 | |
} | |
@ARTICLE{szegedy-2013-intrig-proper-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.6199S", | |
archivePrefix= "arXiv", | |
author = "{Szegedy}, C. and {Zaremba}, W. and {Sutskever}, I. and | |
{Bruna}, J. and {Erhan}, D. and {Goodfellow}, I. and | |
{Fergus}, R.", | |
eprint = "1312.6199", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = dec, | |
primaryClass = "cs.CV", | |
title = "{Intriguing Properties of Neural networks}", | |
year = 2013 | |
} | |
@ARTICLE{kurakin-2016-adver-examp-physic, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160702533K", | |
archivePrefix= "arXiv", | |
author = "{Kurakin}, A. and {Goodfellow}, I. and {Bengio}, S.", | |
eprint = "1607.02533", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Cryptography and Security, Computer | |
Science - Learning, Statistics - Machine Learning", | |
month = jul, | |
primaryClass = "cs.CV", | |
title = "{Adversarial Examples in the Physical world}", | |
year = 2016 | |
} | |
@ARTICLE{mirza-2014-condit-gener-adver-nets, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1411.1784M", | |
archivePrefix= "arXiv", | |
author = "{Mirza}, M. and {Osindero}, S.", | |
eprint = "1411.1784", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Computer Vision and Pattern | |
Recognition, Statistics - Machine Learning", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Conditional Generative Adversarial Nets}", | |
year = 2014 | |
} | |
@ARTICLE{goodfellow-2017-nips-tutor, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170100160G", | |
archivePrefix= "arXiv", | |
author = "{Goodfellow}, I.", | |
eprint = "1701.00160", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{NIPS 2016 Tutorial: Generative Adversarial Networks}", | |
year = 2017 | |
} | |
@ARTICLE{arjovsky-2017-wasser-gan, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170107875A", | |
archivePrefix= "arXiv", | |
author = "{Arjovsky}, M. and {Chintala}, S. and {Bottou}, L.", | |
eprint = "1701.07875", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = jan, | |
primaryClass = "stat.ML", | |
title = "{Wasserstein GAN}", | |
year = 2017 | |
} | |
@inproceedings{ng-2000-algorithms-inverse, | |
title = "Algorithms for Inverse Reinforcement Learning", | |
author = "Ng, Andrew Y and Russell, Stuart", | |
booktitle = "in Proc. 17th International Conf. on Machine Learning", | |
year = 2000 | |
} | |
@ARTICLE{mnih-2013-playin-atari, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.5602M", | |
archivePrefix= "arXiv", | |
author = "{Mnih}, V. and {Kavukcuoglu}, K. and {Silver}, D. and | |
{Graves}, A. and {Antonoglou}, I. and {Wierstra}, D. and | |
{Riedmiller}, M.", | |
eprint = "1312.5602", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{Playing Atari With Deep Reinforcement Learning}", | |
year = 2013 | |
} | |
@ARTICLE{heaton-2016-deep-portf-theor, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160507230H", | |
archivePrefix= "arXiv", | |
author = "{Heaton}, J.~B. and {Polson}, N.~G. and {Witte}, J.~H.", | |
eprint = "1605.07230", | |
journal = "ArXiv e-prints", | |
keywords = "Quantitative Finance - Portfolio Management, Computer Science | |
- Learning", | |
month = may, | |
primaryClass = "q-fin.PM", | |
title = "{Deep Portfolio Theory}", | |
year = 2016 | |
} | |
@ARTICLE{karpathy-2015-visual-under-recur-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150602078K", | |
archivePrefix= "arXiv", | |
author = "{Karpathy}, A. and {Johnson}, J. and {Fei-Fei}, L.", | |
eprint = "1506.02078", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Computer Science - Neural and Evolutionary | |
Computing", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{Visualizing and Understanding Recurrent Networks}", | |
year = 2015 | |
} | |
@ARTICLE{graves-2014-neural-turin-machin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.5401G", | |
archivePrefix= "arXiv", | |
author = "{Graves}, A. and {Wayne}, G. and {Danihelka}, I.", | |
eprint = "1410.5401", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing", | |
month = oct, | |
title = "{Neural Turing Machines}", | |
year = 2014 | |
} | |
@ARTICLE{bahdanau-2014-bahdanau-attention, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1409.0473B", | |
archivePrefix= "arXiv", | |
author = "{Bahdanau}, D. and {Cho}, K. and {Bengio}, Y.", | |
eprint = "1409.0473", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing, Statistics | |
- Machine Learning", | |
month = sep, | |
primaryClass = "cs.CL", | |
title = "{Neural Machine Translation By Jointly Learning To Align and | |
Translate}", | |
year = 2014 | |
} | |
@ARTICLE{vinyals-2014-show-tell, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1411.4555V", | |
archivePrefix= "arXiv", | |
author = "{Vinyals}, O. and {Toshev}, A. and {Bengio}, S. and {Erhan}, | |
D.", | |
eprint = "1411.4555", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = nov, | |
primaryClass = "cs.CV", | |
title = "{Show and Tell: A Neural Image Caption Generator}", | |
year = 2014 | |
} | |
@inproceedings{glorot-2010-under-diff, | |
title = "Understanding the difficulty of training deep feedforward | |
neural networks.", | |
author = "Glorot, Xavier and Bengio, Yoshua", | |
booktitle = "Aistats", | |
volume = 9, | |
pages = "249-256", | |
year = 2010 | |
} | |
@article{lecun-1998-gradient-based, | |
title = "Gradient-based learning applied to document recognition", | |
author = "LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and | |
Haffner, Patrick", | |
journal = "Proceedings of the IEEE", | |
volume = 86, | |
number = 11, | |
pages = "2278-2324", | |
year = 1998, | |
publisher = "IEEE" | |
} | |
@article{gosavi-2009-reinforcement-learning, | |
title = "Reinforcement learning: A tutorial survey and recent | |
advances", | |
author = "Gosavi, Abhijit", | |
journal = "INFORMS Journal on Computing", | |
volume = 21, | |
number = 2, | |
pages = "178-192", | |
year = 2009, | |
publisher = "INFORMS" | |
} | |
@ARTICLE{zeiler-2013-visual-under-convol-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1311.2901Z", | |
archivePrefix= "arXiv", | |
author = "{Zeiler}, M.~D and {Fergus}, R.", | |
eprint = "1311.2901", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = nov, | |
primaryClass = "cs.CV", | |
title = "{Visualizing and Understanding Convolutional Networks}", | |
year = 2013 | |
} | |
@ARTICLE{krizhevsky-2014-one-weird, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1404.5997K", | |
archivePrefix= "arXiv", | |
author = "{Krizhevsky}, A.", | |
eprint = "1404.5997", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Distributed, Parallel, and Cluster | |
Computing, Computer Science - Learning", | |
month = apr, | |
title = "{One Weird Trick for Parallelizing Convolutional Neural | |
networks}", | |
year = 2014 | |
} | |
@inproceedings{zeiler-2011-adaptive-deconv, | |
title = "Adaptive deconvolutional networks for mid and high level | |
feature learning", | |
author = "Zeiler, Matthew D and Taylor, Graham W and Fergus, Rob", | |
booktitle = "Computer Vision (ICCV), 2011 IEEE International Conference | |
on", | |
pages = "2018-2025", | |
year = 2011, | |
organization = "IEEE" | |
} | |
@ARTICLE{dumoulin-2016-guide-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160307285D", | |
archivePrefix= "arXiv", | |
author = "{Dumoulin}, V. and {Visin}, F.", | |
eprint = "1603.07285", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning, | |
Computer Science - Neural and Evolutionary Computing", | |
month = mar, | |
primaryClass = "stat.ML", | |
title = "{A Guide To Convolution Arithmetic for Deep learning}", | |
year = 2016 | |
} | |
@article{beck-2009-fast-iter, | |
title = "A fast iterative shrinkage-thresholding algorithm for linear | |
inverse problems", | |
author = "Beck, Amir and Teboulle, Marc", | |
journal = "SIAM journal on imaging sciences", | |
volume = 2, | |
number = 1, | |
pages = "183-202", | |
year = 2009, | |
publisher = "SIAM" | |
} | |
@ARTICLE{redmon-2015-you-only-look-once, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150602640R", | |
archivePrefix= "arXiv", | |
author = "{Redmon}, J. and {Divvala}, S. and {Girshick}, R. and | |
{Farhadi}, A.", | |
eprint = "1506.02640", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = jun, | |
primaryClass = "cs.CV", | |
title = "{You Only Look Once: Unified, Real-Time Object Detection}", | |
year = 2015 | |
} | |
@ARTICLE{rastegari-2016-xnor-net, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160305279R", | |
archivePrefix= "arXiv", | |
author = "{Rastegari}, M. and {Ordonez}, V. and {Redmon}, J. and | |
{Farhadi}, A.", | |
eprint = "1603.05279", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = mar, | |
primaryClass = "cs.CV", | |
title = "{XNOR-Net: Imagenet Classification Using Binary Convolutional | |
Neural Networks}", | |
year = 2016 | |
} | |
@inproceedings{zeiler-2010-deconvolutional-net, | |
title = "Deconvolutional networks", | |
author = "Zeiler, Matthew D and Krishnan, Dilip and Taylor, Graham W | |
and Fergus, Rob", | |
booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE | |
Conference on", | |
pages = "2528-2535", | |
year = 2010, | |
organization = "IEEE" | |
} | |
@ARTICLE{mikolov-2013-effic-estim, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1301.3781M", | |
archivePrefix= "arXiv", | |
author = "{Mikolov}, T. and {Chen}, K. and {Corrado}, G. and {Dean}, | |
J.", | |
eprint = "1301.3781", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jan, | |
primaryClass = "cs.CL", | |
title = "{Efficient Estimation of Word Representations in Vector | |
Space}", | |
year = 2013 | |
} | |
@ARTICLE{greff-2015-lstm-search, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150304069G", | |
archivePrefix= "arXiv", | |
author = "{Greff}, K. and {Srivastava}, R.~K. and {Koutn{\'{\i}}k}, | |
J. and {Steunebrink}, B.~R. and {Schmidhuber}, J.", | |
eprint = "1503.04069", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Learning, 68T10, I.2.6, I.2.7, I.5.1, | |
H.5.5", | |
month = mar, | |
title = "{LSTM: A Search Space Odyssey}", | |
year = 2015 | |
} | |
@ARTICLE{mnih-2016-async-method, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160201783M", | |
archivePrefix= "arXiv", | |
author = "{Mnih}, V. and {Puigdom{\`e}nech Badia}, A. and {Mirza}, | |
M. and {Graves}, A. and {Lillicrap}, T.~P. and {Harley}, | |
T. and {Silver}, D. and {Kavukcuoglu}, K.", | |
eprint = "1602.01783", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Asynchronous Methods for Deep Reinforcement Learning}", | |
year = 2016 | |
} | |
@article{bengio-2003-neural-prob, | |
title = "A neural probabilistic language model", | |
author = "Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal | |
and Jauvin, Christian", | |
journal = "Journal of machine learning research", | |
volume = 3, | |
number = "Feb", | |
pages = "1137-1155", | |
year = 2003 | |
} | |
@phdthesis{mikolov-2007-language-model, | |
title = "Language Modeling for Speech Recognition in Czech", | |
author = "Mikolov, Tom{\'a}{\v{s}}", | |
year = 2007, | |
school = "Masters thesis, Brno University of Technology" | |
} | |
@inproceedings{mikolov-2013-distributed-repre, | |
title = "Distributed representations of words and phrases and their | |
compositionality", | |
author = "Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, | |
Greg S and Dean, Jeff", | |
booktitle = "Advances in neural information processing systems", | |
pages = "3111-3119", | |
year = 2013 | |
} | |
@ARTICLE{luong-2015-luong-attention, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150804025L", | |
archivePrefix= "arXiv", | |
author = "{Luong}, M.-T. and {Pham}, H. and {Manning}, C.~D.", | |
eprint = "1508.04025", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL", | |
title = "{Effective Approaches To Attention-Based Neural Machine | |
Translation}", | |
year = 2015 | |
} | |
@ARTICLE{chorowski-2015-atten-based, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150607503C", | |
archivePrefix= "arXiv", | |
author = "{Chorowski}, J. and {Bahdanau}, D. and {Serdyuk}, D. and | |
{Cho}, K. and {Bengio}, Y.", | |
eprint = "1506.07503", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing, Statistics | |
- Machine Learning", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Attention-Based Models for Speech Recognition}", | |
year = 2015 | |
} | |
@ARTICLE{mnih-2014-recur-model-visual-atten, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1406.6247M", | |
archivePrefix= "arXiv", | |
author = "{Mnih}, V. and {Heess}, N. and {Graves}, A. and | |
{Kavukcuoglu}, K.", | |
eprint = "1406.6247", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computer | |
Vision and Pattern Recognition, Statistics - Machine | |
Learning", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{Recurrent Models of Visual Attention}", | |
year = 2014 | |
} | |
@ARTICLE{ba-2014-multip-objec, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.7755B", | |
archivePrefix= "arXiv", | |
author = "{Ba}, J. and {Mnih}, V. and {Kavukcuoglu}, K.", | |
eprint = "1412.7755", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computer | |
Vision and Pattern Recognition, Computer Science - Neural and | |
Evolutionary Computing", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{Multiple Object Recognition With Visual Attention}", | |
year = 2014 | |
} | |
@ARTICLE{gregor-2015-draw-recur, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150204623G", | |
archivePrefix= "arXiv", | |
author = "{Gregor}, K. and {Danihelka}, I. and {Graves}, A. and | |
{Jimenez Rezende}, D. and {Wierstra}, D.", | |
eprint = "1502.04623", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = feb, | |
primaryClass = "cs.CV", | |
title = "{DRAW: A Recurrent Neural Network For Image Generation}", | |
year = 2015 | |
} | |
@ARTICLE{xu-2015-show-atten-tell, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150203044X", | |
archivePrefix= "arXiv", | |
author = "{Xu}, K. and {Ba}, J. and {Kiros}, R. and {Cho}, K. and | |
{Courville}, A. and {Salakhutdinov}, R. and {Zemel}, R. and | |
{Bengio}, Y.", | |
eprint = "1502.03044", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computer | |
Vision and Pattern Recognition", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Show, Attend and Tell: Neural Image Caption Generation With | |
Visual Attention}", | |
year = 2015 | |
} | |
@ARTICLE{zaremba-2014-learn-to-execute, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.4615Z", | |
archivePrefix= "arXiv", | |
author = "{Zaremba}, W. and {Sutskever}, I.", | |
eprint = "1410.4615", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Learning", | |
month = oct, | |
title = "{Learning To Execute}", | |
year = 2014 | |
} | |
@ARTICLE{vinyals-2014-gramm-as-foreig-languag, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.7449V", | |
archivePrefix= "arXiv", | |
author = "{Vinyals}, O. and {Kaiser}, L. and {Koo}, T. and {Petrov}, | |
S. and {Sutskever}, I. and {Hinton}, G.", | |
eprint = "1412.7449", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = dec, | |
primaryClass = "cs.CL", | |
title = "{Grammar As a Foreign Language}", | |
year = 2014 | |
} | |
@ARTICLE{hermann-2015-teach-machin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150603340H", | |
archivePrefix= "arXiv", | |
author = "{Hermann}, K.~M. and {Ko{\v c}isk{\'y}}, T. and | |
{Grefenstette}, E. and {Espeholt}, L. and {Kay}, W. and | |
{Suleyman}, M. and {Blunsom}, P.", | |
eprint = "1506.03340", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Neural and Evolutionary | |
Computing", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Teaching Machines To Read and Comprehend}", | |
year = 2015 | |
} | |
@ARTICLE{sukhbaatar-2015-end-to-end, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150308895S", | |
archivePrefix= "arXiv", | |
author = "{Sukhbaatar}, S. and {Szlam}, A. and {Weston}, J. and | |
{Fergus}, R.", | |
eprint = "1503.08895", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Computation and Language", | |
month = mar, | |
title = "{End-To-End Memory Networks}", | |
year = 2015 | |
} | |
@ARTICLE{zaremba-2015-reinf-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150500521Z", | |
archivePrefix= "arXiv", | |
author = "{Zaremba}, W. and {Sutskever}, I.", | |
eprint = "1505.00521", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = may, | |
primaryClass = "cs.LG", | |
title = "{Reinforcement Learning Neural Turing Machines - Revised}", | |
year = 2015 | |
} | |
@ARTICLE{joulin-2016-bag-trick, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160701759J", | |
archivePrefix= "arXiv", | |
author = "{Joulin}, A. and {Grave}, E. and {Bojanowski}, P. and | |
{Mikolov}, T.", | |
eprint = "1607.01759", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jul, | |
primaryClass = "cs.CL", | |
title = "{Bag of Tricks for Efficient Text Classification}", | |
year = 2016 | |
} | |
@ARTICLE{kim-2014-convol-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1408.5882K", | |
archivePrefix= "arXiv", | |
author = "{Kim}, Y.", | |
eprint = "1408.5882", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = aug, | |
primaryClass = "cs.CL", | |
title = "{Convolutional Neural Networks for Sentence Classification}", | |
year = 2014 | |
} | |
@ARTICLE{graves-2013-gener-sequen, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1308.0850G", | |
archivePrefix= "arXiv", | |
author = "{Graves}, A.", | |
eprint = "1308.0850", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Computation and Language", | |
month = aug, | |
title = "{Generating Sequences With Recurrent Neural Networks}", | |
year = 2013 | |
} | |
@inproceedings{yang-2016-hierarchical-attent, | |
title = "Hierarchical attention networks for document classification", | |
author = "Yang, Zichao and Yang, Diyi and Dyer, Chris and He, Xiaodong | |
and Smola, Alex and Hovy, Eduard", | |
booktitle = "Proceedings of NAACL-HLT", | |
pages = "1480-1489", | |
year = 2016 | |
} | |
@inproceedings{lai-2015-recurrent-conv, | |
title = "Recurrent Convolutional Neural Networks for Text | |
Classification.", | |
author = "Lai, Siwei and Xu, Liheng and Liu, Kang and Zhao, Jun", | |
booktitle = "AAAI", | |
volume = 333, | |
pages = "2267-2273", | |
year = 2015 | |
} | |
@inproceedings{ding-2015-deep-learn, | |
title = "Deep Learning for Event-Driven Stock Prediction.", | |
author = "Ding, Xiao and Zhang, Yue and Liu, Ting and Duan, Junwen", | |
booktitle = "IJCAI", | |
pages = "2327-2333", | |
year = 2015 | |
} | |
@inproceedings{socher-2013-reasoning-neural, | |
title = "Reasoning with neural tensor networks for knowledge base | |
completion", | |
author = "Socher, Richard and Chen, Danqi and Manning, Christopher D | |
and Ng, Andrew", | |
booktitle = "Advances in neural information processing systems", | |
pages = "926-934", | |
year = 2013 | |
} | |
@inproceedings{angeli-2015-lever-ling-struct, | |
title = "Leveraging linguistic structure for open domain information | |
extraction", | |
author = "Angeli, Gabor and Premkumar, Melvin Johnson and Manning, | |
Christopher D", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics (ACL 2015)", | |
year = 2015 | |
} | |
@inproceedings{benko-2007-open-info-extra, | |
author = "Banko, Michele and Cafarella, Michael J. and Soderland, | |
Stephen and Broadhead, Matt and Etzioni, Oren", | |
title = "Open Information Extraction from the Web", | |
booktitle = "Proceedings of the 20th International Joint Conference on | |
Artifical Intelligence", | |
series = "IJCAI'07", | |
year = 2007, | |
location = "Hyderabad, India", | |
pages = "2670-2676", | |
numpages = 7, | |
url = "http://dl.acm.org/citation.cfm?id=1625275.1625705", | |
acmid = 1625705, | |
publisher = "Morgan Kaufmann Publishers Inc.", | |
address = "San Francisco, CA, USA" | |
} | |
@inproceedings{si-2014-exploit-social, | |
title = "Exploiting Social Relations and Sentiment for Stock | |
Prediction.", | |
author = "Si, Jianfeng and Mukherjee, Arjun and Liu, Bing and Pan, | |
Sinno Jialin and Li, Qing and Li, Huayi", | |
booktitle = "EMNLP", | |
volume = 14, | |
pages = "1139-1145", | |
year = 2014 | |
} | |
@inproceedings{ding-2014-usings-struct-event, | |
title = "Using Structured Events to Predict Stock Price Movement: An | |
Empirical Investigation", | |
author = "Xiao Ding and Yue Zhang and Ting Liu and Junwen Duan", | |
booktitle = "EMNLP", | |
year = 2014 | |
} | |
@inproceedings{pennington-2014-glove-global-vec, | |
author = "Jeffrey Pennington and Richard Socher and Christopher | |
D. Manning", | |
booktitle = "Empirical Methods in Natural Language Processing (EMNLP)", | |
title = "GloVe: Global Vectors for Word Representation", | |
year = 2014, | |
pages = "1532-1543", | |
url = "http://www.aclweb.org/anthology/D14-1162" | |
} | |
@inproceedings{baroni-2014-dont-count-pred, | |
title = "Don't count, predict! A systematic comparison of | |
context-counting vs. context-predicting semantic vectors", | |
author = "Marco Baroni and Georgiana Dinu and Germ{\'a}n Kruszewski", | |
booktitle = "ACL", | |
year = 2014 | |
} | |
@inproceedings{morin-2005-hiera-prob, | |
title = "Hierarchical Probabilistic Neural Network Language Model.", | |
author = "Morin, Frederic and Bengio, Yoshua", | |
booktitle = "Aistats", | |
volume = 5, | |
pages = "246-252", | |
year = 2005, | |
organization = "Citeseer" | |
} | |
@ARTICLE{rong-2014-param-learn-explain, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1411.2738R", | |
archivePrefix= "arXiv", | |
author = "{Rong}, X.", | |
eprint = "1411.2738", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{word2vec Parameter Learning Explained}", | |
year = 2014 | |
} | |
@inproceedings{mnih-2009-scalable-hiera, | |
title = "A scalable hierarchical distributed language model", | |
author = "Mnih, Andriy and Hinton, Geoffrey E", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1081-1088", | |
year = 2009 | |
} | |
@inproceedings{davis-2006-rela-pre-rec, | |
title = "The relationship between Precision-Recall and ROC curves", | |
author = "Davis, Jesse and Goadrich, Mark", | |
booktitle = "Proceedings of the 23rd international conference on Machine | |
learning", | |
pages = "233-240", | |
year = 2006, | |
organization = "ACM" | |
} | |
@ARTICLE{wojna-2017-atten-based, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170403549W", | |
archivePrefix= "arXiv", | |
author = "{Wojna}, Z. and {Gorban}, A. and {Lee}, D.-S. and {Murphy}, | |
K. and {Yu}, Q. and {Li}, Y. and {Ibarz}, J.", | |
eprint = "1704.03549", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = apr, | |
primaryClass = "cs.CV", | |
title = "{Attention-Based Extraction of Structured Information From | |
Street View Imagery}", | |
year = 2017 | |
} | |
@inproceedings{lee-2013-pesu-label, | |
title = {Pseudo-Label : The Simple and Efficient Semi-Supervised | |
Learning Method for Deep Neural Networks}, | |
author = {Dong-Hyun Lee}, | |
year = 2013 | |
} | |
@ARTICLE{gehring-2017-convol-sequen, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170503122G", | |
archivePrefix= "arXiv", | |
author = "{Gehring}, J. and {Auli}, M. and {Grangier}, D. and {Yarats}, | |
D. and {Dauphin}, Y.~N.", | |
eprint = "1705.03122", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = may, | |
primaryClass = "cs.CL", | |
title = "{Convolutional Sequence To Sequence Learning}", | |
year = 2017 | |
} | |
@inproceedings{dean-2012-large-scale, | |
title = "Large scale distributed deep networks", | |
author = "Dean, Jeffrey and Corrado, Greg and Monga, Rajat and Chen, | |
Kai and Devin, Matthieu and Mao, Mark and Senior, Andrew and | |
Tucker, Paul and Yang, Ke and Le, Quoc V and others", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1223-1231", | |
year = 2012 | |
} | |
@inproceedings{larochelle-2010-learn-combine, | |
title = "Learning to combine foveal glimpses with a third-order | |
Boltzmann machine", | |
author = "Larochelle, Hugo and Hinton, Geoffrey E", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1243-1251", | |
year = 2010 | |
} | |
@ARTICLE{denil-2011-learn-where, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2011arXiv1109.3737D", | |
archivePrefix= "arXiv", | |
author = "{Denil}, M. and {Bazzani}, L. and {Larochelle}, H. and {de | |
Freitas}, N.", | |
eprint = "1109.3737", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Artificial Intelligence", | |
month = sep, | |
primaryClass = "cs.AI", | |
title = "{Learning Where To Attend With Deep Architectures for Image | |
Tracking}", | |
year = 2011 | |
} | |
@ARTICLE{yin-2015-abcnn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151205193Y", | |
archivePrefix= "arXiv", | |
author = "{Yin}, W. and {Sch{\"u}tze}, H. and {Xiang}, B. and {Zhou}, | |
B.", | |
eprint = "1512.05193", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = dec, | |
primaryClass = "cs.CL", | |
title = "{ABCNN: Attention-Based Convolutional Neural Network for | |
Modeling Sentence Pairs}", | |
year = 2015 | |
} | |
@ARTICLE{vinyals-2015-point-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150603134V", | |
archivePrefix= "arXiv", | |
author = "{Vinyals}, O. and {Fortunato}, M. and {Jaitly}, N.", | |
eprint = "1506.03134", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - | |
Computational Geometry, Computer Science - Learning, Computer | |
Science - Neural and Evolutionary Computing", | |
month = jun, | |
primaryClass = "stat.ML", | |
title = "{Pointer Networks}", | |
year = 2015 | |
} | |
@ARTICLE{johnson-2016-percep-losses, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160308155J", | |
archivePrefix= "arXiv", | |
author = "{Johnson}, J. and {Alahi}, A. and {Fei-Fei}, L.", | |
eprint = "1603.08155", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning", | |
month = mar, | |
primaryClass = "cs.CV", | |
title = "{Perceptual Losses for Real-Time Style Transfer and | |
Super-Resolution}", | |
year = 2016 | |
} | |
@inproceedings{lu-2012-combin-sketch, | |
title = "Combining sketch and tone for pencil drawing production", | |
author = "Lu, Cewu and Xu, Li and Jia, Jiaya", | |
booktitle = "Proceedings of the Symposium on Non-Photorealistic Animation | |
and Rendering", | |
pages = "65-73", | |
year = 2012, | |
organization = "Eurographics Association" | |
} | |
@ARTICLE{chan-2015-pcanet, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015ITIP...24.5017C", | |
archivePrefix= "arXiv", | |
author = "{Chan}, T.-H. and {Jia}, K. and {Gao}, S. and {Lu}, J. and | |
{Zeng}, Z. and {Ma}, Y.", | |
doi = "10.1109/TIP.2015.2475625", | |
eprint = "1404.3606", | |
journal = "IEEE Transactions on Image Processing", | |
month = dec, | |
pages = "5017-5032", | |
primaryClass = "cs.CV", | |
title = "{PCANet: A Simple Deep Learning Baseline for Image | |
Classification?}", | |
url = "https://doi.org/10.1109/TIP.2015.2475625", | |
volume = 24, | |
year = 2015 | |
} | |
@ARTICLE{kiros-2015-skip-thought, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150606726K", | |
archivePrefix= "arXiv", | |
author = "{Kiros}, R. and {Zhu}, Y. and {Salakhutdinov}, R. and | |
{Zemel}, R.~S. and {Torralba}, A. and {Urtasun}, R. and | |
{Fidler}, S.", | |
eprint = "1506.06726", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Skip-Thought Vectors}", | |
year = 2015 | |
} | |
@inproceedings{manning-2014-stanford-core, | |
title = "The stanford corenlp natural language processing toolkit.", | |
author = "Manning, Christopher D and Surdeanu, Mihai and Bauer, John | |
and Finkel, Jenny Rose and Bethard, Steven and McClosky, | |
David", | |
booktitle = "ACL (System Demonstrations)", | |
pages = "55-60", | |
year = 2014 | |
} | |
@article{graves-2005-frame-phone, | |
title = "Framewise phoneme classification with bidirectional LSTM and | |
other neural network architectures", | |
author = "Graves, Alex and Schmidhuber, J{\"u}rgen", | |
journal = "Neural Networks", | |
volume = 18, | |
number = 5, | |
pages = "602-610", | |
year = 2005, | |
publisher = "Elsevier" | |
} | |
@inproceedings{graves-2006-conn-temp, | |
title = "Connectionist temporal classification: labelling unsegmented | |
sequence data with recurrent neural networks", | |
author = "Graves, Alex and Fern{\'a}ndez, Santiago and Gomez, Faustino | |
and Schmidhuber, J{\"u}rgen", | |
booktitle = "Proceedings of the 23rd international conference on Machine | |
learning", | |
pages = "369-376", | |
year = 2006, | |
organization = "ACM" | |
} | |
@ARTICLE{szegedy-2015-rethin-incep, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151200567S", | |
archivePrefix= "arXiv", | |
author = "{Szegedy}, C. and {Vanhoucke}, V. and {Ioffe}, S. and | |
{Shlens}, J. and {Wojna}, Z.", | |
eprint = "1512.00567", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = dec, | |
primaryClass = "cs.CV", | |
title = "{Rethinking the Inception Architecture for Computer Vision}", | |
year = 2015 | |
} | |
@article{davis-1959-leonhard-euler, | |
title = "Leonhard euler's integral: A historical profile of the gamma | |
function: In memoriam: Milton abramowitz", | |
author = "Davis, Philip J", | |
journal = "The American Mathematical Monthly", | |
volume = 66, | |
number = 10, | |
pages = "849-869", | |
year = 1959, | |
publisher = "JSTOR" | |
} | |
@article{heinrich-2008-param-esti, | |
title = "Parameter estimation for text analysis", | |
author = "Heinrich, Gregor", | |
journal = "''", | |
year = 2008 | |
} | |
@article{takacs-2007-major-comp, | |
title = "Major components of the gravity recommendation system", | |
author = "Tak{\'a}cs, G{\'a}bor and Pil{\'a}szy, Istv{\'a}n and | |
N{\'e}meth, Botty{\'a}n and Tikk, Domonkos", | |
journal = "ACM SIGKDD Explorations Newsletter", | |
volume = 9, | |
number = 2, | |
pages = "80-83", | |
year = 2007, | |
publisher = "ACM" | |
} | |
@article{takacs-2007-gravity-recomm, | |
title = "On the Gravity Recommendation System", | |
author = "Takacs, Gabor and Pilaszy, Istvan and Nemeth, Bottyan and | |
Tikk, Domonkos", | |
year = 2007, | |
publisher = "Citeseer", | |
journal = "''" | |
} | |
@inproceedings{yang-2015-net-repr, | |
title = "Network representation learning with rich text information", | |
author = "Yang, Cheng and Liu, Zhiyuan and Zhao, Deli and Sun, Maosong | |
and Chang, Edward", | |
booktitle = "Twenty-Fourth International Joint Conference on Artificial | |
Intelligence", | |
year = 2015 | |
} | |
@ARTICLE{perozzi-2014-deepw, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1403.6652P", | |
archivePrefix= "arXiv", | |
author = "{Perozzi}, B. and {Al-Rfou}, R. and {Skiena}, S.", | |
eprint = "1403.6652", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Social and Information Networks, Computer | |
Science - Learning, H.2.8, I.2.6, I.5.1", | |
month = mar, | |
title = "{DeepWalk: Online Learning of Social Representations}", | |
year = 2014 | |
} | |
@article{tibshirani-1996-regres-shrink, | |
title = "Regression shrinkage and selection via the lasso", | |
author = "Tibshirani, Robert", | |
journal = "Journal of the Royal Statistical Society. Series B | |
(Methodological)", | |
pages = "267-288", | |
year = 1996, | |
publisher = "JSTOR" | |
} | |
@ARTICLE{collobert-2011-natur-languag, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2011arXiv1103.0398C", | |
archivePrefix= "arXiv", | |
author = "{Collobert}, R. and {Weston}, J. and {Bottou}, L. and | |
{Karlen}, M. and {Kavukcuoglu}, K. and {Kuksa}, P.", | |
eprint = "1103.0398", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = mar, | |
primaryClass = "cs.LG", | |
title = "{Natural Language Processing (almost) From Scratch}", | |
year = 2011 | |
} | |
@ARTICLE{jin-2017-how-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170300887J", | |
archivePrefix= "arXiv", | |
author = "{Jin}, C. and {Ge}, R. and {Netrapalli}, P. and {Kakade}, | |
S.~M. and {Jordan}, M.~I.", | |
eprint = "1703.00887", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Mathematics - Optimization and | |
Control, Statistics - Machine Learning", | |
month = mar, | |
primaryClass = "cs.LG", | |
title = "{How To Escape Saddle Points Efficiently}", | |
year = 2017 | |
} | |
@inproceedings{mihalcea-2004-textrank-bring, | |
title = "TextRank: Bringing order into texts", | |
author = "Mihalcea, Rada and Tarau, Paul", | |
year = 2004, | |
organization = "Association for Computational Linguistics", | |
booktitle = "''" | |
} | |
@article{wang-2009-more-suit, | |
title = "Which is More Suitable for Chinese Word Segmentation, the | |
Generative Model or the Discriminative One? F", | |
author = "Wang, Kun and Zong, Chengqing", | |
year = 2009, | |
journal = "''" | |
} | |
@inproceedings{wang-2011-online-varia, | |
title = "Online variational inference for the hierarchical Dirichlet | |
process", | |
author = "Wang, Chong and Paisley, John and Blei, David", | |
booktitle = "Proceedings of the Fourteenth International Conference on | |
Artificial Intelligence and Statistics", | |
pages = "752-760", | |
year = 2011 | |
} | |
@inproceedings{hoffman-2010-online-learn, | |
title = "Online learning for latent dirichlet allocation", | |
author = "Hoffman, Matthew and Bach, Francis R and Blei, David M", | |
booktitle = "advances in neural information processing systems", | |
pages = "856-864", | |
year = 2010 | |
} | |
@article{yamamoto-2001-using-suffix, | |
title = "Using suffix arrays to compute term frequency and document | |
frequency for all substrings in a corpus", | |
author = "Yamamoto, Mikio and Church, Kenneth W", | |
journal = "Computational Linguistics", | |
volume = 27, | |
number = 1, | |
pages = "1-30", | |
year = 2001, | |
publisher = "MIT press" | |
} | |
@article{etzioni-2005-unsupervised-named-entity, | |
title = "Unsupervised named-entity extraction from the web: An | |
experimental study", | |
author = "Etzioni, Oren and Cafarella, Michael and Downey, Doug and | |
Popescu, Ana-Maria and Shaked, Tal and Soderland, Stephen and | |
Weld, Daniel S and Yates, Alexander", | |
journal = "Artificial intelligence", | |
volume = 165, | |
number = 1, | |
pages = "91-134", | |
year = 2005, | |
publisher = "Elsevier" | |
} | |
@inproceedings{singh-2010-minimally-super, | |
title = "Minimally-supervised extraction of entities from text | |
advertisements", | |
author = "Singh, Sameer and Hillard, Dustin and Leggetter, Chris", | |
booktitle = "Human Language Technologies: The 2010 Annual Conference of | |
the North American Chapter of the Association for | |
Computational Linguistics", | |
pages = "73-81", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{liu-2011-recogn-named, | |
title = "Recognizing named entities in tweets", | |
author = "Liu, Xiaohua and Zhang, Shaodian and Wei, Furu and Zhou, | |
Ming", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies-Volume | |
1", | |
pages = "359-367", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@ARTICLE{lample-2016-neural-archit, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160301360L", | |
archivePrefix= "arXiv", | |
author = "{Lample}, G. and {Ballesteros}, M. and {Subramanian}, S. and | |
{Kawakami}, K. and {Dyer}, C.", | |
eprint = "1603.01360", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Neural Architectures for Named Entity Recognition}", | |
year = 2016 | |
} | |
@ARTICLE{rei-2016-atten-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161104361R", | |
archivePrefix= "arXiv", | |
author = "{Rei}, M. and {Crichton}, G.~K.~O. and {Pyysalo}, S.", | |
eprint = "1611.04361", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing, I.5.1, | |
I.2.6, I.2.7", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{Attending To Characters in Neural Sequence Labeling Models}", | |
year = 2016 | |
} | |
@inproceedings{bharadwaj-2016-phono-aware, | |
title = "Phonologically Aware Neural Model for Named Entity | |
Recognition in Low Resource Transfer Settings", | |
author = "Akash Bharadwaj and David R. Mortensen and Chris Dyer and | |
Jaime G. Carbonell", | |
booktitle = "EMNLP", | |
year = 2016 | |
} | |
@ARTICLE{yang-2017-trans-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170306345Y", | |
archivePrefix= "arXiv", | |
author = "{Yang}, Z. and {Salakhutdinov}, R. and {Cohen}, W.~W.", | |
eprint = "1703.06345", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Transfer Learning for Sequence Tagging With Hierarchical | |
Recurrent Networks}", | |
year = 2017 | |
} | |
@ARTICLE{peters-2017-semi-super, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170500108P", | |
archivePrefix= "arXiv", | |
author = "{Peters}, M.~E. and {Ammar}, W. and {Bhagavatula}, C. and | |
{Power}, R.", | |
eprint = "1705.00108", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = apr, | |
primaryClass = "cs.CL", | |
title = "{Semi-Supervised Sequence Tagging With Bidirectional Language | |
models}", | |
year = 2017 | |
} | |
@article{rose-2010-auto-key, | |
title = "Automatic keyword extraction from individual documents", | |
author = "Rose, Stuart and Engel, Dave and Cramer, Nick and Cowley, | |
Wendy", | |
journal = "Text Mining", | |
pages = "1-20", | |
year = 2010 | |
} | |
@inproceedings{hasan-2014-auto-key, | |
title = "Automatic Keyphrase Extraction: A Survey of the State of the | |
Art.", | |
author = "Hasan, Kazi Saidul and Ng, Vincent", | |
booktitle = "ACL (1)", | |
pages = "1262-1273", | |
year = 2014 | |
} | |
@inproceedings{yih-2006-find-advert, | |
title = "Finding advertising keywords on web pages", | |
author = "Yih, Wen-tau and Goodman, Joshua and Carvalho, Vitor R", | |
booktitle = "Proceedings of the 15th international conference on World | |
Wide Web", | |
pages = "213-222", | |
year = 2006, | |
organization = "ACM" | |
} | |
@inproceedings{jiang-2009-rank-approach, | |
title = "A ranking approach to keyphrase extraction", | |
author = "Jiang, Xin and Hu, Yunhua and Li, Hang", | |
booktitle = "Proceedings of the 32nd international ACM SIGIR conference on | |
Research and development in information retrieval", | |
pages = "756-757", | |
year = 2009, | |
organization = "ACM" | |
} | |
@inproceedings{liu-2009-unsupervised-approach, | |
title = "Unsupervised approaches for automatic keyword extraction | |
using meeting transcripts", | |
author = "Liu, Feifan and Pennell, Deana and Liu, Fei and Liu, Yang", | |
booktitle = "Proceedings of human language technologies: The 2009 annual | |
conference of the North American chapter of the association | |
for computational linguistics", | |
pages = "620-628", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{witten-1999-kea, | |
author = "Witten, Ian H. and Paynter, Gordon W. and Frank, Eibe and | |
Gutwin, Carl and Nevill-Manning, Craig G.", | |
title = "KEA: Practical Automatic Keyphrase Extraction", | |
booktitle = "Proceedings of the Fourth ACM Conference on Digital | |
Libraries", | |
series = "DL '99", | |
year = 1999, | |
isbn = "1-58113-145-3", | |
location = "Berkeley, California, USA", | |
pages = "254-255", | |
numpages = 2, | |
url = "http://doi.acm.org/10.1145/313238.313437", | |
doi = "10.1145/313238.313437", | |
acmid = 313437, | |
publisher = "ACM", | |
address = "New York, NY, USA" | |
} | |
@inproceedings{liu-2010-auto-key, | |
title = "Automatic keyphrase extraction via topic decomposition", | |
author = "Liu, Zhiyuan and Huang, Wenyi and Zheng, Yabin and Sun, | |
Maosong", | |
booktitle = "Proceedings of the 2010 conference on empirical methods in | |
natural language processing", | |
pages = "366-376", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{chuang-2012-without-cluster, | |
title = "“Without the Clutter of Unimportant Words”: Descriptive | |
keyphrases for text visualization", | |
author = "Chuang, Jason and Manning, Christopher D and Heer, Jeffrey", | |
journal = "ACM Transactions on Computer-Human Interaction (TOCHI)", | |
volume = 19, | |
number = 3, | |
pages = 19, | |
year = 2012, | |
publisher = "ACM" | |
} | |
@inproceedings{mei-2010-divrank, | |
title = "Divrank: the interplay of prestige and diversity in | |
information networks", | |
author = "Mei, Qiaozhu and Guo, Jian and Radev, Dragomir", | |
booktitle = "Proceedings of the 16th ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
pages = "1009-1018", | |
year = 2010, | |
organization = "Acm" | |
} | |
@inproceedings{hasan-2010-conundrums-unspervised, | |
title = "Conundrums in unsupervised keyphrase extraction: making sense | |
of the state-of-the-art", | |
author = "Hasan, Kazi Saidul and Ng, Vincent", | |
booktitle = "Proceedings of the 23rd International Conference on | |
Computational Linguistics: Posters", | |
pages = "365-373", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{wan-2008-single-doc, | |
title = "Single document keyphrase extraction using neighborhood | |
knowledge", | |
author = "Wan, Xiaojun and Xiao, Jianguo", | |
booktitle = "Proceedings of the 23rd national conference on Artificial | |
intelligence-Volume 2", | |
pages = "855-860", | |
year = 2008, | |
organization = "AAAI Press" | |
} | |
@inproceedings{wan-2008-collabrank, | |
title = "CollabRank: towards a collaborative approach to | |
single-document keyphrase extraction", | |
author = "Wan, Xiaojun and Xiao, Jianguo", | |
booktitle = "Proceedings of the 22nd International Conference on | |
Computational Linguistics-Volume 1", | |
pages = "969-976", | |
year = 2008, | |
organization = "Association for Computational Linguistics" | |
} | |
@techreport{page-1999-page-rank, | |
title = "The PageRank citation ranking: Bringing order to the web.", | |
author = "Page, Lawrence and Brin, Sergey and Motwani, Rajeev and | |
Winograd, Terry", | |
year = 1999, | |
institution = "Stanford InfoLab" | |
} | |
@ARTICLE{barrios-2016-variat-simil, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160203606B", | |
archivePrefix= "arXiv", | |
author = "{Barrios}, F. and {L{\'o}pez}, F. and {Argerich}, L. and | |
{Wachenchauzer}, R.", | |
eprint = "1602.03606", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Information Retrieval, I.2.7", | |
month = feb, | |
primaryClass = "cs.CL", | |
title = "{Variations of the Similarity Function of Textrank for | |
Automated Summarization}", | |
year = 2016 | |
} | |
@article{gimpel-2006-model-topics, | |
title = "Modeling Topics", | |
author = "Gimpel, Kevin", | |
year = 2006, | |
journal = "''" | |
} | |
@ARTICLE{salimans-2016-weigh-normal, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160207868S", | |
archivePrefix= "arXiv", | |
author = "{Salimans}, T. and {Kingma}, D.~P.", | |
eprint = "1602.07868", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Neural and Evolutionary | |
Computing", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Weight Normalization: A Simple Reparameterization To | |
Accelerate Training of Deep Neural Networks}", | |
year = 2016 | |
} | |
@ARTICLE{lei-2016-layer-normal, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160706450L", | |
archivePrefix= "arXiv", | |
author = "{Lei Ba}, J. and {Kiros}, J.~R. and {Hinton}, G.~E.", | |
eprint = "1607.06450", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = jul, | |
primaryClass = "stat.ML", | |
title = "{Layer Normalization}", | |
year = 2016 | |
} | |
@ARTICLE{ioffe-2015-batch-normal, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150203167I", | |
archivePrefix= "arXiv", | |
author = "{Ioffe}, S. and {Szegedy}, C.", | |
eprint = "1502.03167", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Batch Normalization: Accelerating Deep Network Training By | |
Reducing Internal Covariate Shift}", | |
year = 2015 | |
} | |
@article{shimodaira-2000-improv-predict, | |
title = "Improving predictive inference under covariate shift by | |
weighting the log-likelihood function", | |
author = "Shimodaira, Hidetoshi", | |
journal = "Journal of statistical planning and inference", | |
volume = 90, | |
number = 2, | |
pages = "227-244", | |
year = 2000, | |
publisher = "Elsevier" | |
} | |
@ARTICLE{mikolov-2013-exploit-simil, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1309.4168M", | |
archivePrefix= "arXiv", | |
author = "{Mikolov}, T. and {Le}, Q.~V. and {Sutskever}, I.", | |
eprint = "1309.4168", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = sep, | |
primaryClass = "cs.CL", | |
title = "{Exploiting Similarities Among Languages for Machine | |
Translation}", | |
year = 2013 | |
} | |
@ARTICLE{deng-2016-image-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160904938D", | |
archivePrefix= "arXiv", | |
author = "{Deng}, Y. and {Kanervisto}, A. and {Ling}, J. and {Rush}, | |
A.~M.", | |
eprint = "1609.04938", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary | |
Computing", | |
month = sep, | |
primaryClass = "cs.CV", | |
title = "{Image-To-Markup Generation With Coarse-To-Fine Attention}", | |
year = 2016 | |
} | |
@inproceedings{minka-2001-automatic-choice, | |
title = "Automatic choice of dimensionality for PCA", | |
author = "Minka, Thomas P", | |
booktitle = "Advances in neural information processing systems", | |
pages = "598-604", | |
year = 2001 | |
} | |
@ARTICLE{le-2014-distr-repres-senten-docum, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1405.4053L", | |
archivePrefix= "arXiv", | |
author = "{Le}, Q.~V. and {Mikolov}, T.", | |
eprint = "1405.4053", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning", | |
month = may, | |
primaryClass = "cs.CL", | |
title = "{Distributed Representations of Sentences and Documents}", | |
year = 2014 | |
} | |
@article{arora-2016-simple-tough, | |
title = {A simple but tough-to-beat baseline for sentence embeddings}, | |
author = {Arora, Sanjeev and Liang, Yingyu and Ma, Tengyu}, | |
year = 2016, | |
journal = "" | |
} | |
@ARTICLE{bojanowski-2016-fasttext, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160704606B", | |
archivePrefix= "arXiv", | |
author = "{Bojanowski}, P. and {Grave}, E. and {Joulin}, A. and | |
{Mikolov}, T.", | |
eprint = "1607.04606", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jul, | |
primaryClass = "cs.CL", | |
title = "{Enriching Word Vectors With Subword Information}", | |
year = 2016 | |
} | |
@ARTICLE{srivastava-2015-highw-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150500387S", | |
archivePrefix= "arXiv", | |
author = "{Srivastava}, R.~K. and {Greff}, K. and {Schmidhuber}, J.", | |
eprint = "1505.00387", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing, 68T01, I.2.6, G.1.6", | |
month = may, | |
primaryClass = "cs.LG", | |
title = "{Highway Networks}", | |
year = 2015 | |
} | |
@ARTICLE{kalchbrenner-2014-convol-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1404.2188K", | |
archivePrefix= "arXiv", | |
author = "{Kalchbrenner}, N. and {Grefenstette}, E. and {Blunsom}, P.", | |
eprint = "1404.2188", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = apr, | |
primaryClass = "cs.CL", | |
title = "{A Convolutional Neural Network for Modelling Sentences}", | |
year = 2014 | |
} | |
@InProceedings{matt-2015-word-embed, | |
title = "From Word Embeddings To Document Distances", | |
author = "Matt Kusner and Yu Sun and Nicholas Kolkin and Kilian | |
Weinberger", | |
booktitle = "Proceedings of the 32nd International Conference on Machine | |
Learning", | |
pages = "957-966", | |
year = 2015, | |
editor = "Francis Bach and David Blei", | |
volume = 37, | |
series = "Proceedings of Machine Learning Research", | |
address = "Lille, France", | |
month = "07--09 Jul", | |
publisher = "PMLR", | |
pdf = "http://proceedings.mlr.press/v37/kusnerb15.pdf", | |
url = "http://proceedings.mlr.press/v37/kusnerb15.html", | |
abstract = "We present the Word Mover’s Distance (WMD), a novel distance | |
function between text documents. Our work is based on recent | |
results in word embeddings that learn semantically meaningful | |
representations for words from local co-occurrences in | |
sentences. The WMD distance measures the dissimilarity | |
between two text documents as the minimum amount of distance | |
that the embedded words of one document need to ``travel'' to | |
reach the embedded words of another document. We show that | |
this distance metric can be cast as an instance of the Earth | |
Mover’s Distance, a well studied transportation problem for | |
which several highly efficient solvers have been | |
developed. Our metric has no hyperparameters and is | |
straight-forward to implement. Further, we demonstrate on | |
eight real world document classification data sets, in | |
comparison with seven state-of-the-art baselines, that the | |
WMD metric leads to unprecedented low k-nearest neighbor | |
document classification error rates." | |
} | |
@ARTICLE{brokos-2016-using-centr, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160803905B", | |
archivePrefix= "arXiv", | |
author = "{Brokos}, G.-I. and {Malakasiotis}, P. and {Androutsopoulos}, | |
I.", | |
eprint = "1608.03905", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Information Retrieval", | |
month = aug, | |
primaryClass = "cs.IR", | |
title = "{Using Centroids of Word Embeddings and Word Mover's Distance | |
for Biomedical Document Retrieval in Question Answering}", | |
year = 2016 | |
} | |
@ARTICLE{dai-2015-docum-embed, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150707998D", | |
archivePrefix= "arXiv", | |
author = "{Dai}, A.~M. and {Olah}, C. and {Le}, Q.~V.", | |
eprint = "1507.07998", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning", | |
month = jul, | |
primaryClass = "cs.CL", | |
title = "{Document Embedding With Paragraph Vectors}", | |
year = 2015 | |
} | |
@ARTICLE{lau-2016-empir-evaluat, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160705368L", | |
archivePrefix= "arXiv", | |
author = "{Lau}, J.~H. and {Baldwin}, T.", | |
eprint = "1607.05368", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jul, | |
primaryClass = "cs.CL", | |
title = "{An Empirical Evaluation of Doc2vec With Practical Insights | |
Into Document Embedding Generation}", | |
year = 2016 | |
} | |
@inproceedings{polajnar-2015-exploration-discourse, | |
title = "An exploration of discourse-based sentence spaces for | |
compositional distributional semantics", | |
author = "Polajnar, Tamara and Rimell, Laura and Clark, Stephen", | |
booktitle = "Workshop on Linking Models of Lexical, Sentential and | |
Discourse-level Semantics (LSDSem)", | |
pages = 1, | |
year = 2015 | |
} | |
@inproceedings{socher-2011-semi-supervised, | |
title = "Semi-supervised recursive autoencoders for predicting | |
sentiment distributions", | |
author = "Socher, Richard and Pennington, Jeffrey and Huang, Eric H and | |
Ng, Andrew Y and Manning, Christopher D", | |
booktitle = "Proceedings of the conference on empirical methods in natural | |
language processing", | |
pages = "151-161", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{hodosh-2013-framing-image, | |
title = "Framing image description as a ranking task: Data, models and | |
evaluation metrics", | |
author = "Hodosh, Micah and Young, Peter and Hockenmaier, Julia", | |
journal = "Journal of Artificial Intelligence Research", | |
volume = 47, | |
pages = "853-899", | |
year = 2013 | |
} | |
@inproceedings{shen-2014-latent-semantic, | |
title = "A latent semantic model with convolutional-pooling structure | |
for information retrieval", | |
author = "Shen, Yelong and He, Xiaodong and Gao, Jianfeng and Deng, Li | |
and Mesnil, Gr{\'e}goire", | |
booktitle = "Proceedings of the 23rd ACM International Conference on | |
Conference on Information and Knowledge Management", | |
pages = "101-110", | |
year = 2014, | |
organization = "ACM" | |
} | |
@ARTICLE{xiong-2016-dynam-memor, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160301417X", | |
archivePrefix= "arXiv", | |
author = "{Xiong}, C. and {Merity}, S. and {Socher}, R.", | |
eprint = "1603.01417", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Computation and Language, Computer Science | |
- Computer Vision and Pattern Recognition", | |
month = mar, | |
title = "{Dynamic Memory Networks for Visual and Textual Question | |
Answering}", | |
year = 2016 | |
} | |
@ARTICLE{zeng-2016-effic-summar, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161103382Z", | |
archivePrefix= "arXiv", | |
author = "{Zeng}, W. and {Luo}, W. and {Fidler}, S. and {Urtasun}, R.", | |
eprint = "1611.03382", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{Efficient Summarization With Read-Again and Copy Mechanism}", | |
year = 2016 | |
} | |
@ARTICLE{lai-2015-how-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150705523L", | |
archivePrefix= "arXiv", | |
author = "{Lai}, S. and {Liu}, K. and {Xu}, L. and {Zhao}, J.", | |
eprint = "1507.05523", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jul, | |
primaryClass = "cs.CL", | |
title = "{How To Generate a Good Word Embedding?}", | |
year = 2015 | |
} | |
@inproceedings{chen-2015-revisit-word, | |
title = "Revisiting Word Embedding for Contrasting Meaning", | |
author = "Zhigang Chen and Wei Lin and Qian Chen and Xiaoping Chen and | |
Si Wei and Hui Jiang and Xiao-Dan Zhu", | |
booktitle = "ACL", | |
year = 2015 | |
} | |
@inproceedings{lazaridou-2015-hubness-pollution, | |
title = "Hubness and Pollution: Delving into Cross-Space Mapping for | |
Zero-Shot Learning", | |
author = "Angeliki Lazaridou and Georgiana Dinu and Marco Baroni", | |
booktitle = "ACL", | |
year = 2015 | |
} | |
@ARTICLE{yin-2017-compar-study, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170201923Y", | |
archivePrefix= "arXiv", | |
author = "{Yin}, W. and {Kann}, K. and {Yu}, M. and {Sch{\"u}tze}, H.", | |
eprint = "1702.01923", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = feb, | |
primaryClass = "cs.CL", | |
title = "{Comparative Study of Cnn and Rnn for Natural Language | |
Processing}", | |
year = 2017 | |
} | |
@ARTICLE{zhang-2015-sensit-analy, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151003820Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, Y. and {Wallace}, B.", | |
eprint = "1510.03820", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing", | |
month = oct, | |
primaryClass = "cs.CL", | |
title = "{A Sensitivity Analysis of (and Practitioners' Guide to) | |
Convolutional Neural Networks for Sentence Classification}", | |
year = 2015 | |
} | |
@ARTICLE{johnson-2014-effec-use, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.1058J", | |
archivePrefix= "arXiv", | |
author = "{Johnson}, R. and {Zhang}, T.", | |
eprint = "1412.1058", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = dec, | |
primaryClass = "cs.CL", | |
title = "{Effective Use of Word Order for Text Categorization With | |
Convolutional Neural Networks}", | |
year = 2014 | |
} | |
@ARTICLE{johnson-2015-semi-super, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150401255J", | |
archivePrefix= "arXiv", | |
author = "{Johnson}, R. and {Zhang}, T.", | |
eprint = "1504.01255", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Computation | |
and Language, Computer Science - Learning", | |
month = apr, | |
primaryClass = "stat.ML", | |
title = "{Semi-Supervised Convolutional Neural Networks for Text | |
Categorization Via Region Embedding}", | |
year = 2015 | |
} | |
@ARTICLE{zhang-2015-charac-level, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150901626Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, X. and {Zhao}, J. and {LeCun}, Y.", | |
eprint = "1509.01626", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = sep, | |
primaryClass = "cs.LG", | |
title = "{Character-Level Convolutional Networks for Text | |
Classification}", | |
year = 2015 | |
} | |
@ARTICLE{zhang-2015-text-under-from-scrat, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150201710Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, X. and {LeCun}, Y.", | |
eprint = "1502.01710", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Text Understanding From Scratch}", | |
year = 2015 | |
} | |
@article{schuster-1997-bidirectional-recurrent, | |
title = "Bidirectional recurrent neural networks", | |
author = "Schuster, Mike and Paliwal, Kuldip K", | |
journal = "IEEE Transactions on Signal Processing", | |
volume = 45, | |
number = 11, | |
pages = "2673-2681", | |
year = 1997, | |
publisher = "IEEE" | |
} | |
@article{chen-2015-event-extract, | |
title = "Event Extraction via Dynamic Multi-Pooling Convolutional | |
Neural Networks", | |
author = "Chen, Yubo and Xu, Liheng and Liu, Kang and Zeng, Daojian and | |
Zhao, Jun and others", | |
year = 2015, | |
journal = "''" | |
} | |
@ARTICLE{bengio-2012-repres-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1206.5538B", | |
archivePrefix= "arXiv", | |
author = "{Bengio}, Y. and {Courville}, A. and {Vincent}, P.", | |
eprint = "1206.5538", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{Representation Learning: A Review and New Perspectives}", | |
year = 2012 | |
} | |
@inproceedings{le-2011-ica-recons, | |
title = "ICA with reconstruction cost for efficient overcomplete | |
feature learning", | |
author = "Le, Quoc V and Karpenko, Alexandre and Ngiam, Jiquan and Ng, | |
Andrew Y", | |
booktitle = "Advances in Neural Information Processing Systems", | |
pages = "1017-1025", | |
year = 2011 | |
} | |
@ARTICLE{goodfellow-2013-maxout-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1302.4389G", | |
archivePrefix= "arXiv", | |
author = "{Goodfellow}, I.~J. and {Warde-Farley}, D. and {Mirza}, | |
M. and {Courville}, A. and {Bengio}, Y.", | |
eprint = "1302.4389", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = feb, | |
primaryClass = "stat.ML", | |
title = "{Maxout Networks}", | |
year = 2013 | |
} | |
@ARTICLE{he-2015-delvin-deep-into-rectif, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150201852H", | |
archivePrefix= "arXiv", | |
author = "{He}, K. and {Zhang}, X. and {Ren}, S. and {Sun}, J.", | |
eprint = "1502.01852", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Learning", | |
month = feb, | |
primaryClass = "cs.CV", | |
title = "{Delving Deep Into Rectifiers: Surpassing Human-Level | |
Performance on Imagenet Classification}", | |
year = 2015 | |
} | |
@ARTICLE{schmidhuber-2014-deep-learn-neural-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1404.7828S", | |
archivePrefix= "arXiv", | |
author = "{Schmidhuber}, J.", | |
eprint = "1404.7828", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Learning", | |
month = apr, | |
title = "{Deep Learning in Neural Networks: An Overview}", | |
year = 2014 | |
} | |
@article{dahl-2012-context-depend, | |
title = "Context-dependent pre-trained deep neural networks for | |
large-vocabulary speech recognition", | |
author = "Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex", | |
journal = "IEEE Transactions on audio, speech, and language processing", | |
volume = 20, | |
number = 1, | |
pages = "30-42", | |
year = 2012, | |
publisher = "IEEE" | |
} | |
@ARTICLE{romero-2014-fitnet, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.6550R", | |
archivePrefix= "arXiv", | |
author = "{Romero}, A. and {Ballas}, N. and {Ebrahimi Kahou}, S. and | |
{Chassang}, A. and {Gatta}, C. and {Bengio}, Y.", | |
eprint = "1412.6550", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{FitNets: Hints for Thin Deep Nets}", | |
year = 2014 | |
} | |
@ARTICLE{srivastava-2015-train-very-deep-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150706228S", | |
archivePrefix= "arXiv", | |
author = "{Srivastava}, R.~K. and {Greff}, K. and {Schmidhuber}, J.", | |
eprint = "1507.06228", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing, 68T01, I.2.6, G.1.6", | |
month = jul, | |
primaryClass = "cs.LG", | |
title = "{Training Very Deep Networks}", | |
year = 2015 | |
} | |
@ARTICLE{huang-2016-densel-connec-convol-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160806993H", | |
archivePrefix= "arXiv", | |
author = "{Huang}, G. and {Liu}, Z. and {Weinberger}, K.~Q. and {van | |
der Maaten}, L.", | |
eprint = "1608.06993", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning", | |
month = aug, | |
primaryClass = "cs.CV", | |
title = "{Densely Connected Convolutional Networks}", | |
year = 2016 | |
} | |
@ARTICLE{he-2016-ident-mappin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160305027H", | |
archivePrefix= "arXiv", | |
author = "{He}, K. and {Zhang}, X. and {Ren}, S. and {Sun}, J.", | |
eprint = "1603.05027", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning", | |
month = mar, | |
primaryClass = "cs.CV", | |
title = "{Identity Mappings in Deep Residual Networks}", | |
year = 2016 | |
} | |
@ARTICLE{veit-2016-resid-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160506431V", | |
archivePrefix= "arXiv", | |
author = "{Veit}, A. and {Wilber}, M. and {Belongie}, S.", | |
eprint = "1605.06431", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary | |
Computing", | |
month = may, | |
primaryClass = "cs.CV", | |
title = "{Residual Networks Behave Like Ensembles of Relatively | |
Shallow Networks}", | |
year = 2016 | |
} | |
@ARTICLE{zagoruyko-2016-wide-resid-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160507146Z", | |
archivePrefix= "arXiv", | |
author = "{Zagoruyko}, S. and {Komodakis}, N.", | |
eprint = "1605.07146", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = may, | |
primaryClass = "cs.CV", | |
title = "{Wide Residual Networks}", | |
year = 2016 | |
} | |
@ARTICLE{telgarsky-2016-benef-depth-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160204485T", | |
archivePrefix= "arXiv", | |
author = "{Telgarsky}, M.", | |
eprint = "1602.04485", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Benefits of Depth in Neural networks}", | |
year = 2016 | |
} | |
@ARTICLE{huang-2016-deep-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160309382H", | |
archivePrefix= "arXiv", | |
author = "{Huang}, G. and {Sun}, Y. and {Liu}, Z. and {Sedra}, D. and | |
{Weinberger}, K.", | |
eprint = "1603.09382", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computer | |
Vision and Pattern Recognition, Computer Science - Neural and | |
Evolutionary Computing", | |
month = mar, | |
primaryClass = "cs.LG", | |
title = "{Deep Networks With Stochastic Depth}", | |
year = 2016 | |
} | |
@ARTICLE{eldan-2015-power-depth, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151203965E", | |
archivePrefix= "arXiv", | |
author = "{Eldan}, R. and {Shamir}, O.", | |
eprint = "1512.03965", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing, Statistics - Machine Learning", | |
month = dec, | |
primaryClass = "cs.LG", | |
title = "{The Power of Depth for Feedforward Neural Networks}", | |
year = 2015 | |
} | |
@ARTICLE{liao-2016-bridg-gaps, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160403640L", | |
archivePrefix= "arXiv", | |
author = "{Liao}, Q. and {Poggio}, T.", | |
eprint = "1604.03640", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = apr, | |
primaryClass = "cs.LG", | |
title = "{Bridging the Gaps Between Residual Learning, Recurrent | |
Neural Networks and Visual Cortex}", | |
year = 2016 | |
} | |
@ARTICLE{greff-2016-highw-resid, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161207771G", | |
archivePrefix= "arXiv", | |
author = "{Greff}, K. and {Srivastava}, R.~K. and {Schmidhuber}, J.", | |
eprint = "1612.07771", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Learning, I.2.6, I.5.1", | |
month = dec, | |
title = "{Highway and Residual Networks Learn Unrolled Iterative | |
Estimation}", | |
year = 2016 | |
} | |
@ARTICLE{xie-2016-aggreg-resid, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161105431X", | |
archivePrefix= "arXiv", | |
author = "{Xie}, S. and {Girshick}, R. and {Doll{\'a}r}, P. and {Tu}, | |
Z. and {He}, K.", | |
eprint = "1611.05431", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = nov, | |
primaryClass = "cs.CV", | |
title = "{Aggregated Residual Transformations for Deep Neural | |
Networks}", | |
year = 2016 | |
} | |
@ARTICLE{alain-2016-under-inter, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161001644A", | |
archivePrefix= "arXiv", | |
author = "{Alain}, G. and {Bengio}, Y.", | |
eprint = "1610.01644", | |
journal = "ArXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = oct, | |
primaryClass = "stat.ML", | |
title = "{Understanding Intermediate Layers Using Linear Classifier | |
probes}", | |
year = 2016 | |
} | |
@ARTICLE{yosinski-2014-how-trans, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1411.1792Y", | |
archivePrefix= "arXiv", | |
author = "{Yosinski}, J. and {Clune}, J. and {Bengio}, Y. and {Lipson}, | |
H.", | |
eprint = "1411.1792", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{How Transferable Are Features in Deep Neural networks?}", | |
year = 2014 | |
} | |
@inproceedings{levy-2014-neural-word, | |
title = "Neural Word Embedding as Implicit Matrix Factorization", | |
author = "Levy, Omer and Goldberg, Yoav", | |
booktitle = "Advances in Neural Information Processing Systems 27", | |
editor = "Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence | |
and K. Q. Weinberger", | |
pages = "2177-2185", | |
year = 2014, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/5477-neural-word-embedding-as-implicit-matrix-factorization.pdf" | |
} | |
@ARTICLE{dyer-2014-notes-noise, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.8251D", | |
archivePrefix= "arXiv", | |
author = "{Dyer}, C.", | |
eprint = "1410.8251", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = oct, | |
primaryClass = "cs.LG", | |
title = "{Notes on Noise Contrastive Estimation and Negative | |
Sampling}", | |
year = 2014 | |
} | |
@inproceedings{levy-2014-ling-regul, | |
title = "Linguistic Regularities in Sparse and Explicit Word | |
Representations", | |
author = "Levy, Omer and Goldberg, Yoav and Ramat-Gan, Israel", | |
booktitle = "CoNLL", | |
pages = "171-180", | |
year = 2014 | |
} | |
@ARTICLE{arora-2015-rand-walk, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150203520A", | |
archivePrefix= "arXiv", | |
author = "{Arora}, S. and {Li}, Y. and {Liang}, Y. and {Ma}, T. and | |
{Risteski}, A.", | |
eprint = "1502.03520", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{RAND-WALK: A Latent Variable Model Approach To Word | |
Embeddings}", | |
year = 2015 | |
} | |
@ARTICLE{saxe-2013-exact-solut, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.6120S", | |
archivePrefix= "arXiv", | |
author = "{Saxe}, A.~M. and {McClelland}, J.~L. and {Ganguli}, S.", | |
eprint = "1312.6120", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Condensed Matter - Disordered Systems and Neural Networks, | |
Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Quantitative Biology - Neurons | |
and Cognition, Statistics - Machine Learning", | |
month = dec, | |
title = "{Exact Solutions To the Nonlinear Dynamics of Learning in | |
Deep Linear Neural networks}", | |
year = 2013 | |
} | |
@ARTICLE{mishkin-2015-all-you, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106422M", | |
archivePrefix= "arXiv", | |
author = "{Mishkin}, D. and {Matas}, J.", | |
eprint = "1511.06422", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{All You Need Is a Good init}", | |
year = 2015 | |
} | |
@ARTICLE{kraehenbuehl-2015-data-depen, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106856K", | |
archivePrefix= "arXiv", | |
author = "{Kr{\"a}henb{\"u}hl}, P. and {Doersch}, C. and {Donahue}, | |
J. and {Darrell}, T.", | |
eprint = "1511.06856", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.CV", | |
title = "{Data-Dependent Initializations of Convolutional Neural | |
Networks}", | |
year = 2015 | |
} | |
@ARTICLE{britz-2017-massiv-explor, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170303906B", | |
archivePrefix= "arXiv", | |
author = "{Britz}, D. and {Goldie}, A. and {Luong}, M.-T. and {Le}, Q.", | |
eprint = "1703.03906", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Massive Exploration of Neural Machine Translation | |
Architectures}", | |
year = 2017 | |
} | |
@ARTICLE{neubig-2017-neural-machin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170301619N", | |
archivePrefix= "arXiv", | |
author = "{Neubig}, G.", | |
eprint = "1703.01619", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Neural Machine Translation and Sequence-To-Sequence Models: | |
A Tutorial}", | |
year = 2017 | |
} | |
@ARTICLE{wu-2016-googl-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160908144W", | |
archivePrefix= "arXiv", | |
author = "{Wu}, Y. and {Schuster}, M. and {Chen}, Z. and {Le}, | |
Q.~V. and {Norouzi}, M. and {Macherey}, W. and {Krikun}, | |
M. and {Cao}, Y. and {Gao}, Q. and {Macherey}, K. and | |
{Klingner}, J. and {Shah}, A. and {Johnson}, M. and {Liu}, | |
X. and {Kaiser}, {\L}. and {Gouws}, S. and {Kato}, Y. and | |
{Kudo}, T. and {Kazawa}, H. and {Stevens}, K. and {Kurian}, | |
G. and {Patil}, N. and {Wang}, W. and {Young}, C. and | |
{Smith}, J. and {Riesa}, J. and {Rudnick}, A. and {Vinyals}, | |
O. and {Corrado}, G. and {Hughes}, M. and {Dean}, J.", | |
eprint = "1609.08144", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning", | |
month = sep, | |
primaryClass = "cs.CL", | |
title = "{Google's Neural Machine Translation System: Bridging the Gap | |
Between Human and Machine Translation}", | |
year = 2016 | |
} | |
@ARTICLE{fang-2014-from-caption, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1411.4952F", | |
archivePrefix= "arXiv", | |
author = "{Fang}, H. and {Gupta}, S. and {Iandola}, F. and | |
{Srivastava}, R. and {Deng}, L. and {Doll{\'a}r}, P. and | |
{Gao}, J. and {He}, X. and {Mitchell}, M. and {Platt}, | |
J.~C. and {Zitnick}, C.~L. and {Zweig}, G.", | |
eprint = "1411.4952", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Computation and Language", | |
month = nov, | |
primaryClass = "cs.CV", | |
title = "{From Captions To Visual Concepts and Back}", | |
year = 2014 | |
} | |
@ARTICLE{ranzato-2015-mixer, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106732R", | |
archivePrefix= "arXiv", | |
author = "{Ranzato}, M. and {Chopra}, S. and {Auli}, M. and {Zaremba}, | |
W.", | |
eprint = "1511.06732", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Sequence Level Training With Recurrent Neural Networks}", | |
year = 2015 | |
} | |
@ARTICLE{graves-2012-sequen-trans, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1211.3711G", | |
archivePrefix= "arXiv", | |
author = "{Graves}, A.", | |
eprint = "1211.3711", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Learning, Statistics - Machine Learning", | |
month = nov, | |
title = "{Sequence Transduction With Recurrent Neural Networks}", | |
year = 2012 | |
} | |
@ARTICLE{zhang-2017-towar-end, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170102720Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, Y. and {Pezeshki}, M. and {Brakel}, P. and {Zhang}, | |
S. and {Yoshua Bengio}, C.~L. and {Courville}, A.", | |
eprint = "1701.02720", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = jan, | |
primaryClass = "cs.CL", | |
title = "{Towards End-To-End Speech Recognition With Deep | |
Convolutional Neural Networks}", | |
year = 2017 | |
} | |
@ARTICLE{bengio-2012-pract-recom, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1206.5533B", | |
archivePrefix= "arXiv", | |
author = "{Bengio}, Y.", | |
eprint = "1206.5533", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{Practical Recommendations for Gradient-Based Training of | |
Deep architectures}", | |
year = 2012 | |
} | |
@ARTICLE{pascanu-2012-diffic-train, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1211.5063P", | |
archivePrefix= "arXiv", | |
author = "{Pascanu}, R. and {Mikolov}, T. and {Bengio}, Y.", | |
eprint = "1211.5063", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{On the Difficulty of Training Recurrent Neural Networks}", | |
year = 2012 | |
} | |
@ARTICLE{yosinski-2015-under-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150606579Y", | |
archivePrefix= "arXiv", | |
author = "{Yosinski}, J. and {Clune}, J. and {Nguyen}, A. and {Fuchs}, | |
T. and {Lipson}, H.", | |
eprint = "1506.06579", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = jun, | |
primaryClass = "cs.CV", | |
title = "{Understanding Neural Networks Through Deep Visualization}", | |
year = 2015 | |
} | |
@ARTICLE{vaswani-2017-transformer, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170603762V", | |
archivePrefix= "arXiv", | |
author = "{Vaswani}, A. and {Shazeer}, N. and {Parmar}, N. and | |
{Uszkoreit}, J. and {Jones}, L. and {Gomez}, A.~N. and | |
{Kaiser}, L. and {Polosukhin}, I.", | |
eprint = "1706.03762", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Attention Is All You Need}", | |
year = 2017 | |
} | |
@ARTICLE{semeniuta-2016-recurrent-dropout, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160305118S", | |
archivePrefix= "arXiv", | |
author = "{Semeniuta}, S. and {Severyn}, A. and {Barth}, E.", | |
eprint = "1603.05118", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Recurrent Dropout Without Memory Loss}", | |
year = 2016 | |
} | |
@ARTICLE{pascanu-2013-how-to, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.6026P", | |
archivePrefix= "arXiv", | |
author = "{Pascanu}, R. and {Gulcehre}, C. and {Cho}, K. and {Bengio}, | |
Y.", | |
eprint = "1312.6026", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Learning, Statistics - Machine Learning", | |
month = dec, | |
title = "{How To Construct Deep Recurrent Neural Networks}", | |
year = 2013 | |
} | |
@ARTICLE{luong-2014-addres-rare, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.8206L", | |
archivePrefix= "arXiv", | |
author = "{Luong}, M.-T. and {Sutskever}, I. and {Le}, Q.~V. and | |
{Vinyals}, O. and {Zaremba}, W.", | |
eprint = "1410.8206", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing", | |
month = oct, | |
primaryClass = "cs.CL", | |
title = "{Addressing the Rare Word Problem in Neural Machine | |
Translation}", | |
year = 2014 | |
} | |
@ARTICLE{luo-2017-cosin-normal, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170205870L", | |
archivePrefix= "arXiv", | |
author = "{Luo}, C. and {Zhan}, J. and {Wang}, L. and {Yang}, Q.", | |
eprint = "1702.05870", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Artificial | |
Intelligence, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.LG", | |
title = "{Cosine Normalization: Using Cosine Similarity Instead of Dot | |
Product in Neural Networks}", | |
year = 2017 | |
} | |
@ARTICLE{kaiser-2017-one-model, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170605137K", | |
archivePrefix= "arXiv", | |
author = "{Kaiser}, L. and {Gomez}, A.~N. and {Shazeer}, N. and | |
{Vaswani}, A. and {Parmar}, N. and {Jones}, L. and | |
{Uszkoreit}, J.", | |
eprint = "1706.05137", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Statistics - Machine Learning", | |
month = jun, | |
primaryClass = "cs.LG", | |
title = "{One Model To Learn Them All}", | |
year = 2017 | |
} | |
@ARTICLE{nguyen-2014-deep-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.1897N", | |
archivePrefix= "arXiv", | |
author = "{Nguyen}, A. and {Yosinski}, J. and {Clune}, J.", | |
eprint = "1412.1897", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Neural and Evolutionary Computing", | |
month = dec, | |
primaryClass = "cs.CV", | |
title = "{Deep Neural Networks Are Easily Fooled: High Confidence | |
Predictions for Unrecognizable Images}", | |
year = 2014 | |
} | |
@ARTICLE{press-2016-using-output, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160805859P", | |
archivePrefix= "arXiv", | |
author = "{Press}, O. and {Wolf}, L.", | |
eprint = "1608.05859", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL", | |
title = "{Using the Output Embedding To Improve Language Models}", | |
year = 2016 | |
} | |
@misc{hochreiter-2001-gradient-flow, | |
title = "Gradient flow in recurrent nets: the difficulty of learning | |
long-term dependencies", | |
author = "Hochreiter, Sepp and Bengio, Yoshua and Frasconi, Paolo and | |
Schmidhuber, J{\"u}rgen and others", | |
year = 2001, | |
publisher = "A field guide to dynamical recurrent neural networks. IEEE | |
Press" | |
} | |
@ARTICLE{szegedy-2016-incep-v4, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160207261S", | |
archivePrefix= "arXiv", | |
author = "{Szegedy}, C. and {Ioffe}, S. and {Vanhoucke}, V. and | |
{Alemi}, A.", | |
eprint = "1602.07261", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = feb, | |
primaryClass = "cs.CV", | |
title = "{Inception-V4, Inception-Resnet and the Impact of Residual | |
Connections on Learning}", | |
year = 2016 | |
} | |
@ARTICLE{lin-2017-struc-self, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170303130L", | |
archivePrefix= "arXiv", | |
author = "{Lin}, Z. and {Feng}, M. and {Nogueira dos Santos}, C. and | |
{Yu}, M. and {Xiang}, B. and {Zhou}, B. and {Bengio}, Y.", | |
eprint = "1703.03130", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{A Structured Self-Attentive Sentence Embedding}", | |
year = 2017 | |
} | |
@ARTICLE{memisevic-2011-learn-to-relat-images, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2011arXiv1110.0107M", | |
archivePrefix= "arXiv", | |
author = "{Memisevic}, R.", | |
eprint = "1110.0107", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Artificial Intelligence, Nonlinear | |
Sciences - Adaptation and Self-Organizing Systems, Statistics | |
- Machine Learning", | |
month = oct, | |
primaryClass = "cs.CV", | |
title = "{Learning To Relate Images: Mapping Units, Complex Cells and | |
Simultaneous eigenspaces}", | |
year = 2011 | |
} | |
@ARTICLE{cheng-2016-long-short, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160106733C", | |
archivePrefix= "arXiv", | |
author = "{Cheng}, J. and {Dong}, L. and {Lapata}, M.", | |
eprint = "1601.06733", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = jan, | |
primaryClass = "cs.CL", | |
title = "{Long Short-Term Memory-Networks for Machine Reading}", | |
year = 2016 | |
} | |
@ARTICLE{paulus-2017-deep-reinf, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170504304P", | |
archivePrefix= "arXiv", | |
author = "{Paulus}, R. and {Xiong}, C. and {Socher}, R.", | |
eprint = "1705.04304", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = may, | |
primaryClass = "cs.CL", | |
title = "{A Deep Reinforced Model for Abstractive Summarization}", | |
year = 2017 | |
} | |
@ARTICLE{shen-2016-reason, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160905284S", | |
archivePrefix= "arXiv", | |
author = "{Shen}, Y. and {Huang}, P.-S. and {Gao}, J. and {Chen}, W.", | |
eprint = "1609.05284", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = sep, | |
primaryClass = "cs.LG", | |
title = "{ReasoNet: Learning To Stop Reading in Machine | |
Comprehension}", | |
year = 2016 | |
} | |
@ARTICLE{golub-2017-two-stage, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170609789G", | |
archivePrefix= "arXiv", | |
author = "{Golub}, D. and {Huang}, P.-S. and {He}, X. and {Deng}, L.", | |
eprint = "1706.09789", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Two-Stage Synthesis Networks for Transfer Learning in | |
Machine Comprehension}", | |
year = 2017 | |
} | |
@ARTICLE{miller-2016-key-value, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160603126M", | |
archivePrefix= "arXiv", | |
author = "{Miller}, A. and {Fisch}, A. and {Dodge}, J. and {Karimi}, | |
A.-H. and {Bordes}, A. and {Weston}, J.", | |
eprint = "1606.03126", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{Key-Value Memory Networks for Directly Reading Documents}", | |
year = 2016 | |
} | |
@ARTICLE{zhang-2016-quest-answer, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160600979Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, Y. and {Liu}, K. and {He}, S. and {Ji}, G. and | |
{Liu}, Z. and {Wu}, H. and {Zhao}, J.", | |
eprint = "1606.00979", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Information Retrieval, Computer Science - | |
Artificial Intelligence, Computer Science - Computation and | |
Language, Computer Science - Neural and Evolutionary | |
Computing", | |
month = jun, | |
primaryClass = "cs.IR", | |
title = "{Question Answering Over Knowledge Base With Neural Attention | |
Combining Global Knowledge Information}", | |
year = 2016 | |
} | |
@ARTICLE{nguyen-2016-ms-marco, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161109268N", | |
archivePrefix= "arXiv", | |
author = "{Nguyen}, T. and {Rosenberg}, M. and {Song}, X. and {Gao}, | |
J. and {Tiwary}, S. and {Majumder}, R. and {Deng}, L.", | |
eprint = "1611.09268", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Information Retrieval", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{MS Marco: A Human Generated Machine Reading Comprehension | |
Dataset}", | |
year = 2016 | |
} | |
@ARTICLE{zhang-2017-inter-convol-neural-networ, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171000935Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, Q. and {Nian Wu}, Y. and {Zhu}, S.-C.", | |
eprint = "1710.00935", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = oct, | |
primaryClass = "cs.CV", | |
title = "{Interpretable Convolutional Neural Networks}", | |
year = 2017 | |
} | |
@ARTICLE{mnih-2012-fast-simpl, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1206.6426M", | |
archivePrefix= "arXiv", | |
author = "{Mnih}, A. and {Whye Teh}, Y.", | |
eprint = "1206.6426", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{A Fast and Simple Algorithm for Training Neural | |
Probabilistic Language Models}", | |
year = 2012 | |
} | |
@ARTICLE{pagliardini-2017-unsup-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170302507P", | |
archivePrefix= "arXiv", | |
author = "{Pagliardini}, M. and {Gupta}, P. and {Jaggi}, M.", | |
eprint = "1703.02507", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Information Retrieval, I.2.7", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Unsupervised Learning of Sentence Embeddings Using | |
Compositional N-Gram Features}", | |
year = 2017 | |
} | |
@ARTICLE{palangi-2015-deep-senten, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150206922P", | |
archivePrefix= "arXiv", | |
author = "{Palangi}, H. and {Deng}, L. and {Shen}, Y. and {Gao}, J. and | |
{He}, X. and {Chen}, J. and {Song}, X. and {Ward}, R.", | |
eprint = "1502.06922", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Information Retrieval, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing", | |
month = feb, | |
primaryClass = "cs.CL", | |
title = "{Deep Sentence Embedding Using Long Short-Term Memory | |
Networks: Analysis and Application To Information Retrieval}", | |
year = 2015 | |
} | |
@ARTICLE{maillard-2017-joint-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170509189M", | |
archivePrefix= "arXiv", | |
author = "{Maillard}, J. and {Clark}, S. and {Yogatama}, D.", | |
eprint = "1705.09189", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = may, | |
primaryClass = "cs.CL", | |
title = "{Jointly Learning Sentence Embeddings and Syntax With | |
Unsupervised Tree-LSTMs}", | |
year = 2017 | |
} | |
@ARTICLE{dai-2015-semi-super-sequen-learn, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151101432D", | |
archivePrefix= "arXiv", | |
author = "{Dai}, A.~M. and {Le}, Q.~V.", | |
eprint = "1511.01432", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Semi-Supervised Sequence Learning}", | |
year = 2015 | |
} | |
@ARTICLE{luong-2015-multi-task-seq2seq, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106114L", | |
archivePrefix= "arXiv", | |
author = "{Luong}, M.-T. and {Le}, Q.~V. and {Sutskever}, I. and | |
{Vinyals}, O. and {Kaiser}, L.", | |
eprint = "1511.06114", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Statistics - Machine Learning", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Multi-Task Sequence To Sequence Learning}", | |
year = 2015 | |
} | |
@ARTICLE{li-2015-hierar-neural, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150601057L", | |
archivePrefix= "arXiv", | |
author = "{Li}, J. and {Luong}, M.-T. and {Jurafsky}, D.", | |
eprint = "1506.01057", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{A Hierarchical Neural Autoencoder for Paragraphs and | |
Documents}", | |
year = 2015 | |
} | |
@ARTICLE{hill-2016-learn-distr, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160203483H", | |
archivePrefix= "arXiv", | |
author = "{Hill}, F. and {Cho}, K. and {Korhonen}, A.", | |
eprint = "1602.03483", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = feb, | |
primaryClass = "cs.CL", | |
title = "{Learning Distributed Representations of Sentences From | |
Unlabelled Data}", | |
year = 2016 | |
} | |
@ARTICLE{wieting-2015-towar-univer, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151108198W", | |
archivePrefix= "arXiv", | |
author = "{Wieting}, J. and {Bansal}, M. and {Gimpel}, K. and | |
{Livescu}, K.", | |
eprint = "1511.08198", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{Towards Universal Paraphrastic Sentence Embeddings}", | |
year = 2015 | |
} | |
@ARTICLE{agrawal-2015-vqa, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150500468A", | |
archivePrefix= "arXiv", | |
author = "{Agrawal}, A. and {Lu}, J. and {Antol}, S. and {Mitchell}, | |
M. and {Zitnick}, C.~L. and {Batra}, D. and {Parikh}, D.", | |
eprint = "1505.00468", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Computer Vision and Pattern Recognition", | |
month = may, | |
primaryClass = "cs.CL", | |
title = "{VQA: Visual Question Answering}", | |
year = 2015 | |
} | |
@ARTICLE{zhang-2015-yin-yang, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151105099Z", | |
archivePrefix= "arXiv", | |
author = "{Zhang}, P. and {Goyal}, Y. and {Summers-Stay}, D. and | |
{Batra}, D. and {Parikh}, D.", | |
eprint = "1511.05099", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Computer Vision and Pattern Recognition, Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.CL", | |
title = "{Yin and Yang: Balancing and Answering Binary Visual | |
Questions}", | |
year = 2015 | |
} | |
@ARTICLE{goyal-2016-makin-v-vqa-matter, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161200837G", | |
archivePrefix= "arXiv", | |
author = "{Goyal}, Y. and {Khot}, T. and {Summers-Stay}, D. and | |
{Batra}, D. and {Parikh}, D.", | |
eprint = "1612.00837", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Computation and Language, Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.CV", | |
title = "{Making the V in Vqa Matter: Elevating the Role of Image | |
Understanding in Visual Question Answering}", | |
year = 2016 | |
} | |
@ARTICLE{bowman-2015-gener-senten, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106349B", | |
archivePrefix= "arXiv", | |
author = "{Bowman}, S.~R. and {Vilnis}, L. and {Vinyals}, O. and {Dai}, | |
A.~M. and {Jozefowicz}, R. and {Bengio}, S.", | |
eprint = "1511.06349", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = nov, | |
primaryClass = "cs.LG", | |
title = "{Generating Sentences From a Continuous Space}", | |
year = 2015 | |
} | |
@inproceedings{maas-2011-learning-word, | |
title = "Learning word vectors for sentiment analysis", | |
author = "Maas, Andrew L and Daly, Raymond E and Pham, Peter T and | |
Huang, Dan and Ng, Andrew Y and Potts, Christopher", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies-Volume | |
1", | |
pages = "142-150", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{ganitkevitch-2013-ppdb, | |
title = "{PPDB}: The Paraphrase Database", | |
author = "Ganitkevitch, Juri and {Van Durme}, Benjamin and | |
Callison-Burch, Chris", | |
booktitle = "Proceedings of NAACL-HLT", | |
pages = "758-764", | |
month = "June", | |
year = 2013, | |
address = "Atlanta, Georgia", | |
publisher = "Association for Computational Linguistics", | |
url = "http://cs.jhu.edu/~ccb/publications/ppdb.pdf" | |
} | |
@ARTICLE{mrk-2016-count-fittin, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160300892M", | |
archivePrefix= "arXiv", | |
author = "{Mrk{\v s}i{\'c}}, N. and {S{\'e}aghdha}, D.~{\'O} and | |
{Thomson}, B. and {Ga{\v s}i{\'c}}, M. and {Rojas-Barahona}, | |
L. and {Su}, P.-H. and {Vandyke}, D. and {Wen}, T.-H. and | |
{Young}, S.", | |
eprint = "1603.00892", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = mar, | |
primaryClass = "cs.CL", | |
title = "{Counter-Fitting Word Vectors To Linguistic Constraints}", | |
year = 2016 | |
} | |
@ARTICLE{hill-2014-simlex, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1408.3456H", | |
archivePrefix= "arXiv", | |
author = "{Hill}, F. and {Reichart}, R. and {Korhonen}, A.", | |
eprint = "1408.3456", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL", | |
title = "{SimLex-999: Evaluating Semantic Models With (Genuine) | |
Similarity Estimation}", | |
year = 2014 | |
} | |
@inproceedings{agirre-2009-study-similarity, | |
title = "A study on similarity and relatedness using distributional | |
and wordnet-based approaches", | |
author = "Agirre, Eneko and Alfonseca, Enrique and Hall, Keith and | |
Kravalova, Jana and Pa{\c{s}}ca, Marius and Soroa, Aitor", | |
booktitle = "Proceedings of Human Language Technologies: The 2009 Annual | |
Conference of the North American Chapter of the Association | |
for Computational Linguistics", | |
pages = "19-27", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{marelli-2014-sick-cure, | |
title = {A SICK cure for the evaluation of compositional distributional | |
semantic models}, | |
author = {Marelli, M and Menini, S and Baroni, M and Bentivogli, L and | |
Bernardi, R and Zamparelli, R}, | |
year = 2014, | |
publisher = {Citeseer}, | |
journal = "" | |
} | |
@inproceedings{severyn-2015-learning-rank, | |
title = "Learning to rank short text pairs with convolutional deep | |
neural networks", | |
author = "Severyn, Aliaksei and Moschitti, Alessandro", | |
booktitle = "Proceedings of the 38th International ACM SIGIR Conference on | |
Research and Development in Information Retrieval", | |
pages = "373-382", | |
year = 2015, | |
organization = "ACM" | |
} | |
@inproceedings{huang-2016-supervised-word, | |
Author = "Huang, Gao and Guo, Chuan and Kusner, Matt J and Sun, Yu and | |
Sha, Fei and Weinberger, Kilian Q", | |
Booktitle = "Advances in Neural Information Processing Systems 29", | |
Editor = "D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and | |
R. Garnett", | |
Pages = "4862-4870", | |
Publisher = "Curran Associates, Inc.", | |
Title = "Supervised Word Mover\textquotesingle s Distance", | |
Url = | |
"http://papers.nips.cc/paper/6139-supervised-word-movers-distance.pdf", | |
Year = 2016, | |
Bdsk-Url-1 = | |
"http://papers.nips.cc/paper/6139-supervised-word-movers-distance.pdf" | |
} | |
@ARTICLE{sennrich-2015-neural-machin, | |
author = "{Sennrich}, R. and {Haddow}, B. and {Birch}, A.", | |
title = "{Neural Machine Translation of Rare Words With Subword | |
Units}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150807909S", | |
archivePrefix= "arXiv", | |
eprint = "1508.07909", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{ling-2015-findin-funct-form, | |
author = "{Ling}, W. and {Lu{\'{\i}}s}, T. and {Marujo}, L. and | |
{Fernandez Astudillo}, R. and {Amir}, S. and {Dyer}, C. and | |
{Black}, A.~W. and {Trancoso}, I.", | |
title = "{Finding Function in Form: Compositional Character Models for | |
Open Vocabulary Word Representation}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150802096L", | |
archivePrefix= "arXiv", | |
eprint = "1508.02096", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{kim-2016-char-aware, | |
title = "Character-Aware Neural Language Models.", | |
author = "Kim, Yoon and Jernite, Yacine and Sontag, David and Rush, | |
Alexander M", | |
booktitle = "AAAI", | |
pages = "2741-2749", | |
year = 2016 | |
} | |
@article{achananuparp-2008-evaluation-sentence, | |
title = "The evaluation of sentence similarity measures", | |
author = "Achananuparp, Palakorn and Hu, Xiaohua and Shen, Xiajiong", | |
journal = "Data warehousing and knowledge discovery", | |
pages = "305-316", | |
year = 2008, | |
publisher = "Springer" | |
} | |
@ARTICLE{bradbury-2016-quasi-recur-neural-networ, | |
author = "{Bradbury}, J. and {Merity}, S. and {Xiong}, C. and {Socher}, | |
R.", | |
title = "{Quasi-Recurrent Neural Networks}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161101576B", | |
archivePrefix= "arXiv", | |
eprint = "1611.01576", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Artificial Intelligence, Computer Science | |
- Computation and Language, Computer Science - Learning", | |
month = nov | |
} | |
@ARTICLE{ballesteros-2015-improv-trans, | |
author = "{Ballesteros}, M. and {Dyer}, C. and {Smith}, N.~A.", | |
title = "{Improved Transition-Based Parsing By Modeling Characters | |
Instead of Words With LSTMs}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150800657B", | |
archivePrefix= "arXiv", | |
eprint = "1508.00657", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{wiseman-2016-sequen-to, | |
author = "{Wiseman}, S. and {Rush}, A.~M.", | |
title = "{Sequence-To-Sequence Learning As Beam-Search Optimization}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160602960W", | |
archivePrefix= "arXiv", | |
eprint = "1606.02960", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing, Statistics | |
- Machine Learning", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{snoek-2012-pract-bayes, | |
author = "{Snoek}, J. and {Larochelle}, H. and {Adams}, R.~P.", | |
title = "{Practical Bayesian Optimization of Machine Learning | |
Algorithms}", | |
journal = "ArXiv e-prints", | |
year = 2012, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2012arXiv1206.2944S", | |
archivePrefix= "arXiv", | |
eprint = "1206.2944", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = jun, | |
primaryClass = "stat.ML" | |
} | |
@article{hashimoto-2016-word-embed, | |
title = "Word embeddings as metric recovery in semantic spaces", | |
author = "Hashimoto, Tatsunori B and Alvarez-Melis, David and Jaakkola, | |
Tommi S", | |
journal = "Transactions of the Association for Computational | |
Linguistics", | |
volume = 4, | |
pages = "273-286", | |
year = 2016 | |
} | |
@inproceedings{mnih-2007-three-graph, | |
title = "Three new graphical models for statistical language | |
modelling", | |
author = "Mnih, Andriy and Hinton, Geoffrey", | |
booktitle = "Proceedings of the 24th international conference on Machine | |
learning", | |
pages = "641-648", | |
year = 2007, | |
organization = "ACM" | |
} | |
@ARTICLE{chawla-2011-smote, | |
author = "{Chawla}, N.~V. and {Bowyer}, K.~W. and {Hall}, L.~O. and | |
{Kegelmeyer}, W.~P.", | |
title = "{SMOTE: Synthetic Minority Over-Sampling Technique}", | |
journal = "ArXiv e-prints", | |
year = 2011, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2011arXiv1106.1813C", | |
archivePrefix= "arXiv", | |
eprint = "1106.1813", | |
keywords = "Computer Science - Artificial Intelligence", | |
month = jun, | |
primaryClass = "cs.AI" | |
} | |
@inproceedings{klein-2001-parsing-treebank, | |
title = "Parsing with treebank grammars: Empirical bounds, theoretical | |
models, and the structure of the Penn treebank", | |
author = "Klein, Dan and Manning, Christopher D", | |
booktitle = "Proceedings of the 39th Annual Meeting on Association for | |
Computational Linguistics", | |
pages = "338-345", | |
year = 2001, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{collins-2003-head-driven, | |
title = "Head-driven statistical models for natural language parsing", | |
author = "Collins, Michael", | |
journal = "Computational linguistics", | |
volume = 29, | |
number = 4, | |
pages = "589-637", | |
year = 2003, | |
publisher = "MIT Press" | |
} | |
@inproceedings{collins-1997-three-generative, | |
title = "Three generative, lexicalised models for statistical parsing", | |
author = "Collins, Michael", | |
booktitle = "Proceedings of the eighth conference on European chapter of | |
the Association for Computational Linguistics", | |
pages = "16-23", | |
year = 1997, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{bikel-2004-distributional-analysis, | |
title = "A Distributional Analysis of a Lexicalized Statistical | |
Parsing Mode.", | |
author = "Bikel, Daniel M", | |
booktitle = "EMNLP", | |
pages = "182-189", | |
year = 2004 | |
} | |
@inproceedings{chen-2014-fast-acc, | |
title = "A fast and accurate dependency parser using neural networks", | |
author = "Chen, Danqi and Manning, Christopher", | |
booktitle = "Proceedings of the 2014 conference on empirical methods in | |
natural language processing (EMNLP)", | |
pages = "740-750", | |
year = 2014 | |
} | |
@inproceedings{socher-2013-su-rnn, | |
title = "Parsing with compositional vector grammars", | |
author = "Socher, Richard and Bauer, John and Manning, Christopher D | |
and others", | |
booktitle = "Proceedings of the 51st Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "455-465", | |
year = 2013 | |
} | |
@inproceedings{levy-2003-harder-parse, | |
title = "Is it harder to parse Chinese, or the Chinese Treebank?", | |
author = "Levy, Roger and Manning, Christopher", | |
booktitle = "Proceedings of the 41st Annual Meeting on Association for | |
Computational Linguistics-Volume 1", | |
pages = "439-446", | |
year = 2003, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{chang-2009-discriminative-reorder, | |
title = "Discriminative reordering with Chinese grammatical relations | |
features", | |
author = "Chang, Pi-Chuan and Tseng, Huihsin and Jurafsky, Dan and | |
Manning, Christopher D", | |
booktitle = "Proceedings of the Third Workshop on Syntax and Structure in | |
Statistical Translation", | |
pages = "51-59", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{zhu-2013-fast-acc, | |
title = "Fast and Accurate Shift-Reduce Constituent Parsing.", | |
author = "Zhu, Muhua and Zhang, Yue and Chen, Wenliang and Zhang, Min | |
and Zhu, Jingbo", | |
booktitle = "ACL (1)", | |
pages = "434-443", | |
year = 2013 | |
} | |
@inproceedings{klein-2003-accurate-unlex, | |
title = "Accurate unlexicalized parsing", | |
author = "Klein, Dan and Manning, Christopher D", | |
booktitle = "Proceedings of the 41st Annual Meeting on Association for | |
Computational Linguistics-Volume 1", | |
pages = "423-430", | |
year = 2003, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{klein-2003-fast-exact, | |
title = "Fast exact inference with a factored model for natural | |
language parsing", | |
author = "Klein, Dan and Manning, Christopher D", | |
booktitle = "Advances in neural information processing systems", | |
pages = "3-10", | |
year = 2003 | |
} | |
@inproceedings{nivre-2016-universal-depend, | |
title = "Universal Dependencies v1: A Multilingual Treebank | |
Collection.", | |
author = "Nivre, Joakim and de Marneffe, Marie-Catherine and Ginter, | |
Filip and Goldberg, Yoav and Hajic, Jan and Manning, | |
Christopher D and McDonald, Ryan T and Petrov, Slav and | |
Pyysalo, Sampo and Silveira, Natalia and others", | |
booktitle = "LREC", | |
year = 2016 | |
} | |
@inproceedings{de-2006-generating-typed, | |
title = "Generating typed dependency parses from phrase structure | |
parses", | |
author = "De Marneffe, Marie-Catherine and MacCartney, Bill and | |
Manning, Christopher D and others", | |
booktitle = "Proceedings of LREC", | |
volume = 6, | |
number = 2006, | |
pages = "449-454", | |
year = 2006, | |
organization = "Genoa Italy" | |
} | |
@ARTICLE{Krotov-1999-compact-penn, | |
author = "{Krotov}, A. and {Hepple}, M. and {Gaizauskas}, R. and | |
{Wilks}, Y.", | |
title = "{Compacting the Penn Treebank Grammar}", | |
journal = "eprint arXiv:cs/9902001", | |
eprint = "cs/9902001", | |
keywords = "Computer Science - Computation and Language, I.2.7", | |
year = 1999, | |
month = jan, | |
adsurl = "http://adsabs.harvard.edu/abs/1999cs........2001K", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{toutanova-2000-enriching-knowledge, | |
title = "Enriching the knowledge sources used in a maximum entropy | |
part-of-speech tagger", | |
author = "Toutanova, Kristina and Manning, Christopher D", | |
booktitle = "Proceedings of the 2000 Joint SIGDAT conference on Empirical | |
methods in natural language processing and very large | |
corpora: held in conjunction with the 38th Annual Meeting of | |
the Association for Computational Linguistics-Volume 13", | |
pages = "63-70", | |
year = 2000, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{toutanova-2003-feature-rich, | |
title = "Feature-rich part-of-speech tagging with a cyclic dependency | |
network", | |
author = "Toutanova, Kristina and Klein, Dan and Manning, Christopher D | |
and Singer, Yoram", | |
booktitle = "Proceedings of the 2003 Conference of the North American | |
Chapter of the Association for Computational Linguistics on | |
Human Language Technology-Volume 1", | |
pages = "173-180", | |
year = 2003, | |
organization = "Association for Computational Linguistics" | |
} | |
@ARTICLE{chen-2016-thoroug-examin, | |
author = "{Chen}, D. and {Bolton}, J. and {Manning}, C.~D.", | |
title = "{A Thorough Examination of the Cnn/daily Mail Reading | |
Comprehension Task}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160602858C", | |
archivePrefix= "arXiv", | |
eprint = "1606.02858", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{dhingra-2016-gated-atten, | |
author = "{Dhingra}, B. and {Liu}, H. and {Yang}, Z. and {Cohen}, | |
W.~W. and {Salakhutdinov}, R.", | |
title = "{Gated-Attention Readers for Text Comprehension}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160601549D", | |
archivePrefix= "arXiv", | |
eprint = "1606.01549", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{kadlec-2016-text-under, | |
author = "{Kadlec}, R. and {Schmid}, M. and {Bajgar}, O. and | |
{Kleindienst}, J.", | |
title = "{Text Understanding With the Attention Sum Reader Network}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160301547K", | |
archivePrefix= "arXiv", | |
eprint = "1603.01547", | |
keywords = "Computer Science - Computation and Language", | |
month = mar, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{tseng-2016-towar-machin, | |
author = "{Tseng}, B.-H. and {Shen}, S.-S. and {Lee}, H.-Y. and {Lee}, | |
L.-S.", | |
title = "{Towards Machine Comprehension of Spoken Content: Initial | |
Toefl Listening Comprehension Test By Machine}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160806378T", | |
archivePrefix= "arXiv", | |
eprint = "1608.06378", | |
keywords = "Computer Science - Computation and Language", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{cui-2016-consen-atten, | |
author = "{Cui}, Y. and {Liu}, T. and {Chen}, Z. and {Wang}, S. and | |
{Hu}, G.", | |
title = "{Consensus Attention-Based Neural Networks for Chinese | |
Reading Comprehension}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160702250C", | |
archivePrefix= "arXiv", | |
eprint = "1607.02250", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = jul, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{cui-2016-atten-over, | |
author = "{Cui}, Y. and {Chen}, Z. and {Wei}, S. and {Wang}, S. and | |
{Liu}, T. and {Hu}, G.", | |
title = "{Attention-Over-Attention Neural Networks for Reading | |
Comprehension}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160704423C", | |
archivePrefix= "arXiv", | |
eprint = "1607.04423", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = jul, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{wang-2016-machin-compr, | |
author = "{Wang}, S. and {Jiang}, J.", | |
title = "{Machine Comprehension Using Match-Lstm and Answer Pointer}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160807905W", | |
archivePrefix= "arXiv", | |
eprint = "1608.07905", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{sordoni-2016-iterat-alter, | |
author = "{Sordoni}, A. and {Bachman}, P. and {Trischler}, A. and | |
{Bengio}, Y.", | |
title = "{Iterative Alternating Neural Attention for Machine Reading}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160602245S", | |
archivePrefix= "arXiv", | |
eprint = "1606.02245", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{kobayashi-2016-dynamic-entity, | |
title = "Dynamic Entity Representation with Max-pooling Improves | |
Machine Reading", | |
author = "Kobayashi, Sosuke and Tian, Ran and Okazaki, Naoaki and Inui, | |
Kentaro", | |
booktitle = "Proceedings of NAACL-HLT", | |
pages = "850-855", | |
year = 2016 | |
} | |
@ARTICLE{trischler-2016-natur-languag, | |
author = "{Trischler}, A. and {Ye}, Z. and {Yuan}, X. and {Suleman}, | |
K.", | |
title = "{Natural Language Comprehension With the EpiReader}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160602270T", | |
archivePrefix= "arXiv", | |
eprint = "1606.02270", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{creswell-2017-gener-adver-networ, | |
author = "{Creswell}, A. and {White}, T. and {Dumoulin}, V. and | |
{Arulkumaran}, K. and {Sengupta}, B. and {Bharath}, A.~A", | |
title = "{Generative Adversarial Networks: An Overview}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171007035C", | |
archivePrefix= "arXiv", | |
eprint = "1710.07035", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = oct, | |
primaryClass = "cs.CV" | |
} | |
@ARTICLE{weston-2014-memor-networ, | |
author = "{Weston}, J. and {Chopra}, S. and {Bordes}, A.", | |
title = "{Memory Networks}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1410.3916W", | |
archivePrefix= "arXiv", | |
eprint = "1410.3916", | |
keywords = "Computer Science - Artificial Intelligence, Computer Science | |
- Computation and Language, Statistics - Machine Learning", | |
month = oct, | |
primaryClass = "cs.AI" | |
} | |
@ARTICLE{munkhdalai-2016-neural-seman-encod, | |
author = "{Munkhdalai}, T. and {Yu}, H.", | |
title = "{Neural Semantic Encoders}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160704315M", | |
archivePrefix= "arXiv", | |
eprint = "1607.04315", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Statistics - Machine Learning", | |
month = jul, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{nickel-2017-poinc-embed, | |
author = "{Nickel}, M. and {Kiela}, D.", | |
title = "{Poincar$\backslash$'e Embeddings for Learning Hierarchical | |
Representations}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170508039N", | |
archivePrefix= "arXiv", | |
eprint = "1705.08039", | |
keywords = "Computer Science - Artificial Intelligence, Computer Science | |
- Learning, Statistics - Machine Learning", | |
month = may, | |
primaryClass = "cs.AI" | |
} | |
@ARTICLE{weston-2015-towar-ai, | |
author = "{Weston}, J. and {Bordes}, A. and {Chopra}, S. and {Rush}, | |
A.~M. and {van Merri{\"e}nboer}, B. and {Joulin}, A. and | |
{Mikolov}, T.", | |
title = "{Towards Ai-Complete Question Answering: A Set of | |
Prerequisite Toy Tasks}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150205698W", | |
archivePrefix= "arXiv", | |
eprint = "1502.05698", | |
keywords = "Computer Science - Artificial Intelligence, Computer Science | |
- Computation and Language, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.AI" | |
} | |
@ARTICLE{sabour-2017-dynam-routin-between-capsul, | |
author = "{Sabour}, S. and {Frosst}, N. and {E Hinton}, G.", | |
title = "{Dynamic Routing Between Capsules}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171009829S", | |
archivePrefix= "arXiv", | |
eprint = "1710.09829", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
month = oct, | |
primaryClass = "cs.CV" | |
} | |
@ARTICLE{lu-2017-depth-creat, | |
author = "{Lu}, H. and {Kawaguchi}, K.", | |
title = "{Depth Creates No Bad Local Minima}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170208580L", | |
archivePrefix= "arXiv", | |
eprint = "1702.08580", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing, Mathematics - Optimization and | |
Control, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{kawaguchi-2017-gener-deep-learn, | |
author = "{Kawaguchi}, K. and {Pack Kaelbling}, L. and {Bengio}, Y.", | |
title = "{Generalization in Deep Learning}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171005468K", | |
archivePrefix= "arXiv", | |
eprint = "1710.05468", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing", | |
month = oct, | |
primaryClass = "stat.ML" | |
} | |
@article{wolpert-1997-no-free-lunch, | |
title = "No free lunch theorems for optimization", | |
author = "Wolpert, David H and Macready, William G", | |
journal = "IEEE transactions on evolutionary computation", | |
volume = 1, | |
number = 1, | |
pages = "67-82", | |
year = 1997, | |
publisher = "IEEE" | |
} | |
@inproceedings{recasens-2013-life-death, | |
title = "The Life and Death of Discourse Entities: Identifying | |
Singleton Mentions.", | |
author = "Recasens, Marta and de Marneffe, Marie-Catherine and Potts, | |
Christopher", | |
year = 2013, | |
booktitle = "''" | |
} | |
@inproceedings{lee-2011-stanford-multi-pass, | |
title = "Stanford's multi-pass sieve coreference resolution system at | |
the CoNLL-2011 shared task", | |
author = "Lee, Heeyoung and Peirsman, Yves and Chang, Angel and | |
Chambers, Nathanael and Surdeanu, Mihai and Jurafsky, Dan", | |
booktitle = "Proceedings of the fifteenth conference on computational | |
natural language learning: Shared task", | |
pages = "28-34", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{raghunathan-2010-multi-pass-sieve, | |
title = "A multi-pass sieve for coreference resolution", | |
author = "Raghunathan, Karthik and Lee, Heeyoung and Rangarajan, | |
Sudarshan and Chambers, Nathanael and Surdeanu, Mihai and | |
Jurafsky, Dan and Manning, Christopher", | |
booktitle = "Proceedings of the 2010 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "492-501", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{lee-2013-deterministic-coreference, | |
title = "Deterministic coreference resolution based on entity-centric, | |
precision-ranked rules", | |
author = "Lee, Heeyoung and Chang, Angel and Peirsman, Yves and | |
Chambers, Nathanael and Surdeanu, Mihai and Jurafsky, Dan", | |
journal = "Computational Linguistics", | |
volume = 39, | |
number = 4, | |
pages = "885-916", | |
year = 2013, | |
publisher = "MIT Press" | |
} | |
@inproceedings{clark-2015-entity-centric, | |
title = "Entity-Centric Coreference Resolution with Model Stacking.", | |
author = "Clark, Kevin and Manning, Christopher D", | |
booktitle = "ACL (1)", | |
pages = "1405-1415", | |
year = 2015 | |
} | |
@ARTICLE{clark-2016-rl-for-cr, | |
author = "{Clark}, K. and {Manning}, C.~D.", | |
title = "{Deep Reinforcement Learning for Mention-Ranking Coreference | |
Models}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160908667C", | |
archivePrefix= "arXiv", | |
eprint = "1609.08667", | |
keywords = "Computer Science - Computation and Language", | |
month = sep, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{clark-2016-improv-coref, | |
author = "{Clark}, K. and {Manning}, C.~D.", | |
title = "{Improving Coreference Resolution By Learning Entity-Level | |
Distributed Representations}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160601323C", | |
archivePrefix= "arXiv", | |
eprint = "1606.01323", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@InProceedings{recasens-2013-same-referent, | |
author = "Recasens, Marta and Can, Matthew and Jurafsky, Daniel", | |
title = "Same Referent, Different Words: Unsupervised Mining of Opaque | |
Coreferent Mentions", | |
booktitle = "Proceedings of the 2013 Conference of the North American | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies", | |
year = 2013, | |
publisher = "Association for Computational Linguistics", | |
pages = "897-906", | |
location = "Atlanta, Georgia", | |
url = | |
"http://aclanthology.coli.uni-saarland.de/pdf/N/N13/N13-1110.pdf" | |
} | |
@inproceedings{lee-2012-joint-entity, | |
title = "Joint entity and event coreference resolution across | |
documents", | |
author = "Lee, Heeyoung and Recasens, Marta and Chang, Angel and | |
Surdeanu, Mihai and Jurafsky, Dan", | |
booktitle = "Proceedings of the 2012 Joint Conference on Empirical Methods | |
in Natural Language Processing and Computational Natural | |
Language Learning", | |
pages = "489-500", | |
year = 2012, | |
organization = "Association for Computational Linguistics" | |
} | |
@ARTICLE{lee-2017-end-to, | |
author = "{Lee}, K. and {He}, L. and {Lewis}, M. and {Zettlemoyer}, L.", | |
title = "{End-To-End Neural Coreference Resolution}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170707045L", | |
archivePrefix= "arXiv", | |
eprint = "1707.07045", | |
keywords = "Computer Science - Computation and Language", | |
month = jul, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{radford-2017-learn-to, | |
author = "{Radford}, A. and {Jozefowicz}, R. and {Sutskever}, I.", | |
title = "{Learning To Generate Reviews and Discovering Sentiment}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170401444R", | |
archivePrefix= "arXiv", | |
eprint = "1704.01444", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Computer Science - Neural and Evolutionary | |
Computing", | |
month = apr, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{felbo-2017-using-million, | |
author = "{Felbo}, B. and {Mislove}, A. and {S{\o}gaard}, A. and | |
{Rahwan}, I. and {Lehmann}, S.", | |
title = "{Using Millions of Emoji Occurrences To Learn Any-Domain | |
Representations for Detecting Sentiment, Emotion and | |
sarcasm}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170800524F", | |
archivePrefix= "arXiv", | |
eprint = "1708.00524", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = aug, | |
primaryClass = "stat.ML" | |
} | |
@PHDTHESIS{hamdan-2016-under-coupl, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016PhDT........44H", | |
author = "{Hamdan}, L.", | |
school = "West Virginia University", | |
title = "{Understanding Coupling of Global and Diffuse Solar Radiation | |
with Climatic Variability}", | |
year = 2016 | |
} | |
@ARTICLE{zhang-2017-which-encod, | |
author = "{Zhang}, X. and {LeCun}, Y.", | |
title = "{Which Encoding Is the Best for Text Classification in | |
Chinese, English, Japanese and Korean?}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170802657Z", | |
archivePrefix= "arXiv", | |
eprint = "1708.02657", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@article{liu-2012-sentiment-analysis, | |
title = "Sentiment analysis and opinion mining", | |
author = "Liu, Bing", | |
journal = "Synthesis lectures on human language technologies", | |
volume = 5, | |
number = 1, | |
pages = "1-167", | |
year = 2012, | |
publisher = "Morgan \& Claypool Publishers" | |
} | |
@article{pang-2008-opinion-mining, | |
title = "Opinion mining and sentiment analysis", | |
author = "Pang, Bo and Lee, Lillian and others", | |
journal = "Foundations and Trends{\textregistered} in Information | |
Retrieval", | |
volume = 2, | |
number = "1--2", | |
pages = "1-135", | |
year = 2008, | |
publisher = "Now Publishers, Inc." | |
} | |
@ARTICLE{rajpurkar-2016-squad, | |
author = "{Rajpurkar}, P. and {Zhang}, J. and {Lopyrev}, K. and | |
{Liang}, P.", | |
title = "{SQuAD: 100,000+ Questions for Machine Comprehension of | |
Text}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160605250R", | |
archivePrefix= "arXiv", | |
eprint = "1606.05250", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{miwa-2016-end-to, | |
author = "{Miwa}, M. and {Bansal}, M.", | |
title = "{End-To-End Relation Extraction Using Lstms on Sequences and | |
Tree Structures}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160100770M", | |
archivePrefix= "arXiv", | |
eprint = "1601.00770", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = jan, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{kumar-2017-survey-deep, | |
author = "{Kumar}, S.", | |
title = "{A Survey of Deep Learning Methods for Relation Extraction}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170503645K", | |
archivePrefix= "arXiv", | |
eprint = "1705.03645", | |
keywords = "Computer Science - Computation and Language", | |
month = may, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{lin-2016-neural-relation, | |
title = "Neural Relation Extraction with Selective Attention over | |
Instances.", | |
author = "Lin, Yankai and Shen, Shiqi and Liu, Zhiyuan and Luan, Huanbo | |
and Sun, Maosong", | |
booktitle = "ACL (1)", | |
year = 2016 | |
} | |
@inproceedings{wu-2017-adversarial-train, | |
title = "Adversarial Training for Relation Extraction", | |
author = "Wu, Yi and Bamman, David and Russell, Stuart", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1779-1784", | |
year = 2017 | |
} | |
@ARTICLE{lei-2017-train-rnns, | |
author = "{Lei}, T. and {Zhang}, Y.", | |
title = "{Training Rnns As Fast As CNNs}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170902755L", | |
archivePrefix= "arXiv", | |
eprint = "1709.02755", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing", | |
month = sep, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{rocktaeschel-2015-reason-about, | |
author = "{Rockt{\"a}schel}, T. and {Grefenstette}, E. and {Hermann}, | |
K.~M. and {Ko{\v c}isk{\'y}}, T. and {Blunsom}, P.", | |
title = "{Reasoning About Entailment With Neural Attention}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150906664R", | |
archivePrefix= "arXiv", | |
eprint = "1509.06664", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing, 68T50, I.2.6, I.2.7", | |
month = sep, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{bowman-2015-large-annotated, | |
Author = "Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher, | |
and Manning, Christopher D.", | |
Booktitle = "Proceedings of the 2015 Conference on Empirical Methods in | |
Natural Language Processing (EMNLP)", | |
Publisher = "Association for Computational Linguistics", | |
Title = "A large annotated corpus for learning natural language | |
inference", | |
Year = 2015 | |
} | |
@ARTICLE{zolna-2017-fraternal-dropout, | |
author = "{Zolna}, K. and {Arpit}, D. and {Suhubdy}, D. and {Bengio}, | |
Y.", | |
title = "{Fraternal Dropout}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171100066Z", | |
archivePrefix= "arXiv", | |
eprint = "1711.00066", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Learning", | |
month = oct, | |
primaryClass = "stat.ML" | |
} | |
@ARTICLE{vinyals-2015-order-matter, | |
author = "{Vinyals}, O. and {Bengio}, S. and {Kudlur}, M.", | |
title = "{Order Matters: Sequence To Sequence for sets}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151106391V", | |
archivePrefix= "arXiv", | |
eprint = "1511.06391", | |
keywords = "Statistics - Machine Learning, Computer Science - Computation | |
and Language, Computer Science - Learning", | |
month = nov, | |
primaryClass = "stat.ML" | |
} | |
@ARTICLE{clauset-2007-power-law, | |
author = "{Clauset}, A. and {Rohilla Shalizi}, C. and {Newman}, | |
M.~E.~J.", | |
title = "{Power-Law Distributions in Empirical data}", | |
journal = "ArXiv e-prints", | |
year = 2007, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2007arXiv0706.1062C", | |
archivePrefix= "arXiv", | |
eprint = "0706.1062", | |
keywords = "Physics - Data Analysis, Statistics and Probability, | |
Condensed Matter - Disordered Systems and Neural Networks, | |
Statistics - Applications, Statistics - Methodology", | |
month = jun, | |
primaryClass = "physics.data-an" | |
} | |
@ARTICLE{gu-2016-incor-copyin, | |
author = "{Gu}, J. and {Lu}, Z. and {Li}, H. and {Li}, V.~O.~K.", | |
title = "{Incorporating Copying Mechanism in Sequence-To-Sequence | |
Learning}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160306393G", | |
archivePrefix= "arXiv", | |
eprint = "1603.06393", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing", | |
month = mar, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{see-2017-get-to-point, | |
author = "{See}, A. and {Liu}, P.~J. and {Manning}, C.~D.", | |
title = "{Get To The Point: Summarization With Pointer-Generator | |
Networks}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170404368S", | |
archivePrefix= "arXiv", | |
eprint = "1704.04368", | |
keywords = "Computer Science - Computation and Language", | |
month = apr, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{he-2017-generating-natural, | |
title = "Generating natural answers by incorporating copying and | |
retrieving mechanisms in sequence-to-sequence learning", | |
author = "He, Shizhu and Liu, Cao and Liu, Kang and Zhao, Jun", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "199-208", | |
year = 2017 | |
} | |
@ARTICLE{wang-2015-learn-natur, | |
author = "{Wang}, S. and {Jiang}, J.", | |
title = "{Learning Natural Language Inference With LSTM}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151208849W", | |
archivePrefix= "arXiv", | |
eprint = "1512.08849", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Neural and Evolutionary | |
Computing", | |
month = dec, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{yu-2016-seqgan, | |
author = "{Yu}, L. and {Zhang}, W. and {Wang}, J. and {Yu}, Y.", | |
title = "{SeqGAN: Sequence Generative Adversarial Nets With Policy | |
Gradient}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160905473Y", | |
archivePrefix= "arXiv", | |
eprint = "1609.05473", | |
keywords = "Computer Science - Learning, Computer Science - Artificial | |
Intelligence", | |
month = sep, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{gulrajani-2017-improv-train-wasser-gans, | |
author = "{Gulrajani}, I. and {Ahmed}, F. and {Arjovsky}, M. and | |
{Dumoulin}, V. and {Courville}, A.", | |
title = "{Improved Training of Wasserstein GANs}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170400028G", | |
archivePrefix= "arXiv", | |
eprint = "1704.00028", | |
keywords = "Computer Science - Learning, Statistics - Machine Learning", | |
month = mar, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{dauphin-2016-languag-model, | |
author = "{Dauphin}, Y.~N. and {Fan}, A. and {Auli}, M. and {Grangier}, | |
D.", | |
title = "{Language Modeling With Gated Convolutional Networks}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161208083D", | |
archivePrefix= "arXiv", | |
eprint = "1612.08083", | |
keywords = "Computer Science - Computation and Language", | |
month = dec, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{kuchaiev-2017-factor-trick-lstm, | |
author = "{Kuchaiev}, O. and {Ginsburg}, B.", | |
title = "{Factorization Tricks for Lstm networks}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170310722K", | |
archivePrefix= "arXiv", | |
eprint = "1703.10722", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Neural and Evolutionary Computing, Statistics - Machine Learning", | |
month = mar, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{artetxe-2017-unsup-neural-machin-trans, | |
author = "{Artetxe}, M. and {Labaka}, G. and {Agirre}, E. and {Cho}, | |
K.", | |
title = "{Unsupervised Neural Machine Translation}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171011041A", | |
archivePrefix= "arXiv", | |
eprint = "1710.11041", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Learning", | |
month = oct, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{artetxe-2016-learning-principled, | |
author = "Artetxe, Mikel and Labaka, Gorka and Agirre, Eneko", | |
title = "Learning principled bilingual mappings of word embeddings | |
while preserving monolingual invariance", | |
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in | |
Natural Language Processing", | |
year = 2016, | |
pages = "2289-2294" | |
} | |
@inproceedings{artetxe-2017-learning-bilingual, | |
author = "Artetxe, Mikel and Labaka, Gorka and Agirre, Eneko", | |
title = "Learning bilingual word embeddings with (almost) no bilingual | |
data", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
year = 2017, | |
pages = "451-462" | |
} | |
@ARTICLE{sagun-2016-eigen-hessian-deep-learn, | |
author = "{Sagun}, L. and {Bottou}, L. and {LeCun}, Y.", | |
title = "{Eigenvalues of the Hessian in Deep Learning: Singularity and | |
Beyond}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161107476S", | |
archivePrefix= "arXiv", | |
eprint = "1611.07476", | |
keywords = "Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{zhou-2017-incep-score, | |
author = "{Zhou}, Z. and {Zhang}, W. and {Wang}, J.", | |
title = "{Inception Score, Label Smoothing, Gradient Vanishing and | |
-log(D(x)) Alternative}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170801729Z", | |
archivePrefix= "arXiv", | |
eprint = "1708.01729", | |
keywords = "Computer Science - Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Computer Vision and Pattern | |
Recognition, Statistics - Machine Learning", | |
month = aug, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{dauphin-2014-ident-attac, | |
author = "{Dauphin}, Y. and {Pascanu}, R. and {Gulcehre}, C. and {Cho}, | |
K. and {Ganguli}, S. and {Bengio}, Y.", | |
title = "{Identifying and Attacking the Saddle Point Problem in | |
High-Dimensional Non-Convex optimization}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1406.2572D", | |
archivePrefix= "arXiv", | |
eprint = "1406.2572", | |
keywords = "Computer Science - Learning, Mathematics - Optimization and | |
Control, Statistics - Machine Learning", | |
month = jun, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{bottou-2016-optim-method, | |
author = "{Bottou}, L. and {Curtis}, F.~E. and {Nocedal}, J.", | |
title = "{Optimization Methods for Large-Scale Machine Learning}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160604838B", | |
archivePrefix= "arXiv", | |
eprint = "1606.04838", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning, | |
Mathematics - Optimization and Control", | |
month = jun, | |
primaryClass = "stat.ML" | |
} | |
@ARTICLE{berahas-2016-multi-batch, | |
author = "{Berahas}, A.~S. and {Nocedal}, J. and {Tak{\'a}{\v c}}, M.", | |
title = "{A Multi-Batch L-Bfgs Method for Machine Learning}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160506049B", | |
archivePrefix= "arXiv", | |
eprint = "1605.06049", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning, Statistics - Machine Learning", | |
month = may, | |
primaryClass = "math.OC" | |
} | |
@phdthesis{martens-2016-second-order, | |
title = "Second-order optimization for neural networks", | |
author = "Martens, James", | |
year = 2016, | |
school = "University of Toronto (Canada)" | |
} | |
@ARTICLE{mahsereci-2015-probab-line, | |
author = "{Mahsereci}, M. and {Hennig}, P.", | |
title = "{Probabilistic Line Searches for Stochastic Optimization}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150202846M", | |
archivePrefix= "arXiv", | |
eprint = "1502.02846", | |
keywords = "Computer Science - Learning, Mathematics - Optimization and | |
Control, Statistics - Machine Learning", | |
month = feb, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{tan-2016-barzilai-borwein, | |
author = "{Tan}, C. and {Ma}, S. and {Dai}, Y.-H. and {Qian}, Y.", | |
title = "{Barzilai-Borwein Step Size for Stochastic Gradient Descent}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1605.04131", | |
primaryClass = "math.OC", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning, Statistics - Machine Learning", | |
year = 2016, | |
month = may, | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160504131T", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{mass-2015-speed-learn, | |
author = "{Mass{\'e}}, P.-Y. and {Ollivier}, Y.", | |
title = "{Speed learning on the fly}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1511.02540", | |
primaryClass = "math.OC", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning, Statistics - Machine Learning", | |
year = 2015, | |
month = nov, | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151102540M", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{moritz-2015-linear-conver, | |
author = "{Moritz}, P. and {Nishihara}, R. and {Jordan}, M.~I.", | |
title = "{A Linearly-Convergent Stochastic L-Bfgs Algorithm}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150802087M", | |
archivePrefix= "arXiv", | |
eprint = "1508.02087", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning, Mathematics - Numerical Analysis, Statistics - | |
Computation, Statistics - Machine Learning", | |
month = aug, | |
primaryClass = "math.OC" | |
} | |
@ARTICLE{byrd-2014-stoch-quasi, | |
author = "{Byrd}, R.~H. and {Hansen}, S.~L. and {Nocedal}, J. and | |
{Singer}, Y.", | |
title = "{A Stochastic Quasi-Newton Method for Large-Scale | |
Optimization}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1401.7020B", | |
archivePrefix= "arXiv", | |
eprint = "1401.7020", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning, Statistics - Machine Learning", | |
month = jan, | |
primaryClass = "math.OC" | |
} | |
@article{pearlmutter-1994-fast-exact, | |
title = "Fast exact multiplication by the Hessian", | |
author = "Pearlmutter, Barak A", | |
journal = "Neural computation", | |
volume = 6, | |
number = 1, | |
pages = "147-160", | |
year = 1994, | |
publisher = "MIT Press" | |
} | |
@ARTICLE{agarwal-2016-second-order, | |
author = "{Agarwal}, N. and {Bullins}, B. and {Hazan}, E.", | |
title = "{Second Order Stochastic Optimization in Linear Time}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1602.03943", | |
primaryClass = "stat.ML", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
year = 2016, | |
month = feb, | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160203943A", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{pascanu-2014-saddl-point, | |
author = "{Pascanu}, R. and {Dauphin}, Y.~N. and {Ganguli}, S. and | |
{Bengio}, Y.", | |
title = "{On the Saddle Point Problem for Non-Convex optimization}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1405.4604P", | |
archivePrefix= "arXiv", | |
eprint = "1405.4604", | |
keywords = "Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = may, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{looks-2017-deep-learn, | |
author = "{Looks}, M. and {Herreshoff}, M. and {Hutchins}, D. and | |
{Norvig}, P.", | |
title = "{Deep Learning With Dynamic Computation Graphs}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170202181L", | |
archivePrefix= "arXiv", | |
eprint = "1702.02181", | |
keywords = "Computer Science - Neural and Evolutionary Computing, | |
Computer Science - Learning, Statistics - Machine Learning", | |
month = feb | |
} | |
@ARTICLE{neubig-2017-fly-operat, | |
author = "{Neubig}, G. and {Goldberg}, Y. and {Dyer}, C.", | |
title = "{On-The-Fly Operation Batching in Dynamic Computation | |
Graphs}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170507860N", | |
archivePrefix= "arXiv", | |
eprint = "1705.07860", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language, Statistics - Machine Learning", | |
month = may, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{klein-2017-openn, | |
author = "{Klein}, G. and {Kim}, Y. and {Deng}, Y. and {Senellart}, | |
J. and {Rush}, A.~M.", | |
title = "{OpenNMT: Open-Source Toolkit for Neural Machine | |
Translation}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170102810K", | |
archivePrefix= "arXiv", | |
eprint = "1701.02810", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Neural and Evolutionary | |
Computing", | |
month = jan, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{gatys-2016-image-style, | |
title = "Image style transfer using convolutional neural networks", | |
author = "Gatys, Leon A and Ecker, Alexander S and Bethge, Matthias", | |
booktitle = "Proceedings of the IEEE Conference on Computer Vision and | |
Pattern Recognition", | |
pages = "2414-2423", | |
year = 2016 | |
} | |
@ARTICLE{kingma-2014-adam, | |
author = "{Kingma}, D.~P. and {Ba}, J.", | |
title = "{Adam: A Method for Stochastic Optimization}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1412.6980K", | |
archivePrefix= "arXiv", | |
eprint = "1412.6980", | |
keywords = "Computer Science - Learning", | |
month = dec, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{zhang-2016-under-deep, | |
author = "{Zhang}, C. and {Bengio}, S. and {Hardt}, M. and {Recht}, | |
B. and {Vinyals}, O.", | |
title = "{Understanding Deep Learning Requires Rethinking | |
generalization}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161103530Z", | |
archivePrefix= "arXiv", | |
eprint = "1611.03530", | |
keywords = "Computer Science - Learning", | |
month = nov, | |
primaryClass = "cs.LG" | |
} | |
@article{duchi-2011-adaptive-subgrad, | |
title = "Adaptive subgradient methods for online learning and | |
stochastic optimization", | |
author = "Duchi, John and Hazan, Elad and Singer, Yoram", | |
journal = "Journal of Machine Learning Research", | |
volume = 12, | |
number = "Jul", | |
pages = "2121-2159", | |
year = 2011 | |
} | |
@inproceedings{roth-2004-feature-selection, | |
title = "Feature selection in clustering problems", | |
author = "Roth, Volker and Lange, Tilman", | |
booktitle = "Advances in neural information processing systems", | |
pages = "473-480", | |
year = 2004 | |
} | |
@ARTICLE{liu-2017-gener-adver, | |
author = "{Liu}, L. and {Lu}, Y. and {Yang}, M. and {Qu}, Q. and {Zhu}, | |
J. and {Li}, H.", | |
title = "{Generative Adversarial Network for Abstractive Text | |
Summarization}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171109357L", | |
archivePrefix= "arXiv", | |
eprint = "1711.09357", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence", | |
month = nov, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{moussallem-2017-machin-trans, | |
author = "{Moussallem}, D. and {Wauer}, M. and {Ngonga Ngomo}, A.-C.", | |
title = "{Machine Translation Using Semantic Web Technologies: A | |
Survey}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171109476M", | |
archivePrefix= "arXiv", | |
eprint = "1711.09476", | |
keywords = "Computer Science - Computation and Language", | |
month = nov, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{smith-2015-cyclic-lr, | |
author = "{Smith}, L.~N.", | |
title = "{Cyclical Learning Rates for Training Neural Networks}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150601186S", | |
archivePrefix= "arXiv", | |
eprint = "1506.01186", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Learning, Computer Science - Neural and | |
Evolutionary Computing", | |
month = jun, | |
primaryClass = "cs.CV" | |
} | |
@ARTICLE{bubeck-2014-convex-optim, | |
author = "{Bubeck}, S.", | |
title = "{Convex Optimization: Algorithms and Complexity}", | |
journal = "ArXiv e-prints", | |
year = 2014, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1405.4980B", | |
archivePrefix= "arXiv", | |
eprint = "1405.4980", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Computational Complexity, Computer Science - Learning, | |
Computer Science - Numerical Analysis, Statistics - Machine | |
Learning", | |
month = may, | |
primaryClass = "math.OC" | |
} | |
@ARTICLE{gu-2017-non-autor, | |
author = "{Gu}, J. and {Bradbury}, J. and {Xiong}, C. and {Li}, | |
V.~O.~K. and {Socher}, R.", | |
title = "{Non-Autoregressive Neural Machine Translation}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171102281G", | |
archivePrefix= "arXiv", | |
eprint = "1711.02281", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = nov, | |
primaryClass = "cs.CL" | |
} | |
@article{kalman-1996-singularly-valuable, | |
title = {A singularly valuable decomposition: the SVD of a matrix}, | |
author = {Kalman, Dan}, | |
journal = "", | |
year = 1996 | |
} | |
@ARTICLE{niu-2011-hogwil, | |
author = "{Niu}, F. and {Recht}, B. and {Re}, C. and {Wright}, S.~J.", | |
title = "{HOGWILD!: A Lock-Free Approach To Parallelizing Stochastic | |
Gradient Descent}", | |
journal = "ArXiv e-prints", | |
year = 2011, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2011arXiv1106.5730N", | |
archivePrefix= "arXiv", | |
eprint = "1106.5730", | |
keywords = "Mathematics - Optimization and Control, Computer Science - | |
Learning", | |
month = jun, | |
primaryClass = "math.OC" | |
} | |
@ARTICLE{theis-2015-note-evaluat-gener, | |
author = "{Theis}, L. and {van den Oord}, A. and {Bethge}, M.", | |
title = "{A Note on the Evaluation of Generative models}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv151101844T", | |
archivePrefix= "arXiv", | |
eprint = "1511.01844", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
month = nov, | |
primaryClass = "stat.ML" | |
} | |
@ARTICLE{sutherland-2016-gener-model, | |
author = "{Sutherland}, D.~J. and {Tung}, H.-Y. and {Strathmann}, | |
H. and {De}, S. and {Ramdas}, A. and {Smola}, A. and | |
{Gretton}, A.", | |
title = "{Generative Models and Model Criticism Via Optimized Maximum | |
Mean Discrepancy}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv161104488S", | |
archivePrefix= "arXiv", | |
eprint = "1611.04488", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Learning, Computer Science - | |
Neural and Evolutionary Computing, Statistics - Methodology", | |
month = nov, | |
primaryClass = "stat.ML" | |
} | |
@ARTICLE{yang-2016-multi-task, | |
author = "{Yang}, Z. and {Salakhutdinov}, R. and {Cohen}, W.", | |
title = "{Multi-Task Cross-Lingual Sequence Tagging From Scratch}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160306270Y", | |
archivePrefix= "arXiv", | |
eprint = "1603.06270", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning", | |
month = mar, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{dhingra-2016-tweet, | |
author = "{Dhingra}, B. and {Zhou}, Z. and {Fitzpatrick}, D. and | |
{Muehl}, M. and {Cohen}, W.~W.", | |
title = "{Tweet2Vec: Character-Based Distributed Representations for | |
Social Media}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160503481D", | |
archivePrefix= "arXiv", | |
eprint = "1605.03481", | |
keywords = "Computer Science - Learning, Computer Science - Computation | |
and Language", | |
month = may, | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{coates-2011-text-detection, | |
title = "Text detection and character recognition in scene images with | |
unsupervised feature learning", | |
author = "Coates, Adam and Carpenter, Blake and Case, Carl and | |
Satheesh, Sanjeev and Suresh, Bipin and Wang, Tao and Wu, | |
David J and Ng, Andrew Y", | |
booktitle = "Document Analysis and Recognition (ICDAR), 2011 International | |
Conference on", | |
pages = "440-445", | |
year = 2011, | |
organization = "IEEE" | |
} | |
@inproceedings{zhang-2013-hmsearch, | |
title = "Hmsearch: An efficient hamming distance query processing | |
algorithm", | |
author = "Zhang, Xiaoyang and Qin, Jianbin and Wang, Wei and Sun, | |
Yifang and Lu, Jiaheng", | |
booktitle = "Proceedings of the 25th International Conference on | |
Scientific and Statistical Database Management", | |
pages = 19, | |
year = 2013, | |
organization = "ACM" | |
} | |
@misc{mueen-2017-fastest-similarity, | |
title = "The Fastest Similarity Search Algorithm for Time Series | |
Subsequences under Euclidean Distance", | |
author = "Mueen, Abdullah and Zhu, Yan and Yeh, Michael and Kamgar, | |
Kaveh and Viswanathan, Krishnamurthy and Gupta, Chetan and | |
Keogh, Eamonn", | |
year = 2017, | |
month = "August", | |
note = | |
"\url{http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html}" | |
} | |
@article{hyyro-2001-explaining-extending, | |
title = {Explaining and extending the bit-parallel approximate string | |
matching algorithm of Myers}, | |
author = {Hyyr{\"o}, Heikki}, | |
year = 2001, | |
publisher = {Citeseer}, | |
journal = "" | |
} | |
@inproceedings{askitis-2007-hat-trie, | |
title = "HAT-trie: a cache-conscious trie-based data structure for | |
strings", | |
author = "Askitis, Nikolas and Sinha, Ranjan", | |
booktitle = "Proceedings of the thirtieth Australasian conference on | |
Computer science-Volume 62", | |
pages = "97-105", | |
year = 2007, | |
organization = "Australian Computer Society, Inc." | |
} | |
@techreport{bagwell-2001-ideal-hash-trees, | |
title = {Ideal hash trees}, | |
author = {Bagwell, Phil}, | |
year = 2001, | |
institution = "" | |
} | |
@article{van-2014-accelerating-t-sne, | |
title = "Accelerating t-SNE using tree-based algorithms.", | |
author = "Van Der Maaten, Laurens", | |
journal = "Journal of machine learning research", | |
volume = 15, | |
number = 1, | |
pages = "3221-3245", | |
year = 2014 | |
} | |
@article{tibshirani-2001-estimating-number, | |
title = "Estimating the number of clusters in a data set via the gap | |
statistic", | |
author = "Tibshirani, Robert and Walther, Guenther and Hastie, Trevor", | |
journal = "Journal of the Royal Statistical Society: Series B | |
(Statistical Methodology)", | |
volume = 63, | |
number = 2, | |
pages = "411-423", | |
year = 2001, | |
publisher = "Wiley Online Library" | |
} | |
@article{schmidhuber-1992-learning-factorial-codes, | |
title = "Learning factorial codes by predictability minimization", | |
author = "Schmidhuber, J{\"u}rgen", | |
journal = "Neural Computation", | |
volume = 4, | |
number = 6, | |
pages = "863-879", | |
year = 1992, | |
publisher = "MIT Press" | |
} | |
@article{maaten-2008-visualizing-data, | |
title = "Visualizing data using t-SNE", | |
author = "Maaten, Laurens van der and Hinton, Geoffrey", | |
journal = "Journal of machine learning research", | |
volume = 9, | |
number = "Nov", | |
pages = "2579-2605", | |
year = 2008 | |
} | |
@ARTICLE{kingma-2013-auto-encoding, | |
author = "{Kingma}, D.~P and {Welling}, M.", | |
title = "{Auto-Encoding Variational Bayes}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1312.6114", | |
primaryClass = "stat.ML", | |
keywords = "Statistics - Machine Learning, Computer Science - Learning", | |
year = 2013, | |
month = dec, | |
adsurl = "http://adsabs.harvard.edu/abs/2013arXiv1312.6114K", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{rezende-2014-stochastic-backpropagation, | |
author = "{Jimenez Rezende}, D. and {Mohamed}, S. and {Wierstra}, D.", | |
title = "{Stochastic Backpropagation and Approximate Inference in Deep | |
Generative Models}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1401.4082", | |
primaryClass = "stat.ML", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Learning, Statistics - | |
Computation, Statistics - Methodology", | |
year = 2014, | |
month = jan, | |
adsurl = "http://adsabs.harvard.edu/abs/2014arXiv1401.4082J", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{roberts-2017-cross-validation, | |
title = "Cross-validation strategies for data with temporal, spatial, | |
hierarchical, or phylogenetic structure", | |
author = "Roberts, David R and Bahn, Volker and Ciuti, Simone and | |
Boyce, Mark S and Elith, Jane and Guillera-Arroita, Gurutzeta | |
and Hauenstein, Severin and Lahoz-Monfort, Jos{\'e} J and | |
Schr{\"o}der, Boris and Thuiller, Wilfried and others", | |
journal = "Ecography", | |
volume = 40, | |
number = 8, | |
pages = "913-929", | |
year = 2017, | |
publisher = "Wiley Online Library" | |
} | |
@article{zhang-2004-optimality-navie, | |
title = "The optimality of naive Bayes", | |
author = "Zhang, Harry", | |
journal = "AA", | |
volume = 1, | |
number = 2, | |
pages = 3, | |
year = 2004 | |
} | |
@inproceedings{zheng-2013-deep-learning, | |
title = "Deep learning for Chinese word segmentation and POS tagging", | |
author = "Zheng, Xiaoqing and Chen, Hanyang and Xu, Tianyu", | |
booktitle = "Proceedings of the 2013 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "647-657", | |
year = 2013 | |
} | |
@article{黄昌宁-2007-中文分词十年回顾, | |
title = "中文分词十年回顾", | |
author = "黄昌宁 and 赵海 and others", | |
journal = "中文信息学报", | |
volume = 21, | |
number = 3, | |
pages = "8-19", | |
year = 2007 | |
} | |
@article{张博-2006-对互联网环境下中文分词系统的一种架构改进, | |
title = "对互联网环境下中文分词系统的一种架构改进", | |
author = "张博 and 姜建国 and 万平国", | |
journal = "计算机应用研究", | |
volume = 11, | |
pages = "176-178", | |
year = 2006 | |
} | |
@phdthesis{孙茂松-2001-汉语自动分词研究评述, | |
title = {汉语自动分词研究评述}, | |
author = {孙茂松 and 邹嘉彦}, | |
school = "", | |
year = 2001 | |
} | |
@article{赵伟-2004-一种规则与统计相结合的汉语分词方法, | |
title = "一种规则与统计相结合的汉语分词方法", | |
author = "赵伟 and 戴新宇 and 尹存燕 and 陈家骏", | |
journal = "计算机应用研究", | |
volume = 21, | |
number = 3, | |
pages = "23-25", | |
year = 2004 | |
} | |
@article{张华平-2004-基于角色标注的中国人名自动识别研究, | |
title = "基于角色标注的中国人名自动识别研究", | |
author = "张华平 and 刘群", | |
journal = "计算机学报", | |
volume = 27, | |
number = 1, | |
year = 2004 | |
} | |
@article{孙宾-2003-现代汉语文本的词语切分技术, | |
title = "现代汉语文本的词语切分技术", | |
author = "孙宾", | |
journal = "技术报告, 北京大学计算语言学研究所", | |
year = 2003 | |
} | |
@article{foo-2004-chinese-word, | |
title = "Chinese word segmentation and its effect on information | |
retrieval", | |
author = "Foo, Schubert and Li, Hui", | |
journal = "Information processing \& management", | |
volume = 40, | |
number = 1, | |
pages = "161-190", | |
year = 2004, | |
publisher = "Elsevier" | |
} | |
@inproceedings{peng-2004-chinese-segmentation, | |
title = "Chinese segmentation and new word detection using conditional | |
random fields", | |
author = "Peng, Fuchun and Feng, Fangfang and McCallum, Andrew", | |
booktitle = "Proceedings of the 20th international conference on | |
Computational Linguistics", | |
pages = 562, | |
year = 2004, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{huang-2003-applying-machine, | |
title = "Applying machine learning to text segmentation for | |
information retrieval", | |
author = "Huang, Xiangji and Peng, Fuchun and Schuurmans, Dale and | |
Cercone, Nick and Robertson, Stephen E", | |
journal = "Information Retrieval", | |
volume = 6, | |
number = "3-4", | |
pages = "333-362", | |
year = 2003, | |
publisher = "Springer" | |
} | |
@inproceedings{jiang-2009-automatic-adaptation, | |
title = "Automatic adaptation of annotation standards: Chinese word | |
segmentation and POS tagging: a case study", | |
author = "Jiang, Wenbin and Huang, Liang and Liu, Qun", | |
booktitle = "Proceedings of the Joint Conference of the 47th Annual | |
Meeting of the ACL and the 4th International Joint Conference | |
on Natural Language Processing of the AFNLP: Volume 1-Volume | |
1", | |
pages = "522-530", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{sun-1998-chinese-word, | |
title = "Chinese word segmentation without using lexicon and | |
hand-crafted training data", | |
author = "Maosong, Sun and Dayang, Shen and Tsou, Benjamin K", | |
booktitle = "Proceedings of the 36th Annual Meeting of the Association for | |
Computational Linguistics and 17th International Conference | |
on Computational Linguistics-Volume 2", | |
pages = "1265-1271", | |
year = 1998, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{俞士汶-2002-北京大学现代汉语语料库基本加工规范, | |
title = "北京大学现代汉语语料库基本加工规范 (续)", | |
author = "俞士汶 and 段慧明 and 朱学锋 and 孙斌", | |
journal = "中文信息学报", | |
volume = 16, | |
number = 6, | |
pages = "59-65", | |
year = 2002 | |
} | |
@article{宋柔-1997-关于分词规范的探讨, | |
title = "关于分词规范的探讨", | |
author = "宋柔", | |
journal = "语言文字应用", | |
number = 3, | |
pages = "113-114", | |
year = 1997 | |
} | |
@article{孙茂松-2001-信息处理用词汇研究, | |
title = "信息处理用现代汉语分词词表", | |
author = "孙茂松 and 王洪君 and 李行健 and 富丽 and 黄昌宁 and 陈松岑 | |
and谢自立 and 张卫国", | |
journal = "语言文字应用", | |
number = 4, | |
pages = "84-89", | |
year = 2001 | |
} | |
@article{李玉梅-2007-分词规范亟需补充的三方面内容, | |
title = "分词规范亟需补充的三方面内容", | |
author = "李玉梅 and 陈晓 and 姜自霞 and 易江燕 and 靳光瑾 and 黄昌宁", | |
journal = "中文信息学报", | |
volume = 21, | |
number = 5, | |
pages = "1-7", | |
year = 2007 | |
} | |
@article{刘荣-2011-利用统计量和语言学规则提取多字词表达, | |
title = "利用统计量和语言学规则提取多字词表达", | |
author = "刘荣 and 王奕凯", | |
journal = "太原理工大學學報", | |
volume = 42, | |
number = 2, | |
pages = "133-137", | |
year = 2011, | |
publisher = "太原理工大學學報編輯部" | |
} | |
@inproceedings{zhao-2017-ngram2vec, | |
title = "Ngram2vec: Learning Improved Word Representations from Ngram | |
Co-occurrence Statistics", | |
author = "Zhao, Zhe and Liu, Tao and Li, Shen and Li, Bofang and Du, | |
Xiaoyong", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "244-253", | |
year = 2017 | |
} | |
@ARTICLE{pawar-2017-relation-extraction, | |
author = "{Pawar}, S. and {Palshikar}, G.~K. and {Bhattacharyya}, P.", | |
title = "{Relation Extraction : A Survey}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1712.05191", | |
primaryClass = "cs.CL", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Information Retrieval", | |
year = 2017, | |
month = dec, | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171205191P", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{mintz-2009-distant-supervision, | |
title = "Distant supervision for relation extraction without labeled | |
data", | |
author = "Mintz, Mike and Bills, Steven and Snow, Rion and Jurafsky, | |
Dan", | |
booktitle = "Proceedings of the Joint Conference of the 47th Annual | |
Meeting of the ACL and the 4th International Joint Conference | |
on Natural Language Processing of the AFNLP: Volume 2-Volume | |
2", | |
pages = "1003-1011", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{王丽杰-2009-基于SVMTool的中文词性标注, | |
author = "王丽杰 and 车万翔 and 刘挺", | |
title = "基于SVMTool的中文词性标注", | |
publisher = "中文信息学报", | |
year = 2009, | |
journal = "中文信息学报", | |
volume = 23, | |
number = 4, | |
eid = 16, | |
numpages = 6, | |
pages = 16, | |
keywords = "计算机应用;中文信息处理;词性标注;SVMTool;未登录词;偏旁部首", | |
url = "http://jcip.cipsc.org.cn/CN/abstract/article_1212.shtml" | |
} | |
@ARTICLE{sutton-2010-intro-cond, | |
author = "{Sutton}, C. and {McCallum}, A.", | |
title = "{An Introduction to Conditional Random Fields}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1011.4088", | |
primaryClass = "stat.ML", | |
keywords = "Statistics - Machine Learning", | |
year = 2010, | |
month = nov, | |
adsurl = "http://adsabs.harvard.edu/abs/2010arXiv1011.4088S", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{lafferty-2001-cond-rand, | |
author = "Lafferty, John D. and McCallum, Andrew and Pereira, Fernando | |
C. N.", | |
title = "Conditional Random Fields: Probabilistic Models for | |
Segmenting and Labeling Sequence Data", | |
booktitle = "Proceedings of the Eighteenth International Conference on | |
Machine Learning", | |
series = "ICML '01", | |
year = 2001, | |
isbn = "1-55860-778-1", | |
pages = "282-289", | |
numpages = 8, | |
url = "http://dl.acm.org/citation.cfm?id=645530.655813", | |
acmid = 655813, | |
publisher = "Morgan Kaufmann Publishers Inc.", | |
address = "San Francisco, CA, USA" | |
} | |
@inproceedings{sha-2003-shallow-parsing, | |
author = "Sha, Fei and Pereira, Fernando", | |
title = "Shallow Parsing with Conditional Random Fields", | |
booktitle = "Proceedings of the 2003 Conference of the North American | |
Chapter of the Association for Computational Linguistics on | |
Human Language Technology - Volume 1", | |
series = "NAACL '03", | |
year = 2003, | |
location = "Edmonton, Canada", | |
pages = "134-141", | |
numpages = 8, | |
url = "https://doi.org/10.3115/1073445.1073473", | |
doi = "10.3115/1073445.1073473", | |
acmid = 1073473, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@article{刘炜-2017-一种面向突发事件的文本语料自动标注方法, | |
author = "刘炜 and 王旭 and 张雨嘉 and 刘宗田", | |
title = "一种面向突发事件的文本语料自动标注方法", | |
publisher = "中文信息学报", | |
year = 2017, | |
journal = "中文信息学报", | |
volume = 31, | |
number = 2, | |
eid = 76, | |
numpages = 9, | |
pages = 76, | |
keywords = "突发事件;语料库;自动标注", | |
url = "http://jcip.cipsc.org.cn/CN/abstract/article_2360.shtml" | |
} | |
@ARTICLE{huang-2015-bidirect-lstm-crf, | |
author = "{Huang}, Z. and {Xu}, W. and {Yu}, K.", | |
title = "{Bidirectional LSTM-CRF Models for Sequence Tagging}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1508.01991", | |
primaryClass = "cs.CL", | |
keywords = "Computer Science - Computation and Language", | |
year = 2015, | |
month = aug, | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150801991H", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{askitis-2005-cache-conscious, | |
title = "Cache-conscious collision resolution in string hash tables", | |
author = "Askitis, Nikolas and Zobel, Justin", | |
booktitle = "International Symposium on String Processing and Information | |
Retrieval", | |
pages = "91-102", | |
year = 2005, | |
organization = "Springer" | |
} | |
@article{王厚峰-2002-指代消解的基本方法和实现技术, | |
title = "指代消解的基本方法和实现技术", | |
author = "王厚峰", | |
journal = "中文信息学报", | |
volume = 16, | |
number = 6, | |
pages = "10-18", | |
year = 2002 | |
} | |
@inproceedings{ahn-2006-stage-event, | |
author = "Ahn, David", | |
title = "The Stages of Event Extraction", | |
booktitle = "Proceedings of the Workshop on Annotating and Reasoning About | |
Time and Events", | |
series = "ARTE '06", | |
year = 2006, | |
isbn = "1-932432-81-7", | |
location = "Sydney, Australia", | |
pages = "1-8", | |
numpages = 8, | |
url = "http://dl.acm.org/citation.cfm?id=1629235.1629236", | |
acmid = 1629236, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@article{赵妍妍-2008-中文事件抽取技术研究, | |
title = "中文事件抽取技术研究", | |
author = "赵妍妍 and 秦兵 and 车万翔 and 刘挺", | |
journal = "中文信息学报", | |
volume = 22, | |
number = 1, | |
pages = "3-8", | |
year = 2008 | |
} | |
@article{李颖-2017-中文开放式多元实体关系抽取, | |
title = "中文开放式多元实体关系抽取", | |
author = "李颖 and 郝晓燕 and 王勇", | |
journal = "计算机科学", | |
number = "S1", | |
pages = "80-83", | |
year = 2017 | |
} | |
@inproceedings{takamatsu-2012-reducing-wrong, | |
author = "Takamatsu, Shingo and Sato, Issei and Nakagawa, Hiroshi", | |
title = "Reducing Wrong Labels in Distant Supervision for Relation | |
Extraction", | |
booktitle = "Proceedings of the 50th Annual Meeting of the Association for | |
Computational Linguistics: Long Papers - Volume 1", | |
series = "ACL '12", | |
year = 2012, | |
location = "Jeju Island, Korea", | |
pages = "721-729", | |
numpages = 9, | |
url = "http://dl.acm.org/citation.cfm?id=2390524.2390626", | |
acmid = 2390626, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@inproceedings{yao-2010-collective-cross, | |
title = "Collective cross-document relation extraction without | |
labelled data", | |
author = "Yao, Limin and Riedel, Sebastian and McCallum, Andrew", | |
booktitle = "Proceedings of the 2010 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1013-1023", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{berant-2013-semantic-parsing, | |
title = "Semantic parsing on freebase from question-answer pairs", | |
author = "Berant, Jonathan and Chou, Andrew and Frostig, Roy and Liang, | |
Percy", | |
booktitle = "Proceedings of the 2013 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1533-1544", | |
year = 2013 | |
} | |
@inproceedings{hoffmann-2011-knowledge-based, | |
title = "Knowledge-based weak supervision for information extraction | |
of overlapping relations", | |
author = "Hoffmann, Raphael and Zhang, Congle and Ling, Xiao and | |
Zettlemoyer, Luke and Weld, Daniel S", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies-Volume | |
1", | |
pages = "541-550", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{surdeanu-2012-multi-instance, | |
title = "Multi-instance multi-label learning for relation extraction", | |
author = "Surdeanu, Mihai and Tibshirani, Julie and Nallapati, Ramesh | |
and Manning, Christopher D", | |
booktitle = "Proceedings of the 2012 joint conference on empirical methods | |
in natural language processing and computational natural | |
language learning", | |
pages = "455-465", | |
year = 2012, | |
organization = "Association for Computational Linguistics" | |
} | |
@InProceedings{lin-2013-conv-neural, | |
author = "Liu, ChunYang and Sun, WenBo and Chao, WenHan and Che, | |
WanXiang", | |
editor = "Motoda, Hiroshi and Wu, Zhaohui and Cao, Longbing and Zaiane, | |
Osmar and Yao, Min and Wang, Wei", | |
title = "Convolution Neural Network for Relation Extraction", | |
booktitle = "Advanced Data Mining and Applications", | |
year = 2013, | |
publisher = "Springer Berlin Heidelberg", | |
address = "Berlin, Heidelberg", | |
pages = "231-242", | |
abstract = "Deep Neural Network has been applied to many Natural Language | |
Processing tasks. Instead of building hand-craft features, | |
DNN builds features by automatic learning, fitting different | |
domains well. In this paper, we propose a novel convolution | |
network, incorporating lexical features, applied to Relation | |
Extraction. Since many current deep neural networks use word | |
embedding by word table, which, however, neglects semantic | |
meaning among words, we import a new coding method, which | |
coding input words by synonym dictionary to integrate | |
semantic knowledge into the neural network. We compared our | |
Convolution Neural Network (CNN) on relation extraction with | |
the state-of-art tree kernel approach, including Typed | |
Dependency Path Kernel and Shortest Dependency Path Kernel | |
and Context-Sensitive tree kernel, resulting in a 9{\%} | |
improvement competitive performance on ACE2005 data | |
set. Also, we compared the synonym coding with the one-hot | |
coding, and our approach got 1.6{\%} improvement. Moreover, | |
we also tried other coding method, such as hypernym coding, | |
and give some discussion according the result.", | |
isbn = "978-3-642-53917-6" | |
} | |
@inproceedings{zeng-2014-relation-classification, | |
title = "Relation classification via convolutional deep neural | |
network", | |
author = "Zeng, Daojian and Liu, Kang and Lai, Siwei and Zhou, Guangyou | |
and Zhao, Jun", | |
booktitle = "Proceedings of COLING 2014, the 25th International Conference | |
on Computational Linguistics: Technical Papers", | |
pages = "2335-2344", | |
year = 2014 | |
} | |
@inproceedings{nguyen-2015-relation-extraction, | |
title = "Relation extraction: Perspective from convolutional neural | |
networks", | |
author = "Nguyen, Thien Huu and Grishman, Ralph", | |
booktitle = "Proceedings of the 1st Workshop on Vector Space Modeling for | |
Natural Language Processing", | |
pages = "39-48", | |
year = 2015 | |
} | |
@ARTICLE{nogueira-2015-class-relat, | |
author = "{Nogueira dos Santos}, C. and {Xiang}, B. and {Zhou}, B.", | |
title = "{Classifying Relations By Ranking With Convolutional Neural | |
Networks}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150406580N", | |
archivePrefix= "arXiv", | |
eprint = "1504.06580", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing", | |
month = apr, | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{zhang-2015-relat-class, | |
author = "{Zhang}, D. and {Wang}, D.", | |
title = "{Relation Classification Via Recurrent Neural Network}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150801006Z", | |
archivePrefix= "arXiv", | |
eprint = "1508.01006", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing", | |
month = aug, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{zhou-2016-attention-based, | |
title = "Attention-based bidirectional long short-term memory networks | |
for relation classification", | |
author = "Zhou, Peng and Shi, Wei and Tian, Jun and Qi, Zhenyu and Li, | |
Bingchen and Hao, Hongwei and Xu, Bo", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 2: Short Papers)", | |
volume = 2, | |
pages = "207-212", | |
year = 2016 | |
} | |
@inproceedings{wang-2016-relation-classification, | |
title = "Relation classification via multi-level attention cnns", | |
author = "Wang, Linlin and Cao, Zhu and de Melo, Gerard and Liu, | |
Zhiyuan", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "1298-1307", | |
year = 2016 | |
} | |
@inproceedings{zeng-2015-distant-supervision, | |
title = "Distant supervision for relation extraction via piecewise | |
convolutional neural networks", | |
author = "Zeng, Daojian and Liu, Kang and Chen, Yubo and Zhao, Jun", | |
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1753-1762", | |
year = 2015 | |
} | |
@inproceedings{jiang-2016-relation-extraction, | |
title = "Relation extraction with multi-instance multi-label | |
convolutional neural networks", | |
author = "Jiang, Xiaotian and Wang, Quan and Li, Peng and Wang, Bin", | |
booktitle = "Proceedings of COLING 2016, the 26th International Conference | |
on Computational Linguistics: Technical Papers", | |
pages = "1471-1480", | |
year = 2016 | |
} | |
@inproceedings{ji-2017-distan-super, | |
title = "Distant Supervision for Relation Extraction with | |
Sentence-Level Attention and Entity Descriptions", | |
author = "Guoliang Ji and Kang Liu and Shizhu He and Jun Zhao", | |
booktitle = "AAAI", | |
year = 2017 | |
} | |
@article{漆桂林-2017-知识图谱研究进展, | |
title = "知识图谱研究进展", | |
author = "漆桂林 and 高桓 and 吴天星", | |
journal = "情报工程", | |
volume = 3, | |
number = 1, | |
pages = "4-25", | |
year = 2017 | |
} | |
@inproceedings{brin-1998-extrac, | |
title = "Extracting patterns and relations from the world wide web", | |
author = "Brin, Sergey", | |
booktitle = "International Workshop on The World Wide Web and Databases", | |
pages = "172-183", | |
year = 1998, | |
organization = "Springer" | |
} | |
@inproceedings{agichtein-2000-snowball, | |
title = "Snowball: Extracting relations from large plain-text | |
collections", | |
author = "Agichtein, Eugene and Gravano, Luis", | |
booktitle = "Proceedings of the fifth ACM conference on Digital libraries", | |
pages = "85-94", | |
year = 2000, | |
organization = "ACM" | |
} | |
@inproceedings{yates-2007-textrunner, | |
title = "Textrunner: open information extraction on the web", | |
author = "Yates, Alexander and Cafarella, Michael and Banko, Michele | |
and Etzioni, Oren and Broadhead, Matthew and Soderland, | |
Stephen", | |
booktitle = "Proceedings of Human Language Technologies: The Annual | |
Conference of the North American Chapter of the Association | |
for Computational Linguistics: Demonstrations", | |
pages = "25-26", | |
year = 2007, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{bollegala-2009-measur-simil, | |
author = "Bollegala, Danushka T. and Matsuo, Yutaka and Ishizuka, | |
Mitsuru", | |
title = "Measuring the Similarity Between Implicit Semantic Relations | |
from the Web", | |
booktitle = "Proceedings of the 18th International Conference on World | |
Wide Web", | |
series = "WWW '09", | |
year = 2009, | |
isbn = "978-1-60558-487-4", | |
location = "Madrid, Spain", | |
pages = "651-660", | |
numpages = 10, | |
url = "http://doi.acm.org/10.1145/1526709.1526797", | |
doi = "10.1145/1526709.1526797", | |
acmid = 1526797, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "natural language processing, relational similarity, web | |
mining" | |
} | |
@inproceedings{bollegala-2010-relat-dualit, | |
author = "Bollegala, Danushka Tarupathi and Matsuo, Yutaka and | |
Ishizuka, Mitsuru", | |
title = "Relational Duality: Unsupervised Extraction of Semantic | |
Relations Between Entities on the Web", | |
booktitle = "Proceedings of the 19th International Conference on World | |
Wide Web", | |
series = "WWW '10", | |
year = 2010, | |
isbn = "978-1-60558-799-8", | |
location = "Raleigh, North Carolina, USA", | |
pages = "151-160", | |
numpages = 10, | |
url = "http://doi.acm.org/10.1145/1772690.1772707", | |
doi = "10.1145/1772690.1772707", | |
acmid = 1772707, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "relation extraction, relational duality, relational | |
similarity, web mining" | |
} | |
@inproceedings{batista-2015-semi-supervised, | |
title = "Semi-supervised bootstrapping of relationship extractors with | |
distributional semantics", | |
author = "Batista, David S and Martins, Bruno and Silva, M{\'a}rio J", | |
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "499-504", | |
year = 2015 | |
} | |
@inproceedings{zhu-2009-statsnowball, | |
author = "Zhu, Jun and Nie, Zaiqing and Liu, Xiaojiang and Zhang, Bo | |
and Wen, Ji-Rong", | |
title = "StatSnowball: A Statistical Approach to Extracting Entity | |
Relationships", | |
booktitle = "Proceedings of the 18th International Conference on World | |
Wide Web", | |
series = "WWW '09", | |
year = 2009, | |
isbn = "978-1-60558-487-4", | |
location = "Madrid, Spain", | |
pages = "101-110", | |
numpages = 10, | |
url = "http://doi.acm.org/10.1145/1526709.1526724", | |
doi = "10.1145/1526709.1526724", | |
acmid = 1526724, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "Markov logic networks, relationship extraction, statistical | |
models" | |
} | |
@article{车万翔-2005-实体关系自动抽取, | |
title = "实体关系自动抽取", | |
author = "车万翔 and 刘挺 and 李生", | |
journal = "中文信息学报", | |
volume = 19, | |
number = 2, | |
pages = "2-7", | |
year = 2005 | |
} | |
@article{田久乐-2010-基于同义词词林的词语相似度计算方法, | |
title = "基于同义词词林的词语相似度计算方法", | |
author = "田久乐 and 赵蔚", | |
journal = "吉林大学学报: 信息科学版", | |
number = 6, | |
pages = "602-608", | |
year = 2010 | |
} | |
@ARTICLE{ratner-2017-snorkel, | |
author = "{Ratner}, A. and {Bach}, S.~H. and {Ehrenberg}, H. and | |
{Fries}, J. and {Wu}, S. and {R{\'e}}, C.", | |
title = "{Snorkel: Rapid Training Data Creation with Weak | |
Supervision}", | |
journal = "ArXiv e-prints", | |
archivePrefix= "arXiv", | |
eprint = "1711.10160", | |
primaryClass = "cs.LG", | |
keywords = "Computer Science - Learning, Statistics - Machine Learning", | |
year = 2017, | |
month = nov, | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv171110160R", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{bach-2017-learn-struc, | |
author = "{Bach}, S.~H. and {He}, B. and {Ratner}, A. and {R{\'e}}, C.", | |
title = "{Learning the Structure of Generative Models Without Labeled | |
Data}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170300854B", | |
archivePrefix= "arXiv", | |
eprint = "1703.00854", | |
keywords = "Computer Science - Learning, Statistics - Machine Learning", | |
month = mar, | |
primaryClass = "cs.LG" | |
} | |
@ARTICLE{ratner-2016-data-progr, | |
author = "{Ratner}, A. and {De Sa}, C. and {Wu}, S. and {Selsam}, | |
D. and {R{\'e}}, C.", | |
title = "{Data Programming: Creating Large Training Sets, Quickly}", | |
journal = "ArXiv e-prints", | |
year = 2016, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160507723R", | |
archivePrefix= "arXiv", | |
eprint = "1605.07723", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Learning", | |
month = may, | |
primaryClass = "stat.ML" | |
} | |
@article{刘宗田-2009-面向事件的本体研究, | |
title = "面向事件的本体研究", | |
author = "刘宗田 and 黄美丽 and 周文 and 仲兆满 and 付剑锋 and 单建芳 | |
and 智慧来", | |
journal = "计算机科学", | |
volume = 36, | |
number = 11, | |
pages = "189-192", | |
year = 2009 | |
} | |
@article{pei-2004-mining, | |
title = "Mining sequential patterns by pattern-growth: The prefixspan | |
approach", | |
author = "Pei, Jian and Han, Jiawei and Mortazavi-Asl, Behzad and Wang, | |
Jianyong and Pinto, Helen and Chen, Qiming and Dayal, | |
Umeshwar and Hsu, Mei-Chun", | |
journal = "IEEE Transactions on knowledge and data engineering", | |
volume = 16, | |
number = 11, | |
pages = "1424-1440", | |
year = 2004, | |
publisher = "IEEE" | |
} | |
@article{李明耀-2016-基于依存分析的开放式中文实体关系抽取方法, | |
title = "基于依存分析的开放式中文实体关系抽取方法", | |
author = "李明耀 and 杨静", | |
journal = "计算机工程", | |
volume = 42, | |
number = 6, | |
pages = "201-207", | |
year = 2016 | |
} | |
@inproceedings{ratinov-2009-design-challenges, | |
title = "Design challenges and misconceptions in named entity | |
recognition", | |
author = "Ratinov, Lev and Roth, Dan", | |
booktitle = "Proceedings of the Thirteenth Conference on Computational | |
Natural Language Learning", | |
pages = "147-155", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{dai-2015-enhan, | |
title = "Enhancing of chemical compound and drug name recognition | |
using representative tag scheme and fine-grained | |
tokenization", | |
author = "Hong-Jie Dai and Po-Ting Lai and Yung-Chun Chang and Richard | |
Tzong-Han Tsai", | |
booktitle = "J. Cheminformatics", | |
year = 2015 | |
} | |
@ARTICLE{dyer-2015-stack-lstm, | |
author = "{Dyer}, C. and {Ballesteros}, M. and {Ling}, W. and | |
{Matthews}, A. and {Smith}, N.~A.", | |
title = "{Transition-Based Dependency Parsing With Stack Long | |
Short-Term Memory}", | |
journal = "ArXiv e-prints", | |
year = 2015, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2015arXiv150508075D", | |
archivePrefix= "arXiv", | |
eprint = "1505.08075", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Learning, Computer Science - Neural and Evolutionary Computing", | |
month = may, | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{ling-2015-not-all, | |
title = "Not all contexts are created equal: Better word | |
representations with variable attention", | |
author = "Ling, Wang and Tsvetkov, Yulia and Amir, Silvio and | |
Fermandez, Ramon and Dyer, Chris and Black, Alan W and | |
Trancoso, Isabel and Lin, Chu-Cheng", | |
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1367-1372", | |
year = 2015 | |
} | |
@article{koppel-2006-importance-neutral, | |
title = "The importance of neutral examples for learning sentiment", | |
author = "Koppel, Moshe and Schler, Jonathan", | |
journal = "Computational Intelligence", | |
volume = 22, | |
number = 2, | |
pages = "100-109", | |
year = 2006, | |
publisher = "Wiley Online Library" | |
} | |
@article{berger-1996-maximum-entropy, | |
title = "A maximum entropy approach to natural language processing", | |
author = "Berger, Adam L and Pietra, Vincent J Della and Pietra, | |
Stephen A Della", | |
journal = "Computational linguistics", | |
volume = 22, | |
number = 1, | |
pages = "39-71", | |
year = 1996, | |
publisher = "MIT Press" | |
} | |
@ARTICLE{Prescher-2004-tutoral, | |
author = "{Prescher}, D.", | |
title = "{A Tutorial on the Expectation-Maximization Algorithm | |
Including Maximum-Likelihood Estimation and EM Training of | |
Probabilistic Context-Free Grammars}", | |
journal = "eprint arXiv:cs/0412015", | |
eprint = "cs/0412015", | |
keywords = "Computer Science - Computation and Language", | |
year = 2004, | |
month = dec, | |
adsurl = "http://adsabs.harvard.edu/abs/2004cs.......12015P", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{berger-1997-improved-iterative, | |
title = "The improved iterative scaling algorithm: A gentle | |
introduction", | |
author = "Berger, Adam", | |
journal = "Unpublished manuscript", | |
year = 1997 | |
} | |
@inproceedings{curran-2003-inves-gis, | |
author = "Curran, James R. and Clark, Stephen", | |
title = "Investigating GIS and Smoothing for Maximum Entropy Taggers", | |
booktitle = "Proceedings of the Tenth Conference on European Chapter of | |
the Association for Computational Linguistics - Volume 1", | |
series = "EACL '03", | |
year = 2003, | |
isbn = "1-333-56789-0", | |
location = "Budapest, Hungary", | |
pages = "91-98", | |
numpages = 8, | |
url = "https://doi.org/10.3115/1067807.1067821", | |
doi = "10.3115/1067807.1067821", | |
acmid = 1067821, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@article{张华平-2002-基于N-最短路径方法的中文词语粗分模型, | |
title = "基于N-最短路径方法的中文词语粗分模型", | |
author = "张华平 and 刘群", | |
journal = "中文信息学报", | |
volume = 16, | |
number = 5, | |
pages = "3-9", | |
year = 2002 | |
} | |
@article{秦兵-2015-无指导的中文开放式实体关系抽取, | |
title = "无指导的中文开放式实体关系抽取", | |
author = "秦兵 and 刘安安 and 刘挺 and others", | |
journal = "计算机研究与发展", | |
volume = 52, | |
number = 5, | |
year = 2015, | |
pages = "1029-1035" | |
} | |
@inproceedings{li-2013-joint-event, | |
title = "Joint event extraction via structured prediction with global | |
features", | |
author = "Li, Qi and Ji, Heng and Huang, Liang", | |
booktitle = "Proceedings of the 51st Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "73-82", | |
year = 2013 | |
} | |
@article{chen-2012-joint-modeling, | |
title = "Joint modeling for chinese event extraction with rich | |
linguistic features", | |
author = "Chen, Chen and Ng, Vincent", | |
journal = "Proceedings of COLING 2012", | |
pages = "529-544", | |
year = 2012 | |
} | |
@inproceedings{singh-2013-joint-infer, | |
author = "Singh, Sameer and Riedel, Sebastian and Martin, Brian and | |
Zheng, Jiaping and McCallum, Andrew", | |
title = "Joint Inference of Entities, Relations, and Coreference", | |
booktitle = "Proceedings of the 2013 Workshop on Automated Knowledge Base | |
Construction", | |
series = "AKBC '13", | |
year = 2013, | |
isbn = "978-1-4503-2411-3", | |
location = "San Francisco, California, USA", | |
pages = "1-6", | |
numpages = 6, | |
url = "http://doi.acm.org/10.1145/2509558.2509559", | |
doi = "10.1145/2509558.2509559", | |
acmid = 2509559, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "coreference resolution, information extraction, joint | |
inference, named entity recognition, relation extraction" | |
} | |
@inproceedings{riedel-2011-fast-robust, | |
author = "Riedel, Sebastian and McCallum, Andrew", | |
title = "Fast and Robust Joint Models for Biomedical Event Extraction", | |
booktitle = "Proceedings of the Conference on Empirical Methods in Natural | |
Language Processing", | |
series = "EMNLP '11", | |
year = 2011, | |
isbn = "978-1-937284-11-4", | |
location = "Edinburgh, United Kingdom", | |
pages = "1-12", | |
numpages = 12, | |
url = "http://dl.acm.org/citation.cfm?id=2145432.2145434", | |
acmid = 2145434, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@article{何馨宇-2017-基于双向LSTM和两阶段方法的触发词识别, | |
author = "何馨宇 and 李丽双", | |
title = "基于双向LSTM和两阶段方法的触发词识别", | |
publisher = "中文信息学报", | |
year = 2017, | |
journal = "中文信息学报", | |
volume = 31, | |
number = 6, | |
eid = 147, | |
numpages = 7, | |
pages = 147, | |
keywords = "触发词识别;两阶段方法;双向LSTM;依存词向量", | |
url = "http://jcip.cipsc.org.cn/CN/abstract/article_2482.shtml" | |
} | |
@ARTICLE{cai-2017-fast-accur, | |
author = "{Cai}, D. and {Zhao}, H. and {Zhang}, Z. and {Xin}, Y. and | |
{Wu}, Y. and {Huang}, F.", | |
title = "{Fast and Accurate Neural Word Segmentation for Chinese}", | |
journal = "ArXiv e-prints", | |
year = 2017, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2017arXiv170407047C", | |
archivePrefix= "arXiv", | |
eprint = "1704.07047", | |
keywords = "Computer Science - Computation and Language", | |
month = apr, | |
primaryClass = "cs.CL" | |
} | |
@article{陈自岩-2016-一种非监督的事件触发词检测和分类方法, | |
title = "一种非监督的事件触发词检测和分类方法", | |
author = "陈自岩 and 黄宇 and 王洋 and 傅兴玉 and 付琨", | |
journal = "国外电子测量技术", | |
number = 7, | |
pages = "91-95", | |
year = 2016 | |
} | |
@inproceedings{li-2014-increm, | |
title = "Incremental joint extraction of entity mentions and | |
relations", | |
author = "Li, Qi and Ji, Heng", | |
booktitle = "Proceedings of the 52nd Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "402-412", | |
year = 2014 | |
} | |
@inproceedings{liu-2016-lever, | |
title = "Leveraging framenet to improve automatic event detection", | |
author = "Liu, Shulin and Chen, Yubo and He, Shizhu and Liu, Kang and | |
Zhao, Jun", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "2134-2143", | |
year = 2016 | |
} | |
@article{ji-2008-refin, | |
title = "Refining event extraction through cross-document inference", | |
author = "Ji, Heng and Grishman, Ralph", | |
journal = "Proceedings of ACL-08: HLT", | |
pages = "254-262", | |
year = 2008 | |
} | |
@inproceedings{chen-2009-language-specific, | |
title = "Language specific issue and feature exploration in Chinese | |
event extraction", | |
author = "Chen, Zheng and Ji, Heng", | |
booktitle = "Proceedings of Human Language Technologies: The 2009 Annual | |
Conference of the North American Chapter of the Association | |
for Computational Linguistics, Companion Volume: Short | |
Papers", | |
pages = "209-212", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{li-2012-employ-compositional, | |
title = "Employing compositional semantics and discourse consistency | |
in Chinese event extraction", | |
author = "Li, Peifeng and Zhou, Guodong and Zhu, Qiaoming and Hou, | |
Libin", | |
booktitle = "Proceedings of the 2012 Joint Conference on Empirical Methods | |
in Natural Language Processing and Computational Natural | |
Language Learning", | |
pages = "1006-1016", | |
year = 2012, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{liao-2010-using, | |
title = "Using document level cross-event inference to improve event | |
extraction", | |
author = "Liao, Shasha and Grishman, Ralph", | |
booktitle = "Proceedings of the 48th Annual Meeting of the Association for | |
Computational Linguistics", | |
pages = "789-797", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{hong-2011-using, | |
title = "Using cross-entity inference to improve event extraction", | |
author = "Hong, Yu and Zhang, Jianfeng and Ma, Bin and Yao, Jianmin and | |
Zhou, Guodong and Zhu, Qiaoming", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies-Volume | |
1", | |
pages = "1127-1136", | |
year = 2011, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{liu-2016-probab-soft, | |
author = "Liu, Shulin and Liu, Kang and He, Shizhu and Zhao, Jun", | |
title = "A Probabilistic Soft Logic Based Approach to Exploiting | |
Latent and Global Information in Event Classification", | |
booktitle = "Proceedings of the Thirtieth AAAI Conference on Artificial | |
Intelligence", | |
series = "AAAI'16", | |
year = 2016, | |
location = "Phoenix, Arizona", | |
pages = "2993-2999", | |
numpages = 7, | |
url = "http://dl.acm.org/citation.cfm?id=3016100.3016321", | |
acmid = 3016321, | |
publisher = "AAAI Press" | |
} | |
@article{kim-2000-subject-object, | |
title = "Subject/object drop in the acquisition of Korean: A | |
cross-linguistic comparison", | |
author = "Kim, Young-Joo", | |
journal = "Journal of East Asian Linguistics", | |
volume = 9, | |
number = 4, | |
pages = "325-351", | |
year = 2000, | |
publisher = "Springer" | |
} | |
@inproceedings{tan-2008-ident-chines, | |
author = "Tan, Hongye and Zhao, Tiejun and Zheng, Jiaheng", | |
title = "Identification of Chinese Event and Their Argument Roles", | |
booktitle = "Proceedings of the 2008 IEEE 8th International Conference on | |
Computer and Information Technology Workshops", | |
series = "CITWORKSHOPS '08", | |
year = 2008, | |
isbn = "978-0-7695-3242-4", | |
pages = "14-19", | |
numpages = 6, | |
url = "http://dx.doi.org/10.1109/CIT.2008.Workshops.54", | |
doi = "10.1109/CIT.2008.Workshops.54", | |
acmid = 1381056, | |
publisher = "IEEE Computer Society", | |
address = "Washington, DC, USA" | |
} | |
@inproceedings{fader-2013-parap, | |
title = "Paraphrase-driven learning for open question answering", | |
author = "Fader, Anthony and Zettlemoyer, Luke and Etzioni, Oren", | |
booktitle = "Proceedings of the 51st Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "1608-1618", | |
year = 2013 | |
} | |
@article{陈箫箫-2016-微博中的开放域事件抽取, | |
title = "微博中的开放域事件抽取", | |
author = "陈箫箫 and 刘波", | |
journal = "计算机应用与软件", | |
volume = 33, | |
number = 8, | |
pages = "18-22", | |
year = 2016 | |
} | |
@article{李江龙-2017-金融领域的事件句抽取, | |
title = "金融领域的事件句抽取", | |
author = "李江龙 and 吕学强 and 周建设 and 刘秀磊", | |
journal = "计算机应用研究", | |
volume = 34, | |
number = 10, | |
pages = "2915-2918", | |
year = 2017 | |
} | |
@article{马晨曦-2018-基于递归神经网络的中文事件检测, | |
title = "基于递归神经网络的中文事件检测", | |
author = "马晨曦 and 陈兴蜀 and 王文贤 and 王海舟", | |
journal = "信息网络安全", | |
number = 5, | |
pages = "75-81", | |
year = 2018 | |
} | |
@inproceedings{mcclosky-2011-event-extract, | |
author = "McClosky, David and Surdeanu, Mihai and Manning, Christopher | |
D.", | |
title = "Event Extraction As Dependency Parsing", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies - | |
Volume 1", | |
series = "HLT '11", | |
year = 2011, | |
isbn = "978-1-932432-87-9", | |
location = "Portland, Oregon", | |
pages = "1626-1635", | |
numpages = 10, | |
url = "http://dl.acm.org/citation.cfm?id=2002472.2002667", | |
acmid = 2002667, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@article{reschke-2014-event-extrac, | |
title = "Event Extraction Using Distant Supervision", | |
author = "Reschke, Kevin and Jankowiak, MarVn and Surdeanu, Mihai and | |
Manning, Christopher D and Jurafsky, Daniel", | |
journal = "Language", | |
year = 2014 | |
} | |
@inproceedings{riedel-2013-relation-extraction, | |
title = "Relation extraction with matrix factorization and universal | |
schemas", | |
author = "Riedel, Sebastian and Yao, Limin and McCallum, Andrew and | |
Marlin, Benjamin M", | |
booktitle = "Proceedings of the 2013 Conference of the North American | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies", | |
pages = "74-84", | |
year = 2013 | |
} | |
@inproceedings{toutanova-2015-representing-text, | |
title = "Representing text for joint embedding of text and knowledge | |
bases", | |
author = "Toutanova, Kristina and Chen, Danqi and Pantel, Patrick and | |
Poon, Hoifung and Choudhury, Pallavi and Gamon, Michael", | |
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1499-1509", | |
year = 2015 | |
} | |
@inproceedings{tang-2005-email, | |
title = "Email data cleaning", | |
author = "Tang, Jie and Li, Hang and Cao, Yunbo and Tang, Zhaohui", | |
booktitle = "Proceedings of the eleventh ACM SIGKDD international | |
conference on Knowledge discovery in data mining", | |
pages = "489-498", | |
year = 2005, | |
organization = "ACM" | |
} | |
@inproceedings{smith-2007-tesser-ocr, | |
title = "An overview of the Tesseract OCR engine", | |
author = "Smith, Ray", | |
booktitle = "Document Analysis and Recognition, 2007. ICDAR 2007. Ninth | |
International Conference on", | |
volume = 2, | |
pages = "629-633", | |
year = 2007, | |
organization = "IEEE" | |
} | |
@inproceedings{smith-2009-hybrid-page, | |
title = "Hybrid Page Layout Analysis via Tab-Stop Detection", | |
author = "Ray Smith", | |
year = 2009, | |
URL = "http://www.cvc.uab.es/icdar2009/papers/3725a241.pdf", | |
booktitle = "Proceedings of the 10th international conference on document | |
analysis and recognition" | |
} | |
@inproceedings{epshtein-2010-detect-text, | |
title = "Detecting text in natural scenes with stroke width transform", | |
author = "Epshtein, Boris and Ofek, Eyal and Wexler, Yonatan", | |
booktitle = "Computer Vision and Pattern Recognition (CVPR), 2010 IEEE | |
Conference on", | |
pages = "2963-2970", | |
year = 2010, | |
organization = "IEEE" | |
} | |
@Article{ramakrishnan-2012-layout-pdf, | |
author = "Ramakrishnan, Cartic and Patnia, Abhishek and Hovy, Eduard | |
and Burns, Gully APC", | |
title = "Layout-aware text extraction from full-text PDF of scientific | |
articles", | |
journal = "Source Code for Biology and Medicine", | |
year = 2012, | |
month = "May", | |
day = 28, | |
volume = 7, | |
number = 1, | |
pages = 7, | |
abstract = "The Portable Document Format (PDF) is the most commonly used | |
file format for online scientific publications. The absence | |
of effective means to extract text from these PDF files in a | |
layout-aware manner presents a significant challenge for | |
developers of biomedical text mining or biocuration | |
informatics systems that use published literature as an | |
information source. In this paper we introduce the | |
`Layout-Aware PDF Text Extraction' (LA-PDFText) system to | |
facilitate accurate extraction of text from PDF files of | |
research articles for use in text mining applications.", | |
issn = "1751-0473", | |
doi = "10.1186/1751-0473-7-7", | |
url = "https://doi.org/10.1186/1751-0473-7-7" | |
} | |
@ARTICLE{niklaus-2018-survey-open-infor-extrac, | |
author = "{Niklaus}, C. and {Cetto}, M. and {Freitas}, A. and | |
{Handschuh}, S.", | |
title = "{A Survey on Open Information Extraction}", | |
journal = "ArXiv e-prints", | |
year = 2018, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2018arXiv180605599N", | |
archivePrefix= "arXiv", | |
eprint = "1806.05599", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL" | |
} | |
@Article{nesterov-2015-univer, | |
author = "Nesterov, Yu", | |
title = "Universal gradient methods for convex optimization problems", | |
journal = "Mathematical Programming", | |
year = 2015, | |
month = "Aug", | |
day = 01, | |
volume = 152, | |
number = 1, | |
pages = "381-404", | |
abstract = "In this paper, we present new methods for black-box convex | |
minimization. They do not need to know in advance the actual | |
level of smoothness of the objective function. Their only | |
essential input parameter is the required accuracy of the | |
solution. At the same time, for each particular problem class | |
they automatically ensure the best possible rate of | |
convergence. We confirm our theoretical results by | |
encouraging numerical experiments, which demonstrate that the | |
fast rate of convergence, typical for the smooth optimization | |
problems, sometimes can be achieved even on nonsmooth problem | |
instances.", | |
issn = "1436-4646", | |
doi = "10.1007/s10107-014-0790-0", | |
url = "https://doi.org/10.1007/s10107-014-0790-0" | |
} | |
@inproceedings{nothman-2018-stop-word, | |
title = "Stop Word Lists in Free Open-source Software Packages", | |
author = "Nothman, Joel and Qin, Hanmin and Yurchak, Roman", | |
booktitle = "Proceedings of Workshop for NLP Open Source Software | |
(NLP-OSS)", | |
pages = "7-12", | |
year = 2018 | |
} | |
@inproceedings{shi-2009-hash, | |
title = "Hash kernels", | |
author = "Shi, Qinfeng and Petterson, James and Dror, Gideon and | |
Langford, John and Smola, Alex and Strehl, Alex and | |
Vishwanathan, Vishy", | |
booktitle = "Artificial intelligence and statistics", | |
pages = "496-503", | |
year = 2009 | |
} | |
@article{weinberger-2009-featur-hashin, | |
author = "Kilian Q. Weinberger and Anirban Dasgupta and Josh Attenberg | |
and John Langford and Alexander J. Smola", | |
title = "Feature Hashing for Large Scale Multitask Learning", | |
journal = "CoRR", | |
volume = "abs/0902.2206", | |
year = 2009, | |
url = "http://arxiv.org/abs/0902.2206", | |
archivePrefix= "arXiv", | |
eprint = "0902.2206", | |
timestamp = "Mon, 13 Aug 2018 16:48:03 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-0902-2206", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{freksen-2018-fully-under-hashin-trick, | |
author = "Casper Benjamin Freksen and Lior Kamma and Kasper Green | |
Larsen", | |
title = "Fully Understanding the Hashing Trick", | |
journal = "CoRR", | |
volume = "abs/1805.08539", | |
year = 2018, | |
url = "http://arxiv.org/abs/1805.08539", | |
archivePrefix= "arXiv", | |
eprint = "1805.08539", | |
timestamp = "Mon, 13 Aug 2018 16:49:00 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1805-08539", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{peters-2018-deep, | |
author = "Matthew E. Peters and Mark Neumann and Mohit Iyyer and Matt | |
Gardner and Christopher Clark and Kenton Lee and Luke | |
Zettlemoyer", | |
title = "Deep contextualized word representations", | |
journal = "CoRR", | |
volume = "abs/1802.05365", | |
year = 2018, | |
url = "http://arxiv.org/abs/1802.05365", | |
archivePrefix= "arXiv", | |
eprint = "1802.05365", | |
timestamp = "Mon, 13 Aug 2018 16:48:54 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1802-05365", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{bengio-2003-neural-probab-languag-model, | |
author = "Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal | |
and Janvin, Christian", | |
title = "A Neural Probabilistic Language Model", | |
journal = "J. Mach. Learn. Res.", | |
issue_date = "3/1/2003", | |
volume = 3, | |
month = mar, | |
year = 2003, | |
issn = "1532-4435", | |
pages = "1137-1155", | |
numpages = 19, | |
url = "http://dl.acm.org/citation.cfm?id=944919.944966", | |
acmid = 944966, | |
publisher = "JMLR.org" | |
} | |
@article{devlin-2018-bert, | |
author = "Jacob Devlin and Ming{-}Wei Chang and Kenton Lee and Kristina | |
Toutanova", | |
title = "{BERT:} Pre-training of Deep Bidirectional Transformers for | |
Language Understanding", | |
journal = "CoRR", | |
volume = "abs/1810.04805", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.04805", | |
archivePrefix= "arXiv", | |
eprint = "1810.04805", | |
timestamp = "Tue, 30 Oct 2018 20:39:56 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1810-04805", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{jozefowicz-2016-explor-limit-languag-model, | |
author = "Rafal J{\'{o}}zefowicz and Oriol Vinyals and Mike Schuster | |
and Noam Shazeer and Yonghui Wu", | |
title = "Exploring the Limits of Language Modeling", | |
journal = "CoRR", | |
volume = "abs/1602.02410", | |
year = 2016, | |
url = "http://arxiv.org/abs/1602.02410", | |
archivePrefix= "arXiv", | |
eprint = "1602.02410", | |
timestamp = "Mon, 13 Aug 2018 16:48:43 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/JozefowiczVSSW16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{vania-2017-from-charac-words-between, | |
author = "Clara Vania and Adam Lopez", | |
title = "From Characters to Words to in Between: Do We Capture | |
Morphology?", | |
journal = "CoRR", | |
volume = "abs/1704.08352", | |
year = 2017, | |
url = "http://arxiv.org/abs/1704.08352", | |
archivePrefix= "arXiv", | |
eprint = "1704.08352", | |
timestamp = "Mon, 13 Aug 2018 16:46:32 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/VaniaL17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{lei-2017-train-rnns-fast-cnns, | |
author = "Tao Lei and Yu Zhang and Yoav Artzi", | |
title = "Training RNNs as Fast as CNNs", | |
journal = "CoRR", | |
volume = "abs/1709.02755", | |
year = 2017, | |
url = "http://arxiv.org/abs/1709.02755", | |
archivePrefix= "arXiv", | |
eprint = "1709.02755", | |
timestamp = "Mon, 13 Aug 2018 16:46:29 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1709-02755", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{tang-2018-why-self-atten, | |
author = "Gongbo Tang and Matthias M{\"{u}}ller and Annette Rios and | |
Rico Sennrich", | |
title = "Why Self-Attention? {A} Targeted Evaluation of Neural Machine | |
Translation Architectures", | |
journal = "CoRR", | |
volume = "abs/1808.08946", | |
year = 2018, | |
url = "http://arxiv.org/abs/1808.08946", | |
archivePrefix= "arXiv", | |
eprint = "1808.08946", | |
timestamp = "Mon, 03 Sep 2018 07:29:38 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1808-08946", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@InProceedings{domhan-2018-how-much, | |
author = "Domhan, Tobias", | |
title = "How Much Attention Do You Need? A Granular Analysis of Neural | |
Machine Translation Architectures", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
year = 2018, | |
publisher = "Association for Computational Linguistics", | |
pages = "1799-1808", | |
location = "Melbourne, Australia", | |
url = "http://aclweb.org/anthology/P18-1167" | |
} | |
@inproceedings{li-2015-word-embed-revis, | |
author = "Li, Yitan and Xu, Linli and Tian, Fei and Jiang, Liang and | |
Zhong, Xiaowei and Chen, Enhong", | |
title = "Word Embedding Revisited: A New Representation Learning and | |
Explicit Matrix Factorization Perspective", | |
booktitle = "Proceedings of the 24th International Conference on | |
Artificial Intelligence", | |
series = "IJCAI'15", | |
year = 2015, | |
isbn = "978-1-57735-738-4", | |
location = "Buenos Aires, Argentina", | |
pages = "3650-3656", | |
numpages = 7, | |
url = "http://dl.acm.org/citation.cfm?id=2832747.2832758", | |
acmid = 2832758, | |
publisher = "AAAI Press" | |
} | |
@article{evans-2000-frequen, | |
title = "Frequency versus probability formats in statistical word | |
problems", | |
journal = "Cognition", | |
volume = 77, | |
number = 3, | |
pages = "197-213", | |
year = 2000, | |
issn = "0010-0277", | |
doi = "https://doi.org/10.1016/S0010-0277(00)00098-6", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S0010027700000986", | |
author = "Jonathan St.B.T Evans and Simon J Handley and Nick Perham and | |
David E Over and Valerie A Thompson", | |
keywords = "Frequency, Probability, Statistical word problems", | |
abstract = "Three experiments examined people's ability to incorporate | |
base rate information when judging posterior | |
probabilities. Specifically, we tested the (Cosmides, L., \& | |
Tooby, J. (1996). Are humans good intuitive statisticians | |
after all? Rethinking some conclusions from the literature on | |
judgement under uncertainty. Cognition, 58, 1–73) conclusion | |
that people's reasoning appears to follow Bayesian principles | |
when they are presented with information in a frequency | |
format, but not when information is presented as one case | |
probabilities. First, we found that frequency formats were | |
not generally associated with better performance than | |
probability formats unless they were presented in a manner | |
which facilitated construction of a set inclusion mental | |
model. Second, we demonstrated that the use of frequency | |
information may promote biases in the weighting of | |
information. When participants are asked to express their | |
judgements in frequency rather than probability format, they | |
were more likely to produce the base rate as their answer, | |
ignoring diagnostic evidence." | |
} | |
@article{griffin-1999-frequen-probab-predic, | |
title = "Frequency, Probability, and Prediction: Easy Solutions to | |
Cognitive Illusions?", | |
journal = "Cognitive Psychology", | |
volume = 38, | |
number = 1, | |
pages = "48-78", | |
year = 1999, | |
issn = "0010-0285", | |
doi = "https://doi.org/10.1006/cogp.1998.0707", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S0010028598907071", | |
author = "Dale Griffin and Roger Buehler", | |
abstract = "Many errors in probabilistic judgment have been attributed to | |
people's inability to think in statistical terms when faced | |
with information about a single case. Prior theoretical | |
analyses and empirical results imply that the errors | |
associated with case-specific reasoning may be reduced when | |
people make frequentistic predictions about a set of | |
cases. In studies of three previously identified cognitive | |
biases, we find that frequency-based predictions are | |
different from—but no better than—case-specific judgments of | |
probability. First, in studies of the “planning fallacy,” we | |
compare the accuracy of aggregate frequency and case-specific | |
probability judgments in predictions of students' real-life | |
projects. When aggregate and single-case predictions are | |
collected from different respondents, there is little | |
difference between the two: Both are overly optimistic and | |
show little predictive validity. However, in within-subject | |
comparisons, the aggregate judgments are significantly more | |
conservative than the single-case predictions, though still | |
optimistically biased. Results from studies of overconfidence | |
in general knowledge and base rate neglect in categorical | |
prediction underline a general conclusion. Frequentistic | |
predictions made for sets of events are no more statistically | |
sophisticated, nor more accurate, than predictions made for | |
individual events using subjective probability." | |
} | |
@article{xie-2017-data-noisin, | |
author = "Ziang Xie and Sida I. Wang and Jiwei Li and Daniel L{\'{e}}vy | |
and Aiming Nie and Dan Jurafsky and Andrew Y. Ng", | |
title = "Data Noising as Smoothing in Neural Network Language Models", | |
journal = "CoRR", | |
volume = "abs/1703.02573", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.02573", | |
archivePrefix= "arXiv", | |
eprint = "1703.02573", | |
timestamp = "Mon, 13 Aug 2018 16:47:17 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/XieWLLNJN17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{do-2008-what, | |
title = "What is the expectation maximization algorithm?", | |
author = "Do, Chuong B and Batzoglou, Serafim", | |
journal = "Nature biotechnology", | |
volume = 26, | |
number = 8, | |
pages = 897, | |
year = 2008, | |
publisher = "Nature Publishing Group" | |
} | |
@article{wei-2019-eda, | |
author = "Jason W. Wei and Kai Zou", | |
title = "{EDA:} Easy Data Augmentation Techniques for Boosting | |
Performance on Text Classification Tasks", | |
journal = "CoRR", | |
volume = "abs/1901.11196", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.11196", | |
archivePrefix= "arXiv", | |
eprint = "1901.11196", | |
timestamp = "Mon, 04 Feb 2019 08:11:03 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1901-11196", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{sennrich-2015-improv, | |
title = "Improving neural machine translation models with monolingual | |
data", | |
author = "Sennrich, Rico and Haddow, Barry and Birch, Alexandra", | |
journal = "arXiv preprint arXiv:1511.06709", | |
year = 2015 | |
} | |
@article{smith-2017-dont-decay, | |
author = "Samuel L. Smith and Pieter{-}Jan Kindermans and Quoc V. Le", | |
title = "Don't Decay the Learning Rate, Increase the Batch Size", | |
journal = "CoRR", | |
volume = "abs/1711.00489", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.00489", | |
archivePrefix= "arXiv", | |
eprint = "1711.00489", | |
timestamp = "Mon, 13 Aug 2018 16:46:33 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1711-00489", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{su-2016-differ-equat, | |
author = "Weijie Su and Stephen Boyd and Emmanuel J. C and {{\`e}}s", | |
title = "A Differential Equation for Modeling Nesterov's Accelerated | |
Gradient Method: Theory and Insights", | |
journal = "Journal of Machine Learning Research", | |
year = 2016, | |
volume = 17, | |
number = 153, | |
pages = "1-43", | |
url = "http://jmlr.org/papers/v17/15-084.html" | |
} | |
@article{arora-2012-mwum, | |
title = "The multiplicative weights update method: a meta-algorithm | |
and applications", | |
author = "Arora, Sanjeev and Hazan, Elad and Kale, Satyen", | |
journal = "Theory of Computing", | |
volume = 8, | |
number = 1, | |
pages = "121-164", | |
year = 2012, | |
publisher = "Theory of Computing Exchange" | |
} | |
@article{li-2018-deep-reinf-learn, | |
author = "Yuxi Li", | |
title = "Deep Reinforcement Learning", | |
journal = "CoRR", | |
volume = "abs/1810.06339", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.06339", | |
archivePrefix= "arXiv", | |
eprint = "1810.06339", | |
timestamp = "Tue, 30 Oct 2018 20:39:56 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1810-06339", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{balles-2017-follow-signs, | |
author = "Lukas Balles and Philipp Hennig", | |
title = "Follow the Signs for Robust Stochastic Optimization", | |
journal = "CoRR", | |
volume = "abs/1705.07774", | |
year = 2017, | |
url = "http://arxiv.org/abs/1705.07774", | |
archivePrefix= "arXiv", | |
eprint = "1705.07774", | |
timestamp = "Mon, 13 Aug 2018 16:48:00 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/BallesH17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{sutskever-2013-impor-initial, | |
author = "Sutskever, Ilya and Martens, James and Dahl, George and | |
Hinton, Geoffrey", | |
title = "On the Importance of Initialization and Momentum in Deep | |
Learning", | |
booktitle = "Proceedings of the 30th International Conference on | |
International Conference on Machine Learning - Volume 28", | |
series = "ICML'13", | |
year = 2013, | |
location = "Atlanta, GA, USA", | |
pages = "III-1139--III-1147", | |
url = "http://dl.acm.org/citation.cfm?id=3042817.3043064", | |
acmid = 3043064, | |
publisher = "JMLR.org" | |
} | |
@article{inoue-2018-data-augmen, | |
author = "Hiroshi Inoue", | |
title = "Data Augmentation by Pairing Samples for Images | |
Classification", | |
journal = "CoRR", | |
volume = "abs/1801.02929", | |
year = 2018, | |
url = "http://arxiv.org/abs/1801.02929", | |
archivePrefix= "arXiv", | |
eprint = "1801.02929", | |
timestamp = "Mon, 13 Aug 2018 16:46:20 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1801-02929", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{zhang-2017-mixup, | |
author = "Hongyi Zhang and Moustapha Ciss{\'{e}} and Yann N. Dauphin | |
and David Lopez{-}Paz", | |
title = "mixup: Beyond Empirical Risk Minimization", | |
journal = "CoRR", | |
volume = "abs/1710.09412", | |
year = 2017, | |
url = "http://arxiv.org/abs/1710.09412", | |
archivePrefix= "arXiv", | |
eprint = "1710.09412", | |
timestamp = "Mon, 13 Aug 2018 16:47:14 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1710-09412", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{cubuk-2018-autoaugment, | |
author = "Ekin Dogus Cubuk and Barret Zoph and Dandelion Man{\'{e}} and | |
Vijay Vasudevan and Quoc V. Le", | |
title = "AutoAugment: Learning Augmentation Policies from Data", | |
journal = "CoRR", | |
volume = "abs/1805.09501", | |
year = 2018, | |
url = "http://arxiv.org/abs/1805.09501", | |
archivePrefix= "arXiv", | |
eprint = "1805.09501", | |
timestamp = "Mon, 13 Aug 2018 16:48:44 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1805-09501", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{li-2018-under-dishar, | |
author = "Xiang Li and Shuo Chen and Xiaolin Hu and Jian Yang", | |
title = "Understanding the Disharmony between Dropout and Batch | |
Normalization by Variance Shift", | |
journal = "CoRR", | |
volume = "abs/1801.05134", | |
year = 2018, | |
url = "http://arxiv.org/abs/1801.05134", | |
archivePrefix= "arXiv", | |
eprint = "1801.05134", | |
timestamp = "Fri, 21 Dec 2018 14:34:10 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1801-05134", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{bergstra-2012-random-searc-hyper-optim, | |
author = "Bergstra, James and Bengio, Yoshua", | |
title = "Random Search for Hyper-parameter Optimization", | |
journal = "J. Mach. Learn. Res.", | |
issue_date = "January 2012", | |
volume = 13, | |
number = 1, | |
month = feb, | |
year = 2012, | |
issn = "1532-4435", | |
pages = "281-305", | |
numpages = 25, | |
url = "http://dl.acm.org/citation.cfm?id=2503308.2188395", | |
acmid = 2188395, | |
publisher = "JMLR.org", | |
keywords = "deep learning, global optimization, model selection, neural | |
networks, response surface modeling" | |
} | |
@article{masters-2018-revis-small, | |
author = "Dominic Masters and Carlo Luschi", | |
title = "Revisiting Small Batch Training for Deep Neural Networks", | |
journal = "CoRR", | |
volume = "abs/1804.07612", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.07612", | |
archivePrefix= "arXiv", | |
eprint = "1804.07612", | |
timestamp = "Mon, 13 Aug 2018 16:48:13 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-07612", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{tsuruoka-2009-sgd-l1, | |
title = "Stochastic gradient descent training for l1-regularized | |
log-linear models with cumulative penalty", | |
author = "Tsuruoka, Yoshimasa and Tsujii, Jun'ichi and Ananiadou, | |
Sophia", | |
booktitle = "Proceedings of the Joint Conference of the 47th Annual | |
Meeting of the ACL and the 4th International Joint Conference | |
on Natural Language Processing of the AFNLP: Volume 1-Volume | |
1", | |
pages = "477-485", | |
year = 2009, | |
organization = "Association for Computational Linguistics" | |
} | |
@inproceedings{wilson-2017-margin-value, | |
title = "The Marginal Value of Adaptive Gradient Methods in Machine | |
Learning", | |
author = "Wilson, Ashia C and Roelofs, Rebecca and Stern, Mitchell and | |
Srebro, Nati and Recht, Benjamin", | |
booktitle = "Advances in Neural Information Processing Systems 30", | |
editor = "I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and | |
R. Fergus and S. Vishwanathan and R. Garnett", | |
pages = "4148-4158", | |
year = 2017, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/7003-the-marginal-value-of-adaptive-gradient-methods-in-machine-learning.pdf" | |
} | |
@inproceedings{hoffer-2017-train-longer, | |
title = "Train longer, generalize better: closing the generalization | |
gap in large batch training of neural networks", | |
author = "Hoffer, Elad and Hubara, Itay and Soudry, Daniel", | |
booktitle = "Advances in Neural Information Processing Systems", | |
pages = "1731-1741", | |
year = 2017 | |
} | |
@inproceedings{santurkar-2018-how, | |
title = "How does batch normalization help optimization?", | |
author = "Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew | |
and Madry, Aleksander", | |
booktitle = "Advances in Neural Information Processing Systems", | |
pages = "2483-2493", | |
year = 2018 | |
} | |
@article{breiman-2001-statistical-modeling, | |
title = "Statistical modeling: The two cultures (with comments and a | |
rejoinder by the author)", | |
author = "Breiman, Leo and others", | |
journal = "Statistical science", | |
volume = 16, | |
number = 3, | |
pages = "199-231", | |
year = 2001, | |
publisher = "Institute of Mathematical Statistics" | |
} | |
@article{howard-2018-fine-lang, | |
author = "Jeremy Howard and Sebastian Ruder", | |
title = "Fine-tuned Language Models for Text Classification", | |
journal = "CoRR", | |
volume = "abs/1801.06146", | |
year = 2018, | |
url = "http://arxiv.org/abs/1801.06146", | |
archivePrefix= "arXiv", | |
eprint = "1801.06146", | |
timestamp = "Mon, 13 Aug 2018 16:46:54 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1801-06146", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{bai-2018-tcn, | |
author = "Shaojie Bai and J. Zico Kolter and Vladlen Koltun", | |
title = "An Empirical Evaluation of Generic Convolutional and | |
Recurrent Networks for Sequence Modeling", | |
journal = "CoRR", | |
volume = "abs/1803.01271", | |
year = 2018, | |
url = "http://arxiv.org/abs/1803.01271", | |
archivePrefix= "arXiv", | |
eprint = "1803.01271", | |
timestamp = "Mon, 13 Aug 2018 16:47:39 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1803-01271", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{nemirovski-2009-robust, | |
title = "Robust stochastic approximation approach to stochastic | |
programming", | |
author = "Nemirovski, Arkadi and Juditsky, Anatoli and Lan, Guanghui | |
and Shapiro, Alexander", | |
journal = "SIAM Journal on optimization", | |
volume = 19, | |
number = 4, | |
pages = "1574-1609", | |
year = 2009, | |
publisher = "SIAM" | |
} | |
@article{todorov-2016-optim, | |
year = 2016, | |
title = "Optimal control theory", | |
author = "Todorov, Emanuel", | |
journal = "Bayesian brain: probabilistic approaches to neural coding", | |
pages = "269-298", | |
publisher = "MIT Press Cambridge (Massachusetts)" | |
} | |
@article{zhao-2019-chines-word-segmen, | |
author = "Hai Zhao and Deng Cai and Changning Huang and Chunyu Kit", | |
title = "Chinese Word Segmentation: Another Decade Review | |
{(2007-2017)}", | |
journal = "CoRR", | |
volume = "abs/1901.06079", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.06079", | |
archivePrefix= "arXiv", | |
eprint = "1901.06079", | |
timestamp = "Fri, 01 Feb 2019 13:39:59 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1901-06079", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{johnson-2013-svrg, | |
title = "Accelerating stochastic gradient descent using predictive | |
variance reduction", | |
author = "Johnson, Rie and Zhang, Tong", | |
booktitle = "Advances in neural information processing systems", | |
pages = "315-323", | |
year = 2013 | |
} | |
@article{defazio-2014-saga, | |
author = "Aaron Defazio and Francis R. Bach and Simon Lacoste{-}Julien", | |
title = "{SAGA:} {A} Fast Incremental Gradient Method With Support for | |
Non-Strongly Convex Composite Objectives", | |
journal = "CoRR", | |
volume = "abs/1407.0202", | |
year = 2014, | |
url = "http://arxiv.org/abs/1407.0202", | |
archivePrefix= "arXiv", | |
eprint = "1407.0202", | |
timestamp = "Mon, 13 Aug 2018 16:46:52 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/DefazioBL14", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{pinter-2017-mimic-word, | |
author = "Yuval Pinter and Robert Guthrie and Jacob Eisenstein", | |
title = "Mimicking Word Embeddings using Subword RNNs", | |
journal = "CoRR", | |
volume = "abs/1707.06961", | |
year = 2017, | |
url = "http://arxiv.org/abs/1707.06961", | |
archivePrefix= "arXiv", | |
eprint = "1707.06961", | |
timestamp = "Mon, 13 Aug 2018 16:46:53 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/PinterGE17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{kiperwasser-2016-simpl-accur, | |
title = "Simple and Accurate Dependency Parsing Using Bidirectional | |
LSTM Feature Representations", | |
author = "Kiperwasser, Eliyahu and Goldberg, Yoav", | |
journal = "Transactions of the Association for Computational | |
Linguistics", | |
volume = 4, | |
year = 2016, | |
url = "https://www.aclweb.org/anthology/Q16-1023", | |
pages = "313-327", | |
abstract = "We present a simple and effective scheme for dependency | |
parsing which is based on bidirectional-LSTMs (BiLSTMs). Each | |
sentence token is associated with a BiLSTM vector | |
representing the token in its sentential context, and feature | |
vectors are constructed by concatenating a few BiLSTM | |
vectors. The BiLSTM is trained jointly with the parser | |
objective, resulting in very effective feature extractors for | |
parsing. We demonstrate the effectiveness of the approach by | |
applying it to a greedy transition-based parser as well as to | |
a globally optimized graph-based parser. The resulting | |
parsers have very simple architectures, and match or surpass | |
the state-of-the-art accuracies on English and Chinese." | |
} | |
@article{baltescu-2014-pragm, | |
title = "Pragmatic neural language modelling in machine translation", | |
author = "Baltescu, Paul and Blunsom, Phil", | |
journal = "arXiv preprint arXiv:1412.7119", | |
year = 2014 | |
} | |
@article{cooijmans-2016-recur-batch-normal, | |
author = "Tim Cooijmans and Nicolas Ballas and C{\'{e}}sar Laurent and | |
Aaron C. Courville", | |
title = "Recurrent Batch Normalization", | |
journal = "CoRR", | |
volume = "abs/1603.09025", | |
year = 2016, | |
url = "http://arxiv.org/abs/1603.09025", | |
archivePrefix= "arXiv", | |
eprint = "1603.09025", | |
timestamp = "Mon, 13 Aug 2018 16:48:30 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/CooijmansBLC16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{dehghani-2018-univer-trans, | |
author = "Mostafa Dehghani and Stephan Gouws and Oriol Vinyals and | |
Jakob Uszkoreit and Lukasz Kaiser", | |
title = "Universal Transformers", | |
journal = "CoRR", | |
volume = "abs/1807.03819", | |
year = 2018, | |
url = "http://arxiv.org/abs/1807.03819", | |
archivePrefix= "arXiv", | |
eprint = "1807.03819", | |
timestamp = "Mon, 13 Aug 2018 16:49:11 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1807-03819", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{dai-2019-trans-xl, | |
author = "Zihang Dai and Zhilin Yang and Yiming Yang and Jaime | |
G. Carbonell and Quoc V. Le and Ruslan Salakhutdinov", | |
title = "Transformer-XL: Attentive Language Models Beyond a | |
Fixed-Length Context", | |
journal = "CoRR", | |
volume = "abs/1901.02860", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.02860", | |
archivePrefix= "arXiv", | |
eprint = "1901.02860", | |
timestamp = "Fri, 01 Feb 2019 13:39:59 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1901-02860", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{popel-2018-train-tips-trans-model, | |
author = "Martin Popel and Ondrej Bojar", | |
title = "Training Tips for the Transformer Model", | |
journal = "CoRR", | |
volume = "abs/1804.00247", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.00247", | |
archivePrefix= "arXiv", | |
eprint = "1804.00247", | |
timestamp = "Mon, 13 Aug 2018 16:47:13 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-00247", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{liu-2019-linguis-knowl, | |
author = "Nelson F. Liu and Matt Gardner and Yonatan Belinkov and | |
Matthew Peters and Noah A. Smith", | |
title = "Linguistic Knowledge and Transferability of Contextual | |
Representations", | |
journal = "CoRR", | |
volume = "abs/1903.08855", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.08855", | |
archivePrefix= "arXiv", | |
eprint = "1903.08855", | |
timestamp = "Mon, 01 Apr 2019 14:07:37 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1903-08855", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{peters-2019-to-tune-not-tune, | |
author = "Matthew Peters and Sebastian Ruder and Noah A. Smith", | |
title = "To Tune or Not to Tune? Adapting Pretrained Representations | |
to Diverse Tasks", | |
journal = "CoRR", | |
volume = "abs/1903.05987", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.05987", | |
archivePrefix= "arXiv", | |
eprint = "1903.05987", | |
timestamp = "Sun, 31 Mar 2019 19:01:24 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1903-05987", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{li-2011-learn-to-rank, | |
title = "A short introduction to learning to rank", | |
author = "Li, Hang", | |
journal = "IEICE TRANSACTIONS on Information and Systems", | |
volume = 94, | |
number = 10, | |
pages = "1854-1862", | |
year = 2011, | |
publisher = "The Institute of Electronics, Information and Communication | |
Engineers" | |
} | |
@article{burges-2010-from-ranknet, | |
title = "From ranknet to lambdarank to lambdamart: An overview", | |
author = "Burges, Christopher JC", | |
journal = "Learning", | |
volume = 11, | |
number = "23-581", | |
pages = 81, | |
year = 2010 | |
} | |
@Article{geurts-2006-extreme, | |
author = "Geurts, Pierre and Ernst, Damien and Wehenkel, Louis", | |
title = "Extremely randomized trees", | |
journal = "Machine Learning", | |
year = 2006, | |
month = "Apr", | |
day = 01, | |
volume = 63, | |
number = 1, | |
pages = "3-42", | |
abstract = "This paper proposes a new tree-based ensemble method for | |
supervised classification and regression problems. It | |
essentially consists of randomizing strongly both attribute | |
and cut-point choice while splitting a tree node. In the | |
extreme case, it builds totally randomized trees whose | |
structures are independent of the output values of the | |
learning sample. The strength of the randomization can be | |
tuned to problem specifics by the appropriate choice of a | |
parameter. We evaluate the robustness of the default choice | |
of this parameter, and we also provide insight on how to | |
adjust it in particular situations. Besides accuracy, the | |
main strength of the resulting algorithm is computational | |
efficiency. A bias/variance analysis of the Extra-Trees | |
algorithm is also provided as well as a geometrical and a | |
kernel characterization of the models induced.", | |
issn = "1573-0565", | |
doi = "10.1007/s10994-006-6226-1", | |
url = "https://doi.org/10.1007/s10994-006-6226-1" | |
} | |
@ARTICLE{chase-2014-thres-class, | |
author = "{Chase Lipton}, Zachary and {Elkan}, Charles and | |
{Narayanaswamy}, Balakrishnan", | |
title = "{Thresholding Classifiers to Maximize F1 Score}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Information | |
Retrieval, Computer Science - Machine Learning", | |
year = 2014, | |
month = "Feb", | |
eid = "arXiv:1402.1892", | |
pages = "arXiv:1402.1892", | |
archivePrefix= "arXiv", | |
eprint = "1402.1892", | |
primaryClass = "stat.ML", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2014arXiv1402.1892C", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{baak-2018-phik, | |
author = "{Baak}, M. and {Koopman}, R. and {Snoek}, H. and {Klous}, S.", | |
title = "{A new correlation coefficient between categorical, ordinal | |
and interval variables with Pearson characteristics}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Methodology", | |
year = 2018, | |
month = "Nov", | |
eid = "arXiv:1811.11440", | |
pages = "arXiv:1811.11440", | |
archivePrefix= "arXiv", | |
eprint = "1811.11440", | |
primaryClass = "stat.ME", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2018arXiv181111440B", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{li-2008-learn-rank, | |
title = "Learning to Rank Using Classification and Gradient Boosting", | |
author = "Ping Li", | |
booktitle = "NIPS 2008", | |
year = 2008 | |
} | |
@inproceedings{li-2007-mcrank, | |
title = "McRank: Learning to Rank Using Multiple Classification and | |
Gradient Boosting", | |
author = "P. H. W. Li and Christopher J. C. Burges and Qiang Wu", | |
booktitle = "NIPS", | |
year = 2007 | |
} | |
@inproceedings{ke-2017-lightgbm, | |
title = "Lightgbm: A highly efficient gradient boosting decision tree", | |
author = "Ke, Guolin and Meng, Qi and Finley, Thomas and Wang, Taifeng | |
and Chen, Wei and Ma, Weidong and Ye, Qiwei and Liu, Tie-Yan", | |
booktitle = "Advances in Neural Information Processing Systems", | |
pages = "3146-3154", | |
year = 2017 | |
} | |
@incollection{schapire-2013-explain-adaboost, | |
title = "Explaining adaboost", | |
author = "Schapire, Robert E", | |
booktitle = "Empirical inference", | |
pages = "37-52", | |
year = 2013, | |
publisher = "Springer" | |
} | |
@inproceedings{pardoe-2010-boost-regres-trans, | |
author = "Pardoe, David and Stone, Peter", | |
title = "Boosting for Regression Transfer", | |
booktitle = "Proceedings of the 27th International Conference on | |
International Conference on Machine Learning", | |
series = "ICML'10", | |
year = 2010, | |
isbn = "978-1-60558-907-7", | |
location = "Haifa, Israel", | |
pages = "863-870", | |
numpages = 8, | |
url = "http://dl.acm.org/citation.cfm?id=3104322.3104432", | |
acmid = 3104432, | |
publisher = "Omnipress", | |
address = "USA" | |
} | |
@article{dorogush-2018-catboost, | |
author = "Anna Veronika Dorogush and Vasily Ershov and Andrey Gulin", | |
title = "CatBoost: gradient boosting with categorical features | |
support", | |
journal = "CoRR", | |
volume = "abs/1810.11363", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.11363", | |
archivePrefix= "arXiv", | |
eprint = "1810.11363", | |
timestamp = "Wed, 31 Oct 2018 14:24:29 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1810-11363", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{freund-1997-decis-theor, | |
title = "A Decision-Theoretic Generalization of On-Line Learning and | |
an Application to Boosting", | |
journal = "Journal of Computer and System Sciences", | |
volume = 55, | |
number = 1, | |
pages = "119-139", | |
year = 1997, | |
issn = "0022-0000", | |
doi = "https://doi.org/10.1006/jcss.1997.1504", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S002200009791504X", | |
author = "Yoav Freund and Robert E Schapire", | |
abstract = "In the first part of the paper we consider the problem of | |
dynamically apportioning resources among a set of options in | |
a worst-case on-line framework. The model we study can be | |
interpreted as a broad, abstract extension of the | |
well-studied on-line prediction model to a general | |
decision-theoretic setting. We show that the multiplicative | |
weight-update Littlestone–Warmuth rule can be adapted to this | |
model, yielding bounds that are slightly weaker in some | |
cases, but applicable to a considerably more general class of | |
learning problems. We show how the resulting learning | |
algorithm can be applied to a variety of problems, including | |
gambling, multiple-outcome prediction, repeated games, and | |
prediction of points in Rn. In the second part of the paper | |
we apply the multiplicative weight-update technique to derive | |
a new boosting algorithm. This boosting algorithm does not | |
require any prior knowledge about the performance of the weak | |
learning algorithm. We also study generalizations of the new | |
boosting algorithm to the problem of learning functions whose | |
range, rather than being binary, is an arbitrary finite set | |
or a bounded segment of the real line." | |
} | |
@inproceedings{niculescu-mizil-2005-predic, | |
title = "Predicting good probabilities with supervised learning", | |
author = "Niculescu-Mizil, Alexandru and Caruana, Rich", | |
booktitle = "Proceedings of the 22nd international conference on Machine | |
learning", | |
pages = "625-632", | |
year = 2005, | |
organization = "ACM" | |
} | |
@article{kaufman-2012-leakage, | |
title = "Leakage in data mining: Formulation, detection, and | |
avoidance", | |
author = "Kaufman, Shachar and Rosset, Saharon and Perlich, Claudia and | |
Stitelman, Ori", | |
journal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", | |
volume = 6, | |
number = 4, | |
pages = 15, | |
year = 2012, | |
publisher = "ACM" | |
} | |
@article{micci-barreca-2001-target-encoding, | |
author = "Micci-Barreca, Daniele", | |
title = "A Preprocessing Scheme for High-cardinality Categorical | |
Attributes in Classification and Prediction Problems", | |
journal = "SIGKDD Explor. Newsl.", | |
issue_date = "July 2001", | |
volume = 3, | |
number = 1, | |
month = jul, | |
year = 2001, | |
issn = "1931-0145", | |
pages = "27-32", | |
numpages = 6, | |
url = "http://doi.acm.org/10.1145/507533.507538", | |
doi = "10.1145/507533.507538", | |
acmid = 507538, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "categorical attributes, empirical bayes, hierarchical | |
attributes, neural networks, predictive models" | |
} | |
@phdthesis{shi-2007-best, | |
title = "Best-first decision tree learning", | |
author = "Shi, Haijian", | |
year = 2007, | |
school = "The University of Waikato" | |
} | |
@article{fisher-1958-group-maxim-homog, | |
author = "Fisher, Walter D", | |
title = "On Grouping for Maximum Homogeneity", | |
journal = "Journal of the American statistical Association", | |
volume = 53, | |
number = 284, | |
pages = "789-798", | |
year = 1958, | |
publisher = "Taylor \\& Francis" | |
} | |
@article{friedman-2010-regul-paths, | |
author = "Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob", | |
title = "Regularization Paths for Generalized Linear Models Via | |
Coordinate Descent", | |
journal = "Journal of statistical software", | |
volume = 33, | |
number = 1, | |
pages = 1, | |
year = 2010, | |
publisher = "NIH Public Access" | |
} | |
@inproceedings{kerber-1992-chimerge, | |
author = "Kerber, Randy", | |
title = "Chimerge: Discretization of numeric attributes", | |
booktitle = "Proceedings of the tenth national conference on Artificial | |
intelligence", | |
year = 1992, | |
pages = "123-128", | |
organization = "Aaai Press" | |
} | |
@article{harrell-2017-regres-model-strat, | |
author = "Harrell Jr, Frank E", | |
title = "Regression Modeling Strategies", | |
journal = "BIOS", | |
volume = 330, | |
year = 2017 | |
} | |
@article{ribeiro-2016-lime, | |
author = "Marco T{\'{u}}lio Ribeiro and Sameer Singh and Carlos | |
Guestrin", | |
title = "``Why Should {I} Trust You?'': Explaining the Predictions of | |
Any Classifier", | |
journal = "CoRR", | |
volume = "abs/1602.04938", | |
year = 2016, | |
url = "http://arxiv.org/abs/1602.04938", | |
archivePrefix= "arXiv", | |
eprint = "1602.04938", | |
timestamp = "Mon, 13 Aug 2018 16:49:09 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/RibeiroSG16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{lei-2016-dist-free, | |
author = "{Lei}, Jing and {G'Sell}, Max and {Rinaldo}, Alessandro and | |
{Tibshirani}, Ryan J. and {Wasserman}, Larry", | |
title = "{Distribution-Free Predictive Inference For Regression}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Methodology, Mathematics - Statistics Theory, | |
Statistics - Machine Learning", | |
year = 2016, | |
month = "Apr", | |
eid = "arXiv:1604.04173", | |
pages = "arXiv:1604.04173", | |
archivePrefix= "arXiv", | |
eprint = "1604.04173", | |
primaryClass = "stat.ME", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160404173L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@incollection{lundberg-2017-unified-approac, | |
author = "Lundberg, Scott M and Lee, Su-In", | |
booktitle = "Advances in Neural Information Processing Systems 30", | |
editor = "I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and | |
R. Fergus and S. Vishwanathan and R. Garnett", | |
pages = "4765-4774", | |
publisher = "Curran Associates, Inc.", | |
title = "A Unified Approach to Interpreting Model Predictions", | |
url = | |
"http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf", | |
year = 2017 | |
} | |
@inproceedings{kohavi-1995-study-cross, | |
author = "Kohavi, Ron", | |
title = "A Study of Cross-validation and Bootstrap for Accuracy | |
Estimation and Model Selection", | |
booktitle = "Proceedings of the 14th International Joint Conference on | |
Artificial Intelligence - Volume 2", | |
series = "IJCAI'95", | |
year = 1995, | |
isbn = "1-55860-363-8", | |
location = "Montreal, Quebec, Canada", | |
pages = "1137-1143", | |
numpages = 7, | |
url = "http://dl.acm.org/citation.cfm?id=1643031.1643047", | |
acmid = 1643047, | |
publisher = "Morgan Kaufmann Publishers Inc.", | |
address = "San Francisco, CA, USA" | |
} | |
@inproceedings{kanter-2015-deep, | |
author = "Kanter, James Max and Veeramachaneni, Kalyan", | |
title = "Deep feature synthesis: Towards automating data science | |
endeavors", | |
booktitle = "2015 IEEE International Conference on Data Science and | |
Advanced Analytics (DSAA)", | |
year = 2015, | |
pages = "1-10", | |
organization = "IEEE" | |
} | |
@article{yuan-2006-group-lasso, | |
author = "Yuan, Ming and Lin, Yi", | |
title = "Model Selection and Estimation in Regression With Grouped | |
Variables", | |
journal = "Journal of the Royal Statistical Society: Series B | |
(Statistical Methodology)", | |
volume = 68, | |
number = 1, | |
pages = "49-67", | |
year = 2006, | |
publisher = "Wiley Online Library" | |
} | |
@article{tibshirani-2005-fused-lasso, | |
author = "Tibshirani, Robert and Saunders, Michael and Rosset, Saharon | |
and Zhu, Ji and Knight, Keith", | |
title = "Sparsity and Smoothness Via the Fused Lasso", | |
journal = "Journal of the Royal Statistical Society: Series B | |
(Statistical Methodology)", | |
volume = 67, | |
number = 1, | |
pages = "91-108", | |
year = 2005, | |
publisher = "Wiley Online Library" | |
} | |
@ARTICLE{gregorutti-2013-correl, | |
author = "{Gregorutti}, Baptiste and {Michel}, Bertrand and | |
{Saint-Pierre}, Philippe", | |
title = "{Correlation and variable importance in random forests}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Methodology", | |
year = 2013, | |
month = "Oct", | |
eid = "arXiv:1310.5726", | |
pages = "arXiv:1310.5726", | |
archivePrefix= "arXiv", | |
eprint = "1310.5726", | |
primaryClass = "stat.ME", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2013arXiv1310.5726G", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{he-2008-adasyn, | |
author = "He, Haibo and Bai, Yang and Garcia, Edwardo A and Li, Shutao", | |
title = "ADASYN: Adaptive synthetic sampling approach for imbalanced | |
learning", | |
booktitle = "2008 IEEE International Joint Conference on Neural Networks | |
(IEEE World Congress on Computational Intelligence)", | |
year = 2008, | |
pages = "1322-1328", | |
organization = "IEEE" | |
} | |
@inproceedings{han-2005-border-smote, | |
author = "Han, Hui and Wang, Wen-Yuan and Mao, Bing-Huan", | |
title = "Borderline-SMOTE: a new over-sampling method in imbalanced | |
data sets learning", | |
booktitle = "International conference on intelligent computing", | |
year = 2005, | |
pages = "878-887", | |
organization = "Springer" | |
} | |
@inproceedings{nguyen-2009-border, | |
author = "Nguyen, Hien M and Cooper, Eric W and Kamei, Katsuari", | |
title = "Borderline over-sampling for imbalanced data classification", | |
booktitle = "Proceedings: Fifth International Workshop on Computational | |
Intelligence \\& Applications", | |
year = 2009, | |
volume = 2009, | |
number = 1, | |
pages = "24-29", | |
organization = "IEEE SMC Hiroshima Chapter" | |
} | |
@article{last-2017-overs-imbal, | |
author = "Felix Last and Georgios Douzas and Fernando | |
Ba{\c{c}}{\~{a}}o", | |
title = "Oversampling for Imbalanced Learning Based on K-Means and | |
{SMOTE}", | |
journal = "CoRR", | |
volume = "abs/1711.00837", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.00837", | |
archivePrefix= "arXiv", | |
eprint = "1711.00837", | |
timestamp = "Wed, 10 Oct 2018 15:58:34 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1711-00837", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{mani-2003-knn, | |
author = "Mani, Inderjeet and Zhang, I", | |
title = "kNN approach to unbalanced data distributions: a case study | |
involving information extraction", | |
booktitle = "Proceedings of workshop on learning from imbalanced datasets", | |
year = 2003, | |
volume = 126 | |
} | |
@article{tomek-1976-two-modif-cnn, | |
added-at = "2007-08-22T12:37:55.000+0200", | |
author = "Tomek, I.", | |
biburl = | |
"https://www.bibsonomy.org/bibtex/2523c1d70243d3fe9035269af8f6f5ecd/bsmyth", | |
description = "AI 2001 Elizabeth McKenna Barry Smyth", | |
interhash = "379fe276cf4a77f8fba21a949b2d72d6", | |
intrahash = "523c1d70243d3fe9035269af8f6f5ecd", | |
journal = "{IEEE Transactions on Systems, Man, and Cybernetics}", | |
keywords = "imported", | |
pages = "679-772", | |
timestamp = "2007-08-22T12:37:55.000+0200", | |
title = "{Two Modifications of CNN}", | |
volume = "7(2)", | |
year = 1976 | |
} | |
@ARTICLE{wilson-1972-asymp-proper, | |
author = "D. L. {Wilson}", | |
journal = "IEEE Transactions on Systems, Man, and Cybernetics", | |
title = "Asymptotic Properties of Nearest Neighbor Rules Using Edited | |
Data", | |
year = 1972, | |
volume = "SMC-2", | |
number = 3, | |
pages = "408-421", | |
keywords = "Nearest neighbor searches;Random | |
variables;Convergence;Character recognition;Decoding;Pattern | |
recognition", | |
doi = "10.1109/TSMC.1972.4309137", | |
ISSN = "0018-9472", | |
month = "July" | |
} | |
@article{hand-1978-exper, | |
title = "Experiments on the edited condensed nearest neighbor rule", | |
journal = "Information Sciences", | |
volume = 14, | |
number = 3, | |
pages = "171-180", | |
year = 1978, | |
issn = "0020-0255", | |
doi = "https://doi.org/10.1016/0020-0255(78)90040-3", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/0020025578900403", | |
author = "D.J. Hand and B.G. Batchelor", | |
abstract = "Tomek's preprocessing scheme is discussed for editing the | |
training set prior to analyzing it by Hart's condensed | |
nearest neighbor technique. Preprocessing was performed by a | |
κ-nearest-neighbor pdf estimation scheme, although other | |
methods are suggested in this paper. The procedure was | |
studied experimentally and was found to achieve a significant | |
reduction in the storage requirements of the CNN method while | |
maintaining approximately the same error rate, or even | |
improving it." | |
} | |
@article{hart-2006-conden-neares, | |
author = "Hart, P.", | |
title = "The Condensed Nearest Neighbor Rule (Corresp.)", | |
journal = "IEEE Trans. Inf. Theor.", | |
issue_date = "May 1968", | |
volume = 14, | |
number = 3, | |
month = sep, | |
year = 2006, | |
issn = "0018-9448", | |
pages = "515-516", | |
numpages = 2, | |
url = "https://doi.org/10.1109/TIT.1968.1054155", | |
doi = "10.1109/TIT.1968.1054155", | |
acmid = 2267647, | |
publisher = "IEEE Press", | |
address = "Piscataway, NJ, USA" | |
} | |
@inproceedings{kubat-1997-addres-curse, | |
title = "Addressing the Curse of Imbalanced Training Sets: One-Sided | |
Selection", | |
author = "Miroslav Kubat and Stan Matwin", | |
booktitle = "ICML", | |
year = 1997 | |
} | |
@inproceedings{laurikkala-2001-improv, | |
title = "Improving identification of difficult small classes by | |
balancing class distribution", | |
author = "Laurikkala, Jorma", | |
booktitle = "Conference on Artificial Intelligence in Medicine in Europe", | |
pages = "63-66", | |
year = 2001, | |
organization = "Springer" | |
} | |
@article{smith-2014-instan-level, | |
author = "Smith, Michael R. and Martinez, Tony and Giraud-Carrier, | |
Christophe", | |
title = "An Instance Level Analysis of Data Complexity", | |
journal = "Mach. Learn.", | |
issue_date = "May 2014", | |
volume = 95, | |
number = 2, | |
month = may, | |
year = 2014, | |
issn = "0885-6125", | |
pages = "225-256", | |
numpages = 32, | |
url = "https://doi.org/10.1007/s10994-013-5422-z", | |
doi = "10.1007/s10994-013-5422-z", | |
acmid = 2843686, | |
publisher = "Kluwer Academic Publishers", | |
address = "Norwell, MA, USA", | |
keywords = "Data complexity, Dataset hardness, Instance hardness" | |
} | |
@article{batista-2004-study-behav, | |
author = "Batista, Gustavo EAPA and Prati, Ronaldo C and Monard, Maria | |
Carolina", | |
title = "A Study of the Behavior of Several Methods for Balancing | |
Machine Learning Training Data", | |
journal = "ACM SIGKDD explorations newsletter", | |
volume = 6, | |
number = 1, | |
pages = "20-29", | |
year = 2004, | |
publisher = "ACM" | |
} | |
@MISC{batista-2003-balan-train, | |
author = "Gustavo E. A. P. A. Batista and Ana L. C. Bazzan and Maria | |
Carolina Monard", | |
title = "Balancing Training Data for Automated Annotation of Keywords: | |
a Case Study", | |
year = 2003 | |
} | |
@Article{andrieu-2003-introd-mcmc-machin-learn, | |
author = "Andrieu, Christophe and de Freitas, Nando and Doucet, Arnaud | |
and Jordan, Michael I.", | |
title = "An Introduction to MCMC for Machine Learning", | |
journal = "Machine Learning", | |
year = 2003, | |
month = "Jan", | |
day = 01, | |
volume = 50, | |
number = 1, | |
pages = "5-43", | |
abstract = "This purpose of this introductory paper is threefold. First, | |
it introduces the Monte Carlo method with emphasis on | |
probabilistic machine learning. Second, it reviews the main | |
building blocks of modern Markov chain Monte Carlo | |
simulation, thereby providing and introduction to the | |
remaining papers of this special issue. Lastly, it discusses | |
new interesting research horizons.", | |
issn = "1573-0565", | |
doi = "10.1023/A:1020281327116", | |
url = "https://doi.org/10.1023/A:1020281327116" | |
} | |
@article{scholkopf-2000-new-suppor-vector-algor, | |
title = "New Support Vector Algorithms", | |
author = "Sch{\\\"o}lkopf, Bernhard and Smola, Alex J and Williamson, | |
Robert C and Bartlett, Peter L", | |
journal = "Neural computation", | |
volume = 12, | |
number = 5, | |
pages = "1207-1245", | |
year = 2000, | |
publisher = "MIT Press" | |
} | |
@article{scholkopf-2001-estim-suppor, | |
title = "Estimating the Support of a High-Dimensional Distribution", | |
author = "Sch{\\\"o}lkopf, Bernhard and Platt, John C and Shawe-Taylor, | |
John and Smola, Alex J and Williamson, Robert C", | |
journal = "Neural computation", | |
volume = 13, | |
number = 7, | |
pages = "1443-1471", | |
year = 2001, | |
publisher = "MIT Press" | |
} | |
@article{lampert-2009-kernel-method-comput-vision, | |
author = "Lampert, Christoph H and others", | |
title = "Kernel Methods in Computer Vision", | |
journal = "Foundations and Trends{\\textregistered} in Computer Graphics | |
and Vision", | |
volume = 4, | |
number = 3, | |
pages = "193-285", | |
year = 2009, | |
publisher = "Now Publishers, Inc." | |
} | |
@article{tax-2004-suppor-vector-data-descr, | |
author = "Tax, David MJ and Duin, Robert PW", | |
title = "Support Vector Data Description", | |
journal = "Machine learning", | |
volume = 54, | |
number = 1, | |
pages = "45-66", | |
year = 2004, | |
publisher = "Springer" | |
} | |
@inproceedings{liu-2008-isolat, | |
title = "Isolation forest", | |
author = "Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua", | |
booktitle = "2008 Eighth IEEE International Conference on Data Mining", | |
pages = "413-422", | |
year = 2008, | |
organization = "IEEE" | |
} | |
@inproceedings{breunig-2000-lof, | |
title = "LOF: identifying density-based local outliers", | |
author = "Breunig, Markus M and Kriegel, Hans-Peter and Ng, Raymond T | |
and Sander, J{\"o}rg", | |
booktitle = "ACM sigmod record", | |
volume = 29, | |
number = 2, | |
pages = "93-104", | |
year = 2000, | |
organization = "ACM" | |
} | |
@article{goyal-2017-accur-large-minib-sgd, | |
author = "Priya Goyal and Piotr Doll{\'{a}}r and Ross B. Girshick and | |
Pieter Noordhuis and Lukasz Wesolowski and Aapo Kyrola and | |
Andrew Tulloch and Yangqing Jia and Kaiming He", | |
title = "Accurate, Large Minibatch {SGD:} Training ImageNet in 1 Hour", | |
journal = "CoRR", | |
volume = "abs/1706.02677", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.02677", | |
archivePrefix= "arXiv", | |
eprint = "1706.02677", | |
timestamp = "Mon, 13 Aug 2018 16:49:10 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/GoyalDGNWKTJH17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{howard-2018-univer-languag, | |
author = "{Howard}, Jeremy and {Ruder}, Sebastian", | |
title = "{Universal Language Model Fine-tuning for Text | |
Classification}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Machine Learning, Statistics - Machine Learning", | |
year = 2018, | |
month = "Jan", | |
eid = "arXiv:1801.06146", | |
pages = "arXiv:1801.06146", | |
archivePrefix= "arXiv", | |
eprint = "1801.06146", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2018arXiv180106146H", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article {griffiths-2004-finding, | |
author = "Griffiths, Thomas L. and Steyvers, Mark", | |
title = "Finding scientific topics", | |
volume = 101, | |
number = "suppl 1", | |
pages = "5228-5235", | |
year = 2004, | |
doi = "10.1073/pnas.0307752101", | |
publisher = "National Academy of Sciences", | |
abstract = "A first step in identifying the content of a document is | |
determining which topics that document addresses. We describe | |
a generative model for documents, introduced by Blei, Ng, and | |
Jordan [Blei, D. M., Ng, A. Y. \& Jordan, M. I. (2003) | |
J. Machine Learn. Res. 3, 993-1022], in which each document | |
is generated by choosing a distribution over topics and then | |
choosing each word in the document from a topic selected | |
according to this distribution. We then present a Markov | |
chain Monte Carlo algorithm for inference in this model. We | |
use this algorithm to analyze abstracts from PNAS by using | |
Bayesian model selection to establish the number of | |
topics. We show that the extracted topics capture meaningful | |
structure in the data, consistent with the class designations | |
provided by the authors of the articles, and outline further | |
applications of this analysis, including identifying | |
{\textquotedblleft}hot topics{\textquotedblright} by | |
examining temporal dynamics and tagging abstracts to | |
illustrate semantic content.", | |
issn = "0027-8424", | |
URL = "https://www.pnas.org/content/101/suppl_1/5228", | |
eprint = "https://www.pnas.org/content/101/suppl_1/5228.full.pdf", | |
journal = "Proceedings of the National Academy of Sciences" | |
} | |
@article{cui-2016-multi-scale, | |
author = "Zhicheng Cui and Wenlin Chen and Yixin Chen", | |
title = "Multi-Scale Convolutional Neural Networks for Time Series | |
Classification", | |
journal = "CoRR", | |
volume = "abs/1603.06995", | |
year = 2016, | |
url = "http://arxiv.org/abs/1603.06995", | |
archivePrefix= "arXiv", | |
eprint = "1603.06995", | |
timestamp = "Mon, 13 Aug 2018 16:47:13 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/CuiCC16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{liu-2019-roberta, | |
author = "Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and | |
Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and | |
Luke Zettlemoyer and Veselin Stoyanov", | |
title = "RoBERTa: {A} Robustly Optimized {BERT} Pretraining Approach", | |
journal = "CoRR", | |
volume = "abs/1907.11692", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.11692", | |
archivePrefix= "arXiv", | |
eprint = "1907.11692", | |
timestamp = "Thu, 01 Aug 2019 08:59:33 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1907-11692", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{zhao-2012-moodlens, | |
author = "Zhao, Jichang and Dong, Li and Wu, Junjie and Xu, Ke", | |
title = "Moodlens: an emoticon-based sentiment analysis system for | |
chinese tweets", | |
booktitle = "Proceedings of the 18th ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
year = 2012, | |
pages = "1528-1531", | |
organization = "ACM" | |
} | |
@article{hsu-2002-comparison-multi-svm, | |
title = "A comparison of methods for multiclass support vector | |
machines", | |
author = "Hsu, Chih-Wei and Lin, Chih-Jen", | |
journal = "IEEE transactions on Neural Networks", | |
volume = 13, | |
number = 2, | |
pages = "415-425", | |
year = 2002, | |
publisher = "IEEE" | |
} | |
@article{conneau-2018-what, | |
author = "Alexis Conneau and Germ{\'{a}}n Kruszewski and Guillaume | |
Lample and Lo{\"{\i}}c Barrault and Marco Baroni", | |
title = "What you can cram into a single vector: Probing sentence | |
embeddings for linguistic properties", | |
journal = "CoRR", | |
volume = "abs/1805.01070", | |
year = 2018, | |
url = "http://arxiv.org/abs/1805.01070", | |
archivePrefix= "arXiv", | |
eprint = "1805.01070", | |
timestamp = "Mon, 13 Aug 2018 16:48:39 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1805-01070", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@techreport{ester-1996-dbscan, | |
author = "Ester, M and Kriegel, HP and Sander, J and Xiaowei, Xu", | |
institution = "AAAI Press, Menlo Park, CA (United States)", | |
title = "A density-based algorithm for discovering clusters in large | |
spatial databases with noise", | |
year = 1996 | |
} | |
@article{schubert-2017-dbscan-revisit, | |
author = "Schubert, Erich and Sander, J``{o}rg and Ester, Martin and | |
Kriegel, Hans Peter and Xu, Xiaowei", | |
title = "DBSCAN Revisited, Revisited: Why and How You Should (Still) | |
Use DBSCAN", | |
journal = "ACM Trans. Database Syst.", | |
issue_date = "August 2017", | |
volume = 42, | |
number = 3, | |
month = jul, | |
year = 2017, | |
issn = "0362-5915", | |
pages = "19:1--19:21", | |
articleno = 19, | |
numpages = 21, | |
url = "http://doi.acm.org/10.1145/3068335", | |
doi = "10.1145/3068335", | |
acmid = 3068335, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "DBSCAN, density-based clustering, range-search complexity" | |
} | |
@inproceedings{ng-2002-dis-vs-gen, | |
author = "Ng, Andrew Y and Jordan, Michael I", | |
title = "On discriminative vs. generative classifiers: A comparison of | |
logistic regression and naive bayes", | |
booktitle = "Advances in neural information processing systems", | |
year = 2002, | |
pages = "841-848" | |
} | |
@article{joulin-2016-fasttext-zip, | |
author = "Armand Joulin and Edouard Grave and Piotr Bojanowski and | |
Matthijs Douze and Herv{\'{e}} J{\'{e}}gou and Tomas Mikolov", | |
title = "FastText.zip: Compressing text classification models", | |
journal = "CoRR", | |
volume = "abs/1612.03651", | |
year = 2016, | |
url = "http://arxiv.org/abs/1612.03651", | |
archivePrefix= "arXiv", | |
eprint = "1612.03651", | |
timestamp = "Mon, 13 Aug 2018 16:48:53 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/JoulinGBDJM16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{weston-2014-tagspace, | |
title = "{\#}{T}ag{S}pace: Semantic Embeddings from Hashtags", | |
author = "Weston, Jason and Chopra, Sumit and Adams, Keith", | |
booktitle = "Proceedings of the 2014 Conference on Empirical Methods in | |
Natural Language Processing ({EMNLP})", | |
month = oct, | |
year = 2014, | |
address = "Doha, Qatar", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D14-1194", | |
doi = "10.3115/v1/D14-1194", | |
pages = "1822-1827" | |
} | |
@article{li-2015-compon-enhan, | |
author = "Yanran Li and Wenjie Li and Fei Sun and Sujian Li", | |
title = "Component-Enhanced Chinese Character Embeddings", | |
journal = "CoRR", | |
volume = "abs/1508.06669", | |
year = 2015, | |
url = "http://arxiv.org/abs/1508.06669", | |
archivePrefix= "arXiv", | |
eprint = "1508.06669", | |
timestamp = "Mon, 13 Aug 2018 16:47:49 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/LiLSL15", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{chen-2015-joint-learn, | |
author = "Chen, Xinxiong and Xu, Lei and Liu, Zhiyuan and Sun, Maosong | |
and Luan, Huanbo", | |
title = "Joint Learning of Character and Word Embeddings", | |
booktitle = "Proceedings of the 24th International Conference on | |
Artificial Intelligence", | |
series = "IJCAI'15", | |
year = 2015, | |
isbn = "978-1-57735-738-4", | |
location = "Buenos Aires, Argentina", | |
pages = "1236-1242", | |
numpages = 7, | |
url = "http://dl.acm.org/citation.cfm?id=2832415.2832421", | |
acmid = 2832421, | |
publisher = "AAAI Press" | |
} | |
@article{kudo-2018-subword-regularization, | |
author = "Taku Kudo", | |
title = "Subword Regularization: Improving Neural Network Translation | |
Models with Multiple Subword Candidates", | |
journal = "CoRR", | |
volume = "abs/1804.10959", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.10959", | |
archivePrefix= "arXiv", | |
eprint = "1804.10959", | |
timestamp = "Mon, 13 Aug 2018 16:48:57 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-10959", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{wang-2016-atae-lstm, | |
author = "Wang, Yequan and Huang, Minlie and Zhao, Li and others", | |
title = "Attention-based LSTM for aspect-level sentiment | |
classification", | |
booktitle = "Proceedings of the 2016 conference on empirical methods in | |
natural language processing", | |
year = 2016, | |
pages = "606-615" | |
} | |
@article{tang-2015-td-lstm, | |
author = "Duyu Tang and Bing Qin and Xiaocheng Feng and Ting Liu", | |
title = "Target-Dependent Sentiment Classification with Long Short | |
Term Memory", | |
journal = "CoRR", | |
volume = "abs/1512.01100", | |
year = 2015, | |
url = "http://arxiv.org/abs/1512.01100", | |
archivePrefix= "arXiv", | |
eprint = "1512.01100", | |
timestamp = "Mon, 13 Aug 2018 16:46:55 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/TangQFL15", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{pang-2018-learn-repres, | |
author = "Guansong Pang and Longbing Cao and Ling Chen and Huan Liu", | |
title = "Learning Representations of Ultrahigh-dimensional Data for | |
Random Distance-based Outlier Detection", | |
journal = "CoRR", | |
volume = "abs/1806.04808", | |
year = 2018, | |
url = "http://arxiv.org/abs/1806.04808", | |
archivePrefix= "arXiv", | |
eprint = "1806.04808", | |
timestamp = "Mon, 13 Aug 2018 16:46:25 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1806-04808", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{liu-2003-build, | |
title = "Building text classifiers using positive and unlabeled | |
examples", | |
author = "Bing Liu and Yang Dai and Xiaoli Li and Wee Sun Lee and | |
Philip S. Yu", | |
journal = "Third IEEE International Conference on Data Mining", | |
year = 2003, | |
pages = "179-186" | |
} | |
@InProceedings{li-2005-pu-learning, | |
author = "Li, Xiao-Li and Liu, Bing", | |
editor = "Gama, Jo{\~a}o and Camacho, Rui and Brazdil, Pavel B. and | |
Jorge, Al{\'i}pio M{\'a}rio and Torgo, Lu{\'i}s", | |
title = "Learning from Positive and Unlabeled Examples with Different | |
Data Distributions", | |
booktitle = "Machine Learning: ECML 2005", | |
year = 2005, | |
publisher = "Springer Berlin Heidelberg", | |
address = "Berlin, Heidelberg", | |
pages = "218-229", | |
abstract = "We study the problem of learning from positive and unlabeled | |
examples. Although several techniques exist for dealing with | |
this problem, they all assume that positive examples in the | |
positive set P and the positive examples in the unlabeled set | |
U are generated from the same distribution. This assumption | |
may be violated in practice. For example, one wants to | |
collect all printer pages from the Web. One can use the | |
printer pages from one site as the set P of positive pages | |
and use product pages from another site as U. One wants to | |
classify the pages in U into printer pages and non-printer | |
pages. Although printer pages from the two sites have many | |
similarities, they can also be quite different because | |
different sites often present similar products in different | |
styles and have different focuses. In such cases, existing | |
methods perform poorly. This paper proposes a novel technique | |
A-EM to deal with the problem. Experiment results with | |
product page classification demonstrate the effectiveness of | |
the proposed technique.", | |
isbn = "978-3-540-31692-3" | |
} | |
@inproceedings{liu-2002-partial-super, | |
author = "Liu, Bing and Lee, Wee Sun and Yu, Philip S. and Li, Xiaoli", | |
title = "Partially Supervised Classification of Text Documents", | |
booktitle = "Proceedings of the Nineteenth International Conference on | |
Machine Learning", | |
series = "ICML '02", | |
year = 2002, | |
isbn = "1-55860-873-7", | |
pages = "387-394", | |
numpages = 8, | |
url = "http://dl.acm.org/citation.cfm?id=645531.656022", | |
acmid = 656022, | |
publisher = "Morgan Kaufmann Publishers Inc.", | |
address = "San Francisco, CA, USA" | |
} | |
@inproceedings{wilson-2005-recog-contex, | |
author = "Wilson, Theresa and Wiebe, Janyce and Hoffmann, Paul", | |
title = "Recognizing Contextual Polarity in Phrase-level Sentiment | |
Analysis", | |
booktitle = "Proceedings of the Conference on Human Language Technology | |
and Empirical Methods in Natural Language Processing", | |
series = "HLT '05", | |
year = 2005, | |
location = "Vancouver, British Columbia, Canada", | |
pages = "347-354", | |
numpages = 8, | |
url = "https://doi.org/10.3115/1220575.1220619", | |
doi = "10.3115/1220575.1220619", | |
acmid = 1220619, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@INPROCEEDINGS{liu-2010-sentim, | |
author = "Bing Liu", | |
title = "Sentiment analysis and subjectivity", | |
booktitle = "Handbook of Natural Language Processing, Second | |
Edition. Taylor and Francis Group, Boca", | |
year = 2010 | |
} | |
@Inbook{liu-2012-survey-opinion, | |
author = "Liu, Bing and Zhang, Lei", | |
title = "A Survey of Opinion Mining and Sentiment Analysis", | |
bookTitle = "Mining Text Data", | |
year = 2012, | |
publisher = "Springer US", | |
address = "Boston, MA", | |
pages = "415-463", | |
chapter = 1, | |
abstract = "Sentiment analysis or opinion mining is the computational | |
study of people's opinions, appraisals, attitudes, and | |
emotions toward entities, individuals, issues, events, topics | |
and their attributes. The task is technically challenging and | |
practically very useful. For example, businesses always want | |
to find public or consumer opinions about their products and | |
services. Potential customers also want to know the opinions | |
of existing users before they use a service or purchase a | |
product.", | |
isbn = "978-1-4614-3223-4", | |
doi = "10.1007/978-1-4614-3223-4_13", | |
url = "https://doi.org/10.1007/978-1-4614-3223-4_13" | |
} | |
@InProceedings{conneau-2018-xnli, | |
author = "Conneau, Alexis and Rinott, Ruty and Lample, Guillaume and | |
Williams, Adina and Bowman, Samuel R. and Schwenk, Holger | |
and Stoyanov, Veselin", | |
title = "XNLI: Evaluating Cross-lingual Sentence Representations", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
year = 2018, | |
publisher = "Association for Computational Linguistics", | |
location = "Brussels, Belgium" | |
} | |
@article{lample-2019-xlms, | |
author = "Guillaume Lample and Alexis Conneau", | |
title = "Cross-lingual Language Model Pretraining", | |
journal = "CoRR", | |
volume = "abs/1901.07291", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.07291", | |
archivePrefix= "arXiv", | |
eprint = "1901.07291", | |
timestamp = "Fri, 01 Feb 2019 13:39:59 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1901-07291", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{schuster-2012-japan-korean, | |
title = "Japanese and korean voice search", | |
author = "Schuster, Mike and Nakajima, Kaisuke", | |
booktitle = "2012 IEEE International Conference on Acoustics, Speech and | |
Signal Processing (ICASSP)", | |
pages = "5149-5152", | |
year = 2012, | |
organization = "IEEE" | |
} | |
@article{shaw-2018-self-atten, | |
author = "Peter Shaw and Jakob Uszkoreit and Ashish Vaswani", | |
title = "Self-Attention with Relative Position Representations", | |
journal = "CoRR", | |
volume = "abs/1803.02155", | |
year = 2018, | |
url = "http://arxiv.org/abs/1803.02155", | |
archivePrefix= "arXiv", | |
eprint = "1803.02155", | |
timestamp = "Mon, 13 Aug 2018 16:46:37 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1803-02155", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{al-rfou-2018-charac-level, | |
author = "Rami Al{-}Rfou and Dokook Choe and Noah Constant and Mandy | |
Guo and Llion Jones", | |
title = "Character-Level Language Modeling with Deeper Self-Attention", | |
journal = "CoRR", | |
volume = "abs/1808.04444", | |
year = 2018, | |
url = "http://arxiv.org/abs/1808.04444", | |
archivePrefix= "arXiv", | |
eprint = "1808.04444", | |
timestamp = "Sun, 02 Sep 2018 15:01:55 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1808-04444", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{fan-2017-matchzoo, | |
author = "Yixing Fan and Liang Pang and Jianpeng Hou and Jiafeng Guo | |
and Yanyan Lan and Xueqi Cheng", | |
title = "MatchZoo: {A} Toolkit for Deep Text Matching", | |
journal = "CoRR", | |
volume = "abs/1707.07270", | |
year = 2017, | |
url = "http://arxiv.org/abs/1707.07270", | |
archivePrefix= "arXiv", | |
eprint = "1707.07270", | |
timestamp = "Mon, 13 Aug 2018 16:48:14 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/FanPHGLC17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{wang-2018-ripplenet, | |
author = "Hongwei Wang and Fuzheng Zhang and Jialin Wang and Miao Zhao | |
and Wenjie Li and Xing Xie and Minyi Guo", | |
title = "Ripple Network: Propagating User Preferences on the Knowledge | |
Graph for Recommender Systems", | |
journal = "CoRR", | |
volume = "abs/1803.03467", | |
year = 2018, | |
url = "http://arxiv.org/abs/1803.03467", | |
archivePrefix= "arXiv", | |
eprint = "1803.03467", | |
timestamp = "Mon, 13 Aug 2018 16:48:19 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1803-03467", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{song-2019-mass, | |
author = "Kaitao Song and Xu Tan and Tao Qin and Jianfeng Lu and | |
Tie{-}Yan Liu", | |
title = "{MASS:} Masked Sequence to Sequence Pre-training for Language | |
Generation", | |
journal = "CoRR", | |
volume = "abs/1905.02450", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.02450", | |
archivePrefix= "arXiv", | |
eprint = "1905.02450", | |
timestamp = "Mon, 27 May 2019 13:15:00 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1905-02450", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{huang-2013-dssm, | |
title = "Learning deep structured semantic models for web search using | |
clickthrough data", | |
author = "Huang, Po-Sen and He, Xiaodong and Gao, Jianfeng and Deng, Li | |
and Acero, Alex and Heck, Larry", | |
booktitle = "Proceedings of the 22nd ACM international conference on | |
Information \& Knowledge Management", | |
pages = "2333-2338", | |
year = 2013, | |
organization = "ACM" | |
} | |
@InProceedings{shen-2014-cnn-dssm, | |
author = "Shen, Yelong and He, Xiaodong and Gao, Jianfeng and Deng, Li | |
and Mesnil, Gregoire", | |
title = "A Latent Semantic Model with Convolutional-Pooling Structure | |
for Information Retrieval", | |
booktitle = "CIKM", | |
year = 2014, | |
month = "November", | |
abstract = "In this paper, we propose a new latent semantic model that | |
incorporates a convolutional-pooling structure over word | |
sequences to learn low-dimensional, semantic vector | |
representations for search queries and Web documents. In | |
order to capture the rich contextual structures in a query or | |
a document, we start with each word within a temporal context | |
window in a word sequence to directly capture contextual | |
features at the word n-gram level. Next, the salient word | |
n-gram features in the word sequence are discovered by the | |
model and are then aggregated to form a sentence-level | |
feature vector. Finally, a non-linear transformation is | |
applied to extract high-level semantic information to | |
generate a continuous vector representation for the full text | |
string. The proposed convolutional latent semantic model | |
(CLSM) is trained on clickthrough data and is evaluated on a | |
Web document ranking task using a large-scale, real-world | |
data set. Results show that the proposed model effectively | |
captures salient semantic information in queries and | |
documents for the task while significantly outperforming | |
previous state-of-the-art semantic models.", | |
url = | |
"https://www.microsoft.com/en-us/research/publication/a-latent-semantic-model-with-convolutional-pooling-structure-for-information-retrieval/" | |
} | |
@article{palangi-2014-lstm-dssm, | |
title = "Semantic modelling with long-short-term memory for | |
information retrieval", | |
author = "Palangi, Hamid and Deng, Li and Shen, Yelong and Gao, | |
Jianfeng and He, Xiaodong and Chen, Jianshu and Song, Xinying | |
and Ward, R", | |
journal = "arXiv preprint arXiv:1412.6629", | |
year = 2014 | |
} | |
@inproceedings{elkahky-2015-mv-dssm, | |
title = "A multi-view deep learning approach for cross domain user | |
modeling in recommendation systems", | |
author = "Elkahky, Ali Mamdouh and Song, Yang and He, Xiaodong", | |
booktitle = "Proceedings of the 24th International Conference on World | |
Wide Web", | |
pages = "278-288", | |
year = 2015, | |
organization = "International World Wide Web Conferences Steering Committee" | |
} | |
@inproceedings{qiu-2015-cntn, | |
title = "Convolutional neural tensor network architecture for | |
community-based question answering", | |
author = "Qiu, Xipeng and Huang, Xuanjing", | |
booktitle = "Twenty-Fourth International Joint Conference on Artificial | |
Intelligence", | |
year = 2015 | |
} | |
@article{庞亮-2017-深度文本匹配综述, | |
title = "深度文本匹配综述", | |
author = "庞亮 and 兰艳艳 and 徐君 and 郭嘉丰 and 万圣贤 and 程学旗", | |
journal = "计算机学报", | |
volume = 40, | |
number = 4, | |
pages = "985-1003", | |
year = 2017 | |
} | |
@INPROCEEDINGS{chopra-2005-siamese, | |
author = "S. {Chopra} and R. {Hadsell} and Y. {LeCun}", | |
booktitle = "2005 IEEE Computer Society Conference on Computer Vision and | |
Pattern Recognition (CVPR'05)", | |
title = "Learning a similarity metric discriminatively, with | |
application to face verification", | |
year = 2005, | |
volume = 1, | |
pages = "539-546 vol. 1", | |
keywords = "face recognition;learning (artificial | |
intelligence);similarity metric learning;face | |
verification;face recognition;L/sub 1/ norm;semantic distance | |
approximation;discriminative loss function;geometric | |
distortion;Character generation;Drives;Robustness;System | |
testing;Spatial databases;Glass;Artificial neural | |
networks;Support vector machines;Support vector machine | |
classification;Face recognition", | |
doi = "10.1109/CVPR.2005.202", | |
month = "June" | |
} | |
@inproceedings{zhai-2016-deepintent, | |
author = "Zhai, Shuangfei and Chang, Keng-hao and Zhang, Ruofei and | |
Zhang, Zhongfei Mark", | |
title = "Deepintent: Learning attentions for online advertising with | |
recurrent neural networks", | |
booktitle = "Proceedings of the 22nd ACM SIGKDD international conference | |
on knowledge discovery and data mining", | |
year = 2016, | |
pages = "1295-1304", | |
organization = "ACM" | |
} | |
@inproceedings{mitra-2017-learn-to-match, | |
author = "Mitra, Bhaskar and Diaz, Fernando and Craswell, Nick", | |
title = "Learning to match using local and distributed representations | |
of text for web search", | |
booktitle = "Proceedings of the 26th International Conference on World | |
Wide Web", | |
year = 2017, | |
pages = "1291-1299", | |
organization = "International World Wide Web Conferences Steering Committee" | |
} | |
@inproceedings{tan-2016-improve, | |
title = "Improved representation learning for question answer | |
matching", | |
author = "Tan, Ming and Dos Santos, Cicero and Xiang, Bing and Zhou, | |
Bowen", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
pages = "464-473", | |
year = 2016 | |
} | |
@incollection{hu-2014-arc-i, | |
title = "Convolutional Neural Network Architectures for Matching | |
Natural Language Sentences", | |
author = "Hu, Baotian and Lu, Zhengdong and Li, Hang and Chen, Qingcai", | |
booktitle = "Advances in Neural Information Processing Systems 27", | |
editor = "Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence | |
and K. Q. Weinberger", | |
pages = "2042-2050", | |
year = 2014, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/5550-convolutional-neural-network-architectures-for-matching-natural-language-sentences.pdf" | |
} | |
@inproceedings{yin-2015-multigrancnn, | |
title = "Multigrancnn: An architecture for general matching of text | |
chunks on multiple levels of granularity", | |
author = "Yin, Wenpeng and Sch{\"u}tze, Hinrich", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics and the 7th International Joint | |
Conference on Natural Language Processing (Volume 1: Long | |
Papers)", | |
pages = "63-73", | |
year = 2015 | |
} | |
@article{pang-2016-matchpyramid, | |
author = "Liang Pang and Yanyan Lan and Jiafeng Guo and Jun Xu and | |
Shengxian Wan and Xueqi Cheng", | |
title = "Text Matching as Image Recognition", | |
journal = "CoRR", | |
volume = "abs/1602.06359", | |
year = 2016, | |
url = "http://arxiv.org/abs/1602.06359", | |
archivePrefix= "arXiv", | |
eprint = "1602.06359", | |
timestamp = "Mon, 13 Aug 2018 16:47:25 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/PangLGXWC16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@incollection{lu-2013-deepmatch, | |
title = "A Deep Architecture for Matching Short Texts", | |
author = "Lu, Zhengdong and Li, Hang", | |
booktitle = "Advances in Neural Information Processing Systems 26", | |
editor = "C. J. C. Burges and L. Bottou and M. Welling and | |
Z. Ghahramani and K. Q. Weinberger", | |
pages = "1367-1375", | |
year = 2013, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/5019-a-deep-architecture-for-matching-short-texts.pdf" | |
} | |
@inproceedings{zhang-2017-aicnn, | |
title = "Attentive interactive neural networks for answer selection in | |
community question answering", | |
author = "Zhang, Xiaodong and Li, Sujian and Sha, Lei and Wang, | |
Houfeng", | |
booktitle = "Thirty-First AAAI Conference on Artificial Intelligence", | |
year = 2017 | |
} | |
@inproceedings{sha-2018-mvfnn, | |
title = "A multi-view fusion neural network for answer selection", | |
author = "Sha, Lei and Zhang, Xiaodong and Qian, Feng and Chang, Baobao | |
and Sui, Zhifang", | |
booktitle = "Thirty-Second AAAI Conference on Artificial Intelligence", | |
year = 2018 | |
} | |
@inproceedings{zhang-2018-dqi, | |
title = "Duplicate question identification by integrating framenet | |
with neural networks", | |
author = "Zhang, Xiaodong and Sun, Xu and Wang, Houfeng", | |
booktitle = "Thirty-Second AAAI Conference on Artificial Intelligence", | |
year = 2018 | |
} | |
@article{wan-2016-match-srnn, | |
author = "Shengxian Wan and Yanyan Lan and Jun Xu and Jiafeng Guo and | |
Liang Pang and Xueqi Cheng", | |
title = "Match-SRNN: Modeling the Recursive Matching Structure with | |
Spatial {RNN}", | |
journal = "CoRR", | |
volume = "abs/1604.04378", | |
year = 2016, | |
url = "http://arxiv.org/abs/1604.04378", | |
archivePrefix= "arXiv", | |
eprint = "1604.04378", | |
timestamp = "Mon, 13 Aug 2018 16:47:12 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/WanLXGPC16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{tan-2015-qa-lstm, | |
author = "Ming Tan and Bing Xiang and Bowen Zhou", | |
title = "LSTM-based Deep Learning Models for non-factoid answer | |
selection", | |
journal = "CoRR", | |
volume = "abs/1511.04108", | |
year = 2015, | |
url = "http://arxiv.org/abs/1511.04108", | |
archivePrefix= "arXiv", | |
eprint = "1511.04108", | |
timestamp = "Mon, 13 Aug 2018 16:46:33 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/TanXZ15", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{xiong-2017-k-nrm, | |
author = "Chenyan Xiong and Zhuyun Dai and Jamie Callan and Zhiyuan Liu | |
and Russell Power", | |
title = "End-to-End Neural Ad-hoc Ranking with Kernel Pooling", | |
journal = "CoRR", | |
volume = "abs/1706.06613", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.06613", | |
archivePrefix= "arXiv", | |
eprint = "1706.06613", | |
timestamp = "Mon, 13 Aug 2018 16:49:10 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/XiongDCLP17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{chen-2018-mix, | |
title = "Mix: Multi-channel information crossing for text matching", | |
author = "Chen, Haolan and Han, Fred X and Niu, Di and Liu, Dong and | |
Lai, Kunfeng and Wu, Chenglin and Xu, Yu", | |
booktitle = "Proceedings of the 24th ACM SIGKDD International Conference | |
on Knowledge Discovery \& Data Mining", | |
pages = "110-119", | |
year = 2018, | |
organization = "ACM" | |
} | |
@inproceedings{zhang-2003-quest-class, | |
author = "Zhang, Dell and Lee, Wee Sun", | |
title = "Question Classification Using Support Vector Machines", | |
booktitle = "Proceedings of the 26th Annual International ACM SIGIR | |
Conference on Research and Development in Informaion | |
Retrieval", | |
series = "SIGIR '03", | |
year = 2003, | |
isbn = "1-58113-646-3", | |
location = "Toronto, Canada", | |
pages = "26-32", | |
numpages = 7, | |
url = "http://doi.acm.org/10.1145/860435.860443", | |
doi = "10.1145/860435.860443", | |
acmid = 860443, | |
publisher = "ACM", | |
address = "New York, NY, USA", | |
keywords = "kernel method, machine learning, question answering, support | |
vector machine, text classification" | |
} | |
@inproceedings{li-2002-learn-quest-class, | |
author = "Li, Xin and Roth, Dan", | |
title = "Learning Question Classifiers", | |
booktitle = "Proceedings of the 19th International Conference on | |
Computational Linguistics - Volume 1", | |
series = "COLING '02", | |
year = 2002, | |
location = "Taipei, Taiwan", | |
pages = "1-7", | |
numpages = 7, | |
url = "https://doi.org/10.3115/1072228.1072378", | |
doi = "10.3115/1072228.1072378", | |
acmid = 1072378, | |
publisher = "Association for Computational Linguistics", | |
address = "Stroudsburg, PA, USA" | |
} | |
@inproceedings{cui-2004-unsup, | |
title = "Unsupervised learning of soft patterns for generating | |
definitions from online news", | |
author = "Cui, Hang and Kan, Min-Yen and Chua, Tat-Seng", | |
booktitle = "Proceedings of the 13th international conference on World | |
Wide Web", | |
pages = "90-99", | |
year = 2004, | |
organization = "ACM" | |
} | |
@inproceedings{unger-2012-template-based, | |
title = "Template-based question answering over RDF data", | |
author = "Unger, Christina and B{\"u}hmann, Lorenz and Lehmann, Jens | |
and Ngonga Ngomo, Axel-Cyrille and Gerber, Daniel and | |
Cimiano, Philipp", | |
booktitle = "Proceedings of the 21st international conference on World | |
Wide Web", | |
pages = "639-648", | |
year = 2012, | |
organization = "ACM" | |
} | |
@inproceedings{abujabal-2017-autom-templ, | |
author = "Abujabal, Abdalghani and Yahya, Mohamed and Riedewald, Mirek | |
and Weikum, Gerhard", | |
title = "Automated Template Generation for Question Answering over | |
Knowledge Graphs", | |
booktitle = "Proceedings of the 26th International Conference on World | |
Wide Web", | |
series = "WWW '17", | |
year = 2017, | |
isbn = "978-1-4503-4913-0", | |
location = "Perth, Australia", | |
pages = "1191-1200", | |
numpages = 10, | |
url = "https://doi.org/10.1145/3038912.3052583", | |
doi = "10.1145/3038912.3052583", | |
acmid = 3052583, | |
publisher = "International World Wide Web Conferences Steering Committee", | |
address = "Republic and Canton of Geneva, Switzerland", | |
keywords = "knowledge graphs, question answering, semantic parsing" | |
} | |
@inproceedings{riedel-2010-model, | |
title = "Modeling relations and their mentions without labeled text", | |
author = "Riedel, Sebastian and Yao, Limin and McCallum, Andrew", | |
booktitle = "Joint European Conference on Machine Learning and Knowledge | |
Discovery in Databases", | |
pages = "148-163", | |
year = 2010, | |
organization = "Springer" | |
} | |
@inproceedings{liu-2017-soft-label, | |
title = "A soft-label method for noise-tolerant distantly supervised | |
relation extraction", | |
author = "Liu, Tianyu and Wang, Kexiang and Chang, Baobao and Sui, | |
Zhifang", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
pages = "1790-1795", | |
year = 2017 | |
} | |
@article{feng-2018-reinf-learn, | |
author = "Jun Feng and Minlie Huang and Li Zhao and Yang Yang and | |
Xiaoyan Zhu", | |
title = "Reinforcement Learning for Relation Classification from Noisy | |
Data", | |
journal = "CoRR", | |
volume = "abs/1808.08013", | |
year = 2018, | |
url = "http://arxiv.org/abs/1808.08013", | |
archivePrefix= "arXiv", | |
eprint = "1808.08013", | |
timestamp = "Tue, 03 Sep 2019 20:11:19 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1808-08013", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@phdthesis{zhang-2015-deepdive, | |
title = "DeepDive: A Data Management System for Automatic Knowledge | |
Base Construction", | |
author = "Zhang, Ce", | |
year = 2015, | |
school = "UW-Madison" | |
} | |
@inproceedings{yao-2014-infor-extrac-struc-data, | |
title = "Information Extraction over Structured Data: Question | |
Answering with {F}reebase", | |
author = "Yao, Xuchen and Van Durme, Benjamin", | |
booktitle = "Proceedings of the 52nd Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jun, | |
year = 2014, | |
address = "Baltimore, Maryland", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P14-1090", | |
doi = "10.3115/v1/P14-1090", | |
pages = "956-966" | |
} | |
@article{bordes-2014-quest-answer-subgr-embed, | |
author = "Antoine Bordes and Sumit Chopra and Jason Weston", | |
title = "Question Answering with Subgraph Embeddings", | |
journal = "CoRR", | |
volume = "abs/1406.3676", | |
year = 2014, | |
url = "http://arxiv.org/abs/1406.3676", | |
archivePrefix= "arXiv", | |
eprint = "1406.3676", | |
timestamp = "Mon, 13 Aug 2018 16:46:20 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/BordesCW14", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{dong-2015-quest, | |
title = "Question answering over freebase with multi-column | |
convolutional neural networks", | |
author = "Dong, Li and Wei, Furu and Zhou, Ming and Xu, Ke", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics and the 7th International Joint | |
Conference on Natural Language Processing (Volume 1: Long | |
Papers)", | |
pages = "260-269", | |
year = 2015 | |
} | |
@inproceedings{yih-2015-query-graph, | |
title = "Semantic Parsing via Staged Query Graph Generation: Question | |
Answering with Knowledge Base", | |
author = "Yih, Wen-tau and Chang, Ming-Wei and He, Xiaodong and Gao, | |
Jianfeng", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics and the 7th International Joint | |
Conference on Natural Language Processing (Volume 1: Long | |
Papers)", | |
pages = "1321-1331", | |
year = 2015 | |
} | |
@article{chen-2017-drqa, | |
author = "Danqi Chen and Adam Fisch and Jason Weston and Antoine | |
Bordes", | |
title = "Reading Wikipedia to Answer Open-Domain Questions", | |
journal = "CoRR", | |
volume = "abs/1704.00051", | |
year = 2017, | |
url = "http://arxiv.org/abs/1704.00051", | |
archivePrefix= "arXiv", | |
eprint = "1704.00051", | |
timestamp = "Mon, 13 Aug 2018 16:47:17 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ChenFWB17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{miwa-2014-model, | |
title = "Modeling joint entity and relation extraction with table | |
representation", | |
author = "Miwa, Makoto and Sasaki, Yutaka", | |
booktitle = "Proceedings of the 2014 Conference on Empirical Methods in | |
Natural Language Processing (EMNLP)", | |
pages = "1858-1869", | |
year = 2014 | |
} | |
@article{zheng-2017-joint-extrac, | |
author = "Suncong Zheng and Feng Wang and Hongyun Bao and Yuexing Hao | |
and Peng Zhou and Bo Xu", | |
title = "Joint Extraction of Entities and Relations Based on a Novel | |
Tagging Scheme", | |
journal = "CoRR", | |
volume = "abs/1706.05075", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.05075", | |
archivePrefix= "arXiv", | |
eprint = "1706.05075", | |
timestamp = "Tue, 25 Jun 2019 17:27:14 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ZhengWBHZX17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{luo-2017-learn-noise, | |
author = "Bingfeng Luo and Yansong Feng and Zheng Wang and Zhanxing Zhu | |
and Songfang Huang and Rui Yan and Dongyan Zhao", | |
title = "Learning with Noise: Enhance Distantly Supervised Relation | |
Extraction with Dynamic Transition Matrix", | |
journal = "CoRR", | |
volume = "abs/1705.03995", | |
year = 2017, | |
url = "http://arxiv.org/abs/1705.03995", | |
archivePrefix= "arXiv", | |
eprint = "1705.03995", | |
timestamp = "Sat, 31 Aug 2019 16:23:05 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/LuoFWZHYZ17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{feng-2017-effec-deep, | |
author = "Xiaocheng Feng and Jiang Guo and Bing Qin and Ting Liu and | |
Yongjie Liu", | |
title = "Effective Deep Memory Networks for Distant Supervised | |
Relation Extraction", | |
booktitle = "Proceedings of the Twenty-Sixth International Joint | |
Conference on Artificial Intelligence, {IJCAI-17}", | |
pages = "4002-4008", | |
year = 2017, | |
doi = "10.24963/ijcai.2017/559", | |
url = "https://doi.org/10.24963/ijcai.2017/559" | |
} | |
@inproceedings{bordes-2013-transe, | |
title = "Translating embeddings for modeling multi-relational data", | |
author = "Bordes, Antoine and Usunier, Nicolas and Garcia-Duran, | |
Alberto and Weston, Jason and Yakhnenko, Oksana", | |
booktitle = "Advances in neural information processing systems", | |
pages = "2787-2795", | |
year = 2013 | |
} | |
@inproceedings{wang-2014-transh, | |
title = "Knowledge graph embedding by translating on hyperplanes", | |
author = "Wang, Zhen and Zhang, Jianwen and Feng, Jianlin and Chen, | |
Zheng", | |
booktitle = "Twenty-Eighth AAAI conference on artificial intelligence", | |
year = 2014 | |
} | |
@inproceedings{lin-2015-transr, | |
title = "Learning entity and relation embeddings for knowledge graph | |
completion", | |
author = "Lin, Yankai and Liu, Zhiyuan and Sun, Maosong and Liu, Yang | |
and Zhu, Xuan", | |
booktitle = "Proceedings of the Twenty-Ninth AAAI Conference on Artificial | |
Intelligence", | |
pages = "2181-2187", | |
year = 2015, | |
organization = "AAAI Press" | |
} | |
@inproceedings{ji-2015-transd, | |
title = "Knowledge graph embedding via dynamic mapping matrix", | |
author = "Ji, Guoliang and He, Shizhu and Xu, Liheng and Liu, Kang and | |
Zhao, Jun", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics and the 7th International Joint | |
Conference on Natural Language Processing (Volume 1: Long | |
Papers)", | |
pages = "687-696", | |
year = 2015 | |
} | |
@article{xiao-2015-transa, | |
author = "Han Xiao and Minlie Huang and Yu Hao and Xiaoyan Zhu", | |
title = "TransA: An Adaptive Approach for Knowledge Graph Embedding", | |
journal = "CoRR", | |
volume = "abs/1509.05490", | |
year = 2015, | |
url = "http://arxiv.org/abs/1509.05490", | |
archivePrefix= "arXiv", | |
eprint = "1509.05490", | |
timestamp = "Tue, 03 Sep 2019 20:11:19 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/0005HHZ15a", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{ji-2016-transparse, | |
title = "Knowledge graph completion with adaptive sparse transfer | |
matrix", | |
author = "Ji, Guoliang and Liu, Kang and He, Shizhu and Zhao, Jun", | |
booktitle = "Thirtieth AAAI Conference on Artificial Intelligence", | |
year = 2016 | |
} | |
@inproceedings{xiao-2016-transg, | |
title = "TransG: A generative model for knowledge graph embedding", | |
author = "Xiao, Han and Huang, Minlie and Zhu, Xiaoyan", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
volume = 1, | |
pages = "2316-2325", | |
year = 2016 | |
} | |
@inproceedings{he-2015-kg2e, | |
title = "Learning to represent knowledge graphs with gaussian | |
embedding", | |
author = "He, Shizhu and Liu, Kang and Ji, Guoliang and Zhao, Jun", | |
booktitle = "Proceedings of the 24th ACM International on Conference on | |
Information and Knowledge Management", | |
pages = "623-632", | |
year = 2015, | |
organization = "ACM" | |
} | |
@inproceedings{jia-2016-transa+, | |
title = "Locally adaptive translation for knowledge graph embedding", | |
author = "Jia, Yantao and Wang, Yuanzhuo and Lin, Hailun and Jin, | |
Xiaolong and Cheng, Xueqi", | |
booktitle = "Thirtieth AAAI conference on artificial intelligence", | |
year = 2016 | |
} | |
@inproceedings{shi-2017-proje, | |
title = "ProjE: Embedding projection for knowledge graph completion", | |
author = "Shi, Baoxu and Weninger, Tim", | |
booktitle = "Thirty-First AAAI Conference on Artificial Intelligence", | |
year = 2017 | |
} | |
@inproceedings{krompass-2015-type, | |
title = "Type-constrained representation learning in knowledge graphs", | |
author = "Krompa{\ss}, Denis and Baier, Stephan and Tresp, Volker", | |
booktitle = "International semantic web conference", | |
pages = "640-655", | |
year = 2015, | |
organization = "Springer" | |
} | |
@inproceedings{niu-2011-zhishi, | |
title = "Zhishi.me-weaving chinese linking open data", | |
author = "Niu, Xing and Sun, Xinruo and Wang, Haofen and Rong, Shu and | |
Qi, Guilin and Yu, Yong", | |
booktitle = "International Semantic Web Conference", | |
pages = "205-220", | |
year = 2011, | |
organization = "Springer" | |
} | |
@incollection{bizer-2011-linked, | |
title = "Linked data: The story so far", | |
author = "Bizer, Christian and Heath, Tom and Berners-Lee, Tim", | |
booktitle = "Semantic services, interoperability and web applications: | |
emerging concepts", | |
pages = "205-227", | |
year = 2011, | |
publisher = "IGI Global" | |
} | |
@inproceedings{liu-2017-unsup, | |
title = "Unsupervised image-to-image translation networks", | |
author = "Liu, Ming-Yu and Breuel, Thomas and Kautz, Jan", | |
booktitle = "Advances in neural information processing systems", | |
pages = "700-708", | |
year = 2017 | |
} | |
@inproceedings{cao-2018-cw2vec, | |
title = "cw2vec: Learning chinese word embeddings with stroke n-gram | |
information", | |
author = "Cao, Shaosheng and Lu, Wei and Zhou, Jun and Li, Xiaolong", | |
booktitle = "Thirty-Second AAAI Conference on Artificial Intelligence", | |
year = 2018 | |
} | |
@ARTICLE{yu-2015-multi-scale, | |
author = "{Yu}, Fisher and {Koltun}, Vladlen", | |
title = "{Multi-Scale Context Aggregation by Dilated Convolutions}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition", | |
year = 2015, | |
month = "Nov", | |
eid = "arXiv:1511.07122", | |
pages = "arXiv:1511.07122", | |
archivePrefix= "arXiv", | |
eprint = "1511.07122", | |
primaryClass = "cs.CV", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2015arXiv151107122Y", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{chiu-2016-lstm-cnn, | |
title = "Named Entity Recognition with Bidirectional {LSTM}-{CNN}s", | |
author = "Chiu, Jason P.C. and Nichols, Eric", | |
journal = "Transactions of the Association for Computational | |
Linguistics", | |
volume = 4, | |
year = 2016, | |
url = "https://www.aclweb.org/anthology/Q16-1026", | |
doi = "10.1162/tacl_a_00104", | |
pages = "357-370", | |
abstract = "Named entity recognition is a challenging task that has | |
traditionally required large amounts of knowledge in the form | |
of feature engineering and lexicons to achieve high | |
performance. In this paper, we present a novel neural network | |
architecture that automatically detects word- and | |
character-level features using a hybrid bidirectional LSTM | |
and CNN architecture, eliminating the need for most feature | |
engineering. We also propose a novel method of encoding | |
partial lexicon matches in neural networks and compare it to | |
existing approaches. Extensive evaluation shows that, given | |
only tokenized text and publicly available word embeddings, | |
our system is competitive on the CoNLL-2003 dataset and | |
surpasses the previously reported state of the art | |
performance on the OntoNotes 5.0 dataset by 2.13 F1 | |
points. By using two lexicons constructed from | |
publicly-available sources, we establish new state of the art | |
performance with an F1 score of 91.62 on CoNLL-2003 and 86.28 | |
on OntoNotes, surpassing systems that employ heavy feature | |
engineering, proprietary lexicons, and rich entity linking | |
information." | |
} | |
@inproceedings{zhang-2018-lattice-lstm, | |
title = "{C}hinese {NER} Using Lattice {LSTM}", | |
author = "Zhang, Yue and Yang, Jie", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1144", | |
doi = "10.18653/v1/P18-1144", | |
pages = "1554-1564", | |
abstract = "We investigate a lattice-structured LSTM model for Chinese | |
NER, which encodes a sequence of input characters as well as | |
all potential words that match a lexicon. Compared with | |
character-based methods, our model explicitly leverages word | |
and word sequence information. Compared with word-based | |
methods, lattice LSTM does not suffer from segmentation | |
errors. Gated recurrent cells allow our model to choose the | |
most relevant characters and words from a sentence for better | |
NER results. Experiments on various datasets show that | |
lattice LSTM outperforms both word-based and character-based | |
LSTM baselines, achieving the best results." | |
} | |
@article{shang-2018-autoner, | |
author = "Jingbo Shang and Liyuan Liu and Xiang Ren and Xiaotao Gu and | |
Teng Ren and Jiawei Han", | |
title = "Learning Named Entity Tagger using Domain-Specific | |
Dictionary", | |
journal = "CoRR", | |
volume = "abs/1809.03599", | |
year = 2018, | |
url = "http://arxiv.org/abs/1809.03599", | |
archivePrefix= "arXiv", | |
eprint = "1809.03599", | |
timestamp = "Fri, 05 Oct 2018 11:34:52 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1809-03599", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{yadav-2018-survey-ner, | |
title = "A Survey on Recent Advances in Named Entity Recognition from | |
Deep Learning models", | |
author = "Yadav, Vikas and Bethard, Steven", | |
booktitle = "Proceedings of the 27th International Conference on | |
Computational Linguistics", | |
month = aug, | |
year = 2018, | |
address = "Santa Fe, New Mexico, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/C18-1182", | |
pages = "2145-2158", | |
abstract = "Named Entity Recognition (NER) is a key component in NLP | |
systems for question answering, information retrieval, | |
relation extraction, etc. NER systems have been studied and | |
developed widely for decades, but accurate systems using deep | |
neural networks (NN) have only been introduced in the last | |
few years. We present a comprehensive survey of deep neural | |
network architectures for NER, and contrast them with | |
previous approaches to NER based on feature engineering and | |
other supervised or semi-supervised learning algorithms. Our | |
results highlight the improvements achieved by neural | |
networks, and show how incorporating some of the lessons | |
learned from past work on feature-based NER systems can yield | |
further improvements." | |
} | |
@ARTICLE{li-2016-webqa, | |
author = "{Li}, Peng and {Li}, Wei and {He}, Zhengyan and {Wang}, | |
Xuguang and {Cao}, Ying and {Zhou}, Jie and {Xu}, Wei", | |
title = "{Dataset and Neural Recurrent Sequence Labeling Model for | |
Open-Domain Factoid Question Answering}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Neural and Evolutionary | |
Computing", | |
year = 2016, | |
month = "Jul", | |
eid = "arXiv:1607.06275", | |
pages = "arXiv:1607.06275", | |
archivePrefix= "arXiv", | |
eprint = "1607.06275", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160706275L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{wang-2003-risk-score, | |
author = "Wang, Thomas J. and Massaro, Joseph M. and Levy, Daniel and | |
Vasan, Ramachandran S. and Wolf, Philip A. and D'Agostino, | |
Ralph B. and Larson, Martin G. and Kannel, William B. and | |
Benjamin, Emelia J.", | |
title = "{A Risk Score for Predicting Stroke or Death in Individuals | |
With New-Onset Atrial Fibrillation in the CommunityThe | |
Framingham Heart Study}", | |
journal = "JAMA", | |
volume = 290, | |
number = 8, | |
pages = "1049-1056", | |
year = 2003, | |
month = 08, | |
abstract = "{ContextPrior risk stratification schemes for atrial | |
fibrillation (AF) have been based on randomized trial cohorts | |
or Medicare administrative databases, have included patients | |
with established AF, and have focused on stroke as the | |
principal outcome.ObjectiveTo derive risk scores for stroke | |
alone and stroke or death in community-based individuals with | |
new-onset AF.Design, Setting, and ParticipantsProspective, | |
community-based, observational cohort in Framingham, Mass. | |
We identified 868 participants with new-onset AF, 705 of whom | |
were not treated with warfarin at baseline. Risk scores for | |
stroke (ischemic or hemorrhagic) and stroke or death were | |
developed with censoring when warfarin initiation occurred | |
during follow-up. Event rates were examined in low-risk | |
individuals, as defined by the risk score and 4 previously | |
published risk schemes.Main Outcome MeasuresStroke and the | |
combination of stroke or death.ResultsDuring a mean follow-up | |
of 4.0 years free of warfarin use, stroke alone occurred in | |
83 participants and stroke or death occurred in 382 | |
participants. A risk score for stroke was derived that | |
included the following risk predictors: advancing age, female | |
sex, increasing systolic blood pressure, prior stroke or | |
transient ischemic attack, and diabetes. With the risk score, | |
14.3\\% of the cohort had a predicted 5-year stroke rate | |
≤7.5\\% (average annual rate ≤1.5\\%), and 30.6\\% of the | |
cohort had a predicted 5-year stroke rate ≤10\\% (average | |
annual rate ≤2\\%). Actual stroke rates in these low-risk | |
groups were 1.1 and 1.5 per 100 person-years, | |
respectively. Previous risk schemes classified 6.4\\% to | |
17.3\\% of subjects as low risk, with actual stroke rates of | |
0.9 to 2.3 per 100 person-years. A risk score for stroke or | |
death is also presented.ConclusionThese risk scores can be | |
used to estimate the absolute risk of an adverse event in | |
individuals with AF, which may be helpful in counseling | |
patients and making treatment decisions.}", | |
issn = "0098-7484", | |
doi = "10.1001/jama.290.8.1049", | |
url = "https://doi.org/10.1001/jama.290.8.1049", | |
eprint = | |
"https://jamanetwork.com/journals/jama/articlepdf/197176/joc30626.pdf" | |
} | |
@inproceedings{khosla-2010-integrated, | |
title = "An integrated machine learning approach to stroke prediction", | |
author = "Khosla, Aditya and Cao, Yu and Lin, Cliff Chiung-Yu and Chiu, | |
Hsu-Kuang and Hu, Junling and Lee, Honglak", | |
booktitle = "Proceedings of the 16th ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
pages = "183-192", | |
year = 2010, | |
organization = "ACM" | |
} | |
@inproceedings{cheng-2016-risk, | |
title = "Risk prediction with electronic health records: A deep | |
learning approach", | |
author = "Cheng, Yu and Wang, Fei and Zhang, Ping and Hu, Jianying", | |
booktitle = "Proceedings of the 2016 SIAM International Conference on Data | |
Mining", | |
pages = "432-440", | |
year = 2016, | |
organization = "SIAM" | |
} | |
@article{choi-2016-using, | |
title = "Using recurrent neural network models for early detection of | |
heart failure onset", | |
author = "Choi, Edward and Schuetz, Andy and Stewart, Walter F and Sun, | |
Jimeng", | |
journal = "Journal of the American Medical Informatics Association", | |
volume = 24, | |
number = 2, | |
pages = "361-370", | |
year = 2016, | |
publisher = "Oxford University Press" | |
} | |
@article{rajkomar-2018-scalable, | |
title = "Scalable and accurate deep learning with electronic health | |
records", | |
author = "Rajkomar, Alvin and Oren, Eyal and Chen, Kai and Dai, Andrew | |
M and Hajaj, Nissan and Hardt, Michaela and Liu, Peter J and | |
Liu, Xiaobing and Marcus, Jake and Sun, Mimi and others", | |
journal = "NPJ Digital Medicine", | |
volume = 1, | |
number = 1, | |
pages = 18, | |
year = 2018, | |
publisher = "Nature Publishing Group" | |
} | |
@article{shickel-2017-deep-ehr, | |
author = "Benjamin Shickel and Patrick Tighe and Azra Bihorac and | |
Parisa Rashidi", | |
title = "Deep {EHR:} {A} Survey of Recent Advances on Deep Learning | |
Techniques for Electronic Health Record {(EHR)} Analysis", | |
journal = "CoRR", | |
volume = "abs/1706.03446", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.03446", | |
archivePrefix= "arXiv", | |
eprint = "1706.03446", | |
timestamp = "Mon, 13 Aug 2018 16:46:19 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ShickelTBR17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{chen-2016-enhan-combin, | |
author = "Qian Chen and Xiaodan Zhu and Zhen{-}Hua Ling and Si Wei and | |
Hui Jiang", | |
title = "Enhancing and Combining Sequential and Tree {LSTM} for | |
Natural Language Inference", | |
journal = "CoRR", | |
volume = "abs/1609.06038", | |
year = 2016, | |
url = "http://arxiv.org/abs/1609.06038", | |
archivePrefix= "arXiv", | |
eprint = "1609.06038", | |
timestamp = "Mon, 13 Aug 2018 16:48:17 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ChenZLWJ16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{neculoiu-2016-learn-text, | |
title = "Learning Text Similarity with {S}iamese Recurrent Networks", | |
author = "Neculoiu, Paul and Versteegh, Maarten and Rotaru, Mihai", | |
booktitle = "Proceedings of the 1st Workshop on Representation Learning | |
for {NLP}", | |
month = aug, | |
year = 2016, | |
address = "Berlin, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W16-1617", | |
doi = "10.18653/v1/W16-1617", | |
pages = "148-157" | |
} | |
@inproceedings{wang-2018-learn-ask, | |
title = "Learning to Ask Questions in Open-domain Conversational | |
Systems with Typed Decoders", | |
author = "Wang, Yansen and Liu, Chenyi and Huang, Minlie and Nie, | |
Liqiang", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1204", | |
doi = "10.18653/v1/P18-1204", | |
pages = "2193-2203", | |
abstract = "Asking good questions in open-domain conversational systems | |
is quite significant but rather untouched. This task, | |
substantially different from traditional question generation, | |
requires to question not only with various patterns but also | |
on diverse and relevant topics. We observe that a good | |
question is a natural composition of interrogatives, topic | |
words, and ordinary words. Interrogatives lexicalize the | |
pattern of questioning, topic words address the key | |
information for topic transition in dialogue, and ordinary | |
words play syntactical and grammatical roles in making a | |
natural sentence. We devise two typed decoders (soft typed | |
decoder and hard typed decoder) in which a type distribution | |
over the three types is estimated and the type distribution | |
is used to modulate the final generation | |
distribution. Extensive experiments show that the typed | |
decoders outperform state-of-the-art baselines and can | |
generate more meaningful questions." | |
} | |
@article{seo-2016-bidaf, | |
author = "Min Joon Seo and Aniruddha Kembhavi and Ali Farhadi and | |
Hannaneh Hajishirzi", | |
title = "Bidirectional Attention Flow for Machine Comprehension", | |
journal = "CoRR", | |
volume = "abs/1611.01603", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.01603", | |
archivePrefix= "arXiv", | |
eprint = "1611.01603", | |
timestamp = "Mon, 13 Aug 2018 16:46:34 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/SeoKFH16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{yu-2018-qanet, | |
author = "Adams Wei Yu and David Dohan and Minh{-}Thang Luong and Rui | |
Zhao and Kai Chen and Mohammad Norouzi and Quoc V. Le", | |
title = "QANet: Combining Local Convolution with Global Self-Attention | |
for Reading Comprehension", | |
journal = "CoRR", | |
volume = "abs/1804.09541", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.09541", | |
archivePrefix= "arXiv", | |
eprint = "1804.09541", | |
timestamp = "Mon, 13 Aug 2018 16:48:18 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-09541", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{ture-2017-no-need-pay-atten, | |
title = "No Need to Pay Attention: Simple Recurrent Neural Networks | |
Work!", | |
author = "Ture, Ferhan and Jojic, Oliver", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1307", | |
doi = "10.18653/v1/D17-1307", | |
pages = "2866-2872", | |
abstract = "First-order factoid question answering assumes that the | |
question can be answered by a single fact in a knowledge base | |
(KB). While this does not seem like a challenging task, many | |
recent attempts that apply either complex linguistic | |
reasoning or deep neural networks achieve 65{\%}{--}76{\%} | |
accuracy on benchmark sets. Our approach formulates the task | |
as two machine learning problems: detecting the entities in | |
the question, and classifying the question as one of the | |
relation types in the KB. We train a recurrent neural network | |
to solve each problem. On the SimpleQuestions dataset, our | |
approach yields substantial improvements over previously | |
published results {---} even neural networks based on much | |
more complex architectures. The simplicity of our approach | |
also has practical advantages, such as efficiency and | |
modularity, that are valuable especially in an industry | |
setting. In fact, we present a preliminary analysis of the | |
performance of our model on real queries from Comcast{'}s X1 | |
entertainment platform with millions of users every day." | |
} | |
@inproceedings{yu-2017-improv-neural, | |
title = "Improved Neural Relation Detection for Knowledge Base | |
Question Answering", | |
author = "Yu, Mo and Yin, Wenpeng and Hasan, Kazi Saidul and dos | |
Santos, Cicero and Xiang, Bing and Zhou, Bowen", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2017, | |
address = "Vancouver, Canada", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P17-1053", | |
doi = "10.18653/v1/P17-1053", | |
pages = "571-581", | |
abstract = "Relation detection is a core component of many NLP | |
applications including Knowledge Base Question Answering | |
(KBQA). In this paper, we propose a hierarchical recurrent | |
neural network enhanced by residual learning which detects KB | |
relations given an input question. Our method uses deep | |
residual bidirectional LSTMs to compare questions and | |
relation names via different levels of | |
abstraction. Additionally, we propose a simple KBQA system | |
that integrates entity linking and our proposed relation | |
detector to make the two components enhance each other. Our | |
experimental results show that our approach not only achieves | |
outstanding relation detection performance, but more | |
importantly, it helps our KBQA system achieve | |
state-of-the-art accuracy for both single-relation | |
(SimpleQuestions) and multi-relation (WebQSP) QA benchmarks." | |
} | |
@inproceedings{he-2017-gener-natur, | |
title = "Generating Natural Answers by Incorporating Copying and | |
Retrieving Mechanisms in Sequence-to-Sequence Learning", | |
author = "He, Shizhu and Liu, Cao and Liu, Kang and Zhao, Jun", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2017, | |
address = "Vancouver, Canada", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P17-1019", | |
doi = "10.18653/v1/P17-1019", | |
pages = "199-208", | |
abstract = "Generating answer with natural language sentence is very | |
important in real-world question answering systems, which | |
needs to obtain a right answer as well as a coherent natural | |
response. In this paper, we propose an end-to-end question | |
answering system called COREQA in sequence-to-sequence | |
learning, which incorporates copying and retrieving | |
mechanisms to generate natural answers within an | |
encoder-decoder framework. Specifically, in COREQA, the | |
semantic units (words, phrases and entities) in a natural | |
answer are dynamically predicted from the vocabulary, copied | |
from the given question and/or retrieved from the | |
corresponding knowledge base jointly. Our empirical study on | |
both synthetic and real-world datasets demonstrates the | |
efficiency of COREQA, which is able to generate correct, | |
coherent and natural answers for knowledge inquired | |
questions." | |
} | |
@inproceedings{madotto-2018-mem2seq, | |
title = "{M}em2{S}eq: Effectively Incorporating Knowledge Bases into | |
End-to-End Task-Oriented Dialog Systems", | |
author = "Madotto, Andrea and Wu, Chien-Sheng and Fung, Pascale", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1136", | |
doi = "10.18653/v1/P18-1136", | |
pages = "1468-1478", | |
abstract = "End-to-end task-oriented dialog systems usually suffer from | |
the challenge of incorporating knowledge bases. In this | |
paper, we propose a novel yet simple end-to-end | |
differentiable model called memory-to-sequence (Mem2Seq) to | |
address this issue. Mem2Seq is the first neural generative | |
model that combines the multi-hop attention over memories | |
with the idea of pointer network. We empirically show how | |
Mem2Seq controls each generation step, and how its multi-hop | |
attention mechanism helps in learning correlations between | |
memories. In addition, our model is quite general without | |
complicated task-specific designs. As a result, we show that | |
Mem2Seq can be trained faster and attain the state-of-the-art | |
performance on three different task-oriented dialog | |
datasets." | |
} | |
@article{cheng-2016-wide-deep, | |
author = "Heng{-}Tze Cheng and Levent Koc and Jeremiah Harmsen and Tal | |
Shaked and Tushar Chandra and Hrishi Aradhye and Glen | |
Anderson and Greg Corrado and Wei Chai and Mustafa Ispir and | |
Rohan Anil and Zakaria Haque and Lichan Hong and Vihan Jain | |
and Xiaobing Liu and Hemal Shah", | |
title = "Wide {\&} Deep Learning for Recommender Systems", | |
journal = "CoRR", | |
volume = "abs/1606.07792", | |
year = 2016, | |
url = "http://arxiv.org/abs/1606.07792", | |
archivePrefix= "arXiv", | |
eprint = "1606.07792", | |
timestamp = "Mon, 13 Aug 2018 16:47:53 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ChengKHSCAACCIA16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{wang-2019-multi-passage-bert, | |
author = "{Wang}, Zhiguo and {Ng}, Patrick and {Ma}, Xiaofei and | |
{Nallapati}, Ramesh and {Xiang}, Bing", | |
title = "{Multi-passage BERT: A Globally Normalized BERT Model for | |
Open-domain Question Answering}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence", | |
year = 2019, | |
month = "Aug", | |
eid = "arXiv:1908.08167", | |
pages = "arXiv:1908.08167", | |
archivePrefix= "arXiv", | |
eprint = "1908.08167", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190808167W", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{sun-2019-how-fine, | |
author = "Chi Sun and Xipeng Qiu and Yige Xu and Xuanjing Huang", | |
title = "How to Fine-Tune {BERT} for Text Classification?", | |
journal = "CoRR", | |
volume = "abs/1905.05583", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.05583", | |
archivePrefix= "arXiv", | |
eprint = "1905.05583", | |
timestamp = "Tue, 28 May 2019 12:48:08 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1905-05583", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{diefenbach-2018-core-techn, | |
author = "Diefenbach, Dennis and Lopez, Vanessa and Singh, Kamal and | |
Maret, Pierre", | |
title = "Core Techniques of Question Answering Systems over Knowledge | |
Bases: A Survey", | |
journal = "Knowl. Inf. Syst.", | |
issue_date = "June 2018", | |
volume = 55, | |
number = 3, | |
month = jun, | |
year = 2018, | |
issn = "0219-1377", | |
pages = "529-569", | |
numpages = 41, | |
url = "https://doi.org/10.1007/s10115-017-1100-y", | |
doi = "10.1007/s10115-017-1100-y", | |
acmid = 3210959, | |
publisher = "Springer-Verlag", | |
address = "Berlin, Heidelberg", | |
keywords = "Knowledge base, QALD, Question answering, Semantic Web, | |
SimpleQuestions, Survey, WebQuestions" | |
} | |
@inproceedings{rakthanmanon-2012-search, | |
title = "Searching and mining trillions of time series subsequences | |
under dynamic time warping", | |
author = "Rakthanmanon, Thanawin and Campana, Bilson and Mueen, | |
Abdullah and Batista, Gustavo and Westover, Brandon and Zhu, | |
Qiang and Zakaria, Jesin and Keogh, Eamonn", | |
booktitle = "Proceedings of the 18th ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
pages = "262-270", | |
year = 2012, | |
organization = "ACM" | |
} | |
@inproceedings{palatucci-2009-zero-shot, | |
title = "Zero-shot learning with semantic output codes", | |
author = "Palatucci, Mark and Pomerleau, Dean and Hinton, Geoffrey E | |
and Mitchell, Tom M", | |
booktitle = "Advances in neural information processing systems", | |
pages = "1410-1418", | |
year = 2009 | |
} | |
@article{fei-fei-2006-one-shot, | |
title = "One-shot learning of object categories", | |
author = "Fei-Fei, Li and Fergus, Rob and Perona, Pietro", | |
journal = "IEEE transactions on pattern analysis and machine | |
intelligence", | |
volume = 28, | |
number = 4, | |
pages = "594-611", | |
year = 2006, | |
publisher = "IEEE" | |
} | |
@inproceedings{ganin-2015-unsup-domain-adapt-backp, | |
author = "Ganin, Yaroslav and Lempitsky, Victor", | |
title = "Unsupervised Domain Adaptation by Backpropagation", | |
booktitle = "Proceedings of the 32Nd International Conference on | |
International Conference on Machine Learning - Volume 37", | |
series = "ICML'15", | |
year = 2015, | |
location = "Lille, France", | |
pages = "1180-1189", | |
numpages = 10, | |
url = "http://dl.acm.org/citation.cfm?id=3045118.3045244", | |
acmid = 3045244, | |
publisher = "JMLR.org" | |
} | |
@Article{liu-2018-multi-relations, | |
author = "Liu, Jin and Ren, Haoliang and Wu, Menglong and Wang, Jin and | |
Kim, Hye-jin", | |
title = "Multiple relations extraction among multiple entities in | |
unstructured text", | |
journal = "Soft Computing", | |
year = 2018, | |
month = "Jul", | |
day = 01, | |
volume = 22, | |
number = 13, | |
pages = "4295-4305", | |
abstract = "Relations extraction is a widely researched topic in nature | |
language processing. However, most of the work in the | |
literature concentrate on the methods that are dealing with | |
single relation between two named entities. In the task of | |
multiple relations extraction, traditional statistic-based | |
methods have difficulties in selecting features and improving | |
the performance of extraction model. In this paper, we | |
presented formal definitions of multiple entities and | |
multiple relations and put forward three labeling methods | |
which were used to label entity categories, relation | |
categories and relation conditions. We also proposed a novel | |
relation extraction model which is based on dynamic long | |
short-term memory network. To train our model, entity | |
feature, entity position feature and part of speech feature | |
are used together. These features are used to describe | |
complex relations and improve the performance of relation | |
extraction model. In the experiments, we classified the | |
corpus into three sets which are composed of 0--20 words, | |
20--35 words and 35+ words sentences. On conll04.corp, the | |
final precision, recall rate and F-measure reached 72.9, 70.8 | |
and 67.9{\%} respectively.", | |
issn = "1433-7479", | |
doi = "10.1007/s00500-017-2852-8", | |
url = "https://doi.org/10.1007/s00500-017-2852-8" | |
} | |
@article{bekoulis-2018-joint-entity, | |
author = "Giannis Bekoulis and Johannes Deleu and Thomas Demeester and | |
Chris Develder", | |
title = "Joint entity recognition and relation extraction as a | |
multi-head selection problem", | |
journal = "CoRR", | |
volume = "abs/1804.07847", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.07847", | |
archivePrefix= "arXiv", | |
eprint = "1804.07847", | |
timestamp = "Mon, 13 Aug 2018 16:49:03 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-07847", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{wang-2019-one-pass, | |
author = "Haoyu Wang and Ming Tan and Mo Yu and Shiyu Chang and Dakuo | |
Wang and Kun Xu and Xiaoxiao Guo and Saloni Potdar", | |
title = "Extracting Multiple-Relations in One-Pass with Pre-Trained | |
Transformers", | |
journal = "CoRR", | |
volume = "abs/1902.01030", | |
year = 2019, | |
url = "http://arxiv.org/abs/1902.01030", | |
archivePrefix= "arXiv", | |
eprint = "1902.01030", | |
timestamp = "Tue, 21 May 2019 18:03:37 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1902-01030", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{soares-2019-match-blank, | |
author = "Livio Baldini Soares and Nicholas FitzGerald and Jeffrey Ling | |
and Tom Kwiatkowski", | |
title = "Matching the Blanks: Distributional Similarity for Relation | |
Learning", | |
journal = "CoRR", | |
volume = "abs/1906.03158", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.03158", | |
archivePrefix= "arXiv", | |
eprint = "1906.03158", | |
timestamp = "Fri, 14 Jun 2019 09:38:24 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1906-03158", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{alt-2019-improv-relat, | |
author = "Christoph Alt and Marc H{\"{u}}bner and Leonhard Hennig", | |
title = "Improving Relation Extraction by Pre-trained Language | |
Representations", | |
journal = "CoRR", | |
volume = "abs/1906.03088", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.03088", | |
archivePrefix= "arXiv", | |
eprint = "1906.03088", | |
timestamp = "Fri, 14 Jun 2019 09:38:24 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1906-03088", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{shi-2019-simple-bert, | |
author = "Peng Shi and Jimmy Lin", | |
title = "Simple {BERT} Models for Relation Extraction and Semantic | |
Role Labeling", | |
journal = "CoRR", | |
volume = "abs/1904.05255", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.05255", | |
archivePrefix= "arXiv", | |
eprint = "1904.05255", | |
timestamp = "Thu, 25 Apr 2019 13:55:01 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1904-05255", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{yao-2019-docred, | |
author = "Yuan Yao and Deming Ye and Peng Li and Xu Han and Yankai Lin | |
and Zhenghao Liu and Zhiyuan Liu and Lixin Huang and Jie Zhou | |
and Maosong Sun", | |
title = "DocRED: {A} Large-Scale Document-Level Relation Extraction | |
Dataset", | |
journal = "CoRR", | |
volume = "abs/1906.06127", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.06127", | |
archivePrefix= "arXiv", | |
eprint = "1906.06127", | |
timestamp = "Tue, 23 Jul 2019 15:49:40 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1906-06127", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{fu-2019-graphrel, | |
title = "{G}raph{R}el: Modeling Text as Relational Graphs for Joint | |
Entity and Relation Extraction", | |
author = "Fu, Tsu-Jui and Li, Peng-Hsuan and Ma, Wei-Yun", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1136", | |
doi = "10.18653/v1/P19-1136", | |
pages = "1409-1418", | |
abstract = "In this paper, we present GraphRel, an end-to-end relation | |
extraction model which uses graph convolutional networks | |
(GCNs) to jointly learn named entities and relations. In | |
contrast to previous baselines, we consider the interaction | |
between named entities and relations via a 2nd-phase | |
relation-weighted GCN to better extract relations. Linear and | |
dependency structures are both used to extract both | |
sequential and regional features of the text, and a complete | |
word graph is further utilized to extract implicit features | |
among all word pairs of the text. With the graph-based | |
approach, the prediction for overlapping relations is | |
substantially improved over previous sequential | |
approaches. We evaluate GraphRel on two public datasets: NYT | |
and WebNLG. Results show that GraphRel maintains high | |
precision while increasing recall substantially. Also, | |
GraphRel outperforms previous work by 3.2{\%} and 5.8{\%} (F1 | |
score), achieving a new state-of-the-art for relation | |
extraction." | |
} | |
@article{quirk-2016-distan-super, | |
author = "Chris Quirk and Hoifung Poon", | |
title = "Distant Supervision for Relation Extraction beyond the | |
Sentence Boundary", | |
journal = "CoRR", | |
volume = "abs/1609.04873", | |
year = 2016, | |
url = "http://arxiv.org/abs/1609.04873", | |
archivePrefix= "arXiv", | |
eprint = "1609.04873", | |
timestamp = "Mon, 13 Aug 2018 16:49:11 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/QuirkP16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{peng-2017-cross-sentence, | |
author = "Nanyun Peng and Hoifung Poon and Chris Quirk and Kristina | |
Toutanova and Wen{-}tau Yih", | |
title = "Cross-Sentence N-ary Relation Extraction with Graph LSTMs", | |
journal = "CoRR", | |
volume = "abs/1708.03743", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.03743", | |
archivePrefix= "arXiv", | |
eprint = "1708.03743", | |
timestamp = "Mon, 13 Aug 2018 16:48:58 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1708-03743", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{song-2018-n-ary, | |
author = "Linfeng Song and Yue Zhang and Zhiguo Wang and Daniel Gildea", | |
title = "N-ary Relation Extraction using Graph State {LSTM}", | |
journal = "CoRR", | |
volume = "abs/1808.09101", | |
year = 2018, | |
url = "http://arxiv.org/abs/1808.09101", | |
archivePrefix= "arXiv", | |
eprint = "1808.09101", | |
timestamp = "Mon, 03 Sep 2018 13:36:40 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1808-09101", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{zhang-2019-drug-drug, | |
author = "Zhang, Tianlin and Leng, Jiaxu and Liu, Ying", | |
title = "{Deep learning for drug–drug interaction extraction from the | |
literature: a review}", | |
journal = "Briefings in Bioinformatics", | |
year = 2019, | |
month = 11, | |
abstract = "{Drug–drug interactions (DDIs) are crucial for drug research | |
and pharmacovigilance. These interactions may cause adverse | |
drug effects that threaten public health and patient | |
safety. Therefore, the DDIs extraction from biomedical | |
literature has been widely studied and emphasized in modern | |
biomedical research. The previous rules-based and machine | |
learning approaches rely on tedious feature engineering, | |
which is labourious, time-consuming and unsatisfactory. With | |
the development of deep learning technologies, this problem | |
is alleviated by learning feature representations | |
automatically. Here, we review the recent deep learning | |
methods that have been applied to the extraction of DDIs from | |
biomedical literature. We describe each method briefly and | |
compare its performance in the DDI corpus | |
systematically. Next, we summarize the advantages and | |
disadvantages of these deep learning models for this | |
task. Furthermore, we discuss some challenges and future | |
perspectives of DDI extraction via deep learning | |
methods. This review aims to serve as a useful guide for | |
interested researchers to further advance bioinformatics | |
algorithms for DDIs extraction from the literature.}", | |
issn = "1477-4054", | |
doi = "10.1093/bib/bbz087", | |
url = "https://doi.org/10.1093/bib/bbz087", | |
note = "bbz087", | |
eprint = | |
"http://oup.prod.sis.lan/bib/advance-article-pdf/doi/10.1093/bib/bbz087/30342664/bbz087.pdf" | |
} | |
@article{zheng-2017-joint-entity, | |
title = "Joint entity and relation extraction based on a hybrid neural | |
network", | |
author = "Zheng, Suncong and Hao, Yuexing and Lu, Dongyuan and Bao, | |
Hongyun and Xu, Jiaming and Hao, Hongwei and Xu, Bo", | |
journal = "Neurocomputing", | |
volume = 257, | |
pages = "59-66", | |
year = 2017, | |
publisher = "Elsevier" | |
} | |
@article{li-2017-neural-joint, | |
title = "A neural joint model for entity and relation extraction from | |
biomedical text", | |
author = "Li, Fei and Zhang, Meishan and Fu, Guohong and Ji, Donghong", | |
journal = "BMC bioinformatics", | |
volume = 18, | |
number = 1, | |
pages = 198, | |
year = 2017, | |
publisher = "BioMed Central" | |
} | |
@inproceedings{bekoulis-2018-adver, | |
title = "Adversarial training for multi-context joint entity and | |
relation extraction", | |
author = "Bekoulis, Giannis and Deleu, Johannes and Demeester, Thomas | |
and Develder, Chris", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1307", | |
doi = "10.18653/v1/D18-1307", | |
pages = "2830-2836", | |
abstract = "Adversarial training (AT) is a regularization method that can | |
be used to improve the robustness of neural network methods | |
by adding small perturbations in the training data. We show | |
how to use AT for the tasks of entity recognition and | |
relation extraction. In particular, we demonstrate that | |
applying AT to a general purpose baseline model for jointly | |
extracting entities and relations, allows improving the | |
state-of-the-art effectiveness on several datasets in | |
different contexts (i.e., news, biomedical, and real estate | |
data) and for different languages (English and Dutch)." | |
} | |
@inproceedings{verga-2018-simul-self, | |
title = "Simultaneously Self-Attending to All Mentions for | |
Full-Abstract Biological Relation Extraction", | |
author = "Verga, Patrick and Strubell, Emma and McCallum, Andrew", | |
booktitle = "Proceedings of the 2018 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 1 (Long Papers)", | |
month = jun, | |
year = 2018, | |
address = "New Orleans, Louisiana", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N18-1080", | |
doi = "10.18653/v1/N18-1080", | |
pages = "872-884", | |
abstract = "Most work in relation extraction forms a prediction by | |
looking at a short span of text within a single sentence | |
containing a single entity pair mention. This approach often | |
does not consider interactions across mentions, requires | |
redundant computation for each mention pair, and ignores | |
relationships expressed across sentence boundaries. These | |
problems are exacerbated by the document- (rather than | |
sentence-) level annotation common in biological text. In | |
response, we propose a model which simultaneously predicts | |
relationships between all mention pairs in a document. We | |
form pairwise predictions over entire paper abstracts using | |
an efficient self-attention encoder. All-pairs mention scores | |
allow us to perform multi-instance learning by aggregating | |
over mentions to form entity pair representations. We further | |
adapt to settings without mention-level annotation by jointly | |
training to predict named entities and adding a corpus of | |
weakly labeled data. In experiments on two Biocreative | |
benchmark datasets, we achieve state of the art performance | |
on the Biocreative V Chemical Disease Relation dataset for | |
models without external KB resources. We also introduce a new | |
dataset an order of magnitude larger than existing | |
human-annotated biological information extraction datasets | |
and more accurate than distantly supervised alternatives." | |
} | |
@article{nguyen-2018-end-to-end, | |
author = "Dat Quoc Nguyen and Karin Verspoor", | |
title = "End-to-end neural relation extraction using deep biaffine | |
attention", | |
journal = "CoRR", | |
volume = "abs/1812.11275", | |
year = 2018, | |
url = "http://arxiv.org/abs/1812.11275", | |
archivePrefix= "arXiv", | |
eprint = "1812.11275", | |
timestamp = "Wed, 02 Jan 2019 14:40:18 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1812-11275", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{yan-2019-unified-model, | |
author = "Hang Yan and Xipeng Qiu and Xuanjing Huang", | |
title = "A Unified Model for Joint Chinese Word Segmentation and | |
Dependency Parsing", | |
journal = "CoRR", | |
volume = "abs/1904.04697", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.04697", | |
archivePrefix= "arXiv", | |
eprint = "1904.04697", | |
timestamp = "Thu, 25 Apr 2019 13:55:01 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1904-04697", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{zhang-2017-slot-filling, | |
title = "Position-aware Attention and Supervised Data Improve Slot | |
Filling", | |
author = "Zhang, Yuhao and Zhong, Victor and Chen, Danqi and Angeli, | |
Gabor and Manning, Christopher D.", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1004", | |
doi = "10.18653/v1/D17-1004", | |
pages = "35-45", | |
abstract = "Organized relational knowledge in the form of {``}knowledge | |
graphs{''} is important for many applications. However, the | |
ability to populate knowledge bases with facts automatically | |
extracted from documents has improved frustratingly | |
slowly. This paper simultaneously addresses two issues that | |
have held back prior work. We first propose an effective new | |
model, which combines an LSTM sequence model with a form of | |
entity position-aware attention that is better suited to | |
relation extraction. Then we build TACRED, a large (119,474 | |
examples) supervised relation extraction dataset obtained via | |
crowdsourcing and targeted towards TAC KBP relations. The | |
combination of better supervised data and a more appropriate | |
high-capacity model enables much better relation extraction | |
performance. When the model trained on this new dataset | |
replaces the previous relation extraction component of the | |
best TAC KBP 2015 slot filling system, its F1 score increases | |
markedly from 22.2{\%} to 26.7{\%}." | |
} | |
@inproceedings{han-2018-fewrel, | |
title = "{F}ew{R}el: A Large-Scale Supervised Few-Shot Relation | |
Classification Dataset with State-of-the-Art Evaluation", | |
author = "Han, Xu and Zhu, Hao and Yu, Pengfei and Wang, Ziyun and Yao, | |
Yuan and Liu, Zhiyuan and Sun, Maosong", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1514", | |
doi = "10.18653/v1/D18-1514", | |
pages = "4803-4809", | |
abstract = "We present a Few-Shot Relation Classification Dataset | |
(dataset), consisting of 70, 000 sentences on 100 relations | |
derived from Wikipedia and annotated by crowdworkers. The | |
relation of each sentence is first recognized by distant | |
supervision methods, and then filtered by crowdworkers. We | |
adapt the most recent state-of-the-art few-shot learning | |
methods for relation classification and conduct thorough | |
evaluation of these methods. Empirical results show that even | |
the most competitive few-shot learning models struggle on | |
this task, especially as compared with humans. We also show | |
that a range of different reasoning skills are needed to | |
solve our task. These results indicate that few-shot relation | |
classification remains an open problem and still requires | |
further research. Our detailed analysis points multiple | |
directions for future research." | |
} | |
@article{levy-2017-zero-shot, | |
author = "Omer Levy and Minjoon Seo and Eunsol Choi and Luke | |
Zettlemoyer", | |
title = "Zero-Shot Relation Extraction via Reading Comprehension", | |
journal = "CoRR", | |
volume = "abs/1706.04115", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.04115", | |
archivePrefix= "arXiv", | |
eprint = "1706.04115", | |
timestamp = "Mon, 13 Aug 2018 16:46:48 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/LevySCZ17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{gao-2019-fewrel-2, | |
title = "{F}ew{R}el 2.0: Towards More Challenging Few-Shot Relation | |
Classification", | |
author = "Gao, Tianyu and Han, Xu and Zhu, Hao and Liu, Zhiyuan and Li, | |
Peng and Sun, Maosong and Zhou, Jie", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1649", | |
doi = "10.18653/v1/D19-1649", | |
pages = "6251-6256", | |
abstract = "We present FewRel 2.0, a more challenging task to investigate | |
two aspects of few-shot relation classification models: (1) | |
Can they adapt to a new domain with only a handful of | |
instances? (2) Can they detect none-of-the-above (NOTA) | |
relations? To construct FewRel 2.0, we build upon the FewRel | |
dataset by adding a new test set in a quite different domain, | |
and a NOTA relation choice. With the new dataset and | |
extensive experimental analysis, we found (1) that the | |
state-of-the-art few-shot relation classification models | |
struggle on these two aspects, and (2) that the commonly-used | |
techniques for domain adaptation and NOTA detection still | |
cannot handle the two challenges well. Our research calls for | |
more attention and further efforts to these two real-world | |
issues. All details and resources about the dataset and | |
baselines are released at https://github.com/thunlp/fewrel." | |
} | |
@article{snell-2017-prototypical-networks, | |
author = "Jake Snell and Kevin Swersky and Richard S. Zemel", | |
title = "Prototypical Networks for Few-shot Learning", | |
journal = "CoRR", | |
volume = "abs/1703.05175", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.05175", | |
archivePrefix= "arXiv", | |
eprint = "1703.05175", | |
timestamp = "Mon, 13 Aug 2018 16:46:05 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/SnellSZ17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{cui-2019-bilstm-lan, | |
author = "{Cui}, Leyang and {Zhang}, Yue", | |
title = "{Hierarchically-Refined Label Attention Network for Sequence | |
Labeling}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2019, | |
month = "Aug", | |
eid = "arXiv:1908.08676", | |
pages = "arXiv:1908.08676", | |
archivePrefix= "arXiv", | |
eprint = "1908.08676", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190808676C", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{xu-2019-scalin-open, | |
title = "Scaling up Open Tagging from Tens to Thousands: Comprehension | |
Empowered Attribute Value Extraction from Product Title", | |
author = "Xu, Huimin and Wang, Wenting and Mao, Xin and Jiang, Xinyu | |
and Lan, Man", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1514", | |
doi = "10.18653/v1/P19-1514", | |
pages = "5214-5223", | |
abstract = "Supplementing product information by extracting attribute | |
values from title is a crucial task in e-Commerce | |
domain. Previous studies treat each attribute only as an | |
entity type and build one set of NER tags (e.g., BIO) for | |
each of them, leading to a scalability issue which unfits to | |
the large sized attribute system in real world e-Commerce. In | |
this work, we propose a novel approach to support value | |
extraction scaling up to thousands of attributes without | |
losing performance: (1) We propose to regard attribute as a | |
query and adopt only one global set of BIO tags for any | |
attributes to reduce the burden of attribute tag or model | |
explosion; (2) We explicitly model the semantic | |
representations for attribute and title, and develop an | |
attention mechanism to capture the interactive semantic | |
relations in-between to enforce our framework to be attribute | |
comprehensive. We conduct extensive experiments in real-life | |
datasets. The results show that our model not only | |
outperforms existing state-of-the-art NER tagging models, but | |
also is robust and generates promising results for up to | |
8,906 attributes." | |
} | |
@article{zheng-2018-opentag, | |
author = "Guineng Zheng and Subhabrata Mukherjee and Xin Luna Dong and | |
Feifei Li", | |
title = "OpenTag: Open Attribute Value Extraction from Product | |
Profiles", | |
journal = "CoRR", | |
volume = "abs/1806.01264", | |
year = 2018, | |
url = "http://arxiv.org/abs/1806.01264", | |
archivePrefix= "arXiv", | |
eprint = "1806.01264", | |
timestamp = "Mon, 13 Aug 2018 16:46:56 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1806-01264", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{yahya-2014-renoun, | |
title = "{R}e{N}oun: Fact Extraction for Nominal Attributes", | |
author = "Yahya, Mohamed and Whang, Steven and Gupta, Rahul and Halevy, | |
Alon", | |
booktitle = "Proceedings of the 2014 Conference on Empirical Methods in | |
Natural Language Processing ({EMNLP})", | |
month = oct, | |
year = 2014, | |
address = "Doha, Qatar", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D14-1038", | |
doi = "10.3115/v1/D14-1038", | |
pages = "325-335" | |
} | |
@article{jiang-2017-metapad, | |
author = "Meng Jiang and Jingbo Shang and Taylor Cassidy and Xiang Ren | |
and Lance M. Kaplan and Timothy P. Hanratty and Jiawei Han", | |
title = "MetaPAD: Meta Pattern Discovery from Massive Text Corpora", | |
journal = "CoRR", | |
volume = "abs/1703.04213", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.04213", | |
archivePrefix= "arXiv", | |
eprint = "1703.04213", | |
timestamp = "Mon, 13 Aug 2018 16:48:27 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/JiangSCRKHH17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{zhang-2019-unsup-annot, | |
author = "{Zhang}, Jingqing and {Zhang}, Xiaoyu and {Sun}, Kai and | |
{Yang}, Xian and {Dai}, Chengliang and {Guo}, Yike", | |
title = "{Unsupervised Annotation of Phenotypic Abnormalities via | |
Semantic Latent Representations on Electronic Health | |
Records}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2019, | |
month = "Nov", | |
eid = "arXiv:1911.03862", | |
pages = "arXiv:1911.03862", | |
archivePrefix= "arXiv", | |
eprint = "1911.03862", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv191103862Z", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{liu-2015-segphrase, | |
title = "Mining quality phrases from massive text corpora", | |
author = "Liu, Jialu and Shang, Jingbo and Wang, Chi and Ren, Xiang and | |
Han, Jiawei", | |
booktitle = "Proceedings of the 2015 ACM SIGMOD International Conference | |
on Management of Data", | |
pages = "1729-1744", | |
year = 2015, | |
organization = "ACM" | |
} | |
@article{shang-2017-autophrase, | |
author = "Jingbo Shang and Jialu Liu and Meng Jiang and Xiang Ren and | |
Clare R. Voss and Jiawei Han", | |
title = "Automated Phrase Mining from Massive Text Corpora", | |
journal = "CoRR", | |
volume = "abs/1702.04457", | |
year = 2017, | |
url = "http://arxiv.org/abs/1702.04457", | |
archivePrefix= "arXiv", | |
eprint = "1702.04457", | |
timestamp = "Mon, 13 Aug 2018 16:46:43 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ShangLJRVH17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{el-kishky-2014-topmining, | |
title = "Scalable topical phrase mining from text corpora", | |
author = "El-Kishky, Ahmed and Song, Yanglei and Wang, Chi and Voss, | |
Clare R and Han, Jiawei", | |
journal = "Proceedings of the VLDB Endowment", | |
volume = 8, | |
number = 3, | |
pages = "305-316", | |
year = 2014, | |
publisher = "VLDB Endowment" | |
} | |
@inproceedings{wang-2019-autobioner, | |
title = "Distantly Supervised Biomedical Named Entity Recognition with | |
Dictionary Expansion", | |
author = "Wang, Xuan and Zhang, Yu and Li, Qi and Ren, Xiang and Shang, | |
Jingbo and Han, Jiawei", | |
booktitle = "Proc. 2019 IEEE Int. Conf. on Bioinformatics and Biomedicine | |
(IEEE-BIBM’19), San Diego, CA", | |
year = 2019 | |
} | |
@inproceedings{shen-2017-setexpan, | |
title = "Setexpan: Corpus-based set expansion via context feature | |
selection and rank ensemble", | |
author = "Shen, Jiaming and Wu, Zeqiu and Lei, Dongming and Shang, | |
Jingbo and Ren, Xiang and Han, Jiawei", | |
booktitle = "Joint European Conference on Machine Learning and Knowledge | |
Discovery in Databases", | |
pages = "288-304", | |
year = 2017, | |
organization = "Springer" | |
} | |
@inproceedings{rong-2016-egoset, | |
title = "Egoset: Exploiting word ego-networks and user-generated | |
ontology for multifaceted set expansion", | |
author = "Rong, Xin and Chen, Zhe and Mei, Qiaozhu and Adar, Eytan", | |
booktitle = "Proceedings of the Ninth ACM international conference on Web | |
search and data mining", | |
pages = "645-654", | |
year = 2016, | |
organization = "ACM" | |
} | |
@inproceedings{lin-2008-textcube, | |
author = "Lin, Cindy Xide and Ding, Bolin and Han, Jiawei and Zhu, | |
Feida and Zhao, Bo", | |
title = "Text cube: Computing ir measures for multidimensional text | |
database analysis", | |
booktitle = "2008 Eighth IEEE International Conference on Data Mining", | |
year = 2008, | |
pages = "905-910", | |
organization = "IEEE" | |
} | |
@article{sun-2011-pathsim, | |
author = "Sun, Yizhou and Han, Jiawei and Yan, Xifeng and Yu, Philip S | |
and Wu, Tianyi", | |
title = "Pathsim: Meta Path-Based Top-K Similarity Search in | |
Heterogeneous Information Networks", | |
journal = "Proceedings of the VLDB Endowment", | |
volume = 4, | |
number = 11, | |
pages = "992-1003", | |
year = 2011, | |
publisher = "Citeseer" | |
} | |
@inproceedings{ren-2015-clustype, | |
author = "Ren, Xiang and El-Kishky, Ahmed and Wang, Chi and Tao, Fangbo | |
and Voss, Clare R and Han, Jiawei", | |
title = "Clustype: Effective entity recognition and typing by relation | |
phrase-based clustering", | |
booktitle = "Proceedings of the 21th ACM SIGKDD International Conference | |
on Knowledge Discovery and Data Mining", | |
year = 2015, | |
pages = "995-1004", | |
organization = "ACM" | |
} | |
@article{ren-2016-cotype, | |
author = "Xiang Ren and Zeqiu Wu and Wenqi He and Meng Qu and Clare | |
R. Voss and Heng Ji and Tarek F. Abdelzaher and Jiawei Han", | |
title = "CoType: Joint Extraction of Typed Entities and Relations with | |
Knowledge Bases", | |
journal = "CoRR", | |
volume = "abs/1610.08763", | |
year = 2016, | |
url = "http://arxiv.org/abs/1610.08763", | |
archivePrefix= "arXiv", | |
eprint = "1610.08763", | |
timestamp = "Mon, 13 Aug 2018 16:46:29 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/RenWHQVJAH16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{ren-2016-afet, | |
title = "{AFET}: Automatic Fine-Grained Entity Typing by Hierarchical | |
Partial-Label Embedding", | |
author = "Ren, Xiang and He, Wenqi and Qu, Meng and Huang, Lifu and Ji, | |
Heng and Han, Jiawei", | |
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = nov, | |
year = 2016, | |
address = "Austin, Texas", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D16-1144", | |
doi = "10.18653/v1/D16-1144", | |
pages = "1369-1378" | |
} | |
@article{liu-2017-rehession, | |
author = "Liu, Liyuan and Ren, Xiang and Zhu, Qi and Zhi, Shi and Gui, | |
Huan and Ji, Heng and Han, Jiawei", | |
title = "Heterogeneous Supervision for Relation Extraction: a | |
Representation Learning Approach", | |
journal = "arXiv preprint arXiv:1707.00166", | |
year = 2017 | |
} | |
@inproceedings{ren-2016-ple, | |
author = "Ren, Xiang and He, Wenqi and Qu, Meng and Voss, Clare R and | |
Ji, Heng and Han, Jiawei", | |
title = "Label noise reduction in entity typing by heterogeneous | |
partial-label embedding", | |
booktitle = "Proceedings of the 22nd ACM SIGKDD international conference | |
on Knowledge discovery and data mining", | |
year = 2016, | |
pages = "1825-1834", | |
organization = "ACM" | |
} | |
@inproceedings{qu-2017-auto-synonym, | |
title = "Automatic synonym discovery with knowledge bases", | |
author = "Qu, Meng and Ren, Xiang and Han, Jiawei", | |
booktitle = "Proceedings of the 23rd ACM SIGKDD International Conference | |
on Knowledge Discovery and Data Mining", | |
pages = "997-1005", | |
year = 2017, | |
organization = "ACM" | |
} | |
@article{tao-2016-textcube-summarization, | |
title = "Multi-Dimensional, Phrase-Based Summarization in Text Cubes", | |
author = "Fangbo Tao and Honglei Zhuang and Chi Wang Yu and Qi Wang and | |
Taylor Cassidy and Lance M. Kaplan and Clare R. Voss and | |
Jiawei Han", | |
journal = "IEEE Data Eng. Bull.", | |
year = 2016, | |
volume = 39, | |
pages = "74-84" | |
} | |
@inproceedings{liu-2016-laki, | |
title = "Representing documents via latent keyphrase inference", | |
author = "Liu, Jialu and Ren, Xiang and Shang, Jingbo and Cassidy, | |
Taylor and Voss, Clare R and Han, Jiawei", | |
booktitle = "Proceedings of the 25th international conference on World | |
wide web", | |
pages = "1057-1067", | |
year = 2016, | |
organization = "International World Wide Web Conferences Steering Committee" | |
} | |
@article{hosseini-2018-heteromed, | |
author = "Anahita Hosseini and Ting Chen and Wenjun Wu and Yizhou Sun | |
and Majid Sarrafzadeh", | |
title = "HeteroMed: Heterogeneous Information Network for Medical | |
Diagnosis", | |
journal = "CoRR", | |
volume = "abs/1804.08052", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.08052", | |
archivePrefix= "arXiv", | |
eprint = "1804.08052", | |
timestamp = "Wed, 17 Apr 2019 16:16:59 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1804-08052", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{han-2017-mining-structs, | |
author = "Han, Jiawei", | |
title = "Mining Structures from Massive Text Data: A Data-Driven | |
Approach.", | |
booktitle = "SIMBig", | |
year = 2017, | |
pages = "16-19" | |
} | |
@article{gui-2018-exper-findin, | |
author = "Huan Gui and Qi Zhu and Liyuan Liu and Aston Zhang and Jiawei | |
Han", | |
title = "Expert Finding in Heterogeneous Bibliographic Networks with | |
Locally-trained Embeddings", | |
journal = "CoRR", | |
volume = "abs/1803.03370", | |
year = 2018, | |
url = "http://arxiv.org/abs/1803.03370", | |
archivePrefix= "arXiv", | |
eprint = "1803.03370", | |
timestamp = "Mon, 13 Aug 2018 16:48:03 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1803-03370", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{sun-2013-minin-heter-infor-networ, | |
author = "Sun, Yizhou and Han, Jiawei", | |
title = "Mining Heterogeneous Information Networks: a Structural | |
Analysis Approach", | |
journal = "Acm Sigkdd Explorations Newsletter", | |
volume = 14, | |
number = 2, | |
pages = "20-28", | |
year = 2013, | |
publisher = "ACM" | |
} | |
@inproceedings{sui-2019-cgn, | |
title = "Leverage Lexical Knowledge for {C}hinese Named Entity | |
Recognition via Collaborative Graph Network", | |
author = "Sui, Dianbo and Chen, Yubo and Liu, Kang and Zhao, Jun and | |
Liu, Shengping", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1396", | |
doi = "10.18653/v1/D19-1396", | |
pages = "3828-3838", | |
abstract = "The lack of word boundaries information has been seen as one | |
of the main obstacles to develop a high performance Chinese | |
named entity recognition (NER) system. Fortunately, the | |
automatically constructed lexicon contains rich word | |
boundaries information and word semantic | |
information. However, integrating lexical knowledge in | |
Chinese NER tasks still faces challenges when it comes to | |
self-matched lexical words as well as the nearest contextual | |
lexical words. We present a Collaborative Graph Network to | |
solve these challenges. Experiments on various datasets show | |
that our model not only outperforms the state-of-the-art | |
(SOTA) results, but also achieves a speed that is six to | |
fifteen times faster than that of the SOTA model." | |
} | |
@ARTICLE{2017arXiv171010903V, | |
author = "{Veli{\v{c}}kovi{\'c}}, Petar and {Cucurull}, Guillem and | |
{Casanova}, Arantxa and {Romero}, Adriana and {Li{\`o}}, Pietro and | |
{Bengio}, Yoshua", | |
title = "{Graph Attention Networks}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Machine Learning, Computer Science - Social | |
and Information Networks", | |
year = 2017, | |
month = "Oct", | |
eid = "arXiv:1710.10903", | |
pages = "arXiv:1710.10903", | |
archivePrefix= "arXiv", | |
eprint = "1710.10903", | |
primaryClass = "stat.ML", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2017arXiv171010903V", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{kipf-2016-gcn, | |
author = "{Kipf}, Thomas N. and {Welling}, Max", | |
title = "{Semi-Supervised Classification with Graph Convolutional | |
Networks}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Statistics - Machine | |
Learning", | |
year = 2016, | |
month = "Sep", | |
eid = "arXiv:1609.02907", | |
pages = "arXiv:1609.02907", | |
archivePrefix= "arXiv", | |
eprint = "1609.02907", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160902907K", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{niepert-2016-gcn, | |
title = "Learning convolutional neural networks for graphs", | |
author = "Niepert, Mathias and Ahmed, Mohamed and Kutzkov, Konstantin", | |
booktitle = "International conference on machine learning", | |
pages = "2014-2023", | |
year = 2016 | |
} | |
@ARTICLE{velickovic-2017-gat, | |
author = "{Veli{\v{c}}kovi{\'c}}, Petar and {Cucurull}, Guillem and | |
{Casanova}, Arantxa and {Romero}, Adriana and {Li{\`o}}, | |
Pietro and {Bengio}, Yoshua", | |
title = "{Graph Attention Networks}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Artificial | |
Intelligence, Computer Science - Machine Learning, Computer | |
Science - Social and Information Networks", | |
year = 2017, | |
month = "Oct", | |
eid = "arXiv:1710.10903", | |
pages = "arXiv:1710.10903", | |
archivePrefix= "arXiv", | |
eprint = "1710.10903", | |
primaryClass = "stat.ML", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2017arXiv171010903V", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{xue-2019-bert-joint, | |
author = "{Xue}, Kui and {Zhou}, Yangming and {Ma}, Zhiyuan and {Ruan}, | |
Tong and {Zhang}, Huanhuan and {He}, Ping", | |
title = "{Fine-tuning BERT for Joint Entity and Relation Extraction in | |
Chinese Medical Text}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2019, | |
month = "Aug", | |
eid = "arXiv:1908.07721", | |
pages = "arXiv:1908.07721", | |
archivePrefix= "arXiv", | |
eprint = "1908.07721", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190807721X", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{jia-2019-doc-level, | |
author = "Robin Jia and Cliff Wong and Hoifung Poon", | |
title = "Document-Level N-ary Relation Extraction with Multiscale | |
Representation Learning", | |
journal = "CoRR", | |
volume = "abs/1904.02347", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.02347", | |
archivePrefix= "arXiv", | |
eprint = "1904.02347", | |
timestamp = "Wed, 24 Apr 2019 12:21:25 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1904-02347", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{gupta-2019-nested-ner, | |
title = "Linguistically Informed Relation Extraction and Neural | |
Architectures for Nested Named Entity Recognition in | |
{B}io{NLP}-{OST} 2019", | |
author = "Gupta, Pankaj and Yaseen, Usama and Sch{\"u}tze, Hinrich", | |
booktitle = "Proceedings of The 5th Workshop on BioNLP Open Shared Tasks", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-5720", | |
doi = "10.18653/v1/D19-5720", | |
pages = "132-142", | |
abstract = "Named Entity Recognition (NER) and Relation Extraction (RE) | |
are essential tools in distilling knowledge from biomedical | |
literature. This paper presents our findings from | |
participating in BioNLP Shared Tasks 2019. We addressed Named | |
Entity Recognition including nested entities extraction, | |
Entity Normalization and Relation Extraction. Our proposed | |
approach of Named Entities can be generalized to different | |
languages and we have shown it{'}s effectiveness for English | |
and Spanish text. We investigated linguistic features, hybrid | |
loss including ranking and Conditional Random Fields (CRF), | |
multi-task objective and token level ensembling strategy to | |
improve NER. We employed dictionary based fuzzy and semantic | |
search to perform Entity Normalization. Finally, our RE | |
system employed Support Vector Machine (SVM) with linguistic | |
features. Our NER submission (team:MIC-CIS) ranked first in | |
BB-2019 norm+NER task with standard error rate (SER) of | |
0.7159 and showed competitive performance on PharmaCo NER | |
task with F1-score of 0.8662. Our RE system ranked first in | |
the SeeDev-binary Relation Extraction Task with F1-score of | |
0.3738." | |
} | |
@inproceedings{guo-2019-aggcn, | |
title = "Attention Guided Graph Convolutional Networks for Relation | |
Extraction", | |
author = "Guo, Zhijiang and Zhang, Yan and Lu, Wei", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1024", | |
doi = "10.18653/v1/P19-1024", | |
pages = "241-251", | |
abstract = "Dependency trees convey rich structural information that is | |
proven useful for extracting relations among entities in | |
text. However, how to effectively make use of relevant | |
information while ignoring irrelevant information from the | |
dependency trees remains a challenging research | |
question. Existing approaches employing rule based | |
hard-pruning strategies for selecting relevant partial | |
dependency structures may not always yield optimal | |
results. In this work, we propose Attention Guided Graph | |
Convolutional Networks (AGGCNs), a novel model which directly | |
takes full dependency trees as inputs. Our model can be | |
understood as a soft-pruning approach that automatically | |
learns how to selectively attend to the relevant | |
sub-structures useful for the relation extraction | |
task. Extensive results on various tasks including | |
cross-sentence n-ary relation extraction and large-scale | |
sentence-level relation extraction show that our model is | |
able to better leverage the structural information of the | |
full dependency trees, giving significantly better results | |
than previous approaches." | |
} | |
@ARTICLE{he-2019-nre-pul, | |
author = "{He}, Zhengqiu and {Chen}, Wenliang and {Wang}, Yuyi and | |
{zhang}, Wei and {Wang}, Guanchun and {Zhang}, Min", | |
title = "{Improving Neural Relation Extraction with Positive and | |
Unlabeled Learning}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2019, | |
month = "Nov", | |
eid = "arXiv:1911.12556", | |
pages = "arXiv:1911.12556", | |
archivePrefix= "arXiv", | |
eprint = "1911.12556", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv191112556H", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{nayak-2019-nre-am, | |
title = "Effective Attention Modeling for Neural Relation Extraction", | |
author = "Nayak, Tapas and Ng, Hwee Tou", | |
booktitle = "Proceedings of the 23rd Conference on Computational Natural | |
Language Learning (CoNLL)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/K19-1056", | |
doi = "10.18653/v1/K19-1056", | |
pages = "603-612", | |
abstract = "Relation extraction is the task of determining the relation | |
between two entities in a sentence. Distantly-supervised | |
models are popular for this task. However, sentences can be | |
long and two entities can be located far from each other in a | |
sentence. The pieces of evidence supporting the presence of a | |
relation between two entities may not be very direct, since | |
the entities may be connected via some indirect links such as | |
a third entity or via co-reference. Relation extraction in | |
such scenarios becomes more challenging as we need to capture | |
the long-distance interactions among the entities and other | |
words in the sentence. Also, the words in a sentence do not | |
contribute equally in identifying the relation between the | |
two entities. To address this issue, we propose a novel and | |
effective attention model which incorporates syntactic | |
information of the sentence and a multi-factor attention | |
mechanism. Experiments on the New York Times corpus show that | |
our proposed model outperforms prior state-of-the-art | |
models." | |
} | |
@ARTICLE{shang-2019-noisy-dsre, | |
author = "{Shang}, Yuming", | |
title = "{Are Noisy Sentences Useless for Distant Supervised Relation | |
Extraction?}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Machine Learning", | |
year = 2019, | |
month = "Nov", | |
eid = "arXiv:1911.09788", | |
pages = "arXiv:1911.09788", | |
archivePrefix= "arXiv", | |
eprint = "1911.09788", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv191109788S", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{tran-2019-nml, | |
author = "Tung Tran and Ramakanth Kavuluru", | |
title = "Neural Metric Learning for Fast End-to-End Relation | |
Extraction", | |
journal = "CoRR", | |
volume = "abs/1905.07458", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.07458", | |
archivePrefix= "arXiv", | |
eprint = "1905.07458", | |
timestamp = "Wed, 28 Aug 2019 07:29:35 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1905-07458", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{amos-2017-optnet, | |
author = "Brandon Amos and J. Zico Kolter", | |
title = "OptNet: Differentiable Optimization as a Layer in Neural | |
Networks", | |
journal = "CoRR", | |
volume = "abs/1703.00443", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.00443", | |
archivePrefix= "arXiv", | |
eprint = "1703.00443", | |
timestamp = "Mon, 13 Aug 2018 16:48:26 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/AmosK17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{杨锦锋-2016-中文电子病历命名实体和实体关系语料库构建, | |
title = "中文电子病历命名实体和实体关系语料库构建", | |
author = "杨锦锋 and 关毅 and 何彬 and 曲春燕 and 于秋滨 and 刘雅欣 and | |
赵永杰", | |
journal = "软件学报", | |
number = 11, | |
pages = "2725-2746", | |
year = 2016 | |
} | |
@inproceedings{kuru-2016-charner, | |
title = "{C}har{NER}: Character-Level Named Entity Recognition", | |
author = "Kuru, Onur and Can, Ozan Arkan and Yuret, Deniz", | |
booktitle = "Proceedings of {COLING} 2016, the 26th International | |
Conference on Computational Linguistics: Technical Papers", | |
month = dec, | |
year = 2016, | |
address = "Osaka, Japan", | |
publisher = "The COLING 2016 Organizing Committee", | |
url = "https://www.aclweb.org/anthology/C16-1087", | |
pages = "911-921", | |
abstract = "We describe and evaluate a character-level tagger for | |
language-independent Named Entity Recognition (NER). Instead | |
of words, a sentence is represented as a sequence of | |
characters. The model consists of stacked bidirectional LSTMs | |
which inputs characters and outputs tag probabilities for | |
each character. These probabilities are then converted to | |
consistent word level named entity tags using a Viterbi | |
decoder. We are able to achieve close to state-of-the-art NER | |
performance in seven languages with the same basic model | |
using only labeled NER data and no hand-engineered features | |
or other external resources like syntactic taggers or | |
Gazetteers." | |
} | |
@ARTICLE{ma-2016-lstm-cnn-crf, | |
author = "{Ma}, Xuezhe and {Hovy}, Eduard", | |
title = "{End-to-end Sequence Labeling via Bi-directional | |
LSTM-CNNs-CRF}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Computation and Language, Statistics - Machine Learning", | |
year = 2016, | |
month = "Mar", | |
eid = "arXiv:1603.01354", | |
pages = "arXiv:1603.01354", | |
archivePrefix= "arXiv", | |
eprint = "1603.01354", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160301354M", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{yang-2017-neural-reranking, | |
author = "Jie Yang and Yue Zhang and Fei Dong", | |
title = "Neural Reranking for Named Entity Recognition", | |
journal = "CoRR", | |
volume = "abs/1707.05127", | |
year = 2017, | |
url = "http://arxiv.org/abs/1707.05127", | |
archivePrefix= "arXiv", | |
eprint = "1707.05127", | |
timestamp = "Wed, 20 Nov 2019 08:54:08 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/YangZD17aa", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{li-2017-ner-recursive-nn, | |
title = "Leveraging Linguistic Structures for Named Entity Recognition | |
with Bidirectional Recursive Neural Networks", | |
author = "Li, Peng-Hsuan and Dong, Ruo-Ping and Wang, Yu-Siang and | |
Chou, Ju-Chieh and Ma, Wei-Yun", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1282", | |
doi = "10.18653/v1/D17-1282", | |
pages = "2664-2669", | |
abstract = "In this paper, we utilize the linguistic structures of texts | |
to improve named entity recognition by BRNN-CNN, a special | |
bidirectional recursive network attached with a convolutional | |
network. Motivated by the observation that named entities are | |
highly related to linguistic constituents, we propose a | |
constituent-based BRNN-CNN for named entity recognition. In | |
contrast to classical sequential labeling methods, the system | |
first identifies which text chunks are possible named | |
entities by whether they are linguistic constituents. Then it | |
classifies these chunks with a constituency tree structure by | |
recursively propagating syntactic and semantic information to | |
each constituent node. This method surpasses current | |
state-of-the-art on OntoNotes 5.0 with automatically | |
generated parses." | |
} | |
@inproceedings{tran-2017-stack-residual-lstm, | |
title = "Named Entity Recognition with Stack Residual {LSTM} and | |
Trainable Bias Decoding", | |
author = "Tran, Quan and MacKinlay, Andrew and Jimeno Yepes, Antonio", | |
booktitle = "Proceedings of the Eighth International Joint Conference on | |
Natural Language Processing (Volume 1: Long Papers)", | |
month = nov, | |
year = 2017, | |
address = "Taipei, Taiwan", | |
publisher = "Asian Federation of Natural Language Processing", | |
url = "https://www.aclweb.org/anthology/I17-1057", | |
pages = "566-575", | |
abstract = "Recurrent Neural Network models are the state-of-the-art for | |
Named Entity Recognition (NER). We present two innovations to | |
improve the performance of these models. The first innovation | |
is the introduction of residual connections between the | |
Stacked Recurrent Neural Network model to address the | |
degradation problem of deep neural networks. The second | |
innovation is a bias decoding mechanism that allows the | |
trained system to adapt to non-differentiable and externally | |
computed objectives, such as the entity-based F-measure. Our | |
work improves the state-of-the-art results for both Spanish | |
and English languages on the standard train/development/test | |
split of the CoNLL 2003 Shared Task NER dataset." | |
} | |
@article{wei-2016-disease-ner, | |
title = "Disease named entity recognition by combining conditional | |
random fields and bidirectional recurrent neural networks", | |
author = "Wei, Qikang and Chen, Tao and Xu, Ruifeng and He, Yulan and | |
Gui, Lin", | |
journal = "Database", | |
volume = 2016, | |
year = 2016, | |
publisher = "Oxford University Press" | |
} | |
@inproceedings{strubell-2017-id-cnn, | |
title = "Fast and Accurate Entity Recognition with Iterated Dilated | |
Convolutions", | |
author = "Strubell, Emma and Verga, Patrick and Belanger, David and | |
McCallum, Andrew", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1283", | |
doi = "10.18653/v1/D17-1283", | |
pages = "2670-2680", | |
abstract = "Today when many practitioners run basic NLP on the entire web | |
and large-volume traffic, faster methods are paramount to | |
saving time and energy costs. Recent advances in GPU hardware | |
have led to the emergence of bi-directional LSTMs as a | |
standard method for obtaining per-token vector | |
representations serving as input to labeling tasks such as | |
NER (often followed by prediction in a linear-chain | |
CRF). Though expressive and accurate, these models fail to | |
fully exploit GPU parallelism, limiting their computational | |
efficiency. This paper proposes a faster alternative to | |
Bi-LSTMs for NER: Iterated Dilated Convolutional Neural | |
Networks (ID-CNNs), which have better capacity than | |
traditional CNNs for large context and structured | |
prediction. Unlike LSTMs whose sequential processing on | |
sentences of length N requires O(N) time even in the face of | |
parallelism, ID-CNNs permit fixed-depth convolutions to run | |
in parallel across entire documents. We describe a distinct | |
combination of network structure, parameter sharing and | |
training procedures that enable dramatic 14-20x test-time | |
speedups while retaining accuracy comparable to the | |
Bi-LSTM-CRF. Moreover, ID-CNNs trained to aggregate context | |
from the entire document are more accurate than Bi-LSTM-CRFs | |
while attaining 8x faster test time speeds." | |
} | |
@inproceedings{lin-2017-multi-channel-bi-lstm-crf, | |
title = "Multi-channel {B}i{LSTM}-{CRF} Model for Emerging Named | |
Entity Recognition in Social Media", | |
author = "Lin, Bill Y. and Xu, Frank and Luo, Zhiyi and Zhu, Kenny", | |
booktitle = "Proceedings of the 3rd Workshop on Noisy User-generated Text", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-4421", | |
doi = "10.18653/v1/W17-4421", | |
pages = "160-165", | |
abstract = "In this paper, we present our multi-channel neural | |
architecture for recognizing emerging named entity in social | |
media messages, which we applied in the Novel and Emerging | |
Named Entity Recognition shared task at the EMNLP 2017 | |
Workshop on Noisy User-generated Text (W-NUT). We propose a | |
novel approach, which incorporates comprehensive word | |
representations with multi-channel information and | |
Conditional Random Fields (CRF) into a traditional | |
Bidirectional Long Short-Term Memory (BiLSTM) neural network | |
without using any additional hand-craft features such as | |
gazetteers. In comparison with other systems participating in | |
the shared task, our system won the 2nd place." | |
} | |
@inproceedings{ghaddar-2018-robust-lexical-features, | |
title = "Robust Lexical Features for Improved Neural Network | |
Named-Entity Recognition", | |
author = "Ghaddar, Abbas and Langlais, Phillippe", | |
booktitle = "Proceedings of the 27th International Conference on | |
Computational Linguistics", | |
month = aug, | |
year = 2018, | |
address = "Santa Fe, New Mexico, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/C18-1161", | |
pages = "1896-1907", | |
abstract = "Neural network approaches to Named-Entity Recognition reduce | |
the need for carefully hand-crafted features. While some | |
features do remain in state-of-the-art systems, lexical | |
features have been mostly discarded, with the exception of | |
gazetteers. In this work, we show that this is unfair: | |
lexical features are actually quite useful. We propose to | |
embed words and entity types into a low-dimensional vector | |
space we train from annotated data produced by distant | |
supervision thanks to Wikipedia. From this, we compute {---} | |
offline {---} a feature vector representing each word. When | |
used with a vanilla recurrent neural network model, this | |
representation yields substantial improvements. We establish | |
a new state-of-the-art F1 score of 87.95 on ONTONOTES 5.0, | |
while matching state-of-the-art performance with a F1 score | |
of 91.73 on the over-studied CONLL-2003 dataset." | |
} | |
@article{wu-2015-clinical-text-ner, | |
title = "Named entity recognition in Chinese clinical text using deep | |
neural network", | |
author = "Wu, Yonghui and Jiang, Min and Lei, Jianbo and Xu, Hua", | |
journal = "Studies in health technology and informatics", | |
volume = 216, | |
pages = 624, | |
year = 2015, | |
publisher = "NIH Public Access" | |
} | |
@incollection{zhou-2017-joint-extraction, | |
title = "Joint extraction of multiple relations and entities by using | |
a hybrid neural network", | |
author = "Zhou, Peng and Zheng, Suncong and Xu, Jiaming and Qi, Zhenyu | |
and Bao, Hongyun and Xu, Bo", | |
booktitle = "Chinese Computational Linguistics and Natural Language | |
Processing Based on Naturally Annotated Big Data", | |
pages = "135-146", | |
year = 2017, | |
publisher = "Springer" | |
} | |
@article{nguyen-2016-mention-detection-rnn, | |
author = "Thien Huu Nguyen and Avirup Sil and Georgiana Dinu and Radu | |
Florian", | |
title = "Toward Mention Detection Robustness with Recurrent Neural | |
Networks", | |
journal = "CoRR", | |
volume = "abs/1602.07749", | |
year = 2016, | |
url = "http://arxiv.org/abs/1602.07749", | |
archivePrefix= "arXiv", | |
eprint = "1602.07749", | |
timestamp = "Mon, 13 Aug 2018 16:48:51 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/NguyenSDF16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{zhai-2017-sequence-chunking, | |
author = "Feifei Zhai and Saloni Potdar and Bing Xiang and Bowen Zhou", | |
title = "Neural Models for Sequence Chunking", | |
journal = "CoRR", | |
volume = "abs/1701.04027", | |
year = 2017, | |
url = "http://arxiv.org/abs/1701.04027", | |
archivePrefix= "arXiv", | |
eprint = "1701.04027", | |
timestamp = "Mon, 13 Aug 2018 16:48:01 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ZhaiPXZ17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{zukov-gregoric-2018-ner-parallel-rnn, | |
title = "Named Entity Recognition With Parallel Recurrent Neural | |
Networks", | |
author = "{\v{Z}}ukov-Gregori{\v{c}}, Andrej and Bachrach, Yoram and | |
Coope, Sam", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 2: Short Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-2012", | |
doi = "10.18653/v1/P18-2012", | |
pages = "69-74", | |
abstract = "We present a new architecture for named entity | |
recognition. Our model employs multiple independent | |
bidirectional LSTM units across the same input and promotes | |
diversity among them by employing an inter-model | |
regularization term. By distributing computation across | |
multiple smaller LSTMs we find a significant reduction in the | |
total number of parameters. We find our architecture achieves | |
state-of-the-art performance on the CoNLL 2003 NER dataset." | |
} | |
@inproceedings{rei-2017-semi-supervised-multitask, | |
title = "Semi-supervised Multitask Learning for Sequence Labeling", | |
author = "Rei, Marek", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2017, | |
address = "Vancouver, Canada", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P17-1194", | |
doi = "10.18653/v1/P17-1194", | |
pages = "2121-2130", | |
abstract = "We propose a sequence labeling framework with a secondary | |
training objective, learning to predict surrounding words for | |
every word in the dataset. This language modeling objective | |
incentivises the system to learn general-purpose patterns of | |
semantic and syntactic composition, which are also useful for | |
improving accuracy on different sequence labeling tasks. The | |
architecture was evaluated on a range of datasets, covering | |
the tasks of error detection in learner texts, named entity | |
recognition, chunking and POS-tagging. The novel language | |
modeling objective provided consistent performance | |
improvements on every benchmark, without requiring any | |
additional annotated or unannotated data." | |
} | |
@inproceedings{zhuo-2016-gated-recursive-semi-markov-crf, | |
title = "Segment-Level Sequence Modeling using Gated Recursive | |
Semi-{M}arkov Conditional Random Fields", | |
author = "Zhuo, Jingwei and Cao, Yong and Zhu, Jun and Zhang, Bo and | |
Nie, Zaiqing", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = aug, | |
year = 2016, | |
address = "Berlin, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P16-1134", | |
doi = "10.18653/v1/P16-1134", | |
pages = "1413-1423" | |
} | |
@inproceedings{ye-2018-hybrid-markov-crf, | |
title = "Hybrid semi-{M}arkov {CRF} for Neural Sequence Labeling", | |
author = "Ye, Zhixiu and Ling, Zhen-Hua", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 2: Short Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-2038", | |
doi = "10.18653/v1/P18-2038", | |
pages = "235-240", | |
abstract = "This paper proposes hybrid semi-Markov conditional random | |
fields (SCRFs) for neural sequence labeling in natural | |
language processing. Based on conventional conditional random | |
fields (CRFs), SCRFs have been designed for the tasks of | |
assigning labels to segments by extracting features from and | |
describing transitions between segments instead of words. In | |
this paper, we improve the existing SCRF methods by employing | |
word-level and segment-level information | |
simultaneously. First, word-level labels are utilized to | |
derive the segment scores in SCRFs. Second, a CRF output | |
layer and an SCRF output layer are integrated into a unified | |
neural network and trained jointly. Experimental results on | |
CoNLL 2003 named entity recognition (NER) shared task show | |
that our model achieves state-of-the-art performance when no | |
external knowledge is used." | |
} | |
@inproceedings{aguilar-2017-multi-task-ner, | |
title = "A Multi-task Approach for Named Entity Recognition in Social | |
Media Data", | |
author = "Aguilar, Gustavo and Maharjan, Suraj and L{\'o}pez-Monroy, | |
Adrian Pastor and Solorio, Thamar", | |
booktitle = "Proceedings of the 3rd Workshop on Noisy User-generated Text", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-4419", | |
doi = "10.18653/v1/W17-4419", | |
pages = "148-153", | |
abstract = "Named Entity Recognition for social media data is challenging | |
because of its inherent noisiness. In addition to improper | |
grammatical structures, it contains spelling inconsistencies | |
and numerous informal abbreviations. We propose a novel | |
multi-task approach by employing a more general secondary | |
task of Named Entity (NE) segmentation together with the | |
primary task of fine-grained NE categorization. The | |
multi-task neural network architecture learns higher order | |
feature representations from word and character sequences | |
along with basic Part-of-Speech tags and gazetteer | |
information. This neural network acts as a feature extractor | |
to feed a Conditional Random Fields classifier. We were able | |
to obtain the first position in the 3rd Workshop on Noisy | |
User-generated Text (WNUT-2017) with a 41.86{\%} entity | |
F1-score and a 40.24{\%} surface F1-score." | |
} | |
@inproceedings{peng-2017-multi-task-sequence-tagging, | |
title = "Multi-task Domain Adaptation for Sequence Tagging", | |
author = "Peng, Nanyun and Dredze, Mark", | |
booktitle = "Proceedings of the 2nd Workshop on Representation Learning | |
for {NLP}", | |
month = aug, | |
year = 2017, | |
address = "Vancouver, Canada", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-2612", | |
doi = "10.18653/v1/W17-2612", | |
pages = "91-100", | |
abstract = "Many domain adaptation approaches rely on learning cross | |
domain shared representations to transfer the knowledge | |
learned in one domain to other domains. Traditional domain | |
adaptation only considers adapting for one task. In this | |
paper, we explore multi-task representation learning under | |
the domain adaptation scenario. We propose a neural network | |
framework that supports domain adaptation for multiple tasks | |
simultaneously, and learns shared representations that better | |
generalize for domain adaptation. We apply the proposed | |
framework to domain adaptation for sequence tagging problems | |
considering two tasks: Chinese word segmentation and named | |
entity recognition. Experiments show that multi-task domain | |
adaptation works better than disjoint domain adaptation for | |
each task, and achieves the state-of-the-art results for both | |
tasks in the social media domain." | |
} | |
@article{pan-2013-transfer-joint-embedding, | |
title = "Transfer joint embedding for cross-domain named entity | |
recognition", | |
author = "Pan, Sinno Jialin and Toh, Zhiqiang and Su, Jian", | |
journal = "ACM Transactions on Information Systems (TOIS)", | |
volume = 31, | |
number = 2, | |
pages = 7, | |
year = 2013, | |
publisher = "ACM" | |
} | |
@inproceedings{qu-2016-ner-transfer-learning, | |
title = "Named Entity Recognition for Novel Types by Transfer | |
Learning", | |
author = "Qu, Lizhen and Ferraro, Gabriela and Zhou, Liyuan and Hou, | |
Weiwei and Baldwin, Timothy", | |
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = nov, | |
year = 2016, | |
address = "Austin, Texas", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D16-1087", | |
doi = "10.18653/v1/D16-1087", | |
pages = "899-905" | |
} | |
@article{yang-2017-transfer-learning-hierachical-rnn, | |
author = "Zhilin Yang and Ruslan Salakhutdinov and William W. Cohen", | |
title = "Transfer Learning for Sequence Tagging with Hierarchical | |
Recurrent Networks", | |
journal = "CoRR", | |
volume = "abs/1703.06345", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.06345", | |
archivePrefix= "arXiv", | |
eprint = "1703.06345", | |
timestamp = "Mon, 13 Aug 2018 16:48:14 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/YangSC17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{daeniken-2017-transfer-learning-ner, | |
title = "Transfer Learning and Sentence Level Features for Named | |
Entity Recognition on Tweets", | |
author = "von D{\"a}niken, Pius and Cieliebak, Mark", | |
booktitle = "Proceedings of the 3rd Workshop on Noisy User-generated Text", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-4422", | |
doi = "10.18653/v1/W17-4422", | |
pages = "166-171", | |
abstract = "We present our system for the WNUT 2017 Named Entity | |
Recognition challenge on Twitter data. We describe two | |
modifications of a basic neural network architecture for | |
sequence tagging. First, we show how we exploit additional | |
labeled data, where the Named Entity tags differ from the | |
target task. Then, we propose a way to incorporate sentence | |
level features. Our system uses both methods and ranked | |
second for entity level annotations, achieving an F1-score of | |
40.78, and second for surface form annotations, achieving an | |
F1-score of 39.33." | |
} | |
@inproceedings{zhao-2018-multi-task-data-selection, | |
title = "Improve Neural Entity Recognition via Multi-Task Data | |
Selection and Constrained Decoding", | |
author = "Zhao, Huasha and Yang, Yi and Zhang, Qiong and Si, Luo", | |
booktitle = "Proceedings of the 2018 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 2 (Short Papers)", | |
month = jun, | |
year = 2018, | |
address = "New Orleans, Louisiana", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N18-2056", | |
doi = "10.18653/v1/N18-2056", | |
pages = "346-351", | |
abstract = "Entity recognition is a widely benchmarked task in natural | |
language processing due to its massive applications. The | |
state-of-the-art solution applies a neural architecture named | |
BiLSTM-CRF to model the language sequences. In this paper, we | |
propose an entity recognition system that improves this | |
neural architecture with two novel techniques. The first | |
technique is Multi-Task Data Selection, which ensures the | |
consistency of data distribution and labeling guidelines | |
between source and target datasets. The other one is | |
constrained decoding using knowledge base. The decoder of the | |
model operates at the document level, and leverages global | |
and external information sources to further improve | |
performance. Extensive experiments have been conducted to | |
show the advantages of each technique. Our system achieves | |
state-of-the-art results on the English entity recognition | |
task in KBP 2017 official evaluation, and it also yields very | |
strong results in other languages." | |
} | |
@inproceedings{lin-2018-neural-adaptation-layers, | |
title = "Neural Adaptation Layers for Cross-domain Named Entity | |
Recognition", | |
author = "Lin, Bill Yuchen and Lu, Wei", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1226", | |
doi = "10.18653/v1/D18-1226", | |
pages = "2012-2022", | |
abstract = "Recent research efforts have shown that neural architectures | |
can be effective in conventional information extraction tasks | |
such as named entity recognition, yielding state-of-the-art | |
results on standard newswire datasets. However, despite | |
significant resources required for training such models, the | |
performance of a model trained on one domain typically | |
degrades dramatically when applied to a different domain, yet | |
extracting entities from new emerging domains such as social | |
media can be of significant interest. In this paper, we | |
empirically investigate effective methods for conveniently | |
adapting an existing, well-trained neural NER model for a new | |
domain. Unlike existing approaches, we propose lightweight | |
yet effective methods for performing domain adaptation for | |
neural models. Specifically, we introduce adaptation layers | |
on top of existing neural architectures, where no re-training | |
using the source domain data is required. We conduct | |
extensive empirical studies and show that our approach | |
significantly outperforms state-of-the-art methods." | |
} | |
@article{shen-2017-deep-active-learning, | |
author = "Yanyao Shen and Hyokun Yun and Zachary C. Lipton and Yakov | |
Kronrod and Animashree Anandkumar", | |
title = "Deep Active Learning for Named Entity Recognition", | |
journal = "CoRR", | |
volume = "abs/1707.05928", | |
year = 2017, | |
url = "http://arxiv.org/abs/1707.05928", | |
archivePrefix= "arXiv", | |
eprint = "1707.05928", | |
timestamp = "Mon, 13 Aug 2018 16:47:29 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/ShenYLKA17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{narasimhan-2016-ie-reinforcement-learning, | |
author = "Karthik Narasimhan and Adam Yala and Regina Barzilay", | |
title = "Improving Information Extraction by Acquiring External | |
Evidence with Reinforcement Learning", | |
journal = "CoRR", | |
volume = "abs/1603.07954", | |
year = 2016, | |
url = "http://arxiv.org/abs/1603.07954", | |
archivePrefix= "arXiv", | |
eprint = "1603.07954", | |
timestamp = "Mon, 13 Aug 2018 16:48:30 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/NarasimhanYB16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{zhou-2019-datnet, | |
title = "Dual Adversarial Neural Transfer for Low-Resource Named | |
Entity Recognition", | |
author = "Zhou, Joey Tianyi and Zhang, Hao and Jin, Di and Zhu, | |
Hongyuan and Fang, Meng and Goh, Rick Siow Mong and Kwok, | |
Kenneth", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1336", | |
doi = "10.18653/v1/P19-1336", | |
pages = "3461-3471", | |
abstract = "We propose a new neural transfer method termed Dual | |
Adversarial Transfer Network (DATNet) for addressing | |
low-resource Named Entity Recognition (NER). Specifically, | |
two variants of DATNet, i.e., DATNet-F and DATNet-P, are | |
investigated to explore effective feature fusion between high | |
and low resource. To address the noisy and imbalanced | |
training data, we propose a novel Generalized | |
Resource-Adversarial Discriminator (GRAD). Additionally, | |
adversarial training is adopted to boost model | |
generalization. In experiments, we examine the effects of | |
different components in DATNet across domains and languages | |
and show that significant improvement can be obtained | |
especially for low-resource data, without augmenting any | |
additional hand-crafted features and pre-trained language | |
model." | |
} | |
@inproceedings{zukov-gregoric-2017-ner-self-attention, | |
title = "Neural named entity recognition using a self-attention | |
mechanism", | |
author = "Zukov-Gregoric, Andrej and Bachrach, Yoram and Minkovsky, | |
Pasha and Coope, Sam and Maksak, Bogdan", | |
booktitle = "2017 IEEE 29th International Conference on Tools with | |
Artificial Intelligence (ICTAI)", | |
pages = "652-656", | |
year = 2017, | |
organization = "IEEE" | |
} | |
@inproceedings{xu-2018-ner-global-attention, | |
title = "Improving clinical named entity recognition with global | |
neural attention", | |
author = "Xu, Guohai and Wang, Chengyu and He, Xiaofeng", | |
booktitle = "Asia-Pacific Web (APWeb) and Web-Age Information Management | |
(WAIM) Joint International Conference on Web and Big Data", | |
pages = "264-279", | |
year = 2018, | |
organization = "Springer" | |
} | |
@article{li-2018-survey-nlp, | |
author = "Jing Li and Aixin Sun and Jianglei Han and Chenliang Li", | |
title = "A Survey on Deep Learning for Named Entity Recognition", | |
journal = "CoRR", | |
volume = "abs/1812.09449", | |
year = 2018, | |
url = "http://arxiv.org/abs/1812.09449", | |
archivePrefix= "arXiv", | |
eprint = "1812.09449", | |
timestamp = "Mon, 28 Jan 2019 16:41:27 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1812-09449", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{zhu-2019-can-ner, | |
title = "{CAN}-{NER}: {C}onvolutional {A}ttention {N}etwork for | |
{C}hinese {N}amed {E}ntity {R}ecognition", | |
author = "Zhu, Yuying and Wang, Guoxin", | |
booktitle = "Proceedings of the 2019 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 1 (Long and Short | |
Papers)", | |
month = jun, | |
year = 2019, | |
address = "Minneapolis, Minnesota", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N19-1342", | |
doi = "10.18653/v1/N19-1342", | |
pages = "3384-3393", | |
abstract = "Named entity recognition (NER) in Chinese is essential but | |
difficult because of the lack of natural | |
delimiters. Therefore, Chinese Word Segmentation (CWS) is | |
usually considered as the first step for Chinese | |
NER. However, models based on word-level embeddings and | |
lexicon features often suffer from segmentation errors and | |
out-of-vocabulary (OOV) words. In this paper, we investigate | |
a Convolutional Attention Network called CAN for Chinese NER, | |
which consists of a character-based convolutional neural | |
network (CNN) with local-attention layer and a gated | |
recurrent unit (GRU) with global self-attention layer to | |
capture the information from adjacent characters and sentence | |
contexts. Also, compared to other models, not depending on | |
any external resources like lexicons and employing small size | |
of char embeddings make our model more practical. Extensive | |
experimental results show that our approach outperforms | |
state-of-the-art methods without word embedding and external | |
lexicon resources on different domain datasets including | |
Weibo, MSRA and Chinese Resume NER dataset." | |
} | |
@inproceedings{guan-2019-bert-lstm-crf, | |
title = "New Research on Transfer Learning Model of Named Entity | |
Recognition", | |
author = "Guan, Guoliang and Zhu, Min", | |
booktitle = "Journal of Physics: Conference Series", | |
volume = 1267, | |
number = 1, | |
pages = 012017, | |
year = 2019, | |
organization = "IOP Publishing" | |
} | |
@inproceedings{arkhipov-2019-multilingual-transforms, | |
title = "Tuning multilingual transformers for language-specific named | |
entity recognition", | |
author = "Arkhipov, Mikhail and Trofimova, Maria and Kuratov, Yurii and | |
Sorokin, Alexey", | |
booktitle = "Proceedings of the 7th Workshop on Balto-Slavic Natural | |
Language Processing", | |
pages = "89-93", | |
year = 2019 | |
} | |
@ARTICLE{zadeh-2019-fmt, | |
author = "{Zadeh}, Amir and {Mao}, Chengfeng and {Shi}, Kelly and | |
{Zhang}, Yiwei and {Liang}, Paul Pu and {Poria}, Soujanya and | |
{Morency}, Louis-Philippe", | |
title = "{Factorized Multimodal Transformer for Multimodal Sequential | |
Learning}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Computation and Language, Statistics - Machine Learning", | |
year = 2019, | |
month = "Nov", | |
eid = "arXiv:1911.09826", | |
pages = "arXiv:1911.09826", | |
archivePrefix= "arXiv", | |
eprint = "1911.09826", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv191109826Z", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{liu-2017-lm-lstm-crf, | |
author = "Liyuan Liu and Jingbo Shang and Frank F. Xu and Xiang Ren and | |
Huan Gui and Jian Peng and Jiawei Han", | |
title = "Empower Sequence Labeling with Task-Aware Neural Language | |
Model", | |
journal = "CoRR", | |
volume = "abs/1709.04109", | |
year = 2017, | |
url = "http://arxiv.org/abs/1709.04109", | |
archivePrefix= "arXiv", | |
eprint = "1709.04109", | |
timestamp = "Mon, 13 Aug 2018 16:47:53 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1709-04109", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{liu-2018-lm-pruning, | |
title = "Efficient Contextualized Representation: Language Model | |
Pruning for Sequence Labeling", | |
author = "Liu, Liyuan and Ren, Xiang and Shang, Jingbo and Gu, Xiaotao | |
and Peng, Jian and Han, Jiawei", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1153", | |
doi = "10.18653/v1/D18-1153", | |
pages = "1215-1225", | |
abstract = "Many efforts have been made to facilitate natural language | |
processing tasks with pre-trained language models (LMs), and | |
brought significant improvements to various applications. To | |
fully leverage the nearly unlimited corpora and capture | |
linguistic information of multifarious levels, large-size LMs | |
are required; but for a specific task, only parts of these | |
information are useful. Such large-sized LMs, even in the | |
inference stage, may cause heavy computation workloads, | |
making them too time-consuming for large-scale | |
applications. Here we propose to compress bulky LMs while | |
preserving useful information with regard to a specific | |
task. As different layers of the model keep different | |
information, we develop a layer selection method for model | |
pruning using sparsity-inducing regularization. By | |
introducing the dense connectivity, we can detach any layer | |
without affecting others, and stretch shallow and wide LMs to | |
be deep and narrow. In model training, LMs are learned with | |
layer-wise dropouts for better robustness. Experiments on two | |
benchmark datasets demonstrate the effectiveness of our | |
method." | |
} | |
@article{liu-2018-non-local-nn, | |
author = "Pengfei Liu and Shuaichen Chang and Xuanjing Huang and Jian | |
Tang and Jackie Chi Kit Cheung", | |
title = "Contextualized Non-local Neural Networks for Sequence | |
Learning", | |
journal = "CoRR", | |
volume = "abs/1811.08600", | |
year = 2018, | |
url = "http://arxiv.org/abs/1811.08600", | |
archivePrefix= "arXiv", | |
eprint = "1811.08600", | |
timestamp = "Mon, 26 Nov 2018 12:52:45 +0100", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1811-08600", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{chen-2019-grn, | |
author = "Hui Chen and Zijia Lin and Guiguang Ding and Jianguang Lou | |
and Yusen Zhang and B{\"{o}}rje Karlsson", | |
title = "{GRN:} Gated Relation Network to Enhance Convolutional Neural | |
Network for Named Entity Recognition", | |
journal = "CoRR", | |
volume = "abs/1907.05611", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.05611", | |
archivePrefix= "arXiv", | |
eprint = "1907.05611", | |
timestamp = "Thu, 10 Oct 2019 11:51:45 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1907-05611", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{guo-2019-star-transformer, | |
author = "Qipeng Guo and Xipeng Qiu and Pengfei Liu and Yunfan Shao and | |
Xiangyang Xue and Zheng Zhang", | |
title = "Star-Transformer", | |
journal = "CoRR", | |
volume = "abs/1902.09113", | |
year = 2019, | |
url = "http://arxiv.org/abs/1902.09113", | |
archivePrefix= "arXiv", | |
eprint = "1902.09113", | |
timestamp = "Tue, 21 May 2019 18:03:39 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1902-09113", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{yan-2019-tener, | |
author = "{Yan}, Hang and {Deng}, Bocao and {Li}, Xiaonan and {Qiu}, | |
Xipeng", | |
title = "{TENER: Adapting Transformer Encoder for Named Entity | |
Recognition}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Machine Learning", | |
year = 2019, | |
month = "Nov", | |
eid = "arXiv:1911.04474", | |
pages = "arXiv:1911.04474", | |
archivePrefix= "arXiv", | |
eprint = "1911.04474", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv191104474Y", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{xu-2020-cluener, | |
author = "{Xu}, Liang and {tong}, Yu and {Dong}, Qianqian and {Liao}, | |
Yixuan and {Yu}, Cong and {Tian}, Yin and {Liu}, Weitang and | |
{Li}, Lu and {Liu}, Caiquan and {Zhang}, Xuanwei", | |
title = "{CLUENER2020: Fine-grained Named Entity Recognition Dataset | |
and Benchmark for Chinese}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Information Retrieval, Computer Science - Machine Learning", | |
year = 2020, | |
month = "Jan", | |
eid = "arXiv:2001.04351", | |
pages = "arXiv:2001.04351", | |
archivePrefix= "arXiv", | |
eprint = "2001.04351", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2020arXiv200104351X", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{crichton-2017-multi-task-bio-ner, | |
title = "A neural network multi-task learning approach to biomedical | |
named entity recognition", | |
author = "Crichton, Gamal and Pyysalo, Sampo and Chiu, Billy and | |
Korhonen, Anna", | |
journal = "BMC bioinformatics", | |
volume = 18, | |
number = 1, | |
pages = 368, | |
year = 2017, | |
publisher = "BioMed Central" | |
} | |
@ARTICLE{li-2015-ggs-nn, | |
author = "{Li}, Yujia and {Tarlow}, Daniel and {Brockschmidt}, Marc and | |
{Zemel}, Richard", | |
title = "{Gated Graph Sequence Neural Networks}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Artificial Intelligence, Computer Science - Neural and | |
Evolutionary Computing, Statistics - Machine Learning", | |
year = 2015, | |
month = "Nov", | |
eid = "arXiv:1511.05493", | |
pages = "arXiv:1511.05493", | |
archivePrefix= "arXiv", | |
eprint = "1511.05493", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2015arXiv151105493L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{battaglia-2018-gcn, | |
author = "{Battaglia}, Peter W. and {Hamrick}, Jessica B. and {Bapst}, | |
Victor and {Sanchez-Gonzalez}, Alvaro and {Zambaldi}, | |
Vinicius and {Malinowski}, Mateusz and {Tacchetti}, Andrea | |
and {Raposo}, David and {Santoro}, Adam and {Faulkner}, Ryan | |
and {Gulcehre}, Caglar and {Song}, Francis and {Ballard}, | |
Andrew and {Gilmer}, Justin and {Dahl}, George and {Vaswani}, | |
Ashish and {Allen}, Kelsey and {Nash}, Charles and | |
{Langston}, Victoria and {Dyer}, Chris and {Heess}, Nicolas | |
and {Wierstra}, Daan and {Kohli}, Pushmeet and {Botvinick}, | |
Matt and {Vinyals}, Oriol and {Li}, Yujia and {Pascanu}, | |
Razvan", | |
title = "{Relational inductive biases, deep learning, and graph | |
networks}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Artificial Intelligence, Statistics - Machine Learning", | |
year = 2018, | |
month = "Jun", | |
eid = "arXiv:1806.01261", | |
pages = "arXiv:1806.01261", | |
archivePrefix= "arXiv", | |
eprint = "1806.01261", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2018arXiv180601261B", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{limsopatham-2016-bi-lstm-twitter, | |
title = "Bidirectional {LSTM} for Named Entity Recognition in Twitter | |
Messages", | |
author = "Limsopatham, Nut and Collier, Nigel", | |
booktitle = "Proceedings of the 2nd Workshop on Noisy User-generated Text | |
({WNUT})", | |
month = dec, | |
year = 2016, | |
address = "Osaka, Japan", | |
publisher = "The COLING 2016 Organizing Committee", | |
url = "https://www.aclweb.org/anthology/W16-3920", | |
pages = "145-152", | |
abstract = "In this paper, we present our approach for named entity | |
recognition in Twitter messages that we used in our | |
participation in the Named Entity Recognition in Twitter | |
shared task at the COLING 2016 Workshop on Noisy | |
User-generated text (WNUT). The main challenge that we aim to | |
tackle in our participation is the short, noisy and | |
colloquial nature of tweets, which makes named entity | |
recognition in Twitter message a challenging task. In | |
particular, we investigate an approach for dealing with this | |
problem by enabling bidirectional long short-term memory | |
(LSTM) to automatically learn orthographic features without | |
requiring feature engineering. In comparison with other | |
systems participating in the shared task, our system achieved | |
the most effective performance on both the {`}segmentation | |
and categorisation{'} and the {`}segmentation only{'} | |
sub-tasks." | |
} | |
@incollection{sarawagi-2005-scrf, | |
title = "Semi-Markov Conditional Random Fields for Information | |
Extraction", | |
author = "Sunita Sarawagi and Cohen, William W", | |
booktitle = "Advances in Neural Information Processing Systems 17", | |
editor = "L. K. Saul and Y. Weiss and L. Bottou", | |
pages = "1185-1192", | |
year = 2005, | |
publisher = "MIT Press", | |
url = | |
"http://papers.nips.cc/paper/2648-semi-markov-conditional-random-fields-for-information-extraction.pdf" | |
} | |
@article{nadeau-2007-survey-ner, | |
title = "A survey of named entity recognition and classification", | |
author = "Nadeau, David and Sekine, Satoshi", | |
journal = "Lingvisticae Investigationes", | |
volume = 30, | |
number = 1, | |
pages = "3-26", | |
year = 2007, | |
publisher = "John Benjamins" | |
} | |
@article{夏光辉-2015-基于实体词典与机器学习的基因命名实体识别, | |
title = "基于实体词典与机器学习的基因命名实体识别", | |
author = "夏光辉 and 李军莲 and 阮学平", | |
journal = "医学信息学杂志", | |
number = 12, | |
pages = "54-60", | |
year = 2015 | |
} | |
@inproceedings{wu-2018-eval-sl-features, | |
title = "Evaluating the Utility of Hand-crafted Features in Sequence | |
Labelling", | |
author = "Wu, Minghao and Liu, Fei and Cohn, Trevor", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1310", | |
doi = "10.18653/v1/D18-1310", | |
pages = "2850-2856", | |
abstract = "Conventional wisdom is that hand-crafted features are | |
redundant for deep learning models, as they already learn | |
adequate representations of text automatically from | |
corpora. In this work, we test this claim by proposing a new | |
method for exploiting handcrafted features as part of a novel | |
hybrid learning approach, incorporating a feature | |
auto-encoder loss component. We evaluate on the task of named | |
entity recognition (NER), where we show that including manual | |
features for part-of-speech, word shapes and gazetteers can | |
improve the performance of a neural CRF model. We obtain a F | |
1 of 91.89 for the CoNLL-2003 English shared task, which | |
significantly outperforms a collection of highly competitive | |
baseline models. We also present an ablation study showing | |
the importance of auto-encoding, over using features as | |
either inputs or outputs alone, and moreover, show including | |
the autoencoder components reduces training requirements to | |
60{\%}, while retaining the same predictive accuracy." | |
} | |
@inproceedings{zhang-2018-adapt-co-attention-ner, | |
title = "Adaptive co-attention network for named entity recognition in | |
tweets", | |
author = "Zhang, Qi and Fu, Jinlan and Liu, Xiaoyu and Huang, Xuanjing", | |
booktitle = "Thirty-Second AAAI Conference on Artificial Intelligence", | |
year = 2018 | |
} | |
@inproceedings{greenberg-2018-disjoint-label-sets-ner, | |
title = "Marginal Likelihood Training of {B}i{LSTM}-{CRF} for | |
Biomedical Named Entity Recognition from Disjoint Label Sets", | |
author = "Greenberg, Nathan and Bansal, Trapit and Verga, Patrick and | |
McCallum, Andrew", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1306", | |
doi = "10.18653/v1/D18-1306", | |
pages = "2824-2829", | |
abstract = "Extracting typed entity mentions from text is a fundamental | |
component to language understanding and reasoning. While | |
there exist substantial labeled text datasets for multiple | |
subsets of biomedical entity types{---}such as genes and | |
proteins, or chemicals and diseases{---}it is rare to find | |
large labeled datasets containing labels for all desired | |
entity types together. This paper presents a method for | |
training a single CRF extractor from multiple datasets with | |
disjoint or partially overlapping sets of entity types. Our | |
approach employs marginal likelihood training to insist on | |
labels that are present in the data, while filling in | |
{``}missing labels{''}. This allows us to leverage all the | |
available data within a single model. In experimental results | |
on the Biocreative V CDR (chemicals/diseases), Biocreative VI | |
ChemProt (chemicals/proteins) and MedMentions (19 entity | |
types) datasets, we show that joint training on multiple | |
datasets improves NER F1 over training in isolation, and our | |
methods achieve state-of-the-art results." | |
} | |
@inproceedings{cao-2018-adv-ner, | |
title = "Adversarial Transfer Learning for {C}hinese Named Entity | |
Recognition with Self-Attention Mechanism", | |
author = "Cao, Pengfei and Chen, Yubo and Liu, Kang and Zhao, Jun and | |
Liu, Shengping", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1017", | |
doi = "10.18653/v1/D18-1017", | |
pages = "182-192", | |
abstract = "Named entity recognition (NER) is an important task in | |
natural language processing area, which needs to determine | |
entities boundaries and classify them into pre-defined | |
categories. For Chinese NER task, there is only a very small | |
amount of annotated data available. Chinese NER task and | |
Chinese word segmentation (CWS) task have many similar word | |
boundaries. There are also specificities in each | |
task. However, existing methods for Chinese NER either do not | |
exploit word boundary information from CWS or cannot filter | |
the specific information of CWS. In this paper, we propose a | |
novel adversarial transfer learning framework to make full | |
use of task-shared boundaries information and prevent the | |
task-specific features of CWS. Besides, since arbitrary | |
character can provide important cues when predicting entity | |
type, we exploit self-attention to explicitly capture long | |
range dependencies between two tokens. Experimental results | |
on two different widely used datasets show that our proposed | |
model significantly and consistently outperforms other | |
state-of-the-art methods." | |
} | |
@inproceedings{yu-2018-char-lm-ner, | |
title = "On the Strength of Character Language Models for Multilingual | |
Named Entity Recognition", | |
author = "Yu, Xiaodong and Mayhew, Stephen and Sammons, Mark and Roth, | |
Dan", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1345", | |
doi = "10.18653/v1/D18-1345", | |
pages = "3073-3077", | |
abstract = "Character-level patterns have been widely used as features in | |
English Named Entity Recognition (NER) systems. However, to | |
date there has been no direct investigation of the inherent | |
differences between name and nonname tokens in text, nor | |
whether this property holds across multiple languages. This | |
paper analyzes the capabilities of corpus-agnostic | |
Character-level Language Models (CLMs) in the binary task of | |
distinguishing name tokens from non-name tokens. We | |
demonstrate that CLMs provide a simple and powerful model for | |
capturing these differences, identifying named entity tokens | |
in a diverse set of languages at close to the performance of | |
full NER systems. Moreover, by adding very simple CLM-based | |
features we can significantly improve the performance of an | |
off-the-shelf NER system for multiple languages." | |
} | |
@article{savarese-2016-residual-gates, | |
author = "Pedro H. P. Savarese", | |
title = "Learning Identity Mappings with Residual Gates", | |
journal = "CoRR", | |
volume = "abs/1611.01260", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.01260", | |
archivePrefix= "arXiv", | |
eprint = "1611.01260", | |
timestamp = "Mon, 13 Aug 2018 16:48:22 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/Savarese16", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{weiss-2016-survey-transfer-learning, | |
title = "A survey of transfer learning", | |
author = "Weiss, Karl and Khoshgoftaar, Taghi M and Wang, DingDing", | |
journal = "Journal of Big data", | |
volume = 3, | |
number = 1, | |
pages = 9, | |
year = 2016, | |
publisher = "SpringerOpen" | |
} | |
@ARTICLE{thulasidasan-2019-dac-loss, | |
author = "{Thulasidasan}, Sunil and {Bhattacharya}, Tanmoy and | |
{Bilmes}, Jeff and {Chennupati}, Gopinath and {Mohd-Yusof}, | |
Jamal", | |
title = "{Combating Label Noise in Deep Learning Using Abstention}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Machine | |
Learning", | |
year = 2019, | |
month = may, | |
eid = "arXiv:1905.10964", | |
pages = "arXiv:1905.10964", | |
archivePrefix= "arXiv", | |
eprint = "1905.10964", | |
primaryClass = "stat.ML", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190510964T", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{lin-2017-focal-loss, | |
author = "Tsung{-}Yi Lin and Priya Goyal and Ross B. Girshick and | |
Kaiming He and Piotr Doll{\'{a}}r", | |
title = "Focal Loss for Dense Object Detection", | |
journal = "CoRR", | |
volume = "abs/1708.02002", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.02002", | |
archivePrefix= "arXiv", | |
eprint = "1708.02002", | |
timestamp = "Mon, 13 Aug 2018 16:46:12 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1708-02002.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{shrivastava-2016-ohem, | |
author = "{Shrivastava}, Abhinav and {Gupta}, Abhinav and {Girshick}, | |
Ross", | |
title = "{Training Region-based Object Detectors with Online Hard | |
Example Mining}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computer Vision and Pattern Recognition, | |
Computer Science - Machine Learning", | |
year = 2016, | |
month = apr, | |
eid = "arXiv:1604.03540", | |
pages = "arXiv:1604.03540", | |
archivePrefix= "arXiv", | |
eprint = "1604.03540", | |
primaryClass = "cs.CV", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160403540S", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{zhou-2017-east, | |
author = "Xinyu Zhou and Cong Yao and He Wen and Yuzhi Wang and | |
Shuchang Zhou and Weiran He and Jiajun Liang", | |
title = "{EAST:} An Efficient and Accurate Scene Text Detector", | |
journal = "CoRR", | |
volume = "abs/1704.03155", | |
year = 2017, | |
url = "http://arxiv.org/abs/1704.03155", | |
archivePrefix= "arXiv", | |
eprint = "1704.03155", | |
timestamp = "Mon, 13 Aug 2018 16:48:38 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/ZhouYWWZHL17.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{xie-2015-bce-loss, | |
author = "Saining Xie and Zhuowen Tu", | |
title = "Holistically-Nested Edge Detection", | |
journal = "CoRR", | |
volume = "abs/1504.06375", | |
year = 2015, | |
url = "http://arxiv.org/abs/1504.06375", | |
archivePrefix= "arXiv", | |
eprint = "1504.06375", | |
timestamp = "Mon, 13 Aug 2018 16:46:00 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/XieT15.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{sokolova-2009-measure-analysis, | |
title = "A systematic analysis of performance measures for | |
classification tasks", | |
journal = "Information Processing \& Management", | |
volume = 45, | |
number = 4, | |
pages = "427-437", | |
year = 2009, | |
issn = "0306-4573", | |
doi = "https://doi.org/10.1016/j.ipm.2009.03.002", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S0306457309000259", | |
author = "Marina Sokolova and Guy Lapalme", | |
keywords = "Performance evaluation, Machine Learning, Text | |
classification", | |
abstract = "This paper presents a systematic analysis of twenty four | |
performance measures used in the complete spectrum of Machine | |
Learning classification tasks, i.e., binary, multi-class, | |
multi-labelled, and hierarchical. For each classification | |
task, the study relates a set of changes in a confusion | |
matrix to specific characteristics of data. Then the analysis | |
concentrates on the type of changes to a confusion matrix | |
that do not change a measure, therefore, preserve a | |
classifier’s evaluation (measure invariance). The result is | |
the measure invariance taxonomy with respect to all relevant | |
label distribution changes in a classification problem. This | |
formal analysis is supported by examples of applications | |
where invariance properties of measures lead to a more | |
reliable evaluation of classifiers. Text classification | |
supplements the discussion with several case studies." | |
} | |
@inproceedings{yang-2018-sgm, | |
title = "{SGM}: Sequence Generation Model for Multi-label | |
Classification", | |
author = "Yang, Pengcheng and Sun, Xu and Li, Wei and Ma, Shuming and | |
Wu, Wei and Wang, Houfeng", | |
booktitle = "Proceedings of the 27th International Conference on | |
Computational Linguistics", | |
month = aug, | |
year = 2018, | |
address = "Santa Fe, New Mexico, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/C18-1330", | |
pages = "3915-3926", | |
abstract = "Multi-label classification is an important yet challenging | |
task in natural language processing. It is more complex than | |
single-label classification in that the labels tend to be | |
correlated. Existing methods tend to ignore the correlations | |
between labels. Besides, different parts of the text can | |
contribute differently for predicting different labels, which | |
is not considered by existing models. In this paper, we | |
propose to view the multi-label classification task as a | |
sequence generation problem, and apply a sequence generation | |
model with a novel decoder structure to solve it. Extensive | |
experimental results show that our proposed methods | |
outperform previous work by a substantial margin. Further | |
analysis of experimental results demonstrates that the | |
proposed methods not only capture the correlations between | |
labels, but also select the most informative words | |
automatically when predicting different labels." | |
} | |
@article{tay-2018-cafe, | |
author = "Yi Tay and Luu Anh Tuan and Siu Cheung Hui", | |
title = "A Compare-Propagate Architecture with Alignment Factorization | |
for Natural Language Inference", | |
journal = "CoRR", | |
volume = "abs/1801.00102", | |
year = 2018, | |
url = "http://arxiv.org/abs/1801.00102", | |
archivePrefix= "arXiv", | |
eprint = "1801.00102", | |
timestamp = "Mon, 13 Aug 2018 16:47:31 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1801-00102.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{lan-2019-albert, | |
author = "{Lan}, Zhenzhong and {Chen}, Mingda and {Goodman}, Sebastian | |
and {Gimpel}, Kevin and {Sharma}, Piyush and {Soricut}, Radu", | |
title = "{ALBERT: A Lite BERT for Self-supervised Learning of Language | |
Representations}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence", | |
year = 2019, | |
month = sep, | |
eid = "arXiv:1909.11942", | |
pages = "arXiv:1909.11942", | |
archivePrefix= "arXiv", | |
eprint = "1909.11942", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190911942L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{jiao-2019-tinybert, | |
author = "{Jiao}, Xiaoqi and {Yin}, Yichun and {Shang}, Lifeng and | |
{Jiang}, Xin and {Chen}, Xiao and {Li}, Linlin and {Wang}, | |
Fang and {Liu}, Qun", | |
title = "{TinyBERT: Distilling BERT for Natural Language | |
Understanding}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Machine | |
Learning", | |
year = 2019, | |
month = sep, | |
eid = "arXiv:1909.10351", | |
pages = "arXiv:1909.10351", | |
archivePrefix= "arXiv", | |
eprint = "1909.10351", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190910351J", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{joshi-2019-spanbert, | |
author = "Mandar Joshi and Danqi Chen and Yinhan Liu and Daniel S. Weld | |
and Luke Zettlemoyer and Omer Levy", | |
title = "SpanBERT: Improving Pre-training by Representing and | |
Predicting Spans", | |
journal = "CoRR", | |
volume = "abs/1907.10529", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.10529", | |
archivePrefix= "arXiv", | |
eprint = "1907.10529", | |
timestamp = "Thu, 01 Aug 2019 08:59:33 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1907-10529.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{radford-2019-gpt2, | |
title = {Language Models are Unsupervised Multitask Learners}, | |
author = {Alec Radford and Jeffrey Wu and Rewon Child and David Luan | |
and Dario Amodei and Ilya Sutskever}, | |
year = 2019 | |
} | |
@article{boutell-2004-binary-relevance, | |
title = "Learning multi-label scene classification", | |
journal = "Pattern Recognition", | |
volume = 37, | |
number = 9, | |
pages = "1757-1771", | |
year = 2004, | |
issn = "0031-3203", | |
doi = "https://doi.org/10.1016/j.patcog.2004.03.009", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S0031320304001074", | |
author = "Matthew R. Boutell and Jiebo Luo and Xipeng Shen and | |
Christopher M. Brown", | |
keywords = "Image understanding, Semantic scene classification, | |
Multi-label classification, Multi-label training, Multi-label | |
evaluation, Image organization, Cross-training, Jaccard | |
similarity", | |
abstract = "In classic pattern recognition problems, classes are mutually | |
exclusive by definition. Classification errors occur when the | |
classes overlap in the feature space. We examine a different | |
situation, occurring when the classes are, by definition, not | |
mutually exclusive. Such problems arise in semantic scene and | |
document classification and in medical diagnosis. We present | |
a framework to handle such problems and apply it to the | |
problem of semantic scene classification, where a natural | |
scene may contain multiple objects such that the scene can be | |
described by multiple class labels (e.g., a field scene with | |
a mountain in the background). Such a problem poses | |
challenges to the classic pattern recognition paradigm and | |
demands a different treatment. We discuss approaches for | |
training and testing in this scenario and introduce new | |
metrics for evaluating individual examples, class recall and | |
precision, and overall accuracy. Experiments show that our | |
methods are suitable for scene classification; furthermore, | |
our work appears to generalize to other classification | |
problems of the same nature." | |
} | |
@inproceedings{read-2009-classifier-chains, | |
title = "Classifier chains for multi-label classification", | |
author = "Read, Jesse and Pfahringer, Bernhard and Holmes, Geoff and | |
Frank, Eibe", | |
booktitle = "Joint European Conference on Machine Learning and Knowledge | |
Discovery in Databases", | |
pages = "254-269", | |
year = 2009, | |
organization = "Springer" | |
} | |
@inproceedings{zhang-2019-ernie, | |
title = "{ERNIE}: Enhanced Language Representation with Informative | |
Entities", | |
author = "Zhang, Zhengyan and Han, Xu and Liu, Zhiyuan and Jiang, Xin | |
and Sun, Maosong and Liu, Qun", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1139", | |
doi = "10.18653/v1/P19-1139", | |
pages = "1441-1451", | |
abstract = "Neural language representation models such as BERT | |
pre-trained on large-scale corpora can well capture rich | |
semantic patterns from plain text, and be fine-tuned to | |
consistently improve the performance of various NLP | |
tasks. However, the existing pre-trained language models | |
rarely consider incorporating knowledge graphs (KGs), which | |
can provide rich structured knowledge facts for better | |
language understanding. We argue that informative entities in | |
KGs can enhance language representation with external | |
knowledge. In this paper, we utilize both large-scale textual | |
corpora and KGs to train an enhanced language representation | |
model (ERNIE), which can take full advantage of lexical, | |
syntactic, and knowledge information simultaneously. The | |
experimental results have demonstrated that ERNIE achieves | |
significant improvements on various knowledge-driven tasks, | |
and meanwhile is comparable with the state-of-the-art model | |
BERT on other common NLP tasks. The code and datasets will be | |
available in the future." | |
} | |
@article{sun-2019-ernie2, | |
author = "Yu Sun and Shuohuan Wang and Yu{-}Kun Li and Shikun Feng and | |
Hao Tian and Hua Wu and Haifeng Wang", | |
title = "{ERNIE} 2.0: {A} Continual Pre-training Framework for | |
Language Understanding", | |
journal = "CoRR", | |
volume = "abs/1907.12412", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.12412", | |
archivePrefix= "arXiv", | |
eprint = "1907.12412", | |
timestamp = "Tue, 21 Jan 2020 07:56:31 +0100", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1907-12412.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{dong-2019-unilm, | |
author = "Li Dong and Nan Yang and Wenhui Wang and Furu Wei and | |
Xiaodong Liu and Yu Wang and Jianfeng Gao and Ming Zhou and | |
Hsiao{-}Wuen Hon", | |
title = "Unified Language Model Pre-training for Natural Language | |
Understanding and Generation", | |
journal = "CoRR", | |
volume = "abs/1905.03197", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.03197", | |
archivePrefix= "arXiv", | |
eprint = "1905.03197", | |
timestamp = "Wed, 19 Feb 2020 17:11:34 +0100", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1905-03197.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{clark-2019-electra, | |
title = "ELECTRA: Pre-training Text Encoders as Discriminators Rather | |
Than Generators", | |
author = "Clark, Kevin and Luong, Minh-Thang and Le, Quoc V and | |
Manning, Christopher D", | |
booktitle = "International Conference on Learning Representations", | |
year = 2019 | |
} | |
@article{liu-2019-mt-dnn, | |
author = "Xiaodong Liu and Pengcheng He and Weizhu Chen and Jianfeng | |
Gao", | |
title = "Multi-Task Deep Neural Networks for Natural Language | |
Understanding", | |
journal = "CoRR", | |
volume = "abs/1901.11504", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.11504", | |
archivePrefix= "arXiv", | |
eprint = "1901.11504", | |
timestamp = "Mon, 04 Feb 2019 08:11:03 +0100", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1901-11504.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{yang-2019-xlnet, | |
author = "Zhilin Yang and Zihang Dai and Yiming Yang and Jaime | |
G. Carbonell and Ruslan Salakhutdinov and Quoc V. Le", | |
title = "XLNet: Generalized Autoregressive Pretraining for Language | |
Understanding", | |
journal = "CoRR", | |
volume = "abs/1906.08237", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.08237", | |
archivePrefix= "arXiv", | |
eprint = "1906.08237", | |
timestamp = "Mon, 24 Jun 2019 17:28:45 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1906-08237.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{cui-2019-bert-wwm, | |
author = "{Cui}, Yiming and {Che}, Wanxiang and {Liu}, Ting and {Qin}, | |
Bing and {Yang}, Ziqing and {Wang}, Shijin and {Hu}, Guoping", | |
title = "{Pre-Training with Whole Word Masking for Chinese BERT}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Machine Learning", | |
year = 2019, | |
month = jun, | |
eid = "arXiv:1906.08101", | |
pages = "arXiv:1906.08101", | |
archivePrefix= "arXiv", | |
eprint = "1906.08101", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190608101C", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{socher-2011-rnn, | |
title = "Parsing natural scenes and natural language with recursive | |
neural networks", | |
author = "Socher, Richard and Lin, Cliff C and Manning, Chris and Ng, | |
Andrew Y", | |
booktitle = "Proceedings of the 28th international conference on machine | |
learning (ICML-11)", | |
pages = "129-136", | |
year = 2011 | |
} | |
@inproceedings{socher-2013-sentiment-treebank, | |
title = "Recursive Deep Models for Semantic Compositionality Over a | |
Sentiment Treebank", | |
author = "Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, | |
Jason and Manning, Christopher D. and Ng, Andrew and Potts, | |
Christopher", | |
booktitle = "Proceedings of the 2013 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct, | |
year = 2013, | |
address = "Seattle, Washington, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D13-1170", | |
pages = "1631-1642" | |
} | |
@article{pollack-1990-raam, | |
title = "Recursive distributed representations", | |
journal = "Artificial Intelligence", | |
volume = 46, | |
number = 1, | |
pages = "77-105", | |
year = 1990, | |
issn = "0004-3702", | |
doi = "https://doi.org/10.1016/0004-3702(90)90005-K", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/000437029090005K", | |
author = "Jordan B. Pollack", | |
abstract = "A longstanding difficulty for connectionist modeling has been | |
how to represent variable-sized recursive data structures, | |
such as trees and lists, in fixed-width patterns. This paper | |
presents a connectionist architecture which automatically | |
develops compact distributed representations for such | |
compositional structures, as well as efficient accessing | |
mechanisms for them. Patterns which stand for the internal | |
nodes of fixed-valence trees are devised through the | |
recursive use of backpropagation on three-layer | |
auto-associative encoder networks. The resulting | |
representations are novel, in that they combine apparently | |
immiscible aspects of features, pointers, and symbol | |
structures. They form a bridge between the data structures | |
necessary for high-level cognitive tasks and the associative, | |
pattern recognition machinery provided by neural networks." | |
} | |
@inproceedings{shen-2018-straight-tree, | |
title = "Straight to the Tree: Constituency Parsing with Neural | |
Syntactic Distance", | |
author = "Shen, Yikang and Lin, Zhouhan and Jacob, Athul Paul and | |
Sordoni, Alessandro and Courville, Aaron and Bengio, Yoshua", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1108", | |
doi = "10.18653/v1/P18-1108", | |
pages = "1171-1180", | |
abstract = "In this work, we propose a novel constituency parsing | |
scheme. The model first predicts a real-valued scalar, named | |
syntactic distance, for each split position in the | |
sentence. The topology of grammar tree is then determined by | |
the values of syntactic distances. Compared to traditional | |
shift-reduce parsing schemes, our approach is free from the | |
potentially disastrous compounding error. It is also easier | |
to parallelize and much faster. Our model achieves the | |
state-of-the-art single model F1 score of 92.1 on PTB and | |
86.4 on CTB dataset, which surpasses the previous single | |
model results by a large margin." | |
} | |
@inproceedings{socher-2012-mv-rnn, | |
title = "Semantic Compositionality through Recursive Matrix-Vector | |
Spaces", | |
author = "Socher, Richard and Huval, Brody and Manning, Christopher D. | |
and Ng, Andrew Y.", | |
booktitle = "Proceedings of the 2012 Joint Conference on Empirical Methods | |
in Natural Language Processing and Computational Natural | |
Language Learning", | |
month = jul, | |
year = 2012, | |
address = "Jeju Island, Korea", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D12-1110", | |
pages = "1201-1211" | |
} | |
@article{tai-2015-tree-lstm, | |
author = "Kai Sheng Tai and Richard Socher and Christopher D. Manning", | |
title = "Improved Semantic Representations From Tree-Structured Long | |
Short-Term Memory Networks", | |
journal = "CoRR", | |
volume = "abs/1503.00075", | |
year = 2015, | |
url = "http://arxiv.org/abs/1503.00075", | |
archivePrefix= "arXiv", | |
eprint = "1503.00075", | |
timestamp = "Mon, 13 Aug 2018 16:48:20 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/TaiSM15.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{zhu-2020-crosswoz, | |
author = "{Zhu}, Qi and {Huang}, Kaili and {Zhang}, Zheng and {Zhu}, | |
Xiaoyan and {Huang}, Minlie", | |
title = "{CrossWOZ: A Large-Scale Chinese Cross-Domain Task-Oriented | |
Dialogue Dataset}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2020, | |
month = feb, | |
eid = "arXiv:2002.11893", | |
pages = "arXiv:2002.11893", | |
archivePrefix= "arXiv", | |
eprint = "2002.11893", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2020arXiv200211893Z", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{nie-2019-k-multiple-means, | |
author = "Nie, Feiping and Wang, Cheng-Long and Li, Xuelong", | |
title = "K-Multiple-Means: A Multiple-Means Clustering Method with | |
Specified K Clusters", | |
year = 2019, | |
isbn = 9781450362016, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/3292500.3330846", | |
doi = "10.1145/3292500.3330846", | |
booktitle = "Proceedings of the 25th ACM SIGKDD International Conference | |
on Knowledge Discovery \& Data Mining", | |
pages = "959–967", | |
numpages = 9, | |
keywords = "graph laplacian, clustering, K-means, multiple means", | |
location = "Anchorage, AK, USA", | |
series = "KDD ’19" | |
} | |
@article{lee-2019-biobert, | |
author = "Jinhyuk Lee and Wonjin Yoon and Sungdong Kim and Donghyeon | |
Kim and Sunkyu Kim and Chan Ho So and Jaewoo Kang", | |
title = "BioBERT: a pre-trained biomedical language representation | |
model for biomedical text mining", | |
journal = "CoRR", | |
volume = "abs/1901.08746", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.08746", | |
archivePrefix= "arXiv", | |
eprint = "1901.08746", | |
timestamp = "Sat, 02 Feb 2019 16:56:00 +0100", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1901-08746.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{alsentzer-2019-clinical-bert, | |
title = "Publicly Available Clinical {BERT} Embeddings", | |
author = "Alsentzer, Emily and Murphy, John and Boag, William and Weng, | |
Wei-Hung and Jindi, Di and Naumann, Tristan and McDermott, | |
Matthew", | |
booktitle = "Proceedings of the 2nd Clinical Natural Language Processing | |
Workshop", | |
month = jun, | |
year = 2019, | |
address = "Minneapolis, Minnesota, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W19-1909", | |
doi = "10.18653/v1/W19-1909", | |
pages = "72-78", | |
abstract = "Contextual word embedding models such as ELMo and BERT have | |
dramatically improved performance for many natural language | |
processing (NLP) tasks in recent months. However, these | |
models have been minimally explored on specialty corpora, | |
such as clinical text; moreover, in the clinical domain, no | |
publicly-available pre-trained BERT models yet exist. In this | |
work, we address this need by exploring and releasing BERT | |
models for clinical text: one for generic clinical text and | |
another for discharge summaries specifically. We demonstrate | |
that using a domain-specific model yields performance | |
improvements on 3/5 clinical NLP tasks, establishing a new | |
state-of-the-art on the MedNLI dataset. We find that these | |
domain-specific models are not as performant on 2 clinical | |
de-identification tasks, and argue that this is a natural | |
consequence of the differences between de-identified source | |
text and synthetically non de-identified task text." | |
} | |
@ARTICLE{shang-2019-g-bert, | |
author = "{Shang}, Junyuan and {Ma}, Tengfei and {Xiao}, Cao and {Sun}, | |
Jimeng", | |
title = "{Pre-training of Graph Augmented Transformers for Medication | |
Recommendation}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Artificial Intelligence, Computer Science | |
- Computation and Language, Computer Science - Machine | |
Learning", | |
year = 2019, | |
month = jun, | |
eid = "arXiv:1906.00346", | |
pages = "arXiv:1906.00346", | |
archivePrefix= "arXiv", | |
eprint = "1906.00346", | |
primaryClass = "cs.AI", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190600346S", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{chevalier-boisvert-2019-babyai, | |
title = "Baby{AI}: First Steps Towards Grounded Language Learning With | |
a Human In the Loop", | |
author = "Maxime Chevalier-Boisvert and Dzmitry Bahdanau and Salem | |
Lahlou and Lucas Willems and Chitwan Saharia and Thien Huu | |
Nguyen and Yoshua Bengio", | |
booktitle = "International Conference on Learning Representations", | |
year = 2019, | |
url = "https://openreview.net/forum?id=rJeXCo0cYX" | |
} | |
@article{beltagy-2019-scibert, | |
author = "Iz Beltagy and Arman Cohan and Kyle Lo", | |
title = "SciBERT: Pretrained Contextualized Embeddings for Scientific | |
Text", | |
journal = "CoRR", | |
volume = "abs/1903.10676", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.10676", | |
archivePrefix= "arXiv", | |
eprint = "1903.10676", | |
timestamp = "Mon, 01 Apr 2019 14:07:37 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1903-10676.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{pires-2019-m-bert, | |
author = "{Pires}, Telmo and {Schlinger}, Eva and {Garrette}, Dan", | |
title = "{How multilingual is Multilingual BERT?}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Artificial Intelligence, Computer Science - Machine | |
Learning", | |
year = 2019, | |
month = jun, | |
eid = "arXiv:1906.01502", | |
pages = "arXiv:1906.01502", | |
archivePrefix= "arXiv", | |
eprint = "1906.01502", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190601502P", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{lee-2019-patent-bert, | |
author = "{Lee}, Jieh-Sheng and {Hsiang}, Jieh", | |
title = "{PatentBERT: Patent Classification with Fine-Tuning a | |
pre-trained BERT Model}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language, Computer Science | |
- Machine Learning, Statistics - Machine Learning", | |
year = 2019, | |
month = may, | |
eid = "arXiv:1906.02124", | |
pages = "arXiv:1906.02124", | |
archivePrefix= "arXiv", | |
eprint = "1906.02124", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190602124L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{adhikari-2019-docbert, | |
author = "Ashutosh Adhikari and Achyudh Ram and Raphael Tang and Jimmy | |
Lin", | |
title = "DocBERT: {BERT} for Document Classification", | |
journal = "CoRR", | |
volume = "abs/1904.08398", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.08398", | |
archivePrefix= "arXiv", | |
eprint = "1904.08398", | |
timestamp = "Fri, 26 Apr 2019 13:18:53 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1904-08398.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{coenen-2019-bert-geometry, | |
author = "Andy Coenen and Emily Reif and Ann Yuan and Been Kim and Adam | |
Pearce and Fernanda B. Vi{\'{e}}gas and Martin Wattenberg", | |
title = "Visualizing and Measuring the Geometry of {BERT}", | |
journal = "CoRR", | |
volume = "abs/1906.02715", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.02715", | |
archivePrefix= "arXiv", | |
eprint = "1906.02715", | |
timestamp = "Thu, 13 Jun 2019 13:36:00 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1906-02715.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{michel-2019-sixteen-heads, | |
author = "Paul Michel and Omer Levy and Graham Neubig", | |
title = "Are Sixteen Heads Really Better than One?", | |
journal = "CoRR", | |
volume = "abs/1905.10650", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.10650", | |
archivePrefix= "arXiv", | |
eprint = "1905.10650", | |
timestamp = "Mon, 03 Jun 2019 13:42:33 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1905-10650.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{zellers-2019-hellaswag, | |
author = "Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali | |
Farhadi and Yejin Choi", | |
title = "HellaSwag: Can a Machine Really Finish Your Sentence?", | |
journal = "CoRR", | |
volume = "abs/1905.07830", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.07830", | |
archivePrefix= "arXiv", | |
eprint = "1905.07830", | |
timestamp = "Tue, 28 May 2019 12:48:08 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1905-07830.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{tenney-2019-bert-rediscover, | |
author = "Ian Tenney and Dipanjan Das and Ellie Pavlick", | |
title = "{BERT} Rediscovers the Classical {NLP} Pipeline", | |
journal = "CoRR", | |
volume = "abs/1905.05950", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.05950", | |
archivePrefix= "arXiv", | |
eprint = "1905.05950", | |
timestamp = "Tue, 28 May 2019 12:48:08 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1905-05950", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{clark-2019-bert-attention, | |
author = "{Clark}, Kevin and {Khandelwal}, Urvashi and {Levy}, Omer and | |
{Manning}, Christopher D.", | |
title = "{What Does BERT Look At? An Analysis of BERT's Attention}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2019, | |
month = jun, | |
eid = "arXiv:1906.04341", | |
pages = "arXiv:1906.04341", | |
archivePrefix= "arXiv", | |
eprint = "1906.04341", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2019arXiv190604341C", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{loshchilov-2016-sgdr, | |
author = "{Loshchilov}, Ilya and {Hutter}, Frank", | |
title = "{SGDR: Stochastic Gradient Descent with Warm Restarts}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Neural and Evolutionary Computing, Mathematics - Optimization | |
and Control", | |
year = 2016, | |
month = aug, | |
eid = "arXiv:1608.03983", | |
pages = "arXiv:1608.03983", | |
archivePrefix= "arXiv", | |
eprint = "1608.03983", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160803983L", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{yin-2014-gsdmm, | |
author = "Yin, Jianhua and Wang, Jianyong", | |
title = "A Dirichlet Multinomial Mixture Model-Based Approach for | |
Short Text Clustering", | |
year = 2014, | |
isbn = 9781450329569, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2623330.2623715", | |
doi = "10.1145/2623330.2623715", | |
booktitle = "Proceedings of the 20th ACM SIGKDD International Conference | |
on Knowledge Discovery and Data Mining", | |
pages = "233–242", | |
numpages = 10, | |
keywords = "gibbs sampling, short text clustering, dirichlet multinomial | |
mixture", | |
location = "New York, New York, USA", | |
series = "KDD ’14" | |
} | |
@inproceedings{yin-2016-fgsdmm-plus, | |
author = "Yin, Jianhua and Wang, Jianyong", | |
title = "A Text Clustering Algorithm Using an Online Clustering Scheme | |
for Initialization", | |
year = 2016, | |
isbn = 9781450342322, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2939672.2939841", | |
doi = "10.1145/2939672.2939841", | |
booktitle = "Proceedings of the 22nd ACM SIGKDD International Conference | |
on Knowledge Discovery and Data Mining", | |
pages = "1995–2004", | |
numpages = 10, | |
keywords = "dirichlet multinomial mixture, gibbs sampling, text | |
clustering", | |
location = "San Francisco, California, USA", | |
series = "KDD ’16" | |
} | |
@article{nigam-2000-text-em, | |
title = "Text classification from labeled and unlabeled documents | |
using EM", | |
author = "Nigam, Kamal and McCallum, Andrew Kachites and Thrun, | |
Sebastian and Mitchell, Tom", | |
journal = "Machine learning", | |
volume = 39, | |
number = "2-3", | |
pages = "103-134", | |
year = 2000, | |
publisher = "Springer" | |
} | |
@article{holmes-2012-dmm, | |
title = "Dirichlet multinomial mixtures: generative models for | |
microbial metagenomics", | |
author = "Holmes, Ian and Harris, Keith and Quince, Christopher", | |
journal = "PloS one", | |
volume = 7, | |
number = 2, | |
year = 2012, | |
publisher = "Public Library of Science" | |
} | |
@inproceedings{li-2016-gpu-dmm, | |
title = "Topic modeling for short texts with auxiliary word | |
embeddings", | |
author = "Li, Chenliang and Wang, Haoran and Zhang, Zhiqian and Sun, | |
Aixin and Ma, Zongyang", | |
booktitle = "Proceedings of the 39th International ACM SIGIR conference on | |
Research and Development in Information Retrieval", | |
pages = "165-174", | |
year = 2016 | |
} | |
@inproceedings{rangrej-2011-short-text-clustering-comparison, | |
title = "Comparative study of clustering techniques for short text | |
documents", | |
author = "Rangrej, Aniket and Kulkarni, Sayali and Tendulkar, Ashish V", | |
booktitle = "Proceedings of the 20th international conference companion on | |
World wide web", | |
pages = "111-112", | |
year = 2011 | |
} | |
@article{pan-2009-transfer-survey, | |
title = "A survey on transfer learning", | |
author = "Pan, Sinno Jialin and Yang, Qiang", | |
journal = "IEEE Transactions on knowledge and data engineering", | |
volume = 22, | |
number = 10, | |
pages = "1345-1359", | |
year = 2009, | |
publisher = "IEEE" | |
} | |
@article{li-2012-tl-nlp-survey, | |
title = "Literature survey: domain adaptation algorithms for natural | |
language processing", | |
author = "Li, Qi", | |
journal = "Department of Computer Science The Graduate Center, The City | |
University of New York", | |
pages = "8-10", | |
year = 2012 | |
} | |
@article{mao-2019-medgcn, | |
author = "Chengsheng Mao and Liang Yao and Yuan Luo", | |
title = "MedGCN: Graph Convolutional Networks for Multiple Medical | |
Tasks", | |
journal = "CoRR", | |
volume = "abs/1904.00326", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.00326", | |
archivePrefix= "arXiv", | |
eprint = "1904.00326", | |
timestamp = "Fri, 28 Jun 2019 09:35:46 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1904-00326.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{edwards-2016-neural-statistician, | |
author = "{Edwards}, Harrison and {Storkey}, Amos", | |
title = "{Towards a Neural Statistician}", | |
journal = "arXiv e-prints", | |
keywords = "Statistics - Machine Learning, Computer Science - Machine | |
Learning", | |
year = 2016, | |
month = jun, | |
eid = "arXiv:1606.02185", | |
pages = "arXiv:1606.02185", | |
archivePrefix= "arXiv", | |
eprint = "1606.02185", | |
primaryClass = "stat.ML", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160602185E", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{finn-2017-maml, | |
author = "{Finn}, Chelsea and {Abbeel}, Pieter and {Levine}, Sergey", | |
title = "{Model-Agnostic Meta-Learning for Fast Adaptation of Deep | |
Networks}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Artificial Intelligence, Computer Science - Computer Vision | |
and Pattern Recognition, Computer Science - Neural and | |
Evolutionary Computing", | |
year = 2017, | |
month = mar, | |
eid = "arXiv:1703.03400", | |
pages = "arXiv:1703.03400", | |
archivePrefix= "arXiv", | |
eprint = "1703.03400", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2017arXiv170303400F", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{vinyals-2016-matching-networks, | |
author = "Oriol Vinyals and Charles Blundell and Timothy P. Lillicrap | |
and Koray Kavukcuoglu and Daan Wierstra", | |
title = "Matching Networks for One Shot Learning", | |
journal = "CoRR", | |
volume = "abs/1606.04080", | |
year = 2016, | |
url = "http://arxiv.org/abs/1606.04080", | |
archivePrefix= "arXiv", | |
eprint = "1606.04080", | |
timestamp = "Mon, 13 Aug 2018 16:46:48 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/VinyalsBLKW16.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{bhojanapalli-2020-low-rank-bottleneck, | |
author = "{Bhojanapalli}, Srinadh and {Yun}, Chulhee and {Singh Rawat}, | |
Ankit and {Reddi}, Sashank J. and {Kumar}, Sanjiv", | |
title = "{Low-Rank Bottleneck in Multi-head Attention Models}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Statistics - Machine | |
Learning", | |
year = 2020, | |
month = feb, | |
eid = "arXiv:2002.07028", | |
pages = "arXiv:2002.07028", | |
archivePrefix= "arXiv", | |
eprint = "2002.07028", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2020arXiv200207028B", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@ARTICLE{shazeer-2020-talking-head, | |
author = "{Shazeer}, Noam and {Lan}, Zhenzhong and {Cheng}, Youlong and | |
{Ding}, Nan and {Hou}, Le", | |
title = "{Talking-Heads Attention}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Machine Learning, Computer Science - | |
Neural and Evolutionary Computing, Computer Science - Sound, | |
Electrical Engineering and Systems Science - Audio and Speech | |
Processing, Statistics - Machine Learning", | |
year = 2020, | |
month = mar, | |
eid = "arXiv:2003.02436", | |
pages = "arXiv:2003.02436", | |
archivePrefix= "arXiv", | |
eprint = "2003.02436", | |
primaryClass = "cs.LG", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2020arXiv200302436S", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@article{banerjee-2005-cluster-bregman, | |
title = "Clustering with Bregman divergences", | |
author = "Banerjee, Arindam and Merugu, Srujana and Dhillon, Inderjit S | |
and Ghosh, Joydeep", | |
journal = "Journal of machine learning research", | |
volume = 6, | |
number = "Oct", | |
pages = "1705-1749", | |
year = 2005 | |
} | |
@article{he-2017-dureader, | |
author = "Wei He and Kai Liu and Yajuan Lyu and Shiqi Zhao and Xinyan | |
Xiao and Yuan Liu and Yizhong Wang and Hua Wu and Qiaoqiao | |
She and Xuan Liu and Tian Wu and Haifeng Wang", | |
title = "DuReader: a Chinese Machine Reading Comprehension Dataset | |
from Real-world Applications", | |
journal = "CoRR", | |
volume = "abs/1711.05073", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.05073", | |
archivePrefix= "arXiv", | |
eprint = "1711.05073", | |
timestamp = "Thu, 17 Oct 2019 16:06:13 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1711-05073.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@ARTICLE{rajpurkar-2018-squad-2, | |
author = "{Rajpurkar}, Pranav and {Jia}, Robin and {Liang}, Percy", | |
title = "{Know What You Don't Know: Unanswerable Questions for SQuAD}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2018, | |
month = jun, | |
eid = "arXiv:1806.03822", | |
pages = "arXiv:1806.03822", | |
archivePrefix= "arXiv", | |
eprint = "1806.03822", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2018arXiv180603822R", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{wang-2017-gated-self, | |
title = "Gated Self-Matching Networks for Reading Comprehension and | |
Question Answering", | |
author = "Wang, Wenhui and Yang, Nan and Wei, Furu and Chang, Baobao | |
and Zhou, Ming", | |
booktitle = "Proceedings of the 55th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2017, | |
address = "Vancouver, Canada", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P17-1018", | |
doi = "10.18653/v1/P17-1018", | |
pages = "189-198", | |
abstract = "In this paper, we present the gated self-matching networks | |
for reading comprehension style question answering, which | |
aims to answer questions from a given passage. We first match | |
the question and passage with gated attention-based recurrent | |
networks to obtain the question-aware passage | |
representation. Then we propose a self-matching attention | |
mechanism to refine the representation by matching the | |
passage against itself, which effectively encodes information | |
from the whole passage. We finally employ the pointer | |
networks to locate the positions of answers from the | |
passages. We conduct extensive experiments on the SQuAD | |
dataset. The single model achieves 71.3{\%} on the evaluation | |
metrics of exact match on the hidden test set, while the | |
ensemble model further boosts the results to 75.9{\%}. At the | |
time of submission of the paper, our model holds the first | |
place on the SQuAD leaderboard for both single and ensemble | |
model." | |
} | |
@article{santos-2016-attentive-pooling, | |
author = "Santos, Cicero dos and Tan, Ming and Xiang, Bing and Zhou, | |
Bowen", | |
title = "Attentive Pooling Networks", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1602.03609v1", | |
abstract = "In this work, we propose Attentive Pooling (AP), a two-way | |
attention mechanism for discriminative model training. In the | |
context of pair-wise ranking or classification with neural | |
networks, AP enables the pooling layer to be aware of the | |
current input pair, in a way that information from the two | |
input items can directly influence the computation of each | |
other's representations. Along with such representations of | |
the paired inputs, AP jointly learns a similarity measure | |
over projected segments (e.g. trigrams) of the pair, and | |
subsequently, derives the corresponding attention vector for | |
each input to guide the pooling. Our two-way attention | |
mechanism is a general framework independent of the | |
underlying representation learning, and it has been applied | |
to both convolutional neural networks (CNNs) and recurrent | |
neural networks (RNNs) in our studies. The empirical results, | |
from three very different benchmark tasks of question | |
answering/answer selection, demonstrate that our proposed | |
models outperform a variety of strong baselines and achieve | |
state-of-the-art performance in all the benchmarks.", | |
archivePrefix= "arXiv", | |
eprint = "1602.03609", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{girdhar-2017-attentional-pooling, | |
title = "Attentional pooling for action recognition", | |
author = "Girdhar, Rohit and Ramanan, Deva", | |
booktitle = "Advances in Neural Information Processing Systems", | |
pages = "34-45", | |
year = 2017 | |
} | |
@inproceedings{iyyer-2015-word-dropout, | |
title = "Deep Unordered Composition Rivals Syntactic Methods for Text | |
Classification", | |
author = "Iyyer, Mohit and Manjunatha, Varun and Boyd-Graber, Jordan | |
and Daum{\'e} III, Hal", | |
booktitle = "Proceedings of the 53rd Annual Meeting of the Association for | |
Computational Linguistics and the 7th International Joint | |
Conference on Natural Language Processing (Volume 1: Long | |
Papers)", | |
month = jul, | |
year = 2015, | |
address = "Beijing, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P15-1162", | |
doi = "10.3115/v1/P15-1162", | |
pages = "1681-1691" | |
} | |
@article{gal-2015-rnn-dropout, | |
author = "Gal, Yarin and Ghahramani, Zoubin", | |
title = "A Theoretically Grounded Application of Dropout in Recurrent | |
Neural Networks", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1512.05287v5", | |
abstract = "Recurrent neural networks (RNNs) stand at the forefront of | |
many recent developments in deep learning. Yet a major | |
difficulty with these models is their tendency to overfit, | |
with dropout shown to fail when applied to recurrent | |
layers. Recent results at the intersection of Bayesian | |
modelling and deep learning offer a Bayesian interpretation | |
of common deep learning techniques such as dropout. This | |
grounding of dropout in approximate Bayesian inference | |
suggests an extension of the theoretical results, offering | |
insights into the use of dropout with RNN models. We apply | |
this new variational inference based dropout technique in | |
LSTM and GRU models, assessing it on language modelling and | |
sentiment analysis tasks. The new approach outperforms | |
existing techniques, and to the best of our knowledge | |
improves on the single model state-of-the-art in language | |
modelling with the Penn Treebank (73.4 test perplexity). This | |
extends our arsenal of variational tools in deep learning.", | |
archivePrefix= "arXiv", | |
eprint = "1512.05287", | |
primaryClass = "stat.ML" | |
} | |
@article{krueger-2016-zoneout, | |
author = "Krueger, David and Maharaj, Tegan and Kram{\'a}r, J{\'a}nos | |
and Pezeshki, Mohammad and Ballas, Nicolas and Ke, Nan | |
Rosemary and Goyal, Anirudh and Bengio, Yoshua and Courville, | |
Aaron and Pal, Chris", | |
title = "Zoneout: Regularizing Rnns By Randomly Preserving Hidden | |
Activations", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1606.01305v4", | |
abstract = "We propose zoneout, a novel method for regularizing RNNs. At | |
each timestep, zoneout stochastically forces some hidden | |
units to maintain their previous values. Like dropout, | |
zoneout uses random noise to train a pseudo-ensemble, | |
improving generalization. But by preserving instead of | |
dropping hidden units, gradient information and state | |
information are more readily propagated through time, as in | |
feedforward stochastic depth networks. We perform an | |
empirical investigation of various RNN regularizers, and find | |
that zoneout gives significant performance improvements | |
across tasks. We achieve competitive results with relatively | |
simple models in character- and word-level language modelling | |
on the Penn Treebank and Text8 datasets, and combining with | |
recurrent batch normalization yields state-of-the-art results | |
on permuted sequential MNIST.", | |
archivePrefix= "arXiv", | |
eprint = "1606.01305", | |
primaryClass = "cs.NE" | |
} | |
@article{merity-2017-drop-connect, | |
author = "Merity, Stephen and Keskar, Nitish Shirish and Socher, | |
Richard", | |
title = "Regularizing and Optimizing Lstm Language Models", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.02182v1", | |
abstract = "Recurrent neural networks (RNNs), such as long short-term | |
memory networks (LSTMs), serve as a fundamental building | |
block for many sequence learning tasks, including machine | |
translation, language modeling, and question answering. In | |
this paper, we consider the specific problem of word-level | |
language modeling and investigate strategies for regularizing | |
and optimizing LSTM-based models. We propose the | |
weight-dropped LSTM which uses DropConnect on | |
hidden-to-hidden weights as a form of recurrent | |
regularization. Further, we introduce NT-ASGD, a variant of | |
the averaged stochastic gradient method, wherein the | |
averaging trigger is determined using a non-monotonic | |
condition as opposed to being tuned by the user. Using these | |
and other regularization strategies, we achieve | |
state-of-the-art word level perplexities on two data sets: | |
57.3 on Penn Treebank and 65.8 on WikiText-2. In exploring | |
the effectiveness of a neural cache in conjunction with our | |
proposed model, we achieve an even lower state-of-the-art | |
perplexity of 52.8 on Penn Treebank and 52.0 on WikiText-2.", | |
archivePrefix= "arXiv", | |
eprint = "1708.02182", | |
primaryClass = "cs.CL" | |
} | |
@article{melis-2017-state-art, | |
author = "Melis, G{\'a}bor and Dyer, Chris and Blunsom, Phil", | |
title = "On the State of the Art of Evaluation in Neural Language | |
Models", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1707.05589v2", | |
abstract = "Ongoing innovations in recurrent neural network architectures | |
have provided a steady influx of apparently state-of-the-art | |
results on language modelling benchmarks. However, these have | |
been evaluated using differing code bases and limited | |
computational resources, which represent uncontrolled sources | |
of experimental variation. We reevaluate several popular | |
architectures and regularisation methods with large-scale | |
automatic black-box hyperparameter tuning and arrive at the | |
somewhat surprising conclusion that standard LSTM | |
architectures, when properly regularised, outperform more | |
recent models. We establish a new state of the art on the | |
Penn Treebank and Wikitext-2 corpora, as well as strong | |
baselines on the Hutter Prize dataset.", | |
archivePrefix= "arXiv", | |
eprint = "1707.05589", | |
primaryClass = "cs.CL" | |
} | |
@article{merity-2017-activation-regularization, | |
author = "Merity, Stephen and McCann, Bryan and Socher, Richard", | |
title = "Revisiting Activation Regularization for Language Rnns", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.01009v1", | |
abstract = "Recurrent neural networks (RNNs) serve as a fundamental | |
building block for many sequence tasks across natural | |
language processing. Recent research has focused on recurrent | |
dropout techniques or custom RNN cells in order to improve | |
performance. Both of these can require substantial | |
modifications to the machine learning model or to the | |
underlying RNN configurations. We revisit traditional | |
regularization techniques, specifically L2 regularization on | |
RNN activations and slowness regularization over successive | |
hidden states, to improve the performance of RNNs on the task | |
of language modeling. Both of these techniques require | |
minimal modification to existing RNN architectures and result | |
in performance improvements comparable or superior to more | |
complicated regularization techniques or custom cell | |
architectures. These regularization techniques can be used | |
without any modification on optimized LSTM implementations | |
such as the NVIDIA cuDNN LSTM.", | |
archivePrefix= "arXiv", | |
eprint = "1708.01009", | |
primaryClass = "cs.CL" | |
} | |
@article{ma-2016-expectation-linear-dropout, | |
author = "Ma, Xuezhe and Gao, Yingkai and Hu, Zhiting and Yu, Yaoliang | |
and Deng, Yuntian and Hovy, Eduard", | |
title = "Dropout With Expectation-Linear Regularization", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1609.08017v3", | |
abstract = "Dropout, a simple and effective way to train deep neural | |
networks, has led to a number of impressive empirical | |
successes and spawned many recent theoretical | |
investigations. However, the gap between dropout's training | |
and inference phases, introduced due to tractability | |
considerations, has largely remained under-appreciated. In | |
this work, we first formulate dropout as a tractable | |
approximation of some latent variable model, leading to a | |
clean view of parameter sharing and enabling further | |
theoretical analysis. Then, we introduce (approximate) | |
expectation-linear dropout neural networks, whose inference | |
gap we are able to formally characterize. Algorithmically, we | |
show that our proposed measure of the inference gap can be | |
used to regularize the standard dropout training objective, | |
resulting in an \emph{explicit} control of the gap. Our | |
method is as simple and efficient as standard dropout. We | |
further prove the upper bounds on the loss in accuracy due to | |
expectation-linearization, describe classes of input | |
distributions that expectation-linearize easily. Experiments | |
on three image classification benchmark datasets demonstrate | |
that reducing the inference gap can indeed improve the | |
performance consistently.", | |
archivePrefix= "arXiv", | |
eprint = "1609.08017", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{clare-2001-ml-dt, | |
author = "Clare, Amanda and King, Ross D.", | |
title = "Knowledge Discovery in Multi-Label Phenotype Data", | |
year = 2001, | |
isbn = 3540425349, | |
publisher = "Springer-Verlag", | |
address = "Berlin, Heidelberg", | |
booktitle = "Proceedings of the 5th European Conference on Principles of | |
Data Mining and Knowledge Discovery", | |
pages = "42–53", | |
numpages = 12, | |
series = "PKDD ’01" | |
} | |
@inproceedings{elisseeff-2001-rank-svm, | |
author = "Elisseeff, Andr\'{e} and Weston, Jason", | |
title = "A Kernel Method for Multi-Labelled Classification", | |
year = 2001, | |
publisher = "MIT Press", | |
address = "Cambridge, MA, USA", | |
booktitle = "Proceedings of the 14th International Conference on Neural | |
Information Processing Systems: Natural and Synthetic", | |
pages = "681–687", | |
numpages = 7, | |
location = "Vancouver, British Columbia, Canada", | |
series = "NIPS’01" | |
} | |
@article{zhang-2007-ml-knn, | |
author = "Zhang, Min-Ling and Zhou, Zhi-Hua", | |
title = "ML-KNN: A Lazy Learning Approach to Multi-Label Learning", | |
year = 2007, | |
issue_date = "July 2007", | |
publisher = "Elsevier Science Inc.", | |
address = "USA", | |
volume = 40, | |
number = 7, | |
issn = "0031-3203", | |
url = "https://doi.org/10.1016/j.patcog.2006.12.019", | |
doi = "10.1016/j.patcog.2006.12.019", | |
journal = "Pattern Recogn.", | |
month = jul, | |
pages = "2038–2048", | |
numpages = 11, | |
keywords = "Lazy learning, maximum a posteriori, Text categorization, | |
KNN, PMM, K-nearest neighbor, Multi-label learning, Natural | |
scene classification, Machine learning, Functional genomics, | |
ML-KNN, parametric mixture model, MAP, multi-label K-nearest | |
neighbor" | |
} | |
@inproceedings{papineni-2002-bleu, | |
author = "Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, | |
Wei-Jing", | |
title = "{B}leu: a Method for Automatic Evaluation of Machine | |
Translation", | |
booktitle = "Proceedings of the 40th Annual Meeting of the Association for | |
Computational Linguistics", | |
year = 2002, | |
pages = "311-318", | |
doi = "10.3115/1073083.1073135", | |
url = "https://doi.org/10.3115/1073083.1073135", | |
address = "Philadelphia, Pennsylvania, USA", | |
month = jul, | |
publisher = "Association for Computational Linguistics" | |
} | |
@article{vijayakumar-2016-diverse-beam-search, | |
author = "Vijayakumar, Ashwin K and Cogswell, Michael and Selvaraju, | |
Ramprasath R. and Sun, Qing and Lee, Stefan and Crandall, | |
David and Batra, Dhruv", | |
title = "Diverse Beam Search: Decoding Diverse Solutions From Neural | |
Sequence Models", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1610.02424v2", | |
abstract = "Neural sequence models are widely used to model time-series | |
data. Equally ubiquitous is the usage of beam search (BS) as | |
an approximate inference algorithm to decode output sequences | |
from these models. BS explores the search space in a greedy | |
left-right fashion retaining only the top-B candidates - | |
resulting in sequences that differ only slightly from each | |
other. Producing lists of nearly identical sequences is not | |
only computationally wasteful but also typically fails to | |
capture the inherent ambiguity of complex AI tasks. To | |
overcome this problem, we propose Diverse Beam Search (DBS), | |
an alternative to BS that decodes a list of diverse outputs | |
by optimizing for a diversity-augmented objective. We observe | |
that our method finds better top-1 solutions by controlling | |
for the exploration and exploitation of the search space - | |
implying that DBS is a better search algorithm. Moreover, | |
these gains are achieved with minimal computational or memory | |
over- head as compared to beam search. To demonstrate the | |
broad applicability of our method, we present results on | |
image captioning, machine translation and visual question | |
generation using both standard quantitative metrics and | |
qualitative human studies. Further, we study the role of | |
diversity for image-grounded language generation tasks as the | |
complexity of the image changes. We observe that our method | |
consistently outperforms BS and previously proposed | |
techniques for diverse decoding from neural sequence models.", | |
archivePrefix= "arXiv", | |
eprint = "1610.02424", | |
primaryClass = "cs.AI" | |
} | |
@article{huszar-2015-schedule-sampling-problem, | |
journal = "CoRR", | |
title = "How (not) to Train your Generative Model: Scheduled Sampling, | |
Likelihood, Adversary?", | |
author = "Husz{\'a}r, Ferenc", | |
archivePrefix= "arXiv", | |
year = 2015, | |
eprint = "1511.05101", | |
primaryClass = "stat.ML", | |
abstract = "Modern applications and progress in deep learning research | |
have created renewed interest for generative models of text | |
and of images. However, even today it is unclear what | |
objective functions one should use to train and evaluate | |
these models. In this paper we present two contributions. | |
Firstly, we present a critique of scheduled sampling, a | |
state-of-the-art training method that contributed to the | |
winning entry to the MSCOCO image captioning benchmark in | |
2015. Here we show that despite this impressive empirical | |
performance, the objective function underlying scheduled | |
sampling is improper and leads to an inconsistent learning | |
algorithm. Secondly, we revisit the problems that scheduled | |
sampling was meant to address, and present an alternative | |
interpretation. We argue that maximum likelihood is an | |
inappropriate training objective when the end-goal is to | |
generate natural-looking samples. We go on to derive an ideal | |
objective function to use in this situation instead. We | |
introduce a generalisation of adversarial training, and show | |
how such method can interpolate between maximum likelihood | |
training and our ideal training objective. To our knowledge | |
this is the first theoretical analysis that explains why | |
adversarial training tends to produce samples with higher | |
perceived quality.", | |
url = "http://arxiv.org/abs/1511.05101v1" | |
} | |
@article{lamb-2016-professor-forcing, | |
author = "Lamb, Alex and Goyal, Anirudh and Zhang, Ying and Zhang, | |
Saizheng and Courville, Aaron and Bengio, Yoshua", | |
title = "Professor Forcing: a New Algorithm for Training Recurrent | |
Networks", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1610.09038v1", | |
abstract = "The Teacher Forcing algorithm trains recurrent networks by | |
supplying observed sequence values as inputs during training | |
and using the network's own one-step-ahead predictions to do | |
multi-step sampling. We introduce the Professor Forcing | |
algorithm, which uses adversarial domain adaptation to | |
encourage the dynamics of the recurrent network to be the | |
same when training the network and when sampling from the | |
network over multiple time steps. We apply Professor Forcing | |
to language modeling, vocal synthesis on raw waveforms, | |
handwriting generation, and image generation. Empirically we | |
find that Professor Forcing acts as a regularizer, improving | |
test likelihood on character level Penn Treebank and | |
sequential MNIST. We also find that the model qualitatively | |
improves samples, especially when sampling for a large number | |
of time steps. This is supported by human evaluation of | |
sample quality. Trade-offs between Professor Forcing and | |
Scheduled Sampling are discussed. We produce T-SNEs showing | |
that Professor Forcing successfully makes the dynamics of the | |
network during training and sampling more similar.", | |
archivePrefix= "arXiv", | |
eprint = "1610.09038", | |
primaryClass = "stat.ML" | |
} | |
@inproceedings{zhang-2019-train-infer-gap, | |
title = "Bridging the Gap between Training and Inference for Neural | |
Machine Translation", | |
author = "Zhang, Wen and Feng, Yang and Meng, Fandong and You, Di and | |
Liu, Qun", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1426", | |
doi = "10.18653/v1/P19-1426", | |
pages = "4334-4343", | |
abstract = "Neural Machine Translation (NMT) generates target words | |
sequentially in the way of predicting the next word | |
conditioned on the context words. At training time, it | |
predicts with the ground truth words as context while at | |
inference it has to generate the entire sequence from | |
scratch. This discrepancy of the fed context leads to error | |
accumulation among the way. Furthermore, word-level training | |
requires strict matching between the generated sequence and | |
the ground truth sequence which leads to overcorrection over | |
different but reasonable translations. In this paper, we | |
address these issues by sampling context words not only from | |
the ground truth sequence but also from the predicted | |
sequence by the model during training, where the predicted | |
sequence is selected with a sentence-level | |
optimum. Experiment results on Chinese-{\textgreater}English | |
and WMT{'}14 English-{\textgreater}German translation tasks | |
demonstrate that our approach can achieve significant | |
improvements on multiple datasets." | |
} | |
@article{hinton-2015-soft-target, | |
author = "Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff", | |
title = "Distilling the Knowledge in a Neural Network", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1503.02531v1", | |
abstract = "A very simple way to improve the performance of almost any | |
machine learning algorithm is to train many different models | |
on the same data and then to average their | |
predictions. Unfortunately, making predictions using a whole | |
ensemble of models is cumbersome and may be too | |
computationally expensive to allow deployment to a large | |
number of users, especially if the individual models are | |
large neural nets. Caruana and his collaborators have shown | |
that it is possible to compress the knowledge in an ensemble | |
into a single model which is much easier to deploy and we | |
develop this approach further using a different compression | |
technique. We achieve some surprising results on MNIST and we | |
show that we can significantly improve the acoustic model of | |
a heavily used commercial system by distilling the knowledge | |
in an ensemble of models into a single model. We also | |
introduce a new type of ensemble composed of one or more full | |
models and many specialist models which learn to distinguish | |
fine-grained classes that the full models confuse. Unlike a | |
mixture of experts, these specialist models can be trained | |
rapidly and in parallel.", | |
archivePrefix= "arXiv", | |
eprint = "1503.02531", | |
primaryClass = "stat.ML" | |
} | |
@article{tang-2015-soft-target, | |
author = "Tang, Zhiyuan and Wang, Dong and Zhang, Zhiyong", | |
title = "Recurrent Neural Network Training With Dark Knowledge | |
Transfer", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1505.04630v5", | |
abstract = "Recurrent neural networks (RNNs), particularly long | |
short-term memory (LSTM), have gained much attention in | |
automatic speech recognition (ASR). Although some successful | |
stories have been reported, training RNNs remains highly | |
challenging, especially with limited training data. Recent | |
research found that a well-trained model can be used as a | |
teacher to train other child models, by using the predictions | |
generated by the teacher model as supervision. This knowledge | |
transfer learning has been employed to train simple neural | |
nets with a complex one, so that the final performance can | |
reach a level that is infeasible to obtain by regular | |
training. In this paper, we employ the knowledge transfer | |
learning approach to train RNNs (precisely LSTM) using a deep | |
neural network (DNN) model as the teacher. This is different | |
from most of the existing research on knowledge transfer | |
learning, since the teacher (DNN) is assumed to be weaker | |
than the child (RNN); however, our experiments on an ASR task | |
showed that it works fairly well: without applying any tricks | |
on the learning scheme, this approach can train RNNs | |
successfully even with limited training data.", | |
archivePrefix= "arXiv", | |
eprint = "1505.04630", | |
primaryClass = "stat.ML" | |
} | |
@article{you-2017-lars, | |
author = "You, Yang and Gitman, Igor and Ginsburg, Boris", | |
title = "Large Batch Training of Convolutional Networks", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.03888v3", | |
abstract = "A common way to speed up training of large convolutional | |
networks is to add computational units. Training is then | |
performed using data-parallel synchronous Stochastic Gradient | |
Descent (SGD) with mini-batch divided between computational | |
units. With an increase in the number of nodes, the batch | |
size grows. But training with large batch size often results | |
in the lower model accuracy. We argue that the current recipe | |
for large batch training (linear learning rate scaling with | |
warm-up) is not general enough and training may diverge. To | |
overcome this optimization difficulties we propose a new | |
training algorithm based on Layer-wise Adaptive Rate Scaling | |
(LARS). Using LARS, we scaled Alexnet up to a batch size of | |
8K, and Resnet-50 to a batch size of 32K without loss in | |
accuracy.", | |
archivePrefix= "arXiv", | |
eprint = "1708.03888", | |
primaryClass = "cs.CV" | |
} | |
@article{le-2015-identity-rnn, | |
author = "Le, Quoc V. and Jaitly, Navdeep and Hinton, Geoffrey E.", | |
title = "A Simple Way To Initialize Recurrent Networks of Rectified | |
Linear Units", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1504.00941v2", | |
abstract = "Learning long term dependencies in recurrent networks is | |
difficult due to vanishing and exploding gradients. To | |
overcome this difficulty, researchers have developed | |
sophisticated optimization techniques and network | |
architectures. In this paper, we propose a simpler solution | |
that use recurrent neural networks composed of rectified | |
linear units. Key to our solution is the use of the identity | |
matrix or its scaled version to initialize the recurrent | |
weight matrix. We find that our solution is comparable to | |
LSTM on our four benchmarks: two toy problems involving | |
long-range temporal structures, a large language modeling | |
problem and a benchmark speech recognition problem.", | |
archivePrefix= "arXiv", | |
eprint = "1504.00941", | |
primaryClass = "cs.NE" | |
} | |
@inproceedings{bi-2013-efficient-multi-label, | |
author = "Bi, Wei and Kwok, James T.", | |
title = "Efficient Multi-Label Classification with Many Labels", | |
year = 2013, | |
publisher = "JMLR.org", | |
booktitle = "Proceedings of the 30th International Conference on | |
International Conference on Machine Learning - Volume 28", | |
pages = "III–405–III–413", | |
numpages = 9, | |
location = "Atlanta, GA, USA", | |
series = "ICML’13" | |
} | |
@article{raffel-2019-t5, | |
author = "Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, | |
Katherine and Narang, Sharan and Matena, Michael and Zhou, | |
Yanqi and Li, Wei and Liu, Peter J.", | |
title = "Exploring the Limits of Transfer Learning With a Unified | |
Text-To-Text Transformer", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1910.10683v2", | |
abstract = "Transfer learning, where a model is first pre-trained on a | |
data-rich task before being fine-tuned on a downstream task, | |
has emerged as a powerful technique in natural language | |
processing (NLP). The effectiveness of transfer learning has | |
given rise to a diversity of approaches, methodology, and | |
practice. In this paper, we explore the landscape of transfer | |
learning techniques for NLP by introducing a unified | |
framework that converts every language problem into a | |
text-to-text format. Our systematic study compares | |
pre-training objectives, architectures, unlabeled datasets, | |
transfer approaches, and other factors on dozens of language | |
understanding tasks. By combining the insights from our | |
exploration with scale and our new ``Colossal Clean Crawled | |
Corpus``, we achieve state-of-the-art results on many | |
benchmarks covering summarization, question answering, text | |
classification, and more. To facilitate future work on | |
transfer learning for NLP, we release our dataset, | |
pre-trained models, and code.", | |
archivePrefix= "arXiv", | |
eprint = "1910.10683", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{kolitsas-2018-end-to-end-el, | |
author = "Kolitsas, Nikolaos and Ganea, Octavian-Eugen and Hofmann, | |
Thomas", | |
title = "End-to-End Neural Entity Linking", | |
booktitle = "Proceedings of the 22nd Conference on Computational Natural | |
Language Learning", | |
year = 2018, | |
pages = "519-529", | |
doi = "10.18653/v1/K18-1050", | |
url = "https://doi.org/10.18653/v1/K18-1050", | |
abstract = "Entity Linking (EL) is an essential task for semantic text | |
understanding and information extraction. Popular methods | |
separately address the Mention Detection (MD) and Entity | |
Disambiguation (ED) stages of EL, without leveraging their | |
mutual dependency. We here propose the first neural | |
end-to-end EL system that jointly discovers and links | |
entities in a text document. The main idea is to consider all | |
possible spans as potential mentions and learn contextual | |
similarity scores over their entity candidates that are | |
useful for both MD and ED decisions. Key components are | |
context-aware mention embeddings, entity embeddings and a | |
probabilistic mention - entity map, without demanding other | |
engineered features. Empirically, we show that our end-to-end | |
method significantly outperforms popular systems on the | |
Gerbil platform when enough training data is | |
available. Conversely, if testing datasets follow different | |
annotation conventions compared to the training set | |
(e.g. queries/ tweets vs news documents), our ED model | |
coupled with a traditional NER system offers the best or | |
second best EL accuracy.", | |
address = "Brussels, Belgium", | |
month = oct, | |
publisher = "Association for Computational Linguistics" | |
} | |
@article{raiman-2018-deeptype, | |
author = "Raiman, Jonathan and Raiman, Olivier", | |
title = "Deeptype: Multilingual Entity Linking By Neural Type System | |
Evolution", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1802.01021v1", | |
abstract = "The wealth of structured (e.g. Wikidata) and unstructured | |
data about the world available today presents an incredible | |
opportunity for tomorrow's Artificial Intelligence. So far, | |
integration of these two different modalities is a difficult | |
process, involving many decisions concerning how best to | |
represent the information so that it will be captured or | |
useful, and hand-labeling large amounts of data. DeepType | |
overcomes this challenge by explicitly integrating symbolic | |
information into the reasoning process of a neural network | |
with a type system. First we construct a type system, and | |
second, we use it to constrain the outputs of a neural | |
network to respect the symbolic structure. We achieve this by | |
reformulating the design problem into a mixed integer | |
problem: create a type system and subsequently train a neural | |
network with it. In this reformulation discrete variables | |
select which parent-child relations from an ontology are | |
types within the type system, while continuous variables | |
control a classifier fit to the type system. The original | |
problem cannot be solved exactly, so we propose a 2-step | |
algorithm: 1) heuristic search or stochastic optimization | |
over discrete variables that define a type system informed by | |
an Oracle and a Learnability heuristic, 2) gradient descent | |
to fit classifier parameters. We apply DeepType to the | |
problem of Entity Linking on three standard datasets | |
(i.e. WikiDisamb30, CoNLL (YAGO), TAC KBP 2010) and find that | |
it outperforms all existing solutions by a wide margin, | |
including approaches that rely on a human-designed type | |
system or recent deep learning-based entity embeddings, while | |
explicitly using symbolic information lets it integrate new | |
entities without retraining.", | |
archivePrefix= "arXiv", | |
eprint = "1802.01021", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{le-2018-el-latent-relation, | |
title = "Improving Entity Linking by Modeling Latent Relations between | |
Mentions", | |
author = "Le, Phong and Titov, Ivan", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1148", | |
doi = "10.18653/v1/P18-1148", | |
pages = "1595-1604", | |
abstract = "Entity linking involves aligning textual mentions of named | |
entities to their corresponding entries in a knowledge | |
base. Entity linking systems often exploit relations between | |
textual mentions in a document (e.g., coreference) to decide | |
if the linking decisions are compatible. Unlike previous | |
approaches, which relied on supervised systems or heuristics | |
to predict these relations, we treat relations as latent | |
variables in our neural entity-linking model. We induce the | |
relations without any supervision while optimizing the | |
entity-linking system in an end-to-end fashion. Our | |
multi-relational model achieves the best reported scores on | |
the standard benchmark (AIDA-CoNLL) and substantially | |
outperforms its relation-agnostic version. Its training also | |
converges much faster, suggesting that the injected | |
structural bias helps to explain regularities in the training | |
data." | |
} | |
@inproceedings{ganea-2017-deep-ed, | |
title = "Deep Joint Entity Disambiguation with Local Neural Attention", | |
author = "Ganea, Octavian-Eugen and Hofmann, Thomas", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1277", | |
doi = "10.18653/v1/D17-1277", | |
pages = "2619-2629", | |
abstract = "We propose a novel deep learning model for joint | |
document-level entity disambiguation, which leverages learned | |
neural representations. Key components are entity embeddings, | |
a neural attention mechanism over local context windows, and | |
a differentiable joint inference stage for | |
disambiguation. Our approach thereby combines benefits of | |
deep learning with more traditional approaches such as | |
graphical models and probabilistic mention-entity | |
maps. Extensive experiments show that we are able to obtain | |
competitive or state-of-the-art accuracy at moderate | |
computational costs." | |
} | |
@article{vashishth-2020-medtype, | |
author = "Vashishth, Shikhar and Joshi, Rishabh and Dutt, Ritam and | |
Newman-Griffis, Denis and Rose, Carolyn", | |
title = "Medtype: Improving Medical Entity Linking With Semantic Type | |
Prediction", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2005.00460v1", | |
abstract = "Medical entity linking is the task of identifying and | |
standardizing concepts referred in a scientific article or | |
clinical record. Existing methods adopt a two-step approach | |
of detecting mentions and identifying a list of candidate | |
concepts for them. In this paper, we probe the impact of | |
incorporating an entity disambiguation step in existing | |
entity linkers. For this, we present MedType, a novel method | |
that leverages the surrounding context to identify the | |
semantic type of a mention and uses it for filtering out | |
candidate concepts of the wrong types. We further present two | |
novel largescale, automatically-created datasets of medical | |
entity mentions: WIKIMED, a Wikipediabased dataset for | |
cross-domain transfer learning, and PUBMEDDS, a | |
distantly-supervised dataset of medical entity mentions in | |
biomedical abstracts. Through extensive experiments across | |
several datasets and methods, we demonstrate that MedType | |
pre-trained on our proposed datasets substantially improve | |
medical entity linking and gives state-of-the-art | |
performance. We make our source code and datasets publicly | |
available for medical entity linking research.", | |
archivePrefix= "arXiv", | |
eprint = "2005.00460", | |
primaryClass = "cs.CL" | |
} | |
@article{shi-2020-sentence-level-el, | |
author = "Shi, Wei and Zhang, Siyuan and Zhang, Zhiwei and Cheng, Hong | |
and Yu, Jeffrey Xu", | |
title = "Joint Embedding in Named Entity Linking on Sentence Level", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2002.04936v1", | |
abstract = "Named entity linking is to map an ambiguous mention in | |
documents to an entity in a knowledge base. The named entity | |
linking is challenging, given the fact that there are | |
multiple candidate entities for a mention in a document. It | |
is difficult to link a mention when it appears multiple times | |
in a document, since there are conflicts by the contexts | |
around the appearances of the mention. In addition, it is | |
difficult since the given training dataset is small due to | |
the reason that it is done manually to link a mention to its | |
mapping entity. In the literature, there are many reported | |
studies among which the recent embedding methods learn | |
vectors of entities from the training dataset at document | |
level. To address these issues, we focus on how to link | |
entity for mentions at a sentence level, which reduces the | |
noises introduced by different appearances of the same | |
mention in a document at the expense of insufficient | |
information to be used. We propose a new unified embedding | |
method by maximizing the relationships learned from knowledge | |
graphs. We confirm the effectiveness of our method in our | |
experimental studies.", | |
archivePrefix= "arXiv", | |
eprint = "2002.04936", | |
primaryClass = "cs.CL" | |
} | |
@article{broscheit-2020-bert-el, | |
author = "Broscheit, Samuel", | |
title = "Investigating Entity Knowledge in Bert With Simple Neural | |
End-To-End Entity Linking", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2003.05473v1", | |
abstract = "A typical architecture for end-to-end entity linking systems | |
consists of three steps: mention detection, candidate | |
generation and entity disambiguation. In this study we | |
investigate the following questions: (a) Can all those steps | |
be learned jointly with a model for contextualized | |
text-representations, i.e. BERT (Devlin et al., 2019)? (b) | |
How much entity knowledge is already contained in pretrained | |
BERT? (c) Does additional entity knowledge improve BERT's | |
performance in downstream tasks? To this end, we propose an | |
extreme simplification of the entity linking setup that works | |
surprisingly well: simply cast it as a per token | |
classification over the entire entity vocabulary (over 700K | |
classes in our case). We show on an entity linking benchmark | |
that (i) this model improves the entity representations over | |
plain BERT, (ii) that it outperforms entity linking | |
architectures that optimize the tasks separately and (iii) | |
that it only comes second to the current state-of-the-art | |
that does mention detection and entity disambiguation | |
jointly. Additionally, we investigate the usefulness of | |
entity-aware token-representations in the text-understanding | |
benchmark GLUE, as well as the question answering benchmarks | |
SQUAD V2 and SWAG and also the EN-DE WMT14 machine | |
translation benchmark. To our surprise, we find that most of | |
those benchmarks do not benefit from additional entity | |
knowledge, except for a task with very small training data, | |
the RTE task in GLUE, which improves by 2 \%.", | |
archivePrefix= "arXiv", | |
eprint = "2003.05473", | |
primaryClass = "cs.CL" | |
} | |
@article{chen-2020-latent-entity-type, | |
author = "Chen, Shuang and Wang, Jinpeng and Jiang, Feng and Lin, | |
Chin-Yew", | |
title = "Improving Entity Linking By Modeling Latent Entity Type | |
Information", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2001.01447v1", | |
abstract = "Existing state of the art neural entity linking models employ | |
attention-based bag-of-words context model and pre-trained | |
entity embeddings bootstrapped from word embeddings to assess | |
topic level context compatibility. However, the latent entity | |
type information in the immediate context of the mention is | |
neglected, which causes the models often link mentions to | |
incorrect entities with incorrect type. To tackle this | |
problem, we propose to inject latent entity type information | |
into the entity embeddings based on pre-trained BERT. In | |
addition, we integrate a BERT-based entity similarity score | |
into the local context model of a state-of-the-art model to | |
better capture latent entity type information. Our model | |
significantly outperforms the state-of-the-art entity linking | |
models on standard benchmark (AIDA-CoNLL). Detailed | |
experiment analysis demonstrates that our model corrects most | |
of the type errors produced by the direct baseline.", | |
archivePrefix= "arXiv", | |
eprint = "2001.01447", | |
primaryClass = "cs.CL" | |
} | |
@article{zhu-2019-latte, | |
author = "Zhu, Ming and Celikkaya, Busra and Bhatia, Parminder and | |
Reddy, Chandan K.", | |
title = "Latte: Latent Type Modeling for Biomedical Entity Linking", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1911.09787v2", | |
abstract = "Entity linking is the task of linking mentions of named | |
entities in natural language text, to entities in a curated | |
knowledge-base. This is of significant importance in the | |
biomedical domain, where it could be used to semantically | |
annotate a large volume of clinical records and biomedical | |
literature, to standardized concepts described in an ontology | |
such as Unified Medical Language System (UMLS). We observe | |
that with precise type information, entity disambiguation | |
becomes a straightforward task. However, fine-grained type | |
information is usually not available in biomedical | |
domain. Thus, we propose LATTE, a LATent Type Entity Linking | |
model, that improves entity linking by modeling the latent | |
fine-grained type information about mentions and entities. | |
Unlike previous methods that perform entity linking directly | |
between the mentions and the entities, LATTE jointly does | |
entity disambiguation, and latent fine-grained type learning, | |
without direct supervision. We evaluate our model on two | |
biomedical datasets: MedMentions, a large scale public | |
dataset annotated with UMLS concepts, and a de-identified | |
corpus of dictated doctor's notes that has been annotated | |
with ICD concepts. Extensive experimental evaluation shows | |
our model achieves significant performance improvements over | |
several state-of-the-art techniques.", | |
archivePrefix= "arXiv", | |
eprint = "1911.09787", | |
primaryClass = "cs.CL" | |
} | |
@article{chen-2019-yelm, | |
author = "Chen, Haotian and Wadhwa, Sahil and Li, Xi David and | |
Zukov-Gregoric, Andrej", | |
title = "Yelm: End-To-End Contextualized Entity Linking", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1911.03834v1", | |
abstract = "We propose yet another entity linking model (YELM) which | |
links words to entities instead of spans. This overcomes any | |
difficulties associated with the selection of good candidate | |
mention spans and makes the joint training of mention | |
detection (MD) and entity disambiguation (ED) easily | |
possible. Our model is based on BERT and produces | |
contextualized word embeddings which are trained against a | |
joint MD and ED objective. We achieve state-of-the-art | |
results on several standard entity linking (EL) datasets.", | |
archivePrefix= "arXiv", | |
eprint = "1911.03834", | |
primaryClass = "cs.CL" | |
} | |
@article{martins-2019-joint-ner-el, | |
author = "Martins, Pedro Henrique and Marinho, Zita and Martins, | |
Andr{\'e} F. T.", | |
title = "Joint Learning of Named Entity Recognition and Entity | |
Linking", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.08243v1", | |
abstract = "Named entity recognition (NER) and entity linking (EL) are | |
two fundamentally related tasks, since in order to perform | |
EL, first the mentions to entities have to be | |
detected. However, most entity linking approaches disregard | |
the mention detection part, assuming that the correct | |
mentions have been previously detected. In this paper, we | |
perform joint learning of NER and EL to leverage their | |
relatedness and obtain a more robust and generalisable | |
system. For that, we introduce a model inspired by the | |
Stack-LSTM approach (Dyer et al., 2015). We observe that, in | |
fact, doing multi-task learning of NER and EL improves the | |
performance in both tasks when comparing with models trained | |
with individual objectives. Furthermore, we achieve results | |
competitive with the state-of-the-art in both NER and EL.", | |
archivePrefix= "arXiv", | |
eprint = "1907.08243", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{logeswaran-2019-zero-shot-el, | |
title = "Zero-Shot Entity Linking by Reading Entity Descriptions", | |
author = "Logeswaran, Lajanugen and Chang, Ming-Wei and Lee, Kenton and | |
Toutanova, Kristina and Devlin, Jacob and Lee, Honglak", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1335", | |
doi = "10.18653/v1/P19-1335", | |
pages = "3449-3460", | |
abstract = "We present the zero-shot entity linking task, where mentions | |
must be linked to unseen entities without in-domain labeled | |
data. The goal is to enable robust transfer to highly | |
specialized domains, and so no metadata or alias tables are | |
assumed. In this setting, entities are only identified by | |
text descriptions, and models must rely strictly on language | |
understanding to resolve the new entities. First, we show | |
that strong reading comprehension models pre-trained on large | |
unlabeled data can be used to generalize to unseen | |
entities. Second, we propose a simple and effective adaptive | |
pre-training strategy, which we term domain-adaptive | |
pre-training (DAP), to address the domain shift problem | |
associated with linking unseen entities in a new domain. We | |
present experiments on a new dataset that we construct for | |
this task and show that DAP improves over strong pre-training | |
baselines, including BERT. The data and code are available at | |
https://github.com/lajanugen/zeshel." | |
} | |
@inproceedings{le-2019-distant-el, | |
title = "Distant Learning for Entity Linking with Automatic Noise | |
Detection", | |
author = "Le, Phong and Titov, Ivan", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1400", | |
doi = "10.18653/v1/P19-1400", | |
pages = "4081-4090", | |
abstract = "Accurate entity linkers have been produced for domains and | |
languages where annotated data (i.e., texts linked to a | |
knowledge base) is available. However, little progress has | |
been made for the settings where no or very limited amounts | |
of labeled data are present (e.g., legal or most scientific | |
domains). In this work, we show how we can learn to link | |
mentions without having any labeled examples, only a | |
knowledge base and a collection of unannotated texts from the | |
corresponding domain. In order to achieve this, we frame the | |
task as a multi-instance learning problem and rely on surface | |
matching to create initial noisy labels. As the learning | |
signal is weak and our surrogate labels are noisy, we | |
introduce a noise detection component in our model: it lets | |
the model detect and disregard examples which are likely to | |
be noisy. Our method, jointly learning to detect noise and | |
link entities, greatly outperforms the surface matching | |
baseline. For a subset of entity categories, it even | |
approaches the performance of supervised learning." | |
} | |
@inproceedings{mondal-2019-triplet-network-el, | |
title = "Medical Entity Linking using Triplet Network", | |
author = "Mondal, Ishani and Purkayastha, Sukannya and Sarkar, Sudeshna | |
and Goyal, Pawan and Pillai, Jitesh and Bhattacharyya, | |
Amitava and Gattu, Mahanandeeshwar", | |
booktitle = "Proceedings of the 2nd Clinical Natural Language Processing | |
Workshop", | |
month = jun, | |
year = 2019, | |
address = "Minneapolis, Minnesota, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W19-1912", | |
doi = "10.18653/v1/W19-1912", | |
pages = "95-100", | |
abstract = "Entity linking (or Normalization) is an essential task in | |
text mining that maps the entity mentions in the medical text | |
to standard entities in a given Knowledge Base (KB). This | |
task is of great importance in the medical domain. It can | |
also be used for merging different medical and clinical | |
ontologies. In this paper, we center around the problem of | |
disease linking or normalization. This task is executed in | |
two phases: candidate generation and candidate scoring. In | |
this paper, we present an approach to rank the candidate | |
Knowledge Base entries based on their similarity with disease | |
mention. We make use of the Triplet Network for candidate | |
ranking. While the existing methods have used carefully | |
generated sieves and external resources for candidate | |
generation, we introduce a robust and portable candidate | |
generation scheme that does not make use of the hand-crafted | |
rules. Experimental results on the standard benchmark NCBI | |
disease dataset demonstrate that our system outperforms the | |
prior methods by a significant margin." | |
} | |
@article{yang-2019-dca, | |
author = "Yang, Xiyuan and Gu, Xiaotao and Lin, Sheng and Tang, Siliang | |
and Zhuang, Yueting and Wu, Fei and Chen, Zhigang and Hu, | |
Guoping and Ren, Xiang", | |
title = "Learning Dynamic Context Augmentation for Global Entity | |
Linking", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1909.02117v1", | |
abstract = "Despite of the recent success of collective entity linking | |
(EL) methods, these ``global`` inference methods may yield | |
sub-optimal results when the ``all-mention coherence`` | |
assumption breaks, and often suffer from high computational | |
cost at the inference stage, due to the complex search | |
space. In this paper, we propose a simple yet effective | |
solution, called Dynamic Context Augmentation (DCA), for | |
collective EL, which requires only one pass through the | |
mentions in a document. DCA sequentially accumulates context | |
information to make efficient, collective inference, and can | |
cope with different local EL models as a plug-and-enhance | |
module. We explore both supervised and reinforcement learning | |
strategies for learning the DCA model. Extensive experiments | |
show the effectiveness of our model with different learning | |
settings, base models, decision orders and attention | |
mechanisms.", | |
archivePrefix= "arXiv", | |
eprint = "1909.02117", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{murty-2018-hierarchical-losses, | |
title = "Hierarchical Losses and New Resources for Fine-grained Entity | |
Typing and Linking", | |
author = "Murty, Shikhar and Verga, Patrick and Vilnis, Luke and | |
Radovanovic, Irena and McCallum, Andrew", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1010", | |
doi = "10.18653/v1/P18-1010", | |
pages = "97-109", | |
abstract = "Extraction from raw text to a knowledge base of entities and | |
fine-grained types is often cast as prediction into a flat | |
set of entity and type labels, neglecting the rich | |
hierarchies over types and entities contained in curated | |
ontologies. Previous attempts to incorporate hierarchical | |
structure have yielded little benefit and are restricted to | |
shallow ontologies. This paper presents new methods using | |
real and complex bilinear mappings for integrating | |
hierarchical information, yielding substantial improvement | |
over flat predictions in entity linking and fine-grained | |
entity typing, and achieving new state-of-the-art results for | |
end-to-end models on the benchmark FIGER dataset. We also | |
present two new human-annotated datasets containing wide and | |
deep hierarchies which we will release to the community to | |
encourage further research in this direction: | |
\textit{MedMentions}, a collection of PubMed abstracts in | |
which 246k mentions have been mapped to the massive UMLS | |
ontology; and \textit{TypeNet}, which aligns Freebase types | |
with the WordNet hierarchy to obtain nearly 2k entity | |
types. In experiments on all three datasets we show | |
substantial gains from hierarchy-aware training." | |
} | |
@inproceedings{zhong-2018-colink, | |
title = "Colink: An unsupervised framework for user identity linkage", | |
author = "Zhong, Zexuan and Cao, Yong and Guo, Mu and Nie, Zaiqing", | |
booktitle = "Thirty-Second AAAI Conference on Artificial Intelligence", | |
year = 2018 | |
} | |
@inproceedings{du-2019-extract-symptoms, | |
title = "Extracting Symptoms and their Status from Clinical | |
Conversations", | |
author = "Du, Nan and Chen, Kai and Kannan, Anjuli and Tran, Linh and | |
Chen, Yuhui and Shafran, Izhak", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1087", | |
doi = "10.18653/v1/P19-1087", | |
pages = "915-925", | |
abstract = "This paper describes novel models tailored for a new | |
application, that of extracting the symptoms mentioned in | |
clinical conversations along with their status. Lack of any | |
publicly available corpus in this privacy-sensitive domain | |
led us to develop our own corpus, consisting of about 3K | |
conversations annotated by professional medical scribes. We | |
propose two novel deep learning approaches to infer the | |
symptom names and their status: (1) a new hierarchical | |
span-attribute tagging (SA-T) model, trained using curriculum | |
learning, and (2) a variant of sequence-to-sequence model | |
which decodes the symptoms and their status from a few | |
speaker turns within a sliding window over the | |
conversation. This task stems from a realistic application of | |
assisting medical providers in capturing symptoms mentioned | |
by patients from their clinical conversations. To reflect | |
this application, we define multiple metrics. From | |
inter-rater agreement, we find that the task is inherently | |
difficult. We conduct comprehensive evaluations on several | |
contrasting conditions and observe that the performance of | |
the models range from an F-score of 0.5 to 0.8 depending on | |
the condition. Our analysis not only reveals the inherent | |
challenges of the task, but also provides useful directions | |
to improve the models." | |
} | |
@article{sarrouti-2020-sembionlqa, | |
title = "SemBioNLQA: A semantic biomedical question answering system | |
for retrieving exact and ideal answers to natural language | |
questions", | |
journal = "Artificial Intelligence in Medicine", | |
volume = 102, | |
pages = 101767, | |
year = 2020, | |
issn = "0933-3657", | |
doi = "https://doi.org/10.1016/j.artmed.2019.101767", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S0933365718302756", | |
author = "Mourad Sarrouti and Said [Ouatik El Alaoui]", | |
keywords = "Biomedical question answering, Information retrieval, Passage | |
retrieval, Natural language processing, Machine learning, | |
Biomedical informatics, BioASQ", | |
abstract = "Background and objective Question answering (QA), the | |
identification of short accurate answers to users questions | |
written in natural language expressions, is a longstanding | |
issue widely studied over the last decades in the | |
open-domain. However, it still remains a real challenge in | |
the biomedical domain as the most of the existing systems | |
support a limited amount of question and answer types as well | |
as still require further efforts in order to improve their | |
performance in terms of precision for the supported | |
questions. Here, we present a semantic biomedical QA system | |
named SemBioNLQA which has the ability to handle the kinds of | |
yes/no, factoid, list, and summary natural language | |
questions. Methods This paper describes the system | |
architecture and an evaluation of the developed end-to-end | |
biomedical QA system named SemBioNLQA, which consists of | |
question classification, document retrieval, passage | |
retrieval and answer extraction modules. It takes natural | |
language questions as input, and outputs both short precise | |
answers and summaries as results. The SemBioNLQA system, | |
dealing with four types of questions, is based on (1) | |
handcrafted lexico-syntactic patterns and a machine learning | |
algorithm for question classification, (2) PubMed search | |
engine and UMLS similarity for document retrieval, (3) the | |
BM25 model, stemmed words and UMLS concepts for passage | |
retrieval, and (4) UMLS metathesaurus, BioPortal synonyms, | |
sentiment analysis and term frequency metric for answer | |
extraction. Results and conclusion Compared with the current | |
state-of-the-art biomedical QA systems, SemBioNLQA, a fully | |
automated system, has the potential to deal with a large | |
amount of question and answer types. SemBioNLQA retrieves | |
quickly users’ information needs by returning exact answers | |
(e.g., “yes”, “no”, a biomedical entity name, etc.) and ideal | |
answers (i.e., paragraph-sized summaries of relevant | |
information) for yes/no, factoid and list questions, whereas | |
it provides only the ideal answers for summary | |
questions. Moreover, experimental evaluations performed on | |
biomedical questions and answers provided by the BioASQ | |
challenge especially in 2015, 2016 and 2017 (as part of our | |
participation), show that SemBioNLQA achieves good | |
performances compared with the most current state-of-the-art | |
systems and allows a practical and competitive alternative to | |
help information seekers find exact and ideal answers to | |
their biomedical questions. The SemBioNLQA source code is | |
publicly available at | |
https://github.com/sarrouti/sembionlqa." | |
} | |
@article{demner-fushman-2019-health-qa, | |
title = "Consumer health information and question answering: helping | |
consumers find answers to their health-related information | |
needs", | |
author = "Dina Demner-Fushman and Yassine Mrabet and Asma Ben Abacha", | |
journal = "Journal of the American Medical Informatics Association : | |
JAMIA", | |
year = 2019 | |
} | |
@inproceedings{lin-2019-symptom-graph, | |
title = "Enhancing Dialogue Symptom Diagnosis with Global Attention | |
and Symptom Graph", | |
author = "Lin, Xinzhu and He, Xiahui and Chen, Qin and Tou, Huaixiao | |
and Wei, Zhongyu and Chen, Ting", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1508", | |
doi = "10.18653/v1/D19-1508", | |
pages = "5033-5042", | |
abstract = "Symptom diagnosis is a challenging yet profound problem in | |
natural language processing. Most previous research focus on | |
investigating the standard electronic medical records for | |
symptom diagnosis, while the dialogues between doctors and | |
patients that contain more rich information are not well | |
studied. In this paper, we first construct a dialogue symptom | |
diagnosis dataset based on an online medical forum with a | |
large amount of dialogues between patients and doctors. Then, | |
we provide some benchmark models on this dataset to boost the | |
research of dialogue symptom diagnosis. In order to further | |
enhance the performance of symptom diagnosis over dialogues, | |
we propose a global attention mechanism to capture more | |
symptom related information, and build a symptom graph to | |
model the associations between symptoms rather than treating | |
each symptom independently. Experimental results show that | |
both the global attention and symptom graph are effective to | |
boost dialogue symptom diagnosis. In particular, our proposed | |
model achieves the state-of-the-art performance on the | |
constructed dataset." | |
} | |
@inproceedings{dusek-2016-context-aware, | |
title = "A Context-aware Natural Language Generator for Dialogue | |
Systems", | |
author = "Du{\v{s}}ek, Ond{\v{r}}ej and Jur{\v{c}}{\'\i}{\v{c}}ek, | |
Filip", | |
booktitle = "Proceedings of the 17th Annual Meeting of the Special | |
Interest Group on Discourse and Dialogue", | |
month = sep, | |
year = 2016, | |
address = "Los Angeles", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W16-3622", | |
doi = "10.18653/v1/W16-3622", | |
pages = "185-190" | |
} | |
@inproceedings{ghosal-2019-dialogue-gcn, | |
title = "{D}ialogue{GCN}: A Graph Convolutional Neural Network for | |
Emotion Recognition in Conversation", | |
author = "Ghosal, Deepanway and Majumder, Navonil and Poria, Soujanya | |
and Chhaya, Niyati and Gelbukh, Alexander", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1015", | |
doi = "10.18653/v1/D19-1015", | |
pages = "154-164", | |
abstract = "Emotion recognition in conversation (ERC) has received much | |
attention, lately, from researchers due to its potential | |
widespread applications in diverse areas, such as | |
health-care, education, and human resources. In this paper, | |
we present Dialogue Graph Convolutional Network | |
(DialogueGCN), a graph neural network based approach to | |
ERC. We leverage self and inter-speaker dependency of the | |
interlocutors to model conversational context for emotion | |
recognition. Through the graph network, DialogueGCN addresses | |
context propagation issues present in the current RNN-based | |
methods. We empirically show that this method alleviates such | |
issues, while outperforming the current state of the art on a | |
number of benchmark emotion classification datasets." | |
} | |
@inproceedings{chen-2019-working-memory, | |
title = "A Working Memory Model for Task-oriented Dialog Response | |
Generation", | |
author = "Chen, Xiuyi and Xu, Jiaming and Xu, Bo", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1258", | |
doi = "10.18653/v1/P19-1258", | |
pages = "2687-2693", | |
abstract = "Recently, to incorporate external Knowledge Base (KB) | |
information, one form of world knowledge, several end-to-end | |
task-oriented dialog systems have been proposed. These | |
models, however, tend to confound the dialog history with KB | |
tuples and simply store them into one memory. Inspired by the | |
psychological studies on working memory, we propose a working | |
memory model (WMM2Seq) for dialog response generation. Our | |
WMM2Seq adopts a working memory to interact with two | |
separated long-term memories, which are the episodic memory | |
for memorizing dialog history and the semantic memory for | |
storing KB tuples. The working memory consists of a central | |
executive to attend to the aforementioned memories, and a | |
short-term storage system to store the {``}activated{''} | |
contents from the long-term memories. Furthermore, we | |
introduce a context-sensitive perceptual process for the | |
token representations of dialog history, and then feed them | |
into the episodic memory. Extensive experiments on two | |
task-oriented dialog datasets demonstrate that our WMM2Seq | |
significantly outperforms the state-of-the-art results in | |
several evaluation metrics." | |
} | |
@inproceedings{su-2019-utterance-rewriter, | |
title = "Improving Multi-turn Dialogue Modelling with Utterance | |
{R}e{W}riter", | |
author = "Su, Hui and Shen, Xiaoyu and Zhang, Rongzhi and Sun, Fei and | |
Hu, Pengwei and Niu, Cheng and Zhou, Jie", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1003", | |
doi = "10.18653/v1/P19-1003", | |
pages = "22-31", | |
abstract = "Recent research has achieved impressive results in | |
single-turn dialogue modelling. In the multi-turn setting, | |
however, current models are still far from satisfactory. One | |
major challenge is the frequently occurred coreference and | |
information omission in our daily conversation, making it | |
hard for machines to understand the real intention. In this | |
paper, we propose rewriting the human utterance as a | |
pre-process to help multi-turn dialgoue modelling. Each | |
utterance is first rewritten to recover all coreferred and | |
omitted information. The next processing steps are then | |
performed based on the rewritten utterance. To properly train | |
the utterance rewriter, we collect a new dataset with human | |
annotations and introduce a Transformer-based utterance | |
rewriting architecture using the pointer network. We show the | |
proposed architecture achieves remarkably good performance on | |
the utterance rewriting task. The trained utterance rewriter | |
can be easily integrated into online chatbots and brings | |
general improvement over different domains." | |
} | |
@inproceedings{ippolito-2019-decoding-methods, | |
title = "Comparison of Diverse Decoding Methods from Conditional | |
Language Models", | |
author = "Ippolito, Daphne and Kriz, Reno and Sedoc, Jo{\~a}o and | |
Kustikova, Maria and Callison-Burch, Chris", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1365", | |
doi = "10.18653/v1/P19-1365", | |
pages = "3752-3762", | |
abstract = "While conditional language models have greatly improved in | |
their ability to output high quality natural language, many | |
NLP applications benefit from being able to generate a | |
diverse set of candidate sequences. Diverse decoding | |
strategies aim to, within a given-sized candidate list, cover | |
as much of the space of high-quality outputs as possible, | |
leading to improvements for tasks that rerank and combine | |
candidate outputs. Standard decoding methods, such as beam | |
search, optimize for generating high likelihood sequences | |
rather than diverse ones, though recent work has focused on | |
increasing diversity in these methods. In this work, we | |
perform an extensive survey of decoding-time strategies for | |
generating diverse outputs from a conditional language | |
model. In addition, we present a novel method where we | |
over-sample candidates, then use clustering to remove similar | |
sequences, thus achieving high diversity without sacrificing | |
quality." | |
} | |
@inproceedings{qian-2019-daml, | |
title = "Domain Adaptive Dialog Generation via Meta Learning", | |
author = "Qian, Kun and Yu, Zhou", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1253", | |
doi = "10.18653/v1/P19-1253", | |
pages = "2639-2649", | |
abstract = "Domain adaptation is an essential task in dialog system | |
building because there are so many new dialog tasks created | |
for different needs every day. Collecting and annotating | |
training data for these new tasks is costly since it involves | |
real user interactions. We propose a domain adaptive dialog | |
generation method based on meta-learning (DAML). DAML is an | |
end-to-end trainable dialog system model that learns from | |
multiple rich-resource tasks and then adapts to new domains | |
with minimal training samples. We train a dialog system model | |
using multiple rich-resource single-domain dialog data by | |
applying the model-agnostic meta-learning algorithm to dialog | |
domain. The model is capable of learning a competitive dialog | |
system on a new domain with only a few training examples in | |
an efficient manner. The two-step gradient updates in DAML | |
enable the model to learn general features across multiple | |
tasks. We evaluate our method on a simulated dialog dataset | |
and achieve state-of-the-art performance, which is | |
generalizable to new tasks." | |
} | |
@inproceedings{sankar-2019-conversation-history, | |
title = "Do Neural Dialog Systems Use the Conversation History | |
Effectively? An Empirical Study", | |
author = "Sankar, Chinnadhurai and Subramanian, Sandeep and Pal, Chris | |
and Chandar, Sarath and Bengio, Yoshua", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1004", | |
doi = "10.18653/v1/P19-1004", | |
pages = "32-37", | |
abstract = "Neural generative models have been become increasingly | |
popular when building conversational agents. They offer | |
flexibility, can be easily adapted to new domains, and | |
require minimal domain engineering. A common criticism of | |
these systems is that they seldom understand or use the | |
available dialog history effectively. In this paper, we take | |
an empirical approach to understanding how these models use | |
the available dialog history by studying the sensitivity of | |
the models to artificially introduced unnatural changes or | |
perturbations to their context at test time. We experiment | |
with 10 different types of perturbations on 4 multi-turn | |
dialog datasets and find that commonly used neural dialog | |
architectures like recurrent and transformer-based seq2seq | |
models are rarely sensitive to most perturbations such as | |
missing or reordering utterances, shuffling words, etc. Also, | |
by open-sourcing our code, we believe that it will serve as a | |
useful diagnostic tool for evaluating dialog systems in the | |
future." | |
} | |
@inproceedings{quan-2019-gecor, | |
title = "{GECOR}: An End-to-End Generative Ellipsis and Co-reference | |
Resolution Model for Task-Oriented Dialogue", | |
author = "Quan, Jun and Xiong, Deyi and Webber, Bonnie and Hu, | |
Changjian", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1462", | |
doi = "10.18653/v1/D19-1462", | |
pages = "4547-4557", | |
abstract = "Ellipsis and co-reference are common and ubiquitous | |
especially in multi-turn dialogues. In this paper, we treat | |
the resolution of ellipsis and co-reference in dialogue as a | |
problem of generating omitted or referred expressions from | |
the dialogue context. We therefore propose a unified | |
end-to-end Generative Ellipsis and CO-reference Resolution | |
model (GECOR) in the context of dialogue. The model can | |
generate a new pragmatically complete user utterance by | |
alternating the generation and copy mode for each user | |
utterance. A multi-task learning framework is further | |
proposed to integrate the GECOR into an end-to-end | |
task-oriented dialogue. In order to train both the GECOR and | |
the multi-task learning framework, we manually construct a | |
new dataset on the basis of the public dataset CamRest676 | |
with both ellipsis and co-reference annotation. On this | |
dataset, intrinsic evaluations on the resolution of ellipsis | |
and co-reference show that the GECOR model significantly | |
outperforms the sequence-to-sequence (seq2seq) baseline model | |
in terms of EM, BLEU and F1 while extrinsic evaluations on | |
the downstream dialogue task demonstrate that our multi-task | |
learning framework with GECOR achieves a higher success rate | |
of task completion than TSCP, a state-of-the-art end-to-end | |
task-oriented dialogue model." | |
} | |
@inproceedings{zhao-2018-zsdg, | |
title = "Zero-Shot Dialog Generation with Cross-Domain Latent Actions", | |
author = "Zhao, Tiancheng and Eskenazi, Maxine", | |
booktitle = "Proceedings of the 19th Annual {SIG}dial Meeting on Discourse | |
and Dialogue", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W18-5001", | |
doi = "10.18653/v1/W18-5001", | |
pages = "1-10", | |
abstract = "This paper introduces zero-shot dialog generation (ZSDG), as | |
a step towards neural dialog systems that can instantly | |
generalize to new situations with minimum data. ZSDG requires | |
an end-to-end generative dialog system to generalize to a new | |
domain for which only a domain description is provided and no | |
training dialogs are available. Then a novel learning | |
framework, Action Matching, is proposed. This algorithm can | |
learn a cross-domain embedding space that models the | |
semantics of dialog responses which in turn, enables a neural | |
dialog generation model to generalize to new domains. We | |
evaluate our methods on two datasets, a new synthetic dialog | |
dataset, and an existing human-human multi-domain dialog | |
dataset. Experimental results show that our method is able to | |
achieve superior performance in learning dialog models that | |
can rapidly adapt their behavior to new domains and suggests | |
promising future research." | |
} | |
@article{zhao-2018-unsupervised-dg, | |
author = "Tiancheng Zhao and Kyusong Lee and Maxine Esk{\'{e}}nazi", | |
title = "Unsupervised Discrete Sentence Representation Learning for | |
Interpretable Neural Dialog Generation", | |
journal = "CoRR", | |
volume = "abs/1804.08069", | |
year = 2018, | |
url = "http://arxiv.org/abs/1804.08069", | |
archivePrefix= "arXiv", | |
eprint = "1804.08069", | |
timestamp = "Mon, 13 Aug 2018 16:46:01 +0200", | |
biburl = "https://dblp.org/rec/journals/corr/abs-1804-08069.bib", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{shalyminov-2019-few-shot-dg, | |
title = "Few-Shot Dialogue Generation Without Annotated Data: A | |
Transfer Learning Approach", | |
author = "Shalyminov, Igor and Lee, Sungjin and Eshghi, Arash and | |
Lemon, Oliver", | |
booktitle = "Proceedings of the 20th Annual SIGdial Meeting on Discourse | |
and Dialogue", | |
month = sep, | |
year = 2019, | |
address = "Stockholm, Sweden", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W19-5904", | |
doi = "10.18653/v1/W19-5904", | |
pages = "32-39", | |
abstract = "Learning with minimal data is one of the key challenges in | |
the development of practical, production-ready goal-oriented | |
dialogue systems. In a real-world enterprise setting where | |
dialogue systems are developed rapidly and are expected to | |
work robustly for an ever-growing variety of domains, | |
products, and scenarios, efficient learning from a limited | |
number of examples becomes indispensable. In this paper, we | |
introduce a technique to achieve state-of-the-art dialogue | |
generation performance in a few-shot setup, without using any | |
annotated data. We do this by leveraging background knowledge | |
from a larger, more highly represented dialogue source {---} | |
namely, the MetaLWOz dataset. We evaluate our model on the | |
Stanford Multi-Domain Dialogue Dataset, consisting of | |
human-human goal-oriented dialogues in in-car navigation, | |
appointment scheduling, and weather information domains. We | |
show that our few-shot approach achieves state-of-the art | |
results on that dataset by consistently outperforming the | |
previous best model in terms of BLEU and Entity F1 scores, | |
while being more data-efficient than it by not requiring any | |
data annotation." | |
} | |
@inproceedings{lei-2018-sequicity, | |
title = "{S}equicity: Simplifying Task-oriented Dialogue Systems with | |
Single Sequence-to-Sequence Architectures", | |
author = "Lei, Wenqiang and Jin, Xisen and Kan, Min-Yen and Ren, | |
Zhaochun and He, Xiangnan and Yin, Dawei", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1133", | |
doi = "10.18653/v1/P18-1133", | |
pages = "1437-1447", | |
abstract = "Existing solutions to task-oriented dialogue systems follow | |
pipeline designs which introduces architectural complexity | |
and fragility. We propose a novel, holistic, extendable | |
framework based on a single sequence-to-sequence (seq2seq) | |
model which can be optimized with supervised or reinforcement | |
learning. A key contribution is that we design text spans | |
named belief spans to track dialogue believes, allowing | |
task-oriented dialogue systems to be modeled in a seq2seq | |
way. Based on this, we propose a simplistic Two Stage CopyNet | |
instantiation which emonstrates good scalability: | |
significantly reducing model complexity in terms of number of | |
parameters and training time by a magnitude. It significantly | |
outperforms state-of-the-art pipeline-based methods on large | |
datasets and retains a satisfactory entity match rate on | |
out-of-vocabulary (OOV) cases where pipeline-designed | |
competitors totally fail." | |
} | |
@article{liu-2019-nmrc-methods, | |
author = "Liu, Shanshan and Zhang, Xin and Zhang, Sheng and Wang, Hui | |
and Zhang, Weiming", | |
title = "Neural Machine Reading Comprehension: Methods and Trends", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1907.01118v5", | |
abstract = "Machine reading comprehension (MRC), which requires a machine | |
to answer questions based on a given context, has attracted | |
increasing attention with the incorporation of various | |
deep-learning techniques over the past few years. Although | |
research on MRC based on deep learning is flourishing, there | |
remains a lack of a comprehensive survey summarizing existing | |
approaches and recent trends, which motivated the work | |
presented in this article. Specifically, we give a thorough | |
review of this research field, covering different aspects | |
including (1) typical MRC tasks: their definitions, | |
differences, and representative datasets; (2) the general | |
architecture of neural MRC: the main modules and prevalent | |
approaches to each; and (3) new trends: some emerging areas | |
in neural MRC as well as the corresponding | |
challenges. Finally, considering what has been achieved so | |
far, the survey also envisages what the future may hold by | |
discussing the open issues left to be addressed.", | |
archivePrefix= "arXiv", | |
eprint = "1907.01118", | |
primaryClass = "cs.CL" | |
} | |
@phdthesis{chen-2018-nrc-beyond, | |
title = "Neural reading comprehension and beyond", | |
author = "Chen, Danqi", | |
year = 2018, | |
school = "Stanford University" | |
} | |
@inproceedings{trotman-2014-improve-bm25, | |
author = "Trotman, Andrew and Puurula, Antti and Burgess, Blake", | |
title = "Improvements to BM25 and Language Models Examined", | |
year = 2014, | |
isbn = 9781450330008, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2682862.2682863", | |
doi = "10.1145/2682862.2682863", | |
booktitle = "Proceedings of the 2014 Australasian Document Computing | |
Symposium", | |
pages = "58–65", | |
numpages = 8, | |
keywords = "Procrastination, Document Retrieval, Relevance Ranking", | |
location = "Melbourne, VIC, Australia", | |
series = "ADCS ’14" | |
} | |
@article{nogueira-2019-bert-re-ranking, | |
author = "Nogueira, Rodrigo and Cho, Kyunghyun", | |
title = "Passage Re-Ranking With Bert", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.04085v5", | |
abstract = "Recently, neural models pretrained on a language modeling | |
task, such as ELMo (Peters et al., 2017), OpenAI GPT (Radford | |
et al., 2018), and BERT (Devlin et al., 2018), have achieved | |
impressive results on various natural language processing | |
tasks such as question-answering and natural language | |
inference. In this paper, we describe a simple | |
re-implementation of BERT for query-based passage | |
re-ranking. Our system is the state of the art on the | |
TREC-CAR dataset and the top entry in the leaderboard of the | |
MS MARCO passage retrieval task, outperforming the previous | |
state of the art by 27 \% (relative) in MRR@10. The code to | |
reproduce our results is available at | |
https://github.com/nyu-dl/dl4marco-bert", | |
archivePrefix= "arXiv", | |
eprint = "1901.04085", | |
primaryClass = "cs.IR" | |
} | |
@article{bajaj-2016-ms-marco, | |
author = "Bajaj, Payal and Campos, Daniel and Craswell, Nick and Deng, | |
Li and Gao, Jianfeng and Liu, Xiaodong and Majumder, Rangan | |
and McNamara, Andrew and Mitra, Bhaskar and Nguyen, Tri and | |
Rosenberg, Mir and Song, Xia and Stoica, Alina and Tiwary, | |
Saurabh and Wang, Tong", | |
title = "Ms Marco: a Human Generated Machine Reading Comprehension | |
Dataset", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.09268v3", | |
abstract = "We introduce a large scale MAchine Reading COmprehension | |
dataset, which we name MS MARCO. The dataset comprises of | |
1,010,916 anonymized questions---sampled from Bing's search | |
query logs---each with a human generated answer and 182,669 | |
completely human rewritten generated answers. In addition, | |
the dataset contains 8,841,823 passages---extracted from | |
3,563,535 web documents retrieved by Bing---that provide the | |
information necessary for curating the natural language | |
answers. A question in the MS MARCO dataset may have multiple | |
answers or no answers at all. Using this dataset, we propose | |
three different tasks with varying levels of difficulty: (i) | |
predict if a question is answerable given a set of context | |
passages, and extract and synthesize the answer as a human | |
would (ii) generate a well-formed answer (if possible) based | |
on the context passages that can be understood with the | |
question and passage context, and finally (iii) rank a set of | |
retrieved passages given a question. The size of the dataset | |
and the fact that the questions are derived from real user | |
search queries distinguishes MS MARCO from other well-known | |
publicly available datasets for machine reading comprehension | |
and question-answering. We believe that the scale and the | |
real-world nature of this dataset makes it attractive for | |
benchmarking machine reading comprehension and | |
question-answering models.", | |
archivePrefix= "arXiv", | |
eprint = "1611.09268", | |
primaryClass = "cs.CL" | |
} | |
@article{qiao-2019-bert-re-ranking, | |
author = "Qiao, Yifan and Xiong, Chenyan and Liu, Zhenghao and Liu, | |
Zhiyuan", | |
title = "Understanding the Behaviors of Bert in Ranking", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.07531v4", | |
abstract = "This paper studies the performances and behaviors of BERT in | |
ranking tasks. We explore several different ways to leverage | |
the pre-trained BERT and fine-tune it on two ranking tasks: | |
MS MARCO passage reranking and TREC Web Track ad hoc document | |
ranking. Experimental results on MS MARCO demonstrate the | |
strong effectiveness of BERT in question-answering focused | |
passage ranking tasks, as well as the fact that BERT is a | |
strong interaction-based seq2seq matching model. Experimental | |
results on TREC show the gaps between the BERT pre-trained on | |
surrounding contexts and the needs of ad hoc document | |
ranking. Analyses illustrate how BERT allocates its | |
attentions between query-document tokens in its Transformer | |
layers, how it prefers semantic matches between paraphrase | |
tokens, and how that differs with the soft match patterns | |
learned by a click-trained neural ranker.", | |
archivePrefix= "arXiv", | |
eprint = "1904.07531", | |
primaryClass = "cs.IR" | |
} | |
@article{pei-2019-re-ranking-recommendation, | |
author = "Pei, Changhua and Zhang, Yi and Zhang, Yongfeng and Sun, Fei | |
and Lin, Xiao and Sun, Hanxiao and Wu, Jian and Jiang, Peng | |
and Ou, Wenwu", | |
title = "Personalized Re-Ranking for Recommendation", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.06813v3", | |
abstract = "Ranking is a core task in recommender systems, which aims at | |
providing an ordered list of items to users. Typically, a | |
ranking function is learned from the labeled dataset to | |
optimize the global performance, which produces a ranking | |
score for each individual item. However, it may be | |
sub-optimal because the scoring function applies to each item | |
individually and does not explicitly consider the mutual | |
influence between items, as well as the differences of users' | |
preferences or intents. Therefore, we propose a personalized | |
re-ranking model for recommender systems. The proposed | |
re-ranking model can be easily deployed as a follow-up | |
modular after any ranking algorithm, by directly using the | |
existing ranking feature vectors. It directly optimizes the | |
whole recommendation list by employing a transformer | |
structure to efficiently encode the information of all items | |
in the list. Specifically, the Transformer applies a | |
self-attention mechanism that directly models the global | |
relationships between any pair of items in the whole list. We | |
confirm that the performance can be further improved by | |
introducing pre-trained embedding to learn personalized | |
encoding functions for different users. Experimental results | |
on both offline benchmarks and real-world online e-commerce | |
systems demonstrate the significant improvements of the | |
proposed re-ranking model.", | |
archivePrefix= "arXiv", | |
eprint = "1904.06813", | |
primaryClass = "cs.IR" | |
} | |
@inproceedings{kratzwald-2019-rankqa, | |
title = "{R}ank{QA}: Neural Question Answering with Answer Re-Ranking", | |
author = "Kratzwald, Bernhard and Eigenmann, Anna and Feuerriegel, | |
Stefan", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2019, | |
address = "Florence, Italy", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P19-1611", | |
doi = "10.18653/v1/P19-1611", | |
pages = "6076-6085", | |
abstract = "The conventional paradigm in neural question answering (QA) | |
for narrative content is limited to a two-stage process: | |
first, relevant text passages are retrieved and, | |
subsequently, a neural network for machine comprehension | |
extracts the likeliest answer. However, both stages are | |
largely isolated in the status quo and, hence, information | |
from the two phases is never properly fused. In contrast, | |
this work proposes RankQA: RankQA extends the conventional | |
two-stage process in neural QA with a third stage that | |
performs an additional answer re-ranking. The re-ranking | |
leverages different features that are directly extracted from | |
the QA pipeline, i.e., a combination of retrieval and | |
comprehension features. While our intentionally simple design | |
allows for an efficient, data-sparse estimation, it | |
nevertheless outperforms more complex QA systems by a | |
significant margin: in fact, RankQA achieves state-of-the-art | |
performance on 3 out of 4 benchmark datasets. Furthermore, | |
its performance is especially superior in settings where the | |
size of the corpus is dynamic. Here the answer re-ranking | |
provides an effective remedy against the underlying | |
noise-information trade-off due to a variable corpus size. As | |
a consequence, RankQA represents a novel, powerful, and thus | |
challenging baseline for future research in content-based | |
QA." | |
} | |
@article{guu-2020-realm, | |
author = "Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, | |
Panupong and Chang, Ming-Wei", | |
title = "Realm: Retrieval-Augmented Language Model Pre-Training", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2002.08909v1", | |
abstract = "Language model pre-training has been shown to capture a | |
surprising amount of world knowledge, crucial for NLP tasks | |
such as question answering. However, this knowledge is stored | |
implicitly in the parameters of a neural network, requiring | |
ever-larger networks to cover more facts. To capture | |
knowledge in a more modular and interpretable way, we augment | |
language model pre-training with a latent knowledge | |
retriever, which allows the model to retrieve and attend over | |
documents from a large corpus such as Wikipedia, used during | |
pre-training, fine-tuning and inference. For the first time, | |
we show how to pre-train such a knowledge retriever in an | |
unsupervised manner, using masked language modeling as the | |
learning signal and backpropagating through a retrieval step | |
that considers millions of documents. We demonstrate the | |
effectiveness of Retrieval-Augmented Language Model | |
pre-training (REALM) by fine-tuning on the challenging task | |
of Open-domain Question Answering (Open-QA). We compare | |
against state-of-the-art models for both explicit and | |
implicit knowledge storage on three popular Open-QA | |
benchmarks, and find that we outperform all previous methods | |
by a significant margin (4-16 \% absolute accuracy), while | |
also providing qualitative benefits such as interpretability | |
and modularity.", | |
archivePrefix= "arXiv", | |
eprint = "2002.08909", | |
primaryClass = "cs.CL" | |
} | |
@article{yang-2019-bert-ad-hoc-doc, | |
author = "Yang, Wei and Zhang, Haotian and Lin, Jimmy", | |
title = "Simple Applications of Bert for Ad Hoc Document Retrieval", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.10972v1", | |
abstract = "Following recent successes in applying BERT to question | |
answering, we explore simple applications to ad hoc document | |
retrieval. This required confronting the challenge posed by | |
documents that are typically longer than the length of input | |
BERT was designed to handle. We address this issue by | |
applying inference on sentences individually, and then | |
aggregating sentence scores to produce document | |
scores. Experiments on TREC microblog and newswire test | |
collections show that our approach is simple yet effective, | |
as we report the highest average precision on these datasets | |
by neural approaches that we are aware of.", | |
archivePrefix= "arXiv", | |
eprint = "1903.10972", | |
primaryClass = "cs.IR" | |
} | |
@article{kowsari-2017-hdltex, | |
author = "Kowsari, Kamran and Brown, Donald E. and Heidarysafa, Mojtaba | |
and Meimandi, Kiana Jafari and Gerber, Matthew S. and Barnes, | |
Laura E.", | |
title = "Hdltex: Hierarchical Deep Learning for Text Classification", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1709.08267v2", | |
abstract = "The continually increasing number of documents produced each | |
year necessitates ever improving information processing | |
methods for searching, retrieving, and organizing | |
text. Central to these information processing methods is | |
document classification, which has become an important | |
application for supervised learning. Recently the performance | |
of these traditional classifiers has degraded as the number | |
of documents has increased. This is because along with this | |
growth in the number of documents has come an increase in the | |
number of categories. This paper approaches this problem | |
differently from current document classification methods that | |
view the problem as multi-class classification. Instead we | |
perform hierarchical classification using an approach we call | |
Hierarchical Deep Learning for Text classification | |
(HDLTex). HDLTex employs stacks of deep learning | |
architectures to provide specialized understanding at each | |
level of the document hierarchy.", | |
archivePrefix= "arXiv", | |
eprint = "1709.08267", | |
primaryClass = "cs.LG" | |
} | |
@article{shen-2014-entity-linking-solution, | |
title = "Entity linking with a knowledge base: Issues, techniques, and | |
solutions", | |
author = "Shen, Wei and Wang, Jianyong and Han, Jiawei", | |
journal = "IEEE Transactions on Knowledge and Data Engineering", | |
volume = 27, | |
number = 2, | |
pages = "443-460", | |
year = 2014, | |
publisher = "IEEE" | |
} | |
@inproceedings{ehrlinger-2016-kg-definition, | |
added-at = "2017-12-16T11:15:46.000+0100", | |
author = "Ehrlinger, Lisa and W{\"o}{\ss}, Wolfram", | |
biburl = | |
"https://www.bibsonomy.org/bibtex/2bef3c699eeb69778c02467ccc13bc99c/thoni", | |
booktitle = "SEMANTiCS (Posters, Demos, SuCCESS)", | |
interhash = "33750938d78af869dd800db08b39c1b8", | |
intrahash = "bef3c699eeb69778c02467ccc13bc99c", | |
keywords = "knowledge graph defintion citedby:scholar:count:4 | |
citedby:scholar:timestamp:2017-12-16", | |
timestamp = "2017-12-16T11:15:46.000+0100", | |
title = "Towards a Definition of Knowledge Graphs.", | |
year = 2016 | |
} | |
@article{shen-2005-pairwise, | |
title = "Ranking and reranking with perceptron", | |
author = "Shen, Libin and Joshi, Aravind K", | |
journal = "Machine Learning", | |
volume = 60, | |
number = "1-3", | |
pages = "73-96", | |
year = 2005, | |
publisher = "Springer" | |
} | |
@inproceedings{cao-2007-listwise, | |
title = "Learning to rank: from pairwise approach to listwise | |
approach", | |
author = "Cao, Zhe and Qin, Tao and Liu, Tie-Yan and Tsai, Ming-Feng | |
and Li, Hang", | |
booktitle = "Proceedings of the 24th international conference on Machine | |
learning", | |
pages = "129-136", | |
year = 2007 | |
} | |
@inproceedings{zheng-2010-learn-link, | |
title = "Learning to Link Entities with Knowledge Base", | |
author = "Zheng, Zhicheng and Li, Fangtao and Huang, Minlie and Zhu, | |
Xiaoyan", | |
booktitle = "Human Language Technologies: The 2010 Annual Conference of | |
the North {A}merican Chapter of the Association for | |
Computational Linguistics", | |
month = jun, | |
year = 2010, | |
address = "Los Angeles, California", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N10-1072", | |
pages = "483-491" | |
} | |
@inproceedings{chen-2011-collaborative-ranking, | |
title = "Collaborative Ranking: A Case Study on Entity Linking", | |
author = "Chen, Zheng and Ji, Heng", | |
booktitle = "Proceedings of the 2011 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = jul, | |
year = 2011, | |
address = "Edinburgh, Scotland, UK.", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D11-1071", | |
pages = "771-781" | |
} | |
@inproceedings{han-2011-generative-el, | |
title = "A Generative Entity-Mention Model for Linking Entities with | |
Knowledge Base", | |
author = "Han, Xianpei and Sun, Le", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies", | |
month = jun, | |
year = 2011, | |
address = "Portland, Oregon, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P11-1095", | |
pages = "945-954" | |
} | |
@inproceedings{ngomo-2011-limes, | |
title = "LIMES—a time-efficient approach for large-scale link | |
discovery on the web of data", | |
author = "Ngomo, Axel-Cyrille Ngonga and Auer, S{\"o}ren", | |
booktitle = "Twenty-Second International Joint Conference on Artificial | |
Intelligence", | |
year = 2011 | |
} | |
@article{sil-2017-cross-lingual-el, | |
author = "Sil, Avirup and Kundu, Gourab and Florian, Radu and Hamza, | |
Wael", | |
title = "Neural Cross-Lingual Entity Linking", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1712.01813v1", | |
abstract = "A major challenge in Entity Linking (EL) is making effective | |
use of contextual information to disambiguate mentions to | |
Wikipedia that might refer to different entities in different | |
contexts. The problem exacerbates with cross-lingual EL which | |
involves linking mentions written in non-English documents to | |
entries in the English Wikipedia: to compare textual clues | |
across languages we need to compute similarity between | |
textual fragments across languages. In this paper, we propose | |
a neural EL model that trains fine-grained similarities and | |
dissimilarities between the query and candidate document from | |
multiple perspectives, combined with convolution and tensor | |
networks. Further, we show that this English-trained system | |
can be applied, in zero-shot learning, to other languages by | |
making surprisingly effective use of multi-lingual | |
embeddings. The proposed system has strong empirical evidence | |
yielding state-of-the-art results in English as well as | |
cross-lingual: Spanish and Chinese TAC 2015 datasets.", | |
archivePrefix= "arXiv", | |
eprint = "1712.01813", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{hoffart-2011-robust-el, | |
title = "Robust Disambiguation of Named Entities in Text", | |
author = "Hoffart, Johannes and Yosef, Mohamed Amir and Bordino, Ilaria | |
and F{\"u}rstenau, Hagen and Pinkal, Manfred and Spaniol, | |
Marc and Taneva, Bilyana and Thater, Stefan and Weikum, | |
Gerhard", | |
booktitle = "Proceedings of the 2011 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = jul, | |
year = 2011, | |
address = "Edinburgh, Scotland, UK.", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D11-1072", | |
pages = "782-792" | |
} | |
@inproceedings{sil-2013-re-ranking-joint-ner-el, | |
author = "Sil, Avirup and Yates, Alexander", | |
title = "Re-Ranking for Joint Named-Entity Recognition and Linking", | |
year = 2013, | |
isbn = 9781450322638, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2505515.2505601", | |
doi = "10.1145/2505515.2505601", | |
booktitle = "Proceedings of the 22nd ACM International Conference on | |
Information \& Knowledge Management", | |
pages = "2369–2374", | |
numpages = 6, | |
keywords = "named entity recognition, entity linking, entity | |
disambiguation", | |
location = "San Francisco, California, USA", | |
series = "CIKM ’13" | |
} | |
@inproceedings{guo-2013-to-link-not-to-link, | |
title = "To Link or Not to Link? A Study on End-to-End Tweet Entity | |
Linking", | |
author = "Guo, Stephen and Chang, Ming-Wei and Kiciman, Emre", | |
booktitle = "Proceedings of the 2013 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies", | |
month = jun, | |
year = 2013, | |
address = "Atlanta, Georgia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N13-1122", | |
pages = "1020-1030" | |
} | |
@inproceedings{pu-2010-structured-entities, | |
author = "Pu, Ken Q. and Hassanzadeh, Oktie and Drake, Richard and | |
Miller, Ren\'{e}e J.", | |
title = "Online Annotation of Text Streams with Structured Entities", | |
year = 2010, | |
isbn = 9781450300995, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/1871437.1871446", | |
doi = "10.1145/1871437.1871446", | |
booktitle = "Proceedings of the 19th ACM International Conference on | |
Information and Knowledge Management", | |
pages = "29–38", | |
numpages = 10, | |
keywords = "annotation, text stream, online, entity", | |
location = "Toronto, ON, Canada", | |
series = "CIKM ’10" | |
} | |
@inproceedings{zhang-2011-acronym-expansion-el, | |
author = "Zhang, Wei and Sim, Yan Chuan and Su, Jian and Tan, Chew Lim", | |
title = "Entity Linking with Effective Acronym Expansion, Instance | |
Selection and Topic Modeling", | |
year = 2011, | |
isbn = 9781577355151, | |
publisher = "AAAI Press", | |
booktitle = "Proceedings of the Twenty-Second International Joint | |
Conference on Artificial Intelligence - Volume Volume Three", | |
pages = "1909–1914", | |
numpages = 6, | |
location = "Barcelona, Catalonia, Spain", | |
series = "IJCAI’11" | |
} | |
@inproceedings{milne-2008-link-with-wiki, | |
author = "Milne, David and Witten, Ian H.", | |
title = "Learning to Link with Wikipedia", | |
year = 2008, | |
isbn = 9781595939913, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/1458082.1458150", | |
doi = "10.1145/1458082.1458150", | |
booktitle = "Proceedings of the 17th ACM Conference on Information and | |
Knowledge Management", | |
pages = "509–518", | |
numpages = 10, | |
keywords = "data mining, word sense disambiguation, wikipedia, semantic | |
annotation", | |
location = "Napa Valley, California, USA", | |
series = "CIKM ’08" | |
} | |
@inproceedings{ratinov-2011-local-global-wiki-el, | |
title = "Local and Global Algorithms for Disambiguation to | |
{W}ikipedia", | |
author = "Ratinov, Lev and Roth, Dan and Downey, Doug and Anderson, | |
Mike", | |
booktitle = "Proceedings of the 49th Annual Meeting of the Association for | |
Computational Linguistics: Human Language Technologies", | |
month = jun, | |
year = 2011, | |
address = "Portland, Oregon, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P11-1138", | |
pages = "1375-1384" | |
} | |
@inproceedings{radhakrishnan-2018-elden, | |
title = "{ELDEN}: Improved Entity Linking Using Densified Knowledge | |
Graphs", | |
author = "Radhakrishnan, Priya and Talukdar, Partha and Varma, | |
Vasudeva", | |
booktitle = "Proceedings of the 2018 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 1 (Long Papers)", | |
month = jun, | |
year = 2018, | |
address = "New Orleans, Louisiana", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N18-1167", | |
doi = "10.18653/v1/N18-1167", | |
pages = "1844-1853", | |
abstract = "Entity Linking (EL) systems aim to automatically map mentions | |
of an entity in text to the corresponding entity in a | |
Knowledge Graph (KG). Degree of connectivity of an entity in | |
the KG directly affects an EL system{'}s ability to correctly | |
link mentions in text to the entity in KG. This causes many | |
EL systems to perform well for entities well connected to | |
other entities in KG, bringing into focus the role of KG | |
density in EL. In this paper, we propose Entity Linking using | |
Densified Knowledge Graphs (ELDEN). ELDEN is an EL system | |
which first densifies the KG with co-occurrence statistics | |
from a large text corpus, and then uses the densified KG to | |
train entity embeddings. Entity similarity measured using | |
these trained entity embeddings result in improved EL. ELDEN | |
outperforms state-of-the-art EL system on benchmark | |
datasets. Due to such densification, ELDEN performs well for | |
sparsely connected entities in the KG too. ELDEN{'}s approach | |
is simple, yet effective. We have made ELDEN{'}s code and | |
data publicly available." | |
} | |
@inproceedings{piccinno-2014-tagme-to-wat, | |
author = "Piccinno, Francesco and Ferragina, Paolo", | |
title = "From TagME to WAT: A New Entity Annotator", | |
year = 2014, | |
isbn = 9781450330237, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2633211.2634350", | |
doi = "10.1145/2633211.2634350", | |
booktitle = "Proceedings of the First International Workshop on Entity | |
Recognition \& Disambiguation", | |
pages = "55–62", | |
numpages = 8, | |
keywords = "graph-based algorithms, wikipedia, entity annotation, tagme", | |
location = "Gold Coast, Queensland, Australia", | |
series = "ERD ’14" | |
} | |
@inproceedings{yamada-2016-joint-learn-embedding-el, | |
title = "Joint Learning of the Embedding of Words and Entities for | |
Named Entity Disambiguation", | |
author = "Yamada, Ikuya and Shindo, Hiroyuki and Takeda, Hideaki and | |
Takefuji, Yoshiyasu", | |
booktitle = "Proceedings of The 20th {SIGNLL} Conference on Computational | |
Natural Language Learning", | |
month = aug, | |
year = 2016, | |
address = "Berlin, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/K16-1025", | |
doi = "10.18653/v1/K16-1025", | |
pages = "250-259" | |
} | |
@inproceedings{henzinger-2006-duplicate-web-pages, | |
title = "Finding near-duplicate web pages: a large-scale evaluation of | |
algorithms", | |
author = "Henzinger, Monika", | |
booktitle = "Proceedings of the 29th annual international ACM SIGIR | |
conference on Research and development in information | |
retrieval", | |
pages = "284-291", | |
year = 2006 | |
} | |
@inproceedings{charikar-2002-simhash, | |
title = "Similarity estimation techniques from rounding algorithms", | |
author = "Charikar, Moses S", | |
booktitle = "Proceedings of the thiry-fourth annual ACM symposium on | |
Theory of computing", | |
pages = "380-388", | |
year = 2002 | |
} | |
@article{reimers-2019-sentence-bert, | |
author = "Reimers, Nils and Gurevych, Iryna", | |
title = "Sentence-Bert: Sentence Embeddings Using Siamese | |
Bert-Networks", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1908.10084v1", | |
abstract = "BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has | |
set a new state-of-the-art performance on sentence-pair | |
regression tasks like semantic textual similarity | |
(STS). However, it requires that both sentences are fed into | |
the network, which causes a massive computational overhead: | |
Finding the most similar pair in a collection of 10,000 | |
sentences requires about 50 million inference computations | |
(~65 hours) with BERT. The construction of BERT makes it | |
unsuitable for semantic similarity search as well as for | |
unsupervised tasks like clustering. In this publication, we | |
present Sentence-BERT (SBERT), a modification of the | |
pretrained BERT network that use siamese and triplet network | |
structures to derive semantically meaningful sentence | |
embeddings that can be compared using cosine-similarity. This | |
reduces the effort for finding the most similar pair from 65 | |
hours with BERT / RoBERTa to about 5 seconds with SBERT, | |
while maintaining the accuracy from BERT. We evaluate SBERT | |
and SRoBERTa on common STS tasks and transfer learning tasks, | |
where it outperforms other state-of-the-art sentence | |
embeddings methods.", | |
archivePrefix= "arXiv", | |
eprint = "1908.10084", | |
primaryClass = "cs.CL" | |
} | |
@article{guo-2017-drmm, | |
author = "Guo, Jiafeng and Fan, Yixing and Ai, Qingyao and Croft, | |
W. Bruce", | |
title = "A Deep Relevance Matching Model for Ad-Hoc Retrieval", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.08611v1", | |
abstract = "In recent years, deep neural networks have led to exciting | |
breakthroughs in speech recognition, computer vision, and | |
natural language processing (NLP) tasks. However, there have | |
been few positive results of deep models on ad-hoc retrieval | |
tasks. This is partially due to the fact that many important | |
characteristics of the ad-hoc retrieval task have not been | |
well addressed in deep models yet. Typically, the ad-hoc | |
retrieval task is formalized as a matching problem between | |
two pieces of text in existing work using deep models, and | |
treated equivalent to many NLP tasks such as paraphrase | |
identification, question answering and automatic | |
conversation. However, we argue that the ad-hoc retrieval | |
task is mainly about relevance matching while most NLP | |
matching tasks concern semantic matching, and there are some | |
fundamental differences between these two matching | |
tasks. Successful relevance matching requires proper handling | |
of the exact matching signals, query term importance, and | |
diverse matching requirements. In this paper, we propose a | |
novel deep relevance matching model (DRMM) for ad-hoc | |
retrieval. Specifically, our model employs a joint deep | |
architecture at the query term level for relevance | |
matching. By using matching histogram mapping, a feed forward | |
matching network, and a term gating network, we can | |
effectively deal with the three relevance matching factors | |
mentioned above. Experimental results on two representative | |
benchmark collections show that our model can significantly | |
outperform some well-known retrieval models as well as | |
state-of-the-art deep matching models.", | |
archivePrefix= "arXiv", | |
eprint = "1711.08611", | |
primaryClass = "cs.IR" | |
} | |
@inproceedings{hui-2017-pacrr, | |
title = "{PACRR}: A Position-Aware Neural {IR} Model for Relevance | |
Matching", | |
author = "Hui, Kai and Yates, Andrew and Berberich, Klaus and de Melo, | |
Gerard", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1110", | |
doi = "10.18653/v1/D17-1110", | |
pages = "1049-1058", | |
abstract = "In order to adopt deep learning for information retrieval, | |
models are needed that can capture all relevant information | |
required to assess the relevance of a document to a given | |
user query. While previous works have successfully captured | |
unigram term matches, how to fully employ position-dependent | |
information such as proximity and term dependencies has been | |
insufficiently explored. In this work, we propose a novel | |
neural IR model named PACRR aiming at better modeling | |
position-dependent interactions between a query and a | |
document. Extensive experiments on six years{'} TREC Web | |
Track data confirm that the proposed model yields better | |
results under multiple benchmarks." | |
} | |
@article{malkov-2016-hnsw, | |
author = "Malkov, Yu. A. and Yashunin, D. A.", | |
title = "Efficient and Robust Approximate Nearest Neighbor Search | |
Using Hierarchical Navigable Small World Graphs", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1603.09320v4", | |
abstract = "We present a new approach for the approximate K-nearest | |
neighbor search based on navigable small world graphs with | |
controllable hierarchy (Hierarchical NSW, HNSW). The proposed | |
solution is fully graph-based, without any need for | |
additional search structures, which are typically used at the | |
coarse search stage of the most proximity graph | |
techniques. Hierarchical NSW incrementally builds a | |
multi-layer structure consisting from hierarchical set of | |
proximity graphs (layers) for nested subsets of the stored | |
elements. The maximum layer in which an element is present is | |
selected randomly with an exponentially decaying probability | |
distribution. This allows producing graphs similar to the | |
previously studied Navigable Small World (NSW) structures | |
while additionally having the links separated by their | |
characteristic distance scales. Starting search from the | |
upper layer together with utilizing the scale separation | |
boosts the performance compared to NSW and allows a | |
logarithmic complexity scaling. Additional employment of a | |
heuristic for selecting proximity graph neighbors | |
significantly increases performance at high recall and in | |
case of highly clustered data. Performance evaluation has | |
demonstrated that the proposed general metric space search | |
index is able to strongly outperform previous opensource | |
state-of-the-art vector-only approaches. Similarity of the | |
algorithm to the skip list structure allows straightforward | |
balanced distributed implementation.", | |
archivePrefix= "arXiv", | |
eprint = "1603.09320", | |
primaryClass = "cs.DS" | |
} | |
@article{liu-2009-learning-to-rank, | |
title = "Learning to rank for information retrieval", | |
author = "Liu, Tie-Yan", | |
journal = "Foundations and trends in information retrieval", | |
volume = 3, | |
number = 3, | |
pages = "225-331", | |
year = 2009, | |
publisher = "Now Publishers Inc." | |
} | |
@article{marrero-2013-survey-ner, | |
author = "Marrero, M\'{o}nica and Urbano, Juli\'{a}n and | |
S\'{a}nchez-Cuadrado, Sonia and Morato, Jorge and | |
G\'{o}mez-Berb\'{\i}s, Juan Miguel", | |
journal = "Computer Standards \& Interfaces", | |
number = 5, | |
pages = "482-489", | |
title = "{Named Entity Recognition: Fallacies, Challenges and | |
Opportunities}", | |
volume = 35, | |
year = 2013 | |
} | |
@inproceedings{dai-2018-complex-entity, | |
title = "Recognizing Complex Entity Mentions: A Review and Future | |
Directions", | |
author = "Dai, Xiang", | |
booktitle = "Proceedings of {ACL} 2018, Student Research Workshop", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-3006", | |
doi = "10.18653/v1/P18-3006", | |
pages = "37-44", | |
abstract = "Standard named entity recognizers can effectively recognize | |
entity mentions that consist of contiguous tokens and do not | |
overlap with each other. However, in practice, there are many | |
domains, such as the biomedical domain, in which there are | |
nested, overlapping, and discontinuous entity mentions. These | |
complex mentions cannot be directly recognized by | |
conventional sequence tagging models because they may break | |
the assumptions based on which sequence tagging techniques | |
are built. We review the existing methods which are revised | |
to tackle complex entity mentions and categorize them as | |
tokenlevel and sentence-level approaches. We then identify | |
the research gap, and discuss some directions that we are | |
exploring." | |
} | |
@article{goyal-2018-surney-ner, | |
title = "Recent Named Entity Recognition and Classification | |
techniques: A systematic review", | |
journal = "Computer Science Review", | |
volume = 29, | |
pages = "21-43", | |
year = 2018, | |
issn = "1574-0137", | |
doi = "https://doi.org/10.1016/j.cosrev.2018.06.001", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S1574013717302782", | |
author = "Archana Goyal and Vishal Gupta and Manish Kumar", | |
abstract = "Textual information is becoming available in abundance on the | |
web, arising the requirement of techniques and tools to | |
extract the meaningful information. One of such an important | |
information extraction task is Named Entity Recognition and | |
Classification. It is the problem of finding the members of | |
various predetermined classes, such as person, organization, | |
location, date/time, quantities, numbers etc. The concept of | |
named entity extraction was first proposed in Sixth Message | |
Understanding Conference in 1996. Since then, a number of | |
techniques have been developed by many researchers for | |
extracting diversity of entities from different languages and | |
genres of text. Still, there is a growing interest among | |
research community to develop more new approaches to extract | |
diverse named entities which are helpful in various natural | |
language applications. Here we present a survey of | |
developments and progresses made in Named Entity Recognition | |
and Classification research." | |
} | |
@article{wang-2018-sv-guided-softmax, | |
author = "Wang, Xiaobo and Wang, Shuo and Zhang, Shifeng and Fu, Tianyu | |
and Shi, Hailin and Mei, Tao", | |
title = "Support Vector Guided Softmax Loss for Face Recognition", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1812.11317v1", | |
abstract = "Face recognition has witnessed significant progresses due to | |
the advances of deep convolutional neural networks (CNNs), | |
the central challenge of which, is feature discrimination. To | |
address it, one group tries to exploit mining-based | |
strategies (\textit{e.g.}, hard example mining and focal | |
loss) to focus on the informative examples. The other group | |
devotes to designing margin-based loss functions | |
(\textit{e.g.}, angular, additive and additive angular | |
margins) to increase the feature margin from the perspective | |
of ground truth class. Both of them have been well-verified | |
to learn discriminative features. However, they suffer from | |
either the ambiguity of hard examples or the lack of | |
discriminative power of other classes. In this paper, we | |
design a novel loss function, namely support vector guided | |
softmax loss (SV-Softmax), which adaptively emphasizes the | |
mis-classified points (support vectors) to guide the | |
discriminative features learning. So the developed SV-Softmax | |
loss is able to eliminate the ambiguity of hard examples as | |
well as absorb the discriminative power of other classes, and | |
thus results in more discrimiantive features. To the best of | |
our knowledge, this is the first attempt to inherit the | |
advantages of mining-based and margin-based losses into one | |
framework. Experimental results on several benchmarks have | |
demonstrated the effectiveness of our approach over | |
state-of-the-arts.", | |
archivePrefix= "arXiv", | |
eprint = "1812.11317", | |
primaryClass = "cs.CV" | |
} | |
@inproceedings{pan-2015-unsupervised-el, | |
title = "Unsupervised Entity Linking with {A}bstract {M}eaning | |
{R}epresentation", | |
author = "Pan, Xiaoman and Cassidy, Taylor and Hermjakob, Ulf and Ji, | |
Heng and Knight, Kevin", | |
booktitle = "Proceedings of the 2015 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies", | |
month = may # "{--}" # jun, | |
year = 2015, | |
address = "Denver, Colorado", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N15-1119", | |
doi = "10.3115/v1/N15-1119", | |
pages = "1130-1139" | |
} | |
@inproceedings{banarescu-2013-amr, | |
title = "{A}bstract {M}eaning {R}epresentation for Sembanking", | |
author = "Banarescu, Laura and Bonial, Claire and Cai, Shu and | |
Georgescu, Madalina and Griffitt, Kira and Hermjakob, Ulf and | |
Knight, Kevin and Koehn, Philipp and Palmer, Martha and | |
Schneider, Nathan", | |
booktitle = "Proceedings of the 7th Linguistic Annotation Workshop and | |
Interoperability with Discourse", | |
month = aug, | |
year = 2013, | |
address = "Sofia, Bulgaria", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W13-2322", | |
pages = "178-186" | |
} | |
@article{wang-2015-faq-based, | |
author = "Wang, Zhiguo and Ittycheriah, Abraham", | |
title = "Faq-Based Question Answering Via Word Alignment", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1507.02628v1", | |
abstract = "In this paper, we propose a novel word-alignment-based method | |
to solve the FAQ-based question answering task. First, we | |
employ a neural network model to calculate question | |
similarity, where the word alignment between two questions is | |
used for extracting features. Second, we design a | |
bootstrap-based feature extraction method to extract a small | |
set of effective lexical features. Third, we propose a | |
learning-to-rank algorithm to train parameters more suitable | |
for the ranking tasks. Experimental results, conducted on | |
three languages (English, Spanish and Japanese), demonstrate | |
that the question similarity model is more effective than | |
baseline systems, the sparse features bring 5 \% improvements | |
on top-1 accuracy, and the learning-to-rank algorithm works | |
significantly better than the traditional method. We further | |
evaluate our method on the answer sentence selection | |
task. Our method outperforms all the previous systems on the | |
standard TREC data set.", | |
archivePrefix= "arXiv", | |
eprint = "1507.02628", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{song-2007-question-similarity, | |
title = "Question similarity calculation for FAQ answering", | |
author = "Song, Wanpeng and Feng, Min and Gu, Naijie and Wenyin, Liu", | |
booktitle = "Third International Conference on Semantics, Knowledge and | |
Grid (SKG 2007)", | |
pages = "298-301", | |
year = 2007, | |
organization = "IEEE" | |
} | |
@inproceedings{bhardwaj-2016-faq, | |
title = "Question answering system for frequently asked questions", | |
author = "Bhardwaj, Divyanshu and Pakray, Partha and Bentham, Jereemi | |
and Saha, Saurav and Mizoram, NIT and Gelbukh, Alexander", | |
booktitle = "of the Final Workshop 7 December 2016, Naples", | |
pages = 129, | |
year = 2016 | |
} | |
@article{minaee-2017-similarity-qa, | |
author = "Minaee, Shervin and Liu, Zhu", | |
title = "Automatic Question-Answering Using a Deep Similarity Neural | |
Network", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.01713v1", | |
abstract = "Automatic question-answering is a classical problem in | |
natural language processing, which aims at designing systems | |
that can automatically answer a question, in the same way as | |
human does. In this work, we propose a deep learning based | |
model for automatic question-answering. First the questions | |
and answers are embedded using neural probabilistic | |
modeling. Then a deep similarity neural network is trained to | |
find the similarity score of a pair of answer and | |
question. Then for each question, the best answer is found as | |
the one with the highest similarity score. We first train | |
this model on a large-scale public question-answering | |
database, and then fine-tune it to transfer to the | |
customer-care chat data. We have also tested our framework on | |
a public question-answering database and achieved very good | |
performance.", | |
archivePrefix= "arXiv", | |
eprint = "1708.01713", | |
primaryClass = "cs.CL" | |
} | |
@article{sharma-2018-qa-system, | |
title = "Deep Learning Approaches for Question Answering System", | |
journal = "Procedia Computer Science", | |
volume = 132, | |
pages = "785-794", | |
year = 2018, | |
note = "International Conference on Computational Intelligence and | |
Data Science", | |
issn = "1877-0509", | |
doi = "https://doi.org/10.1016/j.procs.2018.05.090", | |
url = | |
"http://www.sciencedirect.com/science/article/pii/S1877050918308226", | |
author = "Yashvardhan Sharma and Sahil Gupta", | |
keywords = "coattention, deep learning, memory nets, neural networks, | |
question answering, word vectors", | |
abstract = "Question Answering (QA) System is very useful as most of the | |
deep learning related problems can be modeled as a question | |
answering problem. Consequently, the field is one of the most | |
researched fields in computer science today. The last few | |
years have seen considerable developments and improvement in | |
the state of the art, much of which can be credited to | |
upcoming of Deep Learning. In this paper, a discussion about | |
various approaches starting from the basic NLP and algorithms | |
based approach has been done and the paper eventually builds | |
towards the recently proposed methods of Deep | |
Learning. Implementation details and various tweaks in the | |
algorithms that produced better results have also been | |
discussed. The evaluation of the proposed models was done on | |
twenty tasks of babI dataset of Facebook." | |
} | |
@inproceedings{lai-2018-answer-selection, | |
title = "A Review on Deep Learning Techniques Applied to Answer | |
Selection", | |
author = "Lai, Tuan Manh and Bui, Trung and Li, Sheng", | |
booktitle = "Proceedings of the 27th International Conference on | |
Computational Linguistics", | |
month = aug, | |
year = 2018, | |
address = "Santa Fe, New Mexico, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/C18-1181", | |
pages = "2132-2144", | |
abstract = "Given a question and a set of candidate answers, answer | |
selection is the task of identifying which of the candidates | |
answers the question correctly. It is an important problem in | |
natural language processing, with applications in many | |
areas. Recently, many deep learning based methods have been | |
proposed for the task. They produce impressive performance | |
without relying on any feature engineering or expensive | |
external resources. In this paper, we aim to provide a | |
comprehensive review on deep learning methods applied to | |
answer selection." | |
} | |
@article{feng-2015-answer-selection, | |
author = "Feng, Minwei and Xiang, Bing and Glass, Michael R. and Wang, | |
Lidan and Zhou, Bowen", | |
title = "Applying Deep Learning To Answer Selection: a Study and an | |
Open Task", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1508.01585v2", | |
abstract = "We apply a general deep learning framework to address the | |
non-factoid question answering task. Our approach does not | |
rely on any linguistic tools and can be applied to different | |
languages or domains. Various architectures are presented and | |
compared. We create and release a QA corpus and setup a new | |
QA task in the insurance domain. Experimental results | |
demonstrate superior performance compared to the baseline | |
methods and various technologies give further | |
improvements. For this highly challenging task, the top-1 | |
accuracy can reach up to 65.3 \% on a test set, which | |
indicates a great potential for practical use.", | |
archivePrefix= "arXiv", | |
eprint = "1508.01585", | |
primaryClass = "cs.CL" | |
} | |
@article{tan-2015-lstm-answer-selection, | |
author = "Tan, Ming and Santos, Cicero dos and Xiang, Bing and Zhou, | |
Bowen", | |
title = "Lstm-Based Deep Learning Models for Non-Factoid Answer | |
Selection", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1511.04108v4", | |
abstract = "In this paper, we apply a general deep learning (DL) | |
framework for the answer selection task, which does not | |
depend on manually defined features or linguistic tools. The | |
basic framework is to build the embeddings of questions and | |
answers based on bidirectional long short-term memory | |
(biLSTM) models, and measure their closeness by cosine | |
similarity. We further extend this basic model in two | |
directions. One direction is to define a more composite | |
representation for questions and answers by combining | |
convolutional neural network with the basic framework. The | |
other direction is to utilize a simple but efficient | |
attention mechanism in order to generate the answer | |
representation according to the question context. Several | |
variations of models are provided. The models are examined by | |
two datasets, including TREC-QA and InsuranceQA. Experimental | |
results demonstrate that the proposed models substantially | |
outperform several strong baselines.", | |
archivePrefix= "arXiv", | |
eprint = "1511.04108", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{wang-2016-inner-attention-answer-selection, | |
title = "Inner Attention based Recurrent Neural Networks for Answer | |
Selection", | |
author = "Wang, Bingning and Liu, Kang and Zhao, Jun", | |
booktitle = "Proceedings of the 54th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = aug, | |
year = 2016, | |
address = "Berlin, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P16-1122", | |
doi = "10.18653/v1/P16-1122", | |
pages = "1288-1297" | |
} | |
@article{wang-2016-compare-aggregate, | |
author = "Wang, Shuohang and Jiang, Jing", | |
title = "A Compare-Aggregate Model for Matching Text Sequences", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.01747v1", | |
abstract = "Many NLP tasks including machine comprehension, answer | |
selection and text entailment require the comparison between | |
sequences. Matching the important units between sequences is | |
a key to solve these problems. In this paper, we present a | |
general ``compare-aggregate`` framework that performs | |
word-level matching followed by aggregation using | |
Convolutional Neural Networks. We particularly focus on the | |
different comparison functions we can use to match two | |
vectors. We use four different datasets to evaluate the | |
model. We find that some simple comparison functions based on | |
element-wise operations can work better than standard neural | |
network and neural tensor network.", | |
archivePrefix= "arXiv", | |
eprint = "1611.01747", | |
primaryClass = "cs.CL" | |
} | |
@ARTICLE{parikh-2016-decomposable-attention, | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System", | |
adsurl = "http://adsabs.harvard.edu/abs/2016arXiv160601933P", | |
archivePrefix= "arXiv", | |
author = "{Parikh}, A.~P. and {T{\"a}ckstr{\"o}m}, O. and {Das}, D. and | |
{Uszkoreit}, J.", | |
eprint = "1606.01933", | |
journal = "ArXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
month = jun, | |
primaryClass = "cs.CL", | |
title = "{A Decomposable Attention Model for Natural Language | |
Inference}", | |
year = 2016 | |
} | |
@article{wang-2017-bimpm, | |
author = "Zhiguo Wang and Wael Hamza and Radu Florian", | |
title = "Bilateral Multi-Perspective Matching for Natural Language | |
Sentences", | |
journal = "CoRR", | |
volume = "abs/1702.03814", | |
year = 2017, | |
url = "http://arxiv.org/abs/1702.03814", | |
archivePrefix= "arXiv", | |
eprint = "1702.03814", | |
timestamp = "Mon, 13 Aug 2018 16:47:19 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/WangHF17", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@inproceedings{wang-2016-lexical-decomposition-composition, | |
title = "Sentence Similarity Learning by Lexical Decomposition and | |
Composition", | |
author = "Wang, Zhiguo and Mi, Haitao and Ittycheriah, Abraham", | |
booktitle = "Proceedings of {COLING} 2016, the 26th International | |
Conference on Computational Linguistics: Technical Papers", | |
month = dec, | |
year = 2016, | |
address = "Osaka, Japan", | |
publisher = "The COLING 2016 Organizing Committee", | |
url = "https://www.aclweb.org/anthology/C16-1127", | |
pages = "1340-1349", | |
abstract = "Most conventional sentence similarity methods only focus on | |
similar parts of two input sentences, and simply ignore the | |
dissimilar parts, which usually give us some clues and | |
semantic meanings about the sentences. In this work, we | |
propose a model to take into account both the similarities | |
and dissimilarities by decomposing and composing lexical | |
semantics over sentences. The model represents each word as a | |
vector, and calculates a semantic matching vector for each | |
word based on all words in the other sentence. Then, each | |
word vector is decomposed into a similar component and a | |
dissimilar component based on the semantic matching | |
vector. After this, a two-channel CNN model is employed to | |
capture features by composing the similar and dissimilar | |
components. Finally, a similarity score is estimated over the | |
composed feature vectors. Experimental results show that our | |
model gets the state-of-the-art performance on the answer | |
sentence selection task, and achieves a comparable result on | |
the paraphrase identification task." | |
} | |
@ARTICLE{chen-2016-esim, | |
author = "{Chen}, Qian and {Zhu}, Xiaodan and {Ling}, Zhenhua and | |
{Wei}, Si and {Jiang}, Hui and {Inkpen}, Diana", | |
title = "{Enhanced LSTM for Natural Language Inference}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Computation and Language", | |
year = 2016, | |
month = sep, | |
eid = "arXiv:1609.06038", | |
pages = "arXiv:1609.06038", | |
archivePrefix= "arXiv", | |
eprint = "1609.06038", | |
primaryClass = "cs.CL", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2016arXiv160906038C", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{shen-2017-inter-weighted-alignment, | |
title = "Inter-Weighted Alignment Network for Sentence Pair Modeling", | |
author = "Shen, Gehui and Yang, Yunlun and Deng, Zhi-Hong", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1122", | |
doi = "10.18653/v1/D17-1122", | |
pages = "1179-1189", | |
abstract = "Sentence pair modeling is a crucial problem in the field of | |
natural language processing. In this paper, we propose a | |
model to measure the similarity of a sentence pair focusing | |
on the interaction information. We utilize the word level | |
similarity matrix to discover fine-grained alignment of two | |
sentences. It should be emphasized that each word in a | |
sentence has a different importance from the perspective of | |
semantic composition, so we exploit two novel and efficient | |
strategies to explicitly calculate a weight for each | |
word. Although the proposed model only use a sequential LSTM | |
for sentence modeling without any external resource such as | |
syntactic parser tree and additional lexicon features, | |
experimental results show that our model achieves | |
state-of-the-art performance on three datasets of two tasks." | |
} | |
@article{tay-2017-compare-compress-propagate, | |
author = "Tay, Yi and Tuan, Luu Anh and Hui, Siu Cheung", | |
title = "Compare, Compress and Propagate: Enhancing Neural | |
Architectures With Alignment Factorization for Natural | |
Language Inference", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1801.00102v2", | |
abstract = "This paper presents a new deep learning architecture for | |
Natural Language Inference (NLI). Firstly, we introduce a new | |
architecture where alignment pairs are compared, compressed | |
and then propagated to upper layers for enhanced | |
representation learning. Secondly, we adopt factorization | |
layers for efficient and expressive compression of alignment | |
vectors into scalar features, which are then used to augment | |
the base word representations. The design of our approach is | |
aimed to be conceptually simple, compact and yet powerful. We | |
conduct experiments on three popular benchmarks, SNLI, | |
MultiNLI and SciTail, achieving competitive performance on | |
all. A lightweight parameterization of our model also enjoys | |
a $\approx 3$ times reduction in parameter size compared to | |
the existing state-of-the-art models, e.g., ESIM and DIIN, | |
while maintaining competitive performance. Additionally, | |
visual analysis shows that our propagated features are highly | |
interpretable.", | |
archivePrefix= "arXiv", | |
eprint = "1801.00102", | |
primaryClass = "cs.CL" | |
} | |
@article{gong-2017-diin, | |
author = "Yichen Gong and Heng Luo and Jian Zhang", | |
title = "Natural Language Inference over Interaction Space", | |
journal = "CoRR", | |
volume = "abs/1709.04348", | |
year = 2017, | |
url = "http://arxiv.org/abs/1709.04348", | |
archivePrefix= "arXiv", | |
eprint = "1709.04348", | |
timestamp = "Mon, 13 Aug 2018 16:47:34 +0200", | |
biburl = "https://dblp.org/rec/bib/journals/corr/abs-1709-04348", | |
bibsource = "dblp computer science bibliography, https://dblp.org" | |
} | |
@article{tay-2018-multi-cast-attention, | |
author = "Tay, Yi and Tuan, Luu Anh and Hui, Siu Cheung", | |
title = "Multi-Cast Attention Networks for Retrieval-Based Question | |
Answering and Response Prediction", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1806.00778v1", | |
abstract = "Attention is typically used to select informative sub-phrases | |
that are used for prediction. This paper investigates the | |
novel use of attention as a form of feature augmentation, | |
i.e, casted attention. We propose Multi-Cast Attention | |
Networks (MCAN), a new attention mechanism and general model | |
architecture for a potpourri of ranking tasks in the | |
conversational modeling and question answering domains. Our | |
approach performs a series of soft attention operations, each | |
time casting a scalar feature upon the inner word | |
embeddings. The key idea is to provide a real-valued hint | |
(feature) to a subsequent encoder layer and is targeted at | |
improving the representation learning process. There are | |
several advantages to this design, e.g., it allows an | |
arbitrary number of attention mechanisms to be casted, | |
allowing for multiple attention types (e.g., co-attention, | |
intra-attention) and attention variants (e.g., | |
alignment-pooling, max-pooling, mean-pooling) to be executed | |
simultaneously. This not only eliminates the costly need to | |
tune the nature of the co-attention layer, but also provides | |
greater extents of explainability to practitioners. Via | |
extensive experiments on four well-known benchmark datasets, | |
we show that MCAN achieves state-of-the-art performance. On | |
the Ubuntu Dialogue Corpus, MCAN outperforms existing | |
state-of-the-art models by $9\%$. MCAN also achieves the best | |
performing score to date on the well-studied TrecQA dataset.", | |
archivePrefix= "arXiv", | |
eprint = "1806.00778", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{tay-2018-csran, | |
title = "Co-Stack Residual Affinity Networks with Multi-level | |
Attention Refinement for Matching Text Sequences", | |
author = "Tay, Yi and Luu, Anh Tuan and Hui, Siu Cheung", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1479", | |
doi = "10.18653/v1/D18-1479", | |
pages = "4492-4502", | |
abstract = "Learning a matching function between two text sequences is a | |
long standing problem in NLP research. This task enables many | |
potential applications such as question answering and | |
paraphrase identification. This paper proposes Co-Stack | |
Residual Affinity Networks (CSRAN), a new and universal | |
neural architecture for this problem. CSRAN is a deep | |
architecture, involving stacked (multi-layered) recurrent | |
encoders. Stacked/Deep architectures are traditionally | |
difficult to train, due to the inherent weaknesses such as | |
difficulty with feature propagation and vanishing | |
gradients. CSRAN incorporates two novel components to take | |
advantage of the stacked architecture. Firstly, it introduces | |
a new bidirectional alignment mechanism that learns affinity | |
weights by fusing sequence pairs across stacked | |
hierarchies. Secondly, it leverages a multi-level attention | |
refinement component between stacked recurrent layers. The | |
key intuition is that, by leveraging information across all | |
network hierarchies, we can not only improve gradient flow | |
but also improve overall performance. We conduct extensive | |
experiments on six well-studied text sequence matching | |
datasets, achieving state-of-the-art performance on all." | |
} | |
@inproceedings{tan-2018-multiway-attention-mwan, | |
title = "Multiway Attention Networks for Modeling Sentence Pairs", | |
author = "Chuanqi Tan and Furu Wei and Wenhui Wang and Weifeng Lv and | |
Ming Zhou", | |
booktitle = "Proceedings of the Twenty-Seventh International Joint | |
Conference on Artificial Intelligence, {IJCAI-18}", | |
publisher = "International Joint Conferences on Artificial Intelligence | |
Organization", | |
pages = "4411-4417", | |
year = 2018, | |
month = 7, | |
doi = "10.24963/ijcai.2018/613", | |
url = "https://doi.org/10.24963/ijcai.2018/613" | |
} | |
@article{kim-2018-semantic-sentence-matching, | |
author = "Kim, Seonhoon and Kang, Inho and Kwak, Nojun", | |
title = "Semantic Sentence Matching With Densely-Connected Recurrent | |
and Co-Attentive Information", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1805.11360v2", | |
abstract = "Sentence matching is widely used in various natural language | |
tasks such as natural language inference, paraphrase | |
identification, and question answering. For these tasks, | |
understanding logical and semantic relationship between two | |
sentences is required but it is yet challenging. Although | |
attention mechanism is useful to capture the semantic | |
relationship and to properly align the elements of two | |
sentences, previous methods of attention mechanism simply use | |
a summation operation which does not retain original features | |
enough. Inspired by DenseNet, a densely connected | |
convolutional network, we propose a densely-connected | |
co-attentive recurrent neural network, each layer of which | |
uses concatenated information of attentive features as well | |
as hidden features of all the preceding recurrent layers. It | |
enables preserving the original and the co-attentive feature | |
information from the bottommost word embedding layer to the | |
uppermost recurrent layer. To alleviate the problem of an | |
ever-increasing size of feature vectors due to dense | |
concatenation operations, we also propose to use an | |
autoencoder after dense concatenation. We evaluate our | |
proposed architecture on highly competitive benchmark | |
datasets related to sentence matching. Experimental results | |
show that our architecture, which retains recurrent and | |
attentive features, achieves state-of-the-art performances | |
for most of the tasks.", | |
archivePrefix= "arXiv", | |
eprint = "1805.11360", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{pan-2018-discourse-marker, | |
title = "Discourse Marker Augmented Network with Reinforcement | |
Learning for Natural Language Inference", | |
author = "Pan, Boyuan and Yang, Yazheng and Zhao, Zhou and Zhuang, | |
Yueting and Cai, Deng and He, Xiaofei", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1091", | |
doi = "10.18653/v1/P18-1091", | |
pages = "989-999", | |
abstract = "Natural Language Inference (NLI), also known as Recognizing | |
Textual Entailment (RTE), is one of the most important | |
problems in natural language processing. It requires to infer | |
the logical relationship between two given sentences. While | |
current approaches mostly focus on the interaction | |
architectures of the sentences, in this paper, we propose to | |
transfer knowledge from some important discourse markers to | |
augment the quality of the NLI model. We observe that people | |
usually use some discourse markers such as {``}so{''} or | |
{``}but{''} to represent the logical relationship between two | |
sentences. These words potentially have deep connections with | |
the meanings of the sentences, thus can be utilized to help | |
improve the representations of them. Moreover, we use | |
reinforcement learning to optimize a new objective function | |
with a reward defined by the property of the NLI datasets to | |
make full use of the labels information. Experiments show | |
that our method achieves the state-of-the-art performance on | |
several large-scale datasets." | |
} | |
@article{zhang-2018-explicit-contextual-semantics, | |
author = "Zhang, Zhuosheng and Wu, Yuwei and Li, Zuchao and Zhao, Hai", | |
title = "Explicit Contextual Semantics for Text Comprehension", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1809.02794v3", | |
abstract = "Who did what to whom is a major focus in natural language | |
understanding, which is right the aim of semantic role | |
labeling (SRL) task. Despite of sharing a lot of processing | |
characteristics and even task purpose, it is surprisingly | |
that jointly considering these two related tasks was never | |
formally reported in previous work. Thus this paper makes the | |
first attempt to let SRL enhance text comprehension and | |
inference through specifying verbal predicates and their | |
corresponding semantic roles. In terms of deep learning | |
models, our embeddings are enhanced by explicit contextual | |
semantic role labels for more fine-grained semantics. We show | |
that the salient labels can be conveniently added to existing | |
models and significantly improve deep learning models in | |
challenging text comprehension tasks. Extensive experiments | |
on benchmark machine reading comprehension and inference | |
datasets verify that the proposed semantic learning helps our | |
system reach new state-of-the-art over strong baselines which | |
have been enhanced by well pretrained language models from | |
the latest progress.", | |
archivePrefix= "arXiv", | |
eprint = "1809.02794", | |
primaryClass = "cs.CL" | |
} | |
@article{leal-taixe-2016-siamese-cnn, | |
author = "Leal-Taix{\'e}, Laura and Ferrer, Cristian Canton and | |
Schindler, Konrad", | |
title = "Learning By Tracking: Siamese Cnn for Robust Target | |
Association", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1604.07866v3", | |
abstract = "This paper introduces a novel approach to the task of data | |
association within the context of pedestrian tracking, by | |
introducing a two-stage learning scheme to match pairs of | |
detections. First, a Siamese convolutional neural network | |
(CNN) is trained to learn descriptors encoding local | |
spatio-temporal structures between the two input image | |
patches, aggregating pixel values and optical flow | |
information. Second, a set of contextual features derived | |
from the position and size of the compared input patches are | |
combined with the CNN output by means of a gradient boosting | |
classifier to generate the final matching probability. This | |
learning approach is validated by using a linear programming | |
based multi-person tracker showing that even a simple and | |
efficient tracker may outperform much more complex models | |
when fed with our learned matching probabilities. Results on | |
publicly available sequences show that our method meets | |
state-of-the-art standards in multiple people tracking.", | |
archivePrefix= "arXiv", | |
eprint = "1604.07866", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{mueller-2016-siamese-lstm, | |
title = "Siamese recurrent architectures for learning sentence | |
similarity", | |
author = "Mueller, Jonas and Thyagarajan, Aditya", | |
booktitle = "thirtieth AAAI conference on artificial intelligence", | |
year = 2016 | |
} | |
@article{conneau-2017-infer-sent, | |
author = "Conneau, Alexis and Kiela, Douwe and Schwenk, Holger and | |
Barrault, Loic and Bordes, Antoine", | |
title = "Supervised Learning of Universal Sentence Representations | |
From Natural Language Inference Data", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1705.02364v5", | |
abstract = "Many modern NLP systems rely on word embeddings, previously | |
trained in an unsupervised manner on large corpora, as base | |
features. Efforts to obtain embeddings for larger chunks of | |
text, such as sentences, have however not been so | |
successful. Several attempts at learning unsupervised | |
representations of sentences have not reached satisfactory | |
enough performance to be widely adopted. In this paper, we | |
show how universal sentence representations trained using the | |
supervised data of the Stanford Natural Language Inference | |
datasets can consistently outperform unsupervised methods | |
like SkipThought vectors on a wide range of transfer | |
tasks. Much like how computer vision uses ImageNet to obtain | |
features, which can then be transferred to other tasks, our | |
work tends to indicate the suitability of naturajl language | |
inference for transfer learning to other NLP tasks. Our | |
encoder is publicly available.", | |
archivePrefix= "arXiv", | |
eprint = "1705.02364", | |
primaryClass = "cs.CL" | |
} | |
@article{nie-2017-sse, | |
author = "Nie, Yixin and Bansal, Mohit", | |
title = "Shortcut-Stacked Sentence Encoders for Multi-Domain | |
Inference", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1708.02312v2", | |
abstract = "We present a simple sequential sentence encoder for | |
multi-domain natural language inference. Our encoder is based | |
on stacked bidirectional LSTM-RNNs with shortcut connections | |
and fine-tuning of word embeddings. The overall supervised | |
model uses the above encoder to encode two input sentences | |
into two vectors, and then uses a classifier over the vector | |
combination to label the relationship between these two | |
sentences as that of entailment, contradiction, or | |
neural. Our Shortcut-Stacked sentence encoders achieve strong | |
improvements over existing encoders on matched and mismatched | |
multi-domain natural language inference (top non-ensemble | |
single-model result in the EMNLP RepEval 2017 Shared Task | |
(Nangia et al., 2017)). Moreover, they achieve the new | |
state-of-the-art encoding result on the original SNLI dataset | |
(Bowman et al., 2015).", | |
archivePrefix= "arXiv", | |
eprint = "1708.02312", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{zhou-2016-multi-view, | |
title = "Multi-view Response Selection for Human-Computer | |
Conversation", | |
author = "Zhou, Xiangyang and Dong, Daxiang and Wu, Hua and Zhao, Shiqi | |
and Yu, Dianhai and Tian, Hao and Liu, Xuan and Yan, Rui", | |
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = nov, | |
year = 2016, | |
address = "Austin, Texas", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D16-1036", | |
doi = "10.18653/v1/D16-1036", | |
pages = "372-381" | |
} | |
@inproceedings{lan-2018-neural-network, | |
title = "Neural Network Models for Paraphrase Identification, Semantic | |
Textual Similarity, Natural Language Inference, and Question | |
Answering", | |
author = "Lan, Wuwei and Xu, Wei", | |
booktitle = "Proceedings of the 27th International Conference on | |
Computational Linguistics", | |
month = aug, | |
year = 2018, | |
address = "Santa Fe, New Mexico, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/C18-1328", | |
pages = "3890-3902", | |
abstract = "In this paper, we analyze several neural network designs (and | |
their variations) for sentence pair modeling and compare | |
their performance extensively across eight datasets, | |
including paraphrase identification, semantic textual | |
similarity, natural language inference, and question | |
answering tasks. Although most of these models have claimed | |
state-of-the-art performance, the original papers often | |
reported on only one or two selected datasets. We provide a | |
systematic study and show that (i) encoding contextual | |
information by LSTM and inter-sentence interactions are | |
critical, (ii) Tree-LSTM does not help as much as previously | |
claimed but surprisingly improves performance on Twitter | |
datasets, (iii) the Enhanced Sequential Inference Model is | |
the best so far for larger datasets, while the Pairwise Word | |
Interaction Model achieves the best performance when less | |
data is available. We release our implementations as an | |
open-source toolkit." | |
} | |
@inproceedings{zhou-2018-dam, | |
title = "Multi-Turn Response Selection for Chatbots with Deep | |
Attention Matching Network", | |
author = "Zhou, Xiangyang and Li, Lu and Dong, Daxiang and Liu, Yi and | |
Chen, Ying and Zhao, Wayne Xin and Yu, Dianhai and Wu, Hua", | |
booktitle = "Proceedings of the 56th Annual Meeting of the Association for | |
Computational Linguistics (Volume 1: Long Papers)", | |
month = jul, | |
year = 2018, | |
address = "Melbourne, Australia", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P18-1103", | |
doi = "10.18653/v1/P18-1103", | |
pages = "1118-1127", | |
abstract = "Human generates responses relying on semantic and functional | |
dependencies, including coreference relation, among dialogue | |
elements and their context. In this paper, we investigate | |
matching a response with its multi-turn context using | |
dependency information based entirely on attention. Our | |
solution is inspired by the recently proposed Transformer in | |
machine translation (Vaswani et al., 2017) and we extend the | |
attention mechanism in two ways. First, we construct | |
representations of text segments at different granularities | |
solely with stacked self-attention. Second, we try to extract | |
the truly matched segment pairs with attention across the | |
context and response. We jointly introduce those two kinds of | |
attention in one uniform neural network. Experiments on two | |
large-scale multi-turn response selection tasks show that our | |
proposed model significantly outperforms the state-of-the-art | |
models." | |
} | |
@inproceedings{rao-2019-hcan, | |
title = "Bridging the Gap between Relevance Matching and Semantic | |
Matching for Short Text Similarity Modeling", | |
author = "Rao, Jinfeng and Liu, Linqing and Tay, Yi and Yang, Wei and | |
Shi, Peng and Lin, Jimmy", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1540", | |
doi = "10.18653/v1/D19-1540", | |
pages = "5370-5381", | |
abstract = "A core problem of information retrieval (IR) is relevance | |
matching, which is to rank documents by relevance to a | |
user{'}s query. On the other hand, many NLP problems, such as | |
question answering and paraphrase identification, can be | |
considered variants of semantic matching, which is to measure | |
the semantic distance between two pieces of short | |
texts. While at a high level both relevance and semantic | |
matching require modeling textual similarity, many existing | |
techniques for one cannot be easily adapted to the other. To | |
bridge this gap, we propose a novel model, HCAN (Hybrid | |
Co-Attention Network), that comprises (1) a hybrid encoder | |
module that includes ConvNet-based and LSTM-based encoders, | |
(2) a relevance matching module that measures soft term | |
matches with importance weighting at multiple granularities, | |
and (3) a semantic matching module with co-attention | |
mechanisms that capture context-aware semantic | |
relatedness. Evaluations on multiple IR and NLP benchmarks | |
demonstrate state-of-the-art effectiveness compared to | |
approaches that do not exploit pretraining on external | |
data. Extensive ablation studies suggest that relevance and | |
semantic matching signals are complementary across many | |
problem settings, regardless of the choice of underlying | |
encoders." | |
} | |
@article{cui-2018-cmrc-2018, | |
author = "Cui, Yiming and Liu, Ting and Che, Wanxiang and Xiao, Li and | |
Chen, Zhipeng and Ma, Wentao and Wang, Shijin and Hu, | |
Guoping", | |
title = "A Span-Extraction Dataset for Chinese Machine Reading | |
Comprehension", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.07366v2", | |
abstract = "Machine Reading Comprehension (MRC) has become enormously | |
popular recently and has attracted a lot of | |
attention. However, the existing reading comprehension | |
datasets are mostly in English. In this paper, we introduce a | |
Span-Extraction dataset for Chinese machine reading | |
comprehension to add language diversities in this area. The | |
dataset is composed by near 20,000 real questions annotated | |
on Wikipedia paragraphs by human experts. We also annotated a | |
challenge set which contains the questions that need | |
comprehensive understanding and multi-sentence inference | |
throughout the context. We present several baseline systems | |
as well as anonymous submissions for demonstrating the | |
difficulties in this dataset. With the release of the | |
dataset, we hosted the Second Evaluation Workshop on Chinese | |
Machine Reading Comprehension (CMRC 2018). We hope the | |
release of the dataset could further accelerate the Chinese | |
machine reading comprehension research. Resources are | |
available: https://github.com/ymcui/cmrc2018", | |
archivePrefix= "arXiv", | |
eprint = "1810.07366", | |
primaryClass = "cs.CL" | |
} | |
@article{cui-2020-cmrc-2019, | |
author = "Cui, Yiming and Liu, Ting and Yang, Ziqing and Chen, Zhipeng | |
and Ma, Wentao and Che, Wanxiang and Wang, Shijin and Hu, | |
Guoping", | |
title = "A Sentence Cloze Dataset for Chinese Machine Reading | |
Comprehension", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2004.03116v1", | |
abstract = "Owing to the continuous contributions by the Chinese NLP | |
community, more and more Chinese machine reading | |
comprehension datasets become available, and they have been | |
pushing Chinese MRC research forward. To add diversity in | |
this area, in this paper, we propose a new task called | |
Sentence Cloze-style Machine Reading Comprehension | |
(SC-MRC). The proposed task aims to fill the right candidate | |
sentence into the passage that has several blanks. Moreover, | |
to add more difficulties, we also made fake candidates that | |
are similar to the correct ones, which requires the machine | |
to judge their correctness in the context. The proposed | |
dataset contains over 100K blanks (questions) within over 10K | |
passages, which was originated from Chinese narrative | |
stories. To evaluate the dataset, we implement several | |
baseline systems based on pre-trained models, and the results | |
show that the state-of-the-art model still underperforms | |
human performance by a large margin. We hope the release of | |
the dataset could further accelerate the machine reading | |
comprehension research. Resources available: | |
https://github.com/ymcui/cmrc2019", | |
archivePrefix= "arXiv", | |
eprint = "2004.03116", | |
primaryClass = "cs.CL" | |
} | |
@article{munkhdalai-2016-neural-tree-indexers, | |
author = "Munkhdalai, Tsendsuren and Yu, Hong", | |
title = "Neural Tree Indexers for Text Understanding", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1607.04492v2", | |
abstract = "Recurrent neural networks (RNNs) process input text | |
sequentially and model the conditional transition between | |
word tokens. In contrast, the advantages of recursive | |
networks include that they explicitly model the | |
compositionality and the recursive structure of natural | |
language. However, the current recursive architecture is | |
limited by its dependence on syntactic tree. In this paper, | |
we introduce a robust syntactic parsing-independent tree | |
structured model, Neural Tree Indexers (NTI) that provides a | |
middle ground between the sequential RNNs and the syntactic | |
treebased recursive models. NTI constructs a full n-ary tree | |
by processing the input text with its node function in a | |
bottom-up fashion. Attention mechanism can then be applied | |
to both structure and node function. We implemented and | |
evaluated a binarytree model of NTI, showing the model | |
achieved the state-of-the-art performance on three different | |
NLP tasks: natural language inference, answer sentence | |
selection, and sentence classification, outperforming | |
state-of-the-art recurrent and recursive neural networks.", | |
archivePrefix= "arXiv", | |
eprint = "1607.04492", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{grandvalet-2004-entropy-minimization, | |
author = "Grandvalet, Yves and Bengio, Yoshua", | |
title = "Semi-Supervised Learning by Entropy Minimization", | |
year = 2004, | |
publisher = "MIT Press", | |
address = "Cambridge, MA, USA", | |
booktitle = "Proceedings of the 17th International Conference on Neural | |
Information Processing Systems", | |
pages = "529–536", | |
numpages = 8, | |
location = "Vancouver, British Columbia, Canada", | |
series = "NIPS’04" | |
} | |
@inproceedings{duan-2017-qg-for-qa, | |
title = "Question Generation for Question Answering", | |
author = "Duan, Nan and Tang, Duyu and Chen, Peng and Zhou, Ming", | |
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D17-1090", | |
doi = "10.18653/v1/D17-1090", | |
pages = "866-874", | |
abstract = "This paper presents how to generate questions from given | |
passages using neural networks, where large scale QA pairs | |
are automatically crawled and processed from Community-QA | |
website, and used as training data. The contribution of the | |
paper is 2-fold: First, two types of question generation | |
approaches are proposed, one is a retrieval-based method | |
using convolution neural network (CNN), the other is a | |
generation-based method using recurrent neural network (RNN); | |
Second, we show how to leverage the generated questions to | |
improve existing question answering systems. We evaluate our | |
question generation method for the answer sentence selection | |
task on three benchmark datasets, including SQuAD, MS MARCO, | |
and WikiQA. Experimental results show that, by using | |
generated questions as an extra signal, significant QA | |
improvement can be achieved." | |
} | |
@inproceedings{hadsell-2006-contrastive-loss, | |
title = "Dimensionality reduction by learning an invariant mapping", | |
author = "Hadsell, Raia and Chopra, Sumit and LeCun, Yann", | |
booktitle = "2006 IEEE Computer Society Conference on Computer Vision and | |
Pattern Recognition (CVPR'06)", | |
volume = 2, | |
pages = "1735-1742", | |
year = 2006, | |
organization = "IEEE" | |
} | |
@article{wieting-2019-no-training-required, | |
author = "Wieting, John and Kiela, Douwe", | |
title = "No Training Required: Exploring Random Encoders for Sentence | |
Classification", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1901.10444v1", | |
abstract = "We explore various methods for computing sentence | |
representations from pre-trained word embeddings without any | |
training, i.e., using nothing but random | |
parameterizations. Our aim is to put sentence embeddings on | |
more solid footing by 1) looking at how much modern sentence | |
embeddings gain over random methods---as it turns out, | |
surprisingly little; and by 2) providing the field with more | |
appropriate baselines going forward---which are, as it turns | |
out, quite strong. We also make important observations about | |
proper experimental protocol for sentence classification | |
evaluation, together with recommendations for future | |
research.", | |
archivePrefix= "arXiv", | |
eprint = "1901.10444", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{mohtarami-2016-sls-semeval-task, | |
title = "{SLS} at {S}em{E}val-2016 Task 3: Neural-based Approaches for | |
Ranking in Community Question Answering", | |
author = "Mohtarami, Mitra and Belinkov, Yonatan and Hsu, Wei-Ning and | |
Zhang, Yu and Lei, Tao and Bar, Kfir and Cyphers, Scott and | |
Glass, Jim", | |
booktitle = "Proceedings of the 10th International Workshop on Semantic | |
Evaluation ({S}em{E}val-2016)", | |
month = jun, | |
year = 2016, | |
address = "San Diego, California", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/S16-1128", | |
doi = "10.18653/v1/S16-1128", | |
pages = "828-835" | |
} | |
@inproceedings{romeo-2016-neural-attention, | |
title = "Neural Attention for Learning to Rank Questions in Community | |
Question Answering", | |
author = "Romeo, Salvatore and Da San Martino, Giovanni and | |
Barr{\'o}n-Cede{\~n}o, Alberto and Moschitti, Alessandro and | |
Belinkov, Yonatan and Hsu, Wei-Ning and Zhang, Yu and | |
Mohtarami, Mitra and Glass, James", | |
booktitle = "Proceedings of {COLING} 2016, the 26th International | |
Conference on Computational Linguistics: Technical Papers", | |
month = dec, | |
year = 2016, | |
address = "Osaka, Japan", | |
publisher = "The COLING 2016 Organizing Committee", | |
url = "https://www.aclweb.org/anthology/C16-1163", | |
pages = "1734-1745", | |
abstract = "In real-world data, e.g., from Web forums, text is often | |
contaminated with redundant or irrelevant content, which | |
leads to introducing noise in machine learning algorithms. In | |
this paper, we apply Long Short-Term Memory networks with an | |
attention mechanism, which can select important parts of text | |
for the task of similar question retrieval from community | |
Question Answering (cQA) forums. In particular, we use the | |
attention weights for both selecting entire sentences and | |
their subparts, i.e., word/chunk, from shallow syntactic | |
trees. More interestingly, we apply tree kernels to the | |
filtered text representations, thus exploiting the implicit | |
features of the subtree space for learning question | |
reranking. Our results show that the attention-based pruning | |
allows for achieving the top position in the cQA challenge of | |
SemEval 2016, with a relatively large gap from the other | |
participants while greatly decreasing running time." | |
} | |
@inproceedings{nassif-2016-learning-semantic-relatedness, | |
title = "Learning Semantic Relatedness in Community Question Answering | |
Using Neural Models", | |
author = "Nassif, Henry and Mohtarami, Mitra and Glass, James", | |
booktitle = "Proceedings of the 1st Workshop on Representation Learning | |
for {NLP}", | |
month = aug, | |
year = 2016, | |
address = "Berlin, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W16-1616", | |
doi = "10.18653/v1/W16-1616", | |
pages = "137-147" | |
} | |
@inproceedings{nakov-2016-semeval-task, | |
title = "{S}em{E}val-2016 Task 3: Community Question Answering", | |
author = "Nakov, Preslav and M{\`a}rquez, Llu{\'\i}s and Moschitti, | |
Alessandro and Magdy, Walid and Mubarak, Hamdy and Freihat, | |
Abed Alhakim and Glass, Jim and Randeree, Bilal", | |
booktitle = "Proceedings of the 10th International Workshop on Semantic | |
Evaluation ({S}em{E}val-2016)", | |
month = jun, | |
year = 2016, | |
address = "San Diego, California", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/S16-1083", | |
doi = "10.18653/v1/S16-1083", | |
pages = "525-545" | |
} | |
@inproceedings{belinkov-2015-vectorslu, | |
title = "{V}ector{SLU}: A Continuous Word Vector Approach to Answer | |
Selection in Community Question Answering Systems", | |
author = "Belinkov, Yonatan and Mohtarami, Mitra and Cyphers, Scott and | |
Glass, James", | |
booktitle = "Proceedings of the 9th International Workshop on Semantic | |
Evaluation ({S}em{E}val 2015)", | |
month = jun, | |
year = 2015, | |
address = "Denver, Colorado", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/S15-2048", | |
doi = "10.18653/v1/S15-2048", | |
pages = "282-287" | |
} | |
@inproceedings{nakov-2015-semeval-task, | |
title = "{S}em{E}val-2015 Task 3: Answer Selection in Community | |
Question Answering", | |
author = "Nakov, Preslav and M{\`a}rquez, Llu{\'\i}s and Magdy, Walid | |
and Moschitti, Alessandro and Glass, Jim and Randeree, Bilal", | |
booktitle = "Proceedings of the 9th International Workshop on Semantic | |
Evaluation ({S}em{E}val 2015)", | |
month = jun, | |
year = 2015, | |
address = "Denver, Colorado", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/S15-2047", | |
doi = "10.18653/v1/S15-2047", | |
pages = "269-281" | |
} | |
@article{loshchilov-2017-adamw, | |
author = "Loshchilov, Ilya and Hutter, Frank", | |
title = "Decoupled Weight Decay Regularization", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.05101v3", | |
abstract = "L$_2$ regularization and weight decay regularization are | |
equivalent for standard stochastic gradient descent (when | |
rescaled by the learning rate), but as we demonstrate this is | |
\emph{not} the case for adaptive gradient algorithms, such as | |
Adam. While common implementations of these algorithms employ | |
L$_2$ regularization (often calling it ``weight decay`` in | |
what may be misleading due to the inequivalence we expose), | |
we propose a simple modification to recover the original | |
formulation of weight decay regularization by | |
\emph{decoupling} the weight decay from the optimization | |
steps taken w.r.t. the loss function. We provide empirical | |
evidence that our proposed modification (i) decouples the | |
optimal choice of weight decay factor from the setting of the | |
learning rate for both standard SGD and Adam and (ii) | |
substantially improves Adam's generalization performance, | |
allowing it to compete with SGD with momentum on image | |
classification datasets (on which it was previously typically | |
outperformed by the latter). Our proposed decoupled weight | |
decay has already been adopted by many researchers, and the | |
community has implemented it in TensorFlow and PyTorch; the | |
complete source code for our experiments is available at | |
https://github.com/loshchil/AdamW-and-SGDW", | |
archivePrefix= "arXiv", | |
eprint = "1711.05101", | |
primaryClass = "cs.LG" | |
} | |
@article{wang-2014-hashining-similarity-search, | |
author = "Wang, Jingdong and Shen, Heng Tao and Song, Jingkuan and Ji, | |
Jianqiu", | |
title = "Hashing for Similarity Search: a Survey", | |
journal = "CoRR", | |
year = 2014, | |
url = "http://arxiv.org/abs/1408.2927v1", | |
abstract = "Similarity search (nearest neighbor search) is a problem of | |
pursuing the data items whose distances to a query item are | |
the smallest from a large database. Various methods have | |
been developed to address this problem, and recently a lot of | |
efforts have been devoted to approximate search. In this | |
paper, we present a survey on one of the main solutions, | |
hashing, which has been widely studied since the pioneering | |
work locality sensitive hashing. We divide the hashing | |
algorithms two main categories: locality sensitive hashing, | |
which designs hash functions without exploring the data | |
distribution and learning to hash, which learns hash | |
functions according the data distribution, and review them | |
from various aspects, including hash function design and | |
distance measure and search scheme in the hash coding space.", | |
archivePrefix= "arXiv", | |
eprint = "1408.2927", | |
primaryClass = "cs.DS" | |
} | |
@inproceedings{yang-2008-ilp, | |
title = "An Entity-Mention Model for Coreference Resolution with | |
Inductive Logic Programming", | |
author = "Yang, Xiaofeng and Su, Jian and Lang, Jun and Tan, Chew Lim | |
and Liu, Ting and Li, Sheng", | |
booktitle = "Proceedings of ACL-08: HLT", | |
month = jun, | |
year = 2008, | |
address = "Columbus, Ohio", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/P08-1096", | |
pages = "843-851" | |
} | |
@article{cui-2019-kbqa, | |
author = "Cui, Wanyun and Xiao, Yanghua and Wang, Haixun and Song, | |
Yangqiu and Hwang, Seung-won and Wang, Wei", | |
title = "Kbqa: Learning Question Answering Over Qa Corpora and | |
Knowledge Bases", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.02419v1", | |
abstract = "Question answering (QA) has become a popular way for humans | |
to access billion-scale knowledge bases. Unlike web search, | |
QA over a knowledge base gives out accurate and concise | |
results, provided that natural language questions can be | |
understood and mapped precisely to structured queries over | |
the knowledge base. The challenge, however, is that a human | |
can ask one question in many different ways. Previous | |
approaches have natural limits due to their representations: | |
rule based approaches only understand a small set of | |
``canned`` questions, while keyword based or synonym based | |
approaches cannot fully understand the questions. In this | |
paper, we design a new kind of question representation: | |
templates, over a billion scale knowledge base and a million | |
scale QA corpora. For example, for questions about a city's | |
population, we learn templates such as What's the population | |
of $city?, How many people are there in $city?. We learned 27 | |
million templates for 2782 intents. Based on these templates, | |
our QA system KBQA effectively supports binary factoid | |
questions, as well as complex questions which are composed of | |
a series of binary factoid questions. Furthermore, we expand | |
predicates in RDF knowledge base, which boosts the coverage | |
of knowledge base by 57 times. Our QA system beats all other | |
state-of-art works on both effectiveness and efficiency over | |
QALD benchmarks.", | |
archivePrefix= "arXiv", | |
eprint = "1903.02419", | |
primaryClass = "cs.CL" | |
} | |
@article{bordes-2014-open-qa, | |
author = "Bordes, Antoine and Weston, Jason and Usunier, Nicolas", | |
title = "Open Question Answering With Weakly Supervised Embedding | |
Models", | |
journal = "CoRR", | |
year = 2014, | |
url = "http://arxiv.org/abs/1404.4326v1", | |
abstract = "Building computers able to answer questions on any subject is | |
a long standing goal of artificial intelligence. Promising | |
progress has recently been achieved by methods that learn to | |
map questions to logical forms or database queries. Such | |
approaches can be effective but at the cost of either large | |
amounts of human-labeled data or by defining lexicons and | |
grammars tailored by practitioners. In this paper, we instead | |
take the radical approach of learning to map questions to | |
vectorial feature representations. By mapping answers into | |
the same space one can query any knowledge base independent | |
of its schema, without requiring any grammar or lexicon. Our | |
method is trained with a new optimization procedure combining | |
stochastic gradient descent followed by a fine-tuning step | |
using the weak supervision provided by blending automatically | |
and collaboratively generated resources. We empirically | |
demonstrate that our model can capture meaningful signals | |
from its noisy supervision leading to major improvements over | |
paralex, the only existing method able to be trained on | |
similar weakly labeled data.", | |
archivePrefix= "arXiv", | |
eprint = "1404.4326", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{zhao-2011-auto-qg, | |
title = "Automatically Generating Questions from Queries for | |
Community-based Question Answering", | |
author = "Zhao, Shiqi and Wang, Haifeng and Li, Chao and Liu, Ting and | |
Guan, Yi", | |
booktitle = "Proceedings of 5th International Joint Conference on Natural | |
Language Processing", | |
month = nov, | |
year = 2011, | |
address = "Chiang Mai, Thailand", | |
publisher = "Asian Federation of Natural Language Processing", | |
url = "https://www.aclweb.org/anthology/I11-1104", | |
pages = "929-937" | |
} | |
@article{yuan-2017-neural-qg, | |
author = "Yuan, Xingdi and Wang, Tong and Gulcehre, Caglar and Sordoni, | |
Alessandro and Bachman, Philip and Subramanian, Sandeep and | |
Zhang, Saizheng and Trischler, Adam", | |
title = "Machine Comprehension By Text-To-Text Neural Question | |
Generation", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1705.02012v2", | |
abstract = "We propose a recurrent neural model that generates | |
natural-language questions from documents, conditioned on | |
answers. We show how to train the model using a combination | |
of supervised and reinforcement learning. After teacher | |
forcing for standard maximum likelihood training, we | |
fine-tune the model using policy gradient techniques to | |
maximize several rewards that measure question quality. Most | |
notably, one of these rewards is the performance of a | |
question-answering system. We motivate question generation as | |
a means to improve the performance of question answering | |
systems. Our model is trained and evaluated on the recent | |
question-answering dataset SQuAD.", | |
archivePrefix= "arXiv", | |
eprint = "1705.02012", | |
primaryClass = "cs.CL" | |
} | |
@article{subramanian-2017-neural-qg, | |
author = "Subramanian, Sandeep and Wang, Tong and Yuan, Xingdi and | |
Zhang, Saizheng and Bengio, Yoshua and Trischler, Adam", | |
title = "Neural Models for Key Phrase Detection and Question | |
Generation", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.04560v3", | |
abstract = "We propose a two-stage neural model to tackle question | |
generation from documents. First, our model estimates the | |
probability that word sequences in a document are ones that a | |
human would pick when selecting candidate answers by training | |
a neural key-phrase extractor on the answers in a | |
question-answering corpus. Predicted key phrases then act as | |
target answers and condition a sequence-to-sequence | |
question-generation model with a copy mechanism. | |
Empirically, our key-phrase extraction model significantly | |
outperforms an entity-tagging baseline and existing | |
rule-based approaches. We further demonstrate that our | |
question generation system formulates fluent, answerable | |
questions from key phrases. This two-stage system could be | |
used to augment or generate reading comprehension datasets, | |
which may be leveraged to improve machine reading systems or | |
in educational settings.", | |
archivePrefix= "arXiv", | |
eprint = "1706.04560", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{rao-2019-gan-qg, | |
title = "{A}nswer-based {A}dversarial {T}raining for {G}enerating | |
{C}larification {Q}uestions", | |
author = "Rao, Sudha and Daum{\'e} III, Hal", | |
booktitle = "Proceedings of the 2019 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 1 (Long and Short | |
Papers)", | |
month = jun, | |
year = 2019, | |
address = "Minneapolis, Minnesota", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N19-1013", | |
doi = "10.18653/v1/N19-1013", | |
pages = "143-155", | |
abstract = "We present an approach for generating clarification questions | |
with the goal of eliciting new information that would make | |
the given textual context more complete. We propose that | |
modeling hypothetical answers (to clarification questions) as | |
latent variables can guide our approach into generating more | |
useful clarification questions. We develop a Generative | |
Adversarial Network (GAN) where the generator is a | |
sequence-to-sequence model and the discriminator is a utility | |
function that models the value of updating the context with | |
the answer to the clarification question. We evaluate on two | |
datasets, using both automatic metrics and human judgments of | |
usefulness, specificity and relevance, showing that our | |
approach outperforms both a retrieval-based model and | |
ablations that exclude the utility model and the adversarial | |
training." | |
} | |
@inproceedings{heilman-2010-good-question, | |
title = "Good question! statistical ranking for question generation", | |
author = "Heilman, Michael and Smith, Noah A", | |
booktitle = "Human Language Technologies: The 2010 Annual Conference of | |
the North American Chapter of the Association for | |
Computational Linguistics", | |
pages = "609-617", | |
year = 2010, | |
organization = "Association for Computational Linguistics" | |
} | |
@article{tang-2017-qa-qg-dual-task, | |
author = "Tang, Duyu and Duan, Nan and Qin, Tao and Yan, Zhao and Zhou, | |
Ming", | |
title = "Question Answering and Question Generation As Dual Tasks", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.02027v2", | |
abstract = "We study the problem of joint question answering (QA) and | |
question generation (QG) in this paper. Our intuition is | |
that QA and QG have intrinsic connections and these two tasks | |
could improve each other. On one side, the QA model judges | |
whether the generated question of a QG model is relevant to | |
the answer. On the other side, the QG model provides the | |
probability of generating a question given the answer, which | |
is a useful evidence that in turn facilitates QA. In this | |
paper we regard QA and QG as dual tasks. We propose a | |
training framework that trains the models of QA and QG | |
simultaneously, and explicitly leverages their probabilistic | |
correlation to guide the training process of both models. We | |
implement a QG model based on sequence-to-sequence learning, | |
and a QA model based on recurrent neural network. As all the | |
components of the QA and QG models are differentiable, all | |
the parameters involved in these two models could be | |
conventionally learned with back propagation. We conduct | |
experiments on three datasets. Empirical results show that | |
our training framework improves both QA and QG tasks. The | |
improved QA model performs comparably with strong baseline | |
approaches on all three datasets.", | |
archivePrefix= "arXiv", | |
eprint = "1706.02027", | |
primaryClass = "cs.CL" | |
} | |
@article{wang-2017-joint-qa-qg, | |
author = "Wang, Tong and Yuan, Xingdi and Trischler, Adam", | |
title = "A Joint Model for Question Answering and Question Generation", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1706.01450v1", | |
abstract = "We propose a generative machine comprehension model that | |
learns jointly to ask and answer questions based on | |
documents. The proposed model uses a sequence-to-sequence | |
framework that encodes the document and generates a question | |
(answer) given an answer (question). Significant improvement | |
in model performance is observed empirically on the SQuAD | |
corpus, confirming our hypothesis that the model benefits | |
from jointly learning to perform both tasks. We believe the | |
joint model's novelty offers a new perspective on machine | |
comprehension beyond architectural engineering, and serves as | |
a first step towards autonomous information seeking.", | |
archivePrefix= "arXiv", | |
eprint = "1706.01450", | |
primaryClass = "cs.CL" | |
} | |
@article{yang-2017-qa-dan, | |
author = "Yang, Zhilin and Hu, Junjie and Salakhutdinov, Ruslan and | |
Cohen, William W.", | |
title = "Semi-Supervised Qa With Generative Domain-Adaptive Nets", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1702.02206v2", | |
abstract = "We study the problem of semi-supervised question | |
answering----utilizing unlabeled text to boost the | |
performance of question answering models. We propose a novel | |
training framework, the Generative Domain-Adaptive Nets. In | |
this framework, we train a generative model to generate | |
questions based on the unlabeled text, and combine | |
model-generated questions with human-generated questions for | |
training question answering models. We develop novel domain | |
adaptation algorithms, based on reinforcement learning, to | |
alleviate the discrepancy between the model-generated data | |
distribution and the human-generated data | |
distribution. Experiments show that our proposed framework | |
obtains substantial improvement from unlabeled text.", | |
archivePrefix= "arXiv", | |
eprint = "1702.02206", | |
primaryClass = "cs.CL" | |
} | |
@article{jegou-2011-product-quantization, | |
author = "Jegou, Herve and Douze, Matthijs and Schmid, Cordelia", | |
title = "Product Quantization for Nearest Neighbor Search", | |
year = 2011, | |
issue_date = "January 2011", | |
publisher = "IEEE Computer Society", | |
address = "USA", | |
volume = 33, | |
number = 1, | |
issn = "0162-8828", | |
url = "https://doi.org/10.1109/TPAMI.2010.57", | |
doi = "10.1109/TPAMI.2010.57", | |
journal = "IEEE Trans. Pattern Anal. Mach. Intell.", | |
month = jan, | |
pages = "117–128", | |
numpages = 12, | |
keywords = "High-dimensional indexing, image indexing, very large | |
databases, High-dimensional indexing, image indexing, very | |
large databases, approximate search., approximate search." | |
} | |
@article{tay-2018-csran, | |
author = "Tay, Yi and Tuan, Luu Anh and Hui, Siu Cheung", | |
title = "Co-Stack Residual Affinity Networks With Multi-Level | |
Attention Refinement for Matching Text Sequences", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.02938v1", | |
abstract = "Learning a matching function between two text sequences is a | |
long standing problem in NLP research. This task enables many | |
potential applications such as question answering and | |
paraphrase identification. This paper proposes Co-Stack | |
Residual Affinity Networks (CSRAN), a new and universal | |
neural architecture for this problem. CSRAN is a deep | |
architecture, involving stacked (multi-layered) recurrent | |
encoders. Stacked/Deep architectures are traditionally | |
difficult to train, due to the inherent weaknesses such as | |
difficulty with feature propagation and vanishing | |
gradients. CSRAN incorporates two novel components to take | |
advantage of the stacked architecture. Firstly, it introduces | |
a new bidirectional alignment mechanism that learns affinity | |
weights by fusing sequence pairs across stacked | |
hierarchies. Secondly, it leverages a multi-level attention | |
refinement component between stacked recurrent layers. The | |
key intuition is that, by leveraging information across all | |
network hierarchies, we can not only improve gradient flow | |
but also improve overall performance. We conduct extensive | |
experiments on six well-studied text sequence matching | |
datasets, achieving state-of-the-art performance on all.", | |
archivePrefix= "arXiv", | |
eprint = "1810.02938", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{he-2016-pairwise-word-interaction, | |
title = "Pairwise Word Interaction Modeling with Deep Neural Networks | |
for Semantic Similarity Measurement", | |
author = "He, Hua and Lin, Jimmy", | |
booktitle = "Proceedings of the 2016 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies", | |
month = jun, | |
year = 2016, | |
address = "San Diego, California", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N16-1108", | |
doi = "10.18653/v1/N16-1108", | |
pages = "937-948" | |
} | |
@article{zhang-2020-soft-masked-bert, | |
author = "Zhang, Shaohua and Huang, Haoran and Liu, Jicong and Li, | |
Hang", | |
title = "Spelling Error Correction With Soft-Masked Bert", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2005.07421v1", | |
abstract = "Spelling error correction is an important yet challenging | |
task because a satisfactory solution of it essentially needs | |
human-level language understanding ability. Without loss of | |
generality we consider Chinese spelling error correction | |
(CSC) in this paper. A state-of-the-art method for the task | |
selects a character from a list of candidates for correction | |
(including non-correction) at each position of the sentence | |
on the basis of BERT, the language representation model. The | |
accuracy of the method can be sub-optimal, however, because | |
BERT does not have sufficient capability to detect whether | |
there is an error at each position, apparently due to the way | |
of pre-training it using mask language modeling. In this | |
work, we propose a novel neural architecture to address the | |
aforementioned issue, which consists of a network for error | |
detection and a network for error correction based on BERT, | |
with the former being connected to the latter with what we | |
call soft-masking technique. Our method of using | |
`Soft-Masked BERT' is general, and it may be employed in | |
other language detection-correction problems. Experimental | |
results on two datasets demonstrate that the performance of | |
our proposed method is significantly better than the | |
baselines including the one solely based on BERT.", | |
archivePrefix= "arXiv", | |
eprint = "2005.07421", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{sarikaya-2016-cortana, | |
title = "An overview of end-to-end language understanding and dialog | |
management for personal digital assistants", | |
author = "Sarikaya, Ruhi and Crook, Paul A and Marin, Alex and Jeong, | |
Minwoo and Robichaud, Jean-Philippe and Celikyilmaz, Asli and | |
Kim, Young-Bum and Rochette, Alexandre and Khan, Omar Zia and | |
Liu, Xiaohu and others", | |
booktitle = "2016 ieee spoken language technology workshop (slt)", | |
pages = "391-397", | |
year = 2016, | |
organization = "IEEE" | |
} | |
@article{williams-2017-hcn, | |
author = "Williams, Jason D. and Asadi, Kavosh and Zweig, Geoffrey", | |
title = "Hybrid Code Networks: Practical and Efficient End-To-End | |
Dialog Control With Supervised and Reinforcement Learning", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1702.03274v2", | |
abstract = "End-to-end learning of recurrent neural networks (RNNs) is an | |
attractive solution for dialog systems; however, current | |
techniques are data-intensive and require thousands of | |
dialogs to learn simple behaviors. We introduce Hybrid Code | |
Networks (HCNs), which combine an RNN with domain-specific | |
knowledge encoded as software and system action | |
templates. Compared to existing end-to-end approaches, HCNs | |
considerably reduce the amount of training data required, | |
while retaining the key benefit of inferring a latent | |
representation of dialog state. In addition, HCNs can be | |
optimized with supervised learning, reinforcement learning, | |
or a mixture of both. HCNs attain state-of-the-art | |
performance on the bAbI dialog dataset, and outperform two | |
commercially deployed customer-facing dialog systems.", | |
archivePrefix= "arXiv", | |
eprint = "1702.03274", | |
primaryClass = "cs.AI" | |
} | |
@article{anh-2017-hybrid-bi-lstm-crf, | |
author = "Anh, L. T. and Arkhipov, M. Y. and Burtsev, M. S.", | |
title = "Application of a Hybrid Bi-Lstm-Crf Model To the Task of | |
Russian Named Entity Recognition", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1709.09686v2", | |
abstract = "Named Entity Recognition (NER) is one of the most common | |
tasks of the natural language processing. The purpose of NER | |
is to find and classify tokens in text documents into | |
predefined categories called tags, such as person names, | |
quantity expressions, percentage expressions, names of | |
locations, organizations, as well as expression of time, | |
currency and others. Although there is a number of approaches | |
have been proposed for this task in Russian language, it | |
still has a substantial potential for the better | |
solutions. In this work, we studied several deep neural | |
network models starting from vanilla Bi-directional Long | |
Short-Term Memory (Bi-LSTM) then supplementing it with | |
Conditional Random Fields (CRF) as well as highway networks | |
and finally adding external word embeddings. All models were | |
evaluated across three datasets: Gareev's dataset, | |
Person-1000, FactRuEval-2016. We found that extension of | |
Bi-LSTM model with CRF significantly increased the quality of | |
predictions. Encoding input tokens with external word | |
embeddings reduced training time and allowed to achieve state | |
of the art for the Russian NER task.", | |
archivePrefix= "arXiv", | |
eprint = "1709.09686", | |
primaryClass = "cs.CL" | |
} | |
@article{le-2019-deep-neural, | |
author = "Lê, Thế Anh", | |
year = 2019, | |
month = 02, | |
title = "A Deep Neural Network Model for the task of Named Entity | |
Recognition", | |
volume = 9, | |
journal = "International Journal of Machine Learning and Computing", | |
doi = "10.18178/ijmlc.2019.9.1.758" | |
} | |
@inproceedings{le-2020-sla-to-sbd, | |
author = "Le, The Anh", | |
title = "Sequence Labeling Approach to the Task of Sentence Boundary | |
Detection", | |
year = 2020, | |
isbn = 9781450376310, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/3380688.3380703", | |
doi = "10.1145/3380688.3380703", | |
booktitle = "Proceedings of the 4th International Conference on Machine | |
Learning and Soft Computing", | |
pages = "144–148", | |
numpages = 5, | |
keywords = "voice-enabled chatbot, sequence labeling, Sentence boundary | |
detection", | |
location = "Haiphong City, Viet Nam", | |
series = "ICMLSC 2020" | |
} | |
@article{gao-2018-neural-conversation, | |
author = "Gao, Jianfeng and Galley, Michel and Li, Lihong", | |
title = "Neural Approaches To Conversational Ai", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1809.08267v3", | |
abstract = "The present paper surveys neural approaches to conversational | |
AI that have been developed in the last few years. We group | |
conversational systems into three categories: (1) question | |
answering agents, (2) task-oriented dialogue agents, and (3) | |
chatbots. For each category, we present a review of | |
state-of-the-art neural approaches, draw the connection | |
between them and traditional approaches, and discuss the | |
progress that has been made and challenges still being faced, | |
using specific systems and models as case studies.", | |
archivePrefix= "arXiv", | |
eprint = "1809.08267", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{kurata-2016-sentence-level-slot-filling, | |
title = "Leveraging Sentence-level Information with Encoder {LSTM} for | |
Semantic Slot Filling", | |
author = "Kurata, Gakuto and Xiang, Bing and Zhou, Bowen and Yu, Mo", | |
booktitle = "Proceedings of the 2016 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = nov, | |
year = 2016, | |
address = "Austin, Texas", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D16-1223", | |
doi = "10.18653/v1/D16-1223", | |
pages = "2077-2083" | |
} | |
@article{jaech-2016-da-for-nlu, | |
author = "Jaech, Aaron and Heck, Larry and Ostendorf, Mari", | |
title = "Domain Adaptation of Recurrent Neural Networks for Natural | |
Language Understanding", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1604.00117v2", | |
abstract = "The goal of this paper is to use multi-task learning to | |
efficiently scale slot filling models for natural language | |
understanding to handle multiple target tasks or domains. The | |
key to scalability is reducing the amount of training data | |
needed to learn a model for a new task. The proposed | |
multi-task model delivers better performance with less data | |
by leveraging patterns that it learns from the other | |
tasks. The approach supports an open vocabulary, which allows | |
the models to generalize to unseen words, which is | |
particularly important when very little training data is | |
used. A newly collected crowd-sourced data set, covering four | |
different domains, is used to demonstrate the effectiveness | |
of the domain adaptation and open vocabulary techniques.", | |
archivePrefix= "arXiv", | |
eprint = "1604.00117", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{tafforeau-2016-multitask-slu, | |
title = "Joint Syntactic and Semantic Analysis with a Multitask Deep | |
Learning Framework for Spoken Language Understanding", | |
author = "J{\'e}r{\'e}mie Tafforeau and Fr{\'e}d{\'e}ric B{\'e}chet and | |
Thierry Arti{\`e}res and Beno{\^i}t Favre", | |
booktitle = "INTERSPEECH", | |
year = 2016 | |
} | |
@InProceedings{hakkani-tur-2016-joint-semantic-frame-parsing, | |
author = "Hakkani-Tür, Dilek and Tur, Gokhan and Celikyilmaz, Asli and | |
Chen, Yun-Nung Vivian and Gao, Jianfeng and Deng, Li and | |
Wang, Ye-Yi", | |
title = "Multi-Domain Joint Semantic Frame Parsing using | |
Bi-directional RNN-LSTM", | |
booktitle = "Proceedings of The 17th Annual Meeting of the International | |
Speech Communication Association (INTERSPEECH 2016)", | |
year = 2016, | |
month = "June", | |
abstract = "Sequence-to-sequence deep learning has recently emerged as a | |
new paradigm in supervised learning for spoken language | |
understanding. However, most of the previous studies explored | |
this framework for building single domain models for each | |
task, such as slot filling or domain classification, | |
comparing deep learning based approaches with conventional | |
ones like conditional random fields. This paper proposes a | |
holistic multi-domain, multi-task (i.e. slot filling, domain | |
and intent detection) modeling approach to estimate complete | |
semantic frames for all user utterances addressed to a | |
conversational system, demonstrating the distinctive power of | |
deep learning methods, namely bi-directional recurrent neural | |
network (RNN) with long-short term memory (LSTM) cells | |
(RNN-LSTM) to handle such complexity. The contributions of | |
the presented work are three-fold: (i) we propose an RNN-LSTM | |
architecture for joint modeling of slot filling, intent | |
determination, and domain classification; (ii) we build a | |
joint multi-domain model enabling multi-task deep learning | |
where the data from each domain reinforces each other; (iii) | |
we investigate alternative architectures for modeling lexical | |
context in spoken language understanding. In addition to the | |
simplicity of the single model framework, experimental | |
results show the power of such an approach on Microsoft | |
Cortana real user data over alternative methods based on | |
single domain/task deep learning.", | |
publisher = "ISCA", | |
url = | |
"https://www.microsoft.com/en-us/research/publication/multijoint/", | |
edition = "Proceedings of The 17th Annual Meeting of the International | |
Speech Communication Association (INTERSPEECH 2016)" | |
} | |
@article{liu-2016-joint-intent-detection-slot-filling, | |
author = "Liu, Bing and Lane, Ian", | |
title = "Attention-Based Recurrent Neural Network Models for Joint | |
Intent Detection and Slot Filling", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1609.01454v1", | |
abstract = "Attention-based encoder-decoder neural network models have | |
recently shown promising results in machine translation and | |
speech recognition. In this work, we propose an | |
attention-based neural network model for joint intent | |
detection and slot filling, both of which are critical steps | |
for many speech understanding and dialog systems. Unlike in | |
machine translation and speech recognition, alignment is | |
explicit in slot filling. We explore different strategies in | |
incorporating this alignment information to the | |
encoder-decoder framework. Learning from the attention | |
mechanism in encoder-decoder model, we further propose | |
introducing attention to the alignment-based RNN models. Such | |
attentions provide additional information to the intent | |
classification and slot label prediction. Our independent | |
task models achieve state-of-the-art intent detection error | |
rate and slot filling F1 score on the benchmark ATIS | |
task. Our joint training model further obtains 0.56 \% | |
absolute (23.8 \% relative) error reduction on intent | |
detection and 0.23 \% absolute gain on slot filling over the | |
independent task models.", | |
archivePrefix= "arXiv", | |
eprint = "1609.01454", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{hori-2016-contextual-slu, | |
title = "Context-Sensitive and Role-Dependent Spoken Language | |
Understanding Using Bidirectional and Attention LSTMs", | |
author = "Chiori Hori and Takaaki Hori and Shinji Watanabe and John | |
R. Hershey", | |
booktitle = "INTERSPEECH", | |
year = 2016 | |
} | |
@INPROCEEDINGS{bhargava-2013-easy-slot-detection, | |
author = "A. {Bhargava} and A. {Celikyilmaz} and D. {Hakkani-Tür} and | |
R. {Sarikaya}", | |
booktitle = "2013 IEEE International Conference on Acoustics, Speech and | |
Signal Processing", | |
title = "Easy contextual intent prediction and slot detection", | |
year = 2013, | |
pages = "8337-8341" | |
} | |
@InProceedings{chen-2016-mm-for-slu, | |
author = "Chen, Yun-Nung Vivian and Hakkani-Tür, Dilek and Tur, Gokhan | |
and Gao, Jianfeng and Deng, Li", | |
title = "End-to-End Memory Networks with Knowledge Carryover for | |
Multi-Turn Spoken Language Understanding", | |
booktitle = "Proceedings of The 17th Annual Meeting of the International | |
Speech Communication Association (INTERSPEECH 2016)", | |
year = 2016, | |
month = "June", | |
abstract = "Spoken language understanding (SLU) is a core component of a | |
spoken dialogue system. In the traditional architecture of | |
dialogue systems, the SLU component treats each utterance | |
independent of each other, and then the following components | |
aggregate the multi-turn information in the separate | |
phases. However, there are two challenges: 1) errors from | |
previous turns may be propagated and then degrade the | |
performance of the current turn; 2) knowledge mentioned in | |
the long history may not be carried into the current | |
turn. This paper addresses the above issues by proposing an | |
architecture using end-to-end memory networks to model | |
knowledge carryover in multi-turn conversations, where | |
utterances encoded with intents and slots can be stored as | |
embeddings in the memory and the decoding phase applies an | |
attention model to leverage previously stored semantics for | |
intent prediction and slot tagging simultaneously. The | |
experiments on Microsoft Cortana conversational data show | |
that the proposed memory network architecture can effectively | |
extract salient semantics for modeling knowledge carryover in | |
the multi-turn conversations and outperform the results using | |
the state-of-the-art recurrent neural network framework (RNN) | |
designed for single-turn SLU.", | |
publisher = "ISCA", | |
url = | |
"https://www.microsoft.com/en-us/research/publication/contextualslu/", | |
edition = "Proceedings of The 17th Annual Meeting of the International | |
Speech Communication Association (INTERSPEECH 2016)" | |
} | |
@inproceedings{bapna-2017-sequential-dialogue, | |
title = "Sequential Dialogue Context Modeling for Spoken Language | |
Understanding", | |
author = "Bapna, Ankur and T{\"u}r, Gokhan and Hakkani-T{\"u}r, Dilek | |
and Heck, Larry", | |
booktitle = "Proceedings of the 18th Annual {SIG}dial Meeting on Discourse | |
and Dialogue", | |
month = aug, | |
year = 2017, | |
address = "Saarbr{\"u}cken, Germany", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-5514", | |
doi = "10.18653/v1/W17-5514", | |
pages = "103-114", | |
abstract = "Spoken Language Understanding (SLU) is a key component of | |
goal oriented dialogue systems that would parse user | |
utterances into semantic frame representations. Traditionally | |
SLU does not utilize the dialogue history beyond the previous | |
system turn and contextual ambiguities are resolved by the | |
downstream components. In this paper, we explore novel | |
approaches for modeling dialogue context in a recurrent | |
neural network (RNN) based language understanding system. We | |
propose the Sequential Dialogue Encoder Network, that allows | |
encoding context from the dialogue history in chronological | |
order. We compare the performance of our proposed | |
architecture with two context models, one that uses just the | |
previous turn context and another that encodes dialogue | |
context in a memory network, but loses the order of | |
utterances in the dialogue history. Experiments with a | |
multi-domain dialogue dataset demonstrate that the proposed | |
architecture results in reduced semantic frame error rates." | |
} | |
@article{chen-2016-k-san, | |
author = "Chen, Yun-Nung and Hakkani-Tur, Dilek and Tur, Gokhan and | |
Celikyilmaz, Asli and Gao, Jianfeng and Deng, Li", | |
title = "Knowledge As a Teacher: Knowledge-Guided Structural Attention | |
Networks", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1609.03286v1", | |
abstract = "Natural language understanding (NLU) is a core component of a | |
spoken dialogue system. Recently recurrent neural networks | |
(RNN) obtained strong results on NLU due to their superior | |
ability of preserving sequential information over time. | |
Traditionally, the NLU module tags semantic slots for | |
utterances considering their flat structures, as the | |
underlying RNN structure is a linear chain. However, natural | |
language exhibits linguistic properties that provide rich, | |
structured information for better understanding. This paper | |
introduces a novel model, knowledge-guided structural | |
attention networks (K-SAN), a generalization of RNN to | |
additionally incorporate non-flat network topologies guided | |
by prior knowledge. There are two characteristics: 1) | |
important substructures can be captured from small training | |
data, allowing the model to generalize to previously unseen | |
test data; 2) the model automatically figures out the salient | |
substructures that are essential to predict the semantic tags | |
of the given sentences, so that the understanding performance | |
can be improved. The experiments on the benchmark Air Travel | |
Information System (ATIS) data show that the proposed K-SAN | |
architecture can effectively extract salient knowledge from | |
substructures with an attention mechanism, and outperform the | |
performance of the state-of-the-art neural network based | |
frameworks.", | |
archivePrefix= "arXiv", | |
eprint = "1609.03286", | |
primaryClass = "cs.AI" | |
} | |
@article{li-2017-lu-importance, | |
author = "Li, Xiujun and Chen, Yun-Nung and Li, Lihong and Gao, | |
Jianfeng and Celikyilmaz, Asli", | |
title = "Investigation of Language Understanding Impact for | |
Reinforcement Learning Based Dialogue Systems", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.07055v1", | |
abstract = "Language understanding is a key component in a spoken | |
dialogue system. In this paper, we investigate how the | |
language understanding module influences the dialogue system | |
performance by conducting a series of systematic experiments | |
on a task-oriented neural dialogue system in a reinforcement | |
learning based setting. The empirical study shows that among | |
different types of language understanding errors, slot-level | |
errors can have more impact on the overall performance of a | |
dialogue system compared to intent-level errors. In addition, | |
our experiments demonstrate that the reinforcement learning | |
based dialogue system is able to learn when and what to | |
confirm in order to achieve better performance and greater | |
robustness.", | |
archivePrefix= "arXiv", | |
eprint = "1703.07055", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{henderson-2013-nn-for-dst, | |
title = "Deep Neural Network Approach for the Dialog State Tracking | |
Challenge", | |
author = "Henderson, Matthew and Thomson, Blaise and Young, Steve", | |
booktitle = "Proceedings of the {SIGDIAL} 2013 Conference", | |
month = aug, | |
year = 2013, | |
address = "Metz, France", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W13-4073", | |
pages = "467-471" | |
} | |
@inproceedings{henderson-2015-ml-for-dst, | |
title = "Machine Learning for Dialog State Tracking: A Review", | |
author = "Matthew Henderson", | |
year = 2015, | |
booktitle = "Proceedings of The First International Workshop on Machine | |
Learning in Spoken Language Processing" | |
} | |
@article{mrksic-2015-rnn-for-dst, | |
author = "Mrk{\v{s}}i{\'c}, Nikola and S{\'e}aghdha, Diarmuid {\'O} and | |
Thomson, Blaise and Ga{\v{s}}i{\'c}, Milica and Su, Pei-Hao | |
and Vandyke, David and Wen, Tsung-Hsien and Young, Steve", | |
title = "Multi-Domain Dialog State Tracking Using Recurrent Neural | |
Networks", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1506.07190v1", | |
abstract = "Dialog state tracking is a key component of many modern | |
dialog systems, most of which are designed with a single, | |
well-defined domain in mind. This paper shows that dialog | |
data drawn from different dialog domains can be used to train | |
a general belief tracking model which can operate across all | |
of these domains, exhibiting superior performance to each of | |
the domain-specific models. We propose a training procedure | |
which uses out-of-domain data to initialise belief tracking | |
models for entirely new domains. This procedure leads to | |
improvements in belief tracking performance regardless of the | |
amount of in-domain data available for training the model.", | |
archivePrefix= "arXiv", | |
eprint = "1506.07190", | |
primaryClass = "cs.CL" | |
} | |
@article{mrksic-2016-neural-belief-tracker, | |
author = "Mrk{\v{s}}i{\'c}, Nikola and S{\'e}aghdha, Diarmuid {\'O} and | |
Wen, Tsung-Hsien and Thomson, Blaise and Young, Steve", | |
title = "Neural Belief Tracker: Data-Driven Dialogue State Tracking", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1606.03777v2", | |
abstract = "One of the core components of modern spoken dialogue systems | |
is the belief tracker, which estimates the user's goal at | |
every step of the dialogue. However, most current approaches | |
have difficulty scaling to larger, more complex dialogue | |
domains. This is due to their dependency on either: a) Spoken | |
Language Understanding models that require large amounts of | |
annotated training data; or b) hand-crafted lexicons for | |
capturing some of the linguistic variation in users' | |
language. We propose a novel Neural Belief Tracking (NBT) | |
framework which overcomes these problems by building on | |
recent advances in representation learning. NBT models reason | |
over pre-trained word vectors, learning to compose them into | |
distributed representations of user utterances and dialogue | |
context. Our evaluation on two datasets shows that this | |
approach surpasses past limitations, matching the performance | |
of state-of-the-art models which rely on hand-crafted | |
semantic lexicons and outperforming them when such lexicons | |
are not provided.", | |
archivePrefix= "arXiv", | |
eprint = "1606.03777", | |
primaryClass = "cs.CL" | |
} | |
@article{shi-2017-cnn-for-dst, | |
author = "Shi, Hongjie and Ushio, Takashi and Endo, Mitsuru and | |
Yamagami, Katsuyoshi and Horii, Noriaki", | |
title = "A Multichannel Convolutional Neural Network for | |
Cross-Language Dialog State Tracking", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1701.06247v1", | |
abstract = "The fifth Dialog State Tracking Challenge (DSTC5) introduces | |
a new cross-language dialog state tracking scenario, where | |
the participants are asked to build their trackers based on | |
the English training corpus, while evaluating them with the | |
unlabeled Chinese corpus. Although the computer-generated | |
translations for both English and Chinese corpus are provided | |
in the dataset, these translations contain errors and | |
careless use of them can easily hurt the performance of the | |
built trackers. To address this problem, we propose a | |
multichannel Convolutional Neural Networks (CNN) | |
architecture, in which we treat English and Chinese language | |
as different input channels of one single CNN model. In the | |
evaluation of DSTC5, we found that such multichannel | |
architecture can effectively improve the robustness against | |
translation errors. Additionally, our method for DSTC5 is | |
purely machine learning based and requires no prior knowledge | |
about the target language. We consider this a desirable | |
property for building a tracker in the cross-language | |
context, as not every developer will be familiar with both | |
languages.", | |
archivePrefix= "arXiv", | |
eprint = "1701.06247", | |
primaryClass = "cs.CL" | |
} | |
@article{nguyen-2017-kbc-overview, | |
author = "Nguyen, Dat Quoc", | |
title = "An Overview of Embedding Models of Entities and Relationships | |
for Knowledge Base Completion", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1703.08098v7", | |
abstract = "Knowledge bases (KBs) of real-world facts about entities and | |
their relationships are useful resources for a variety of | |
natural language processing tasks. However, because knowledge | |
bases are typically incomplete, it is useful to be able to | |
perform knowledge base completion or link prediction, i.e., | |
predict whether a relationship not in the knowledge base is | |
likely to be true. This paper serves as a comprehensive | |
overview of embedding models of entities and relationships | |
for knowledge base completion, summarizing up-to-date | |
experimental results on standard benchmark datasets.", | |
archivePrefix= "arXiv", | |
eprint = "1703.08098", | |
primaryClass = "cs.CL" | |
} | |
@article{li-2019-bertsel, | |
author = "Li, Dongfang and Yu, Yifei and Chen, Qingcai and Li, Xinyu", | |
title = "Bertsel: Answer Selection With Pre-Trained Models", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.07588v1", | |
abstract = "Recently, pre-trained models have been the dominant paradigm | |
in natural language processing. They achieved remarkable | |
state-of-the-art performance across a wide range of related | |
tasks, such as textual entailment, natural language | |
inference, question answering, etc. BERT, proposed by Devlin | |
et.al., has achieved a better marked result in GLUE | |
leaderboard with a deep transformer architecture. Despite its | |
soaring popularity, however, BERT has not yet been applied to | |
answer selection. This task is different from others with a | |
few nuances: first, modeling the relevance and correctness of | |
candidates matters compared to semantic relatedness and | |
syntactic structure; second, the length of an answer may be | |
different from other candidates and questions. In this paper. | |
we are the first to explore the performance of fine-tuning | |
BERT for answer selection. We achieved STOA results across | |
five popular datasets, demonstrating the success of | |
pre-trained models in this task.", | |
archivePrefix= "arXiv", | |
eprint = "1905.07588", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{lai-2019-gsamn, | |
title = "A Gated Self-attention Memory Network for Answer Selection", | |
author = "Lai, Tuan and Tran, Quan Hung and Bui, Trung and Kihara, | |
Daisuke", | |
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in | |
Natural Language Processing and the 9th International Joint | |
Conference on Natural Language Processing (EMNLP-IJCNLP)", | |
month = nov, | |
year = 2019, | |
address = "Hong Kong, China", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D19-1610", | |
doi = "10.18653/v1/D19-1610", | |
pages = "5953-5959", | |
abstract = "Answer selection is an important research problem, with | |
applications in many areas. Previous deep learning based | |
approaches for the task mainly adopt the Compare-Aggregate | |
architecture that performs word-level comparison followed by | |
aggregation. In this work, we take a departure from the | |
popular Compare-Aggregate architecture, and instead, propose | |
a new gated self-attention memory network for the | |
task. Combined with a simple transfer learning technique from | |
a large-scale online corpus, our model outperforms previous | |
methods by a large margin, achieving new state-of-the-art | |
results on two standard answer selection datasets: TrecQA and | |
WikiQA." | |
} | |
@article{mozafari-2019-bas, | |
author = "Mozafari, Jamshid and Fatemi, Afsaneh and Nematbakhsh, | |
Mohammad Ali", | |
title = "Bas: an Answer Selection Method Using Bert Language Model", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1911.01528v3", | |
abstract = "In recent years, Question Answering systems have become more | |
popular and widely used by users. Despite the increasing | |
popularity of these systems, the their performance is not | |
even sufficient for textual data and requires further | |
research. These systems consist of several parts that one of | |
them is the Answer Selection component. This component | |
detects the most relevant answer from a list of candidate | |
answers. The methods presented in previous researches have | |
attempted to provide an independent model to undertake the | |
answer-selection task. An independent model cannot comprehend | |
the syntactic and semantic features of questions and answers | |
with a small training dataset. To fill this gap, language | |
models can be employed in implementing the answer selection | |
part. This action enables the model to have a better | |
understanding of the language in order to understand | |
questions and answers better than previous works. In this | |
research, we will present the `` BAS`` (BERT Answer | |
Selection) that uses the BERT language model to comprehend | |
language. The empirical results of applying the model on the | |
TrecQA Raw, TrecQA Clean, and WikiQA datasets demonstrate | |
that using a robust language model such as BERT can enhance | |
the performance. Using a more robust classifier also enhances | |
the effect of the language model on the answer selection | |
component. The results demonstrate that language | |
comprehension is an essential requirement in natural language | |
processing tasks such as answer-selection.", | |
archivePrefix= "arXiv", | |
eprint = "1911.01528", | |
primaryClass = "cs.CL" | |
} | |
@article{sun-2019-dream, | |
author = "Sun, Kai and Yu, Dian and Chen, Jianshu and Yu, Dong and | |
Choi, Yejin and Cardie, Claire", | |
title = "Dream: a Challenge Dataset and Models for Dialogue-Based | |
Reading Comprehension", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1902.00164v1", | |
abstract = "We present DREAM, the first dialogue-based multiple-choice | |
reading comprehension dataset. Collected from | |
English-as-a-foreign-language examinations designed by human | |
experts to evaluate the comprehension level of Chinese | |
learners of English, our dataset contains 10,197 | |
multiple-choice questions for 6,444 dialogues. In contrast to | |
existing reading comprehension datasets, DREAM is the first | |
to focus on in-depth multi-turn multi-party dialogue | |
understanding. DREAM is likely to present significant | |
challenges for existing reading comprehension systems: 84 \% | |
of answers are non-extractive, 85 \% of questions require | |
reasoning beyond a single sentence, and 34 \% of questions | |
also involve commonsense knowledge. We apply several popular | |
neural reading comprehension models that primarily exploit | |
surface information within the text and find them to, at | |
best, just barely outperform a rule-based approach. We next | |
investigate the effects of incorporating dialogue structure | |
and different kinds of general world knowledge into both | |
rule-based and (neural and non-neural) machine learning-based | |
reading comprehension models. Experimental results on the | |
DREAM dataset show the effectiveness of dialogue structure | |
and general world knowledge. DREAM will be available at | |
https://dataset.org/dream/.", | |
archivePrefix= "arXiv", | |
eprint = "1902.00164", | |
primaryClass = "cs.CL" | |
} | |
@article{sun-2019-c3, | |
author = "Sun, Kai and Yu, Dian and Yu, Dong and Cardie, Claire", | |
title = "Investigating Prior Knowledge for Challenging Chinese Machine | |
Reading Comprehension", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.09679v3", | |
abstract = "Machine reading comprehension tasks require a machine reader | |
to answer questions relevant to the given document. In this | |
paper, we present the first free-form multiple-Choice Chinese | |
machine reading Comprehension dataset (C^3), containing | |
13,369 documents (dialogues or more formally written | |
mixed-genre texts) and their associated 19,577 | |
multiple-choice free-form questions collected from | |
Chinese-as-a-second-language examinations. We present a | |
comprehensive analysis of the prior knowledge (i.e., | |
linguistic, domain-specific, and general world knowledge) | |
needed for these real-world problems. We implement rule-based | |
and popular neural methods and find that there is still a | |
significant performance gap between the best performing model | |
(68.5 \%) and human readers (96.0 \%), especially on problems | |
that require prior knowledge. We further study the effects of | |
distractor plausibility and data augmentation based on | |
translated relevant datasets for English on model | |
performance. We expect C^3 to present great challenges to | |
existing systems as answering 86.8 \% of questions requires | |
both knowledge within and beyond the accompanying document, | |
and we hope that C^3 can serve as a platform to study how to | |
leverage various kinds of prior knowledge to better | |
understand a given written or orally oriented text. C^3 is | |
available at https://dataset.org/c3/.", | |
archivePrefix= "arXiv", | |
eprint = "1904.09679", | |
primaryClass = "cs.CL" | |
} | |
@article{yu-2020-dialogre, | |
author = "Yu, Dian and Sun, Kai and Cardie, Claire and Yu, Dong", | |
title = "Dialogue-Based Relation Extraction", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2004.08056v1", | |
abstract = "We present the first human-annotated dialogue-based relation | |
extraction (RE) dataset DialogRE, aiming to support the | |
prediction of relation(s) between two arguments that appear | |
in a dialogue. We further offer DialogRE as a platform for | |
studying cross-sentence RE as most facts span multiple | |
sentences. We argue that speaker-related information plays a | |
critical role in the proposed task, based on an analysis of | |
similarities and differences between dialogue-based and | |
traditional RE tasks. Considering the timeliness of | |
communication in a dialogue, we design a new metric to | |
evaluate the performance of RE methods in a conversational | |
setting and investigate the performance of several | |
representative RE methods on DialogRE. Experimental results | |
demonstrate that a speaker-aware extension on the | |
best-performing model leads to gains in both the standard and | |
conversational evaluation settings. DialogRE is available at | |
https://dataset.org/dialogre/.", | |
archivePrefix= "arXiv", | |
eprint = "2004.08056", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{ratner-2018-snorkl-metal, | |
title = "Snorkel metal: Weak supervision for multi-task learning", | |
author = "Ratner, Alex and Hancock, Braden and Dunnmon, Jared and | |
Goldman, Roger and R{\'e}, Christopher", | |
booktitle = "Proceedings of the Second Workshop on Data Management for | |
End-To-End Machine Learning", | |
pages = "1-4", | |
year = 2018 | |
} | |
@article{ratner-2018-snorkl-metal-1, | |
author = "Ratner, Alexander and Hancock, Braden and Dunnmon, Jared and | |
Sala, Frederic and Pandey, Shreyash and R{\'e}, Christopher", | |
title = "Training Complex Models With Multi-Task Weak Supervision", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1810.02840v2", | |
abstract = "As machine learning models continue to increase in | |
complexity, collecting large hand-labeled training sets has | |
become one of the biggest roadblocks in practice. Instead, | |
weaker forms of supervision that provide noisier but cheaper | |
labels are often used. However, these weak supervision | |
sources have diverse and unknown accuracies, may output | |
correlated labels, and may label different tasks or apply at | |
different levels of granularity. We propose a framework for | |
integrating and modeling such weak supervision sources by | |
viewing them as labeling different related sub-tasks of a | |
problem, which we refer to as the multi-task weak supervision | |
setting. We show that by solving a matrix completion-style | |
problem, we can recover the accuracies of these multi-task | |
sources given their dependency structure, but without any | |
labeled data, leading to higher-quality supervision for | |
training an end model. Theoretically, we show that the | |
generalization error of models trained with this approach | |
improves with the number of unlabeled data points, and | |
characterize the scaling with respect to the task and | |
dependency structures. On three fine-grained classification | |
problems, we show that our approach leads to average gains of | |
20.2 points in accuracy over a traditional supervised | |
approach, 6.8 points over a majority vote baseline, and 4.1 | |
points over a previously proposed weak supervision method | |
that models tasks separately.", | |
archivePrefix= "arXiv", | |
eprint = "1810.02840", | |
primaryClass = "stat.ML" | |
} | |
@article{gong-2017-ruminating-reader, | |
author = "Gong, Yichen and Bowman, Samuel R.", | |
title = "Ruminating Reader: Reasoning With Gated Multi-Hop Attention", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1704.07415v1", | |
abstract = "To answer the question in machine comprehension (MC) task, | |
the models need to establish the interaction between the | |
question and the context. To tackle the problem that the | |
single-pass model cannot reflect on and correct its answer, | |
we present Ruminating Reader. Ruminating Reader adds a second | |
pass of attention and a novel information fusion component to | |
the Bi-Directional Attention Flow model (BiDAF). We propose | |
novel layer structures that construct an query-aware context | |
vector representation and fuse encoding representation with | |
intermediate representation on top of BiDAF model. We show | |
that a multi-hop attention mechanism can be applied to a | |
bi-directional attention structure. In experiments on SQuAD, | |
we find that the Reader outperforms the BiDAF baseline by a | |
substantial margin, and matches or surpasses the performance | |
of all other published systems.", | |
archivePrefix= "arXiv", | |
eprint = "1704.07415", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{williams-2018-multinli, | |
title = "A Broad-Coverage Challenge Corpus for Sentence Understanding | |
through Inference", | |
author = "Williams, Adina and Nangia, Nikita and Bowman, Samuel", | |
booktitle = "Proceedings of the 2018 Conference of the North {A}merican | |
Chapter of the Association for Computational Linguistics: | |
Human Language Technologies, Volume 1 (Long Papers)", | |
month = jun, | |
year = 2018, | |
address = "New Orleans, Louisiana", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/N18-1101", | |
doi = "10.18653/v1/N18-1101", | |
pages = "1112-1122", | |
abstract = "This paper introduces the Multi-Genre Natural Language | |
Inference (MultiNLI) corpus, a dataset designed for use in | |
the development and evaluation of machine learning models for | |
sentence understanding. At 433k examples, this resource is | |
one of the largest corpora available for natural language | |
inference (a.k.a. recognizing textual entailment), improving | |
upon available resources in both its coverage and | |
difficulty. MultiNLI accomplishes this by offering data from | |
ten distinct genres of written and spoken English, making it | |
possible to evaluate systems on nearly the full complexity of | |
the language, while supplying an explicit setting for | |
evaluating cross-genre domain adaptation. In addition, an | |
evaluation using existing machine learning models designed | |
for the Stanford NLI corpus shows that it represents a | |
substantially more difficult task than does that corpus, | |
despite the two showing similar levels of inter-annotator | |
agreement." | |
} | |
@inproceedings{tomar-2017-decatt, | |
title = "Neural Paraphrase Identification of Questions with Noisy | |
Pretraining", | |
author = "Tomar, Gaurav Singh and Duque, Thyago and T{\"a}ckstr{\"o}m, | |
Oscar and Uszkoreit, Jakob and Das, Dipanjan", | |
booktitle = "Proceedings of the First Workshop on Subword and Character | |
Level Models in {NLP}", | |
month = sep, | |
year = 2017, | |
address = "Copenhagen, Denmark", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/W17-4121", | |
doi = "10.18653/v1/W17-4121", | |
pages = "142-147", | |
abstract = "We present a solution to the problem of paraphrase | |
identification of questions. We focus on a recent dataset of | |
question pairs annotated with binary paraphrase labels and | |
show that a variant of the decomposable attention model | |
(replacing the word embeddings of the decomposable attention | |
model of Parikh et al. 2016 with character n-gram | |
representations) results in accurate performance on this | |
task, while being far simpler than many competing neural | |
architectures. Furthermore, when the model is pretrained on a | |
noisy dataset of automatically collected question | |
paraphrases, it obtains the best reported performance on the | |
dataset." | |
} | |
@ARTICLE{wang-2020-match2, | |
author = "{Wang}, Zizhen and {Fan}, Yixing and {Guo}, Jiafeng and | |
{Yang}, Liu and {Zhang}, Ruqing and {Lan}, Yanyan and | |
{Cheng}, Xueqi and {Jiang}, Hui and {Wang}, Xiaozhao", | |
title = "{Match$^2$: A Matching over Matching Model for Similar | |
Question Identification}", | |
journal = "arXiv e-prints", | |
keywords = "Computer Science - Information Retrieval, Computer Science - | |
Computation and Language", | |
year = 2020, | |
month = jun, | |
eid = "arXiv:2006.11719", | |
pages = "arXiv:2006.11719", | |
archivePrefix= "arXiv", | |
eprint = "2006.11719", | |
primaryClass = "cs.IR", | |
adsurl = "https://ui.adsabs.harvard.edu/abs/2020arXiv200611719W", | |
adsnote = "Provided by the SAO/NASA Astrophysics Data System" | |
} | |
@inproceedings{gupta-2019-faq-attentive-matching, | |
author = "Gupta, Sparsh and Carvalho, Vitor R.", | |
title = "FAQ Retrieval Using Attentive Matching", | |
year = 2019, | |
isbn = 9781450361729, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/3331184.3331294", | |
doi = "10.1145/3331184.3331294", | |
booktitle = "Proceedings of the 42nd International ACM SIGIR Conference on | |
Research and Development in Information Retrieval", | |
pages = "929–932", | |
numpages = 4, | |
keywords = "neural networks, attention mechanism, learning to rank", | |
location = "Paris, France", | |
series = "SIGIR’19" | |
} | |
@inproceedings{ji-2012-qa-topic-model, | |
author = "Ji, Zongcheng and Xu, Fei and Wang, Bin and He, Ben", | |
title = "Question-Answer Topic Model for Question Retrieval in | |
Community Question Answering", | |
year = 2012, | |
isbn = 9781450311564, | |
publisher = "Association for Computing Machinery", | |
address = "New York, NY, USA", | |
url = "https://doi.org/10.1145/2396761.2398669", | |
doi = "10.1145/2396761.2398669", | |
booktitle = "Proceedings of the 21st ACM International Conference on | |
Information and Knowledge Management", | |
pages = "2471–2474", | |
numpages = 4, | |
keywords = "community question answering, question-answer topic model, | |
question retrieval, topic model, translation model", | |
location = "Maui, Hawaii, USA", | |
series = "CIKM ’12" | |
} | |
@article{sakata-2019-faq-retrieval, | |
author = "Sakata, Wataru and Shibata, Tomohide and Tanaka, Ribeka and | |
Kurohashi, Sadao", | |
title = "Faq Retrieval Using Query-Question Similarity and Bert-Based | |
Query-Answer Relevance", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.02851v2", | |
abstract = "Frequently Asked Question (FAQ) retrieval is an important | |
task where the objective is to retrieve an appropriate | |
Question-Answer (QA) pair from a database based on a user's | |
query. We propose a FAQ retrieval system that considers the | |
similarity between a user's query and a question as well as | |
the relevance between the query and an answer. Although a | |
common approach to FAQ retrieval is to construct labeled data | |
for training, it takes annotation costs. Therefore, we use a | |
traditional unsupervised information retrieval system to | |
calculate the similarity between the query and question. On | |
the other hand, the relevance between the query and answer | |
can be learned by using QA pairs in a FAQ database. The | |
recently-proposed BERT model is used for the relevance | |
calculation. Since the number of QA pairs in FAQ page is not | |
enough to train a model, we cope with this issue by | |
leveraging FAQ sets that are similar to the one in | |
question. We evaluate our approach on two datasets. The first | |
one is localgovFAQ, a dataset we construct in a Japanese | |
administrative municipality domain. The second is | |
StackExchange dataset, which is the public dataset in | |
English. We demonstrate that our proposed method outperforms | |
baseline methods on these datasets.", | |
archivePrefix= "arXiv", | |
eprint = "1905.02851", | |
primaryClass = "cs.IR" | |
} | |
@InProceedings{damani-2020-optimized-transformer-faq, | |
author = "Damani, Sonam and Narahari, Kedhar Nath and Chatterjee, | |
Ankush and Gupta, Manish and Agrawal, Puneet", | |
editor = "Lauw, Hady W. and Wong, Raymond Chi-Wing and Ntoulas, | |
Alexandros and Lim, Ee-Peng and Ng, See-Kiong and Pan, Sinno | |
Jialin", | |
title = "Optimized Transformer Models for FAQ Answering", | |
booktitle = "Advances in Knowledge Discovery and Data Mining", | |
year = 2020, | |
publisher = "Springer International Publishing", | |
address = "Cham", | |
pages = "235-248", | |
abstract = "Informational chatbots provide a highly effective medium for | |
improving operational efficiency in answering customer | |
queries for any enterprise. Chatbots are also preferred by | |
users/customers since unlike other alternatives like calling | |
customer care or browsing over FAQ pages, chatbots provide | |
instant responses, are easy to use, are less invasive and are | |
always available. In this paper, we discuss the problem of | |
FAQ answering which is central to designing a retrieval-based | |
informational chatbot. Given a set of FAQ pages s for an | |
enterprise, and a user query, we need to find the best | |
matching question-answer pairs from s. Building such a | |
semantic ranking system that works well across domains for | |
large QA databases with low runtime and model size is | |
challenging. Previous work based on feature engineering or | |
recurrent neural models either provides low accuracy or | |
incurs high runtime costs. We experiment with multiple | |
transformer based deep learning models, and also propose a | |
novel MT-DNN (Multi-task Deep Neural Network)-based | |
architecture, which we call Masked MT-DNN (or | |
MMT-DNN). MMT-DNN significantly outperforms other | |
state-of-the-art transformer models for the FAQ answering | |
task. Further, we propose an improved knowledge distillation | |
component to achieve {\$}{\$}{\backslash}sim {\$}{\$}2.4x | |
reduction in model-size and {\$}{\$}{\backslash}sim | |
{\$}{\$}7x reduction in runtime while maintaining similar | |
accuracy. On a small benchmark dataset from SemEval 2017 CQA | |
Task 3, we show that our approach provides an NDCG@1 of | |
83.1. On another large dataset of {\$}{\$}{\backslash}sim | |
{\$}{\$}281K instances corresponding to | |
{\$}{\$}{\backslash}sim {\$}{\$}30K queries from diverse | |
domains, our distilled 174 MB model provides an NDCG@1 of | |
75.08 with a CPU runtime of mere 31 ms establishing a new | |
state-of-the-art for FAQ answering.", | |
isbn = "978-3-030-47426-3" | |
} | |
@incollection{ba-2014-do-deep, | |
title = "Do Deep Nets Really Need to be Deep?", | |
author = "Ba, Jimmy and Caruana, Rich", | |
booktitle = "Advances in Neural Information Processing Systems 27", | |
editor = "Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence | |
and K. Q. Weinberger", | |
pages = "2654-2662", | |
year = 2014, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/5484-do-deep-nets-really-need-to-be-deep.pdf" | |
} | |
@article{mirzadeh-2019-teacher-assistant, | |
author = "Mirzadeh, Seyed-Iman and Farajtabar, Mehrdad and Li, Ang and | |
Levine, Nir and Matsukawa, Akihiro and Ghasemzadeh, Hassan", | |
title = "Improved Knowledge Distillation Via Teacher Assistant", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1902.03393v2", | |
abstract = "Despite the fact that deep neural networks are powerful | |
models and achieve appealing results on many tasks, they are | |
too large to be deployed on edge devices like smartphones or | |
embedded sensor nodes. There have been efforts to compress | |
these networks, and a popular method is knowledge | |
distillation, where a large (teacher) pre-trained network is | |
used to train a smaller (student) network. However, in this | |
paper, we show that the student network performance degrades | |
when the gap between student and teacher is large. Given a | |
fixed student network, one cannot employ an arbitrarily large | |
teacher, or in other words, a teacher can effectively | |
transfer its knowledge to students up to a certain size, not | |
smaller. To alleviate this shortcoming, we introduce | |
multi-step knowledge distillation, which employs an | |
intermediate-sized network (teacher assistant) to bridge the | |
gap between the student and the teacher. Moreover, we study | |
the effect of teacher assistant size and extend the framework | |
to multi-step distillation. Theoretical analysis and | |
extensive experiments on CIFAR-10,100 and ImageNet datasets | |
and on CNN and ResNet architectures substantiate the | |
effectiveness of our proposed approach.", | |
archivePrefix= "arXiv", | |
eprint = "1902.03393", | |
primaryClass = "cs.LG" | |
} | |
@article{schulz-2002-fast-string-correction, | |
title = "Fast string correction with Levenshtein automata", | |
author = "Schulz, Klaus U and Mihov, Stoyan", | |
journal = "International Journal on Document Analysis and Recognition", | |
volume = 5, | |
number = 1, | |
pages = "67-85", | |
year = 2002, | |
publisher = "Springer" | |
} | |
@article{mihov-2004-fast-approx-search, | |
title = "Fast Approximate Search in Large Dictionaries", | |
author = "Mihov, Stoyan and Schulz, Klaus U.", | |
journal = "Computational Linguistics", | |
volume = 30, | |
number = 4, | |
year = 2004, | |
url = "https://www.aclweb.org/anthology/J04-4003", | |
doi = "10.1162/0891201042544938", | |
pages = "451-477" | |
} | |
@inproceedings{lei-2018-sru, | |
title = "Simple Recurrent Units for Highly Parallelizable Recurrence", | |
author = "Lei, Tao and Zhang, Yu and Wang, Sida I. and Dai, Hui and | |
Artzi, Yoav", | |
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in | |
Natural Language Processing", | |
month = oct # "-" # nov, | |
year = 2018, | |
address = "Brussels, Belgium", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D18-1477", | |
doi = "10.18653/v1/D18-1477", | |
pages = "4470-4481", | |
abstract = "Common recurrent neural architectures scale poorly due to the | |
intrinsic difficulty in parallelizing their state | |
computations. In this work, we propose the Simple Recurrent | |
Unit (SRU), a light recurrent unit that balances model | |
capacity and scalability. SRU is designed to provide | |
expressive recurrence, enable highly parallelized | |
implementation, and comes with careful initialization to | |
facilitate training of deep models. We demonstrate the | |
effectiveness of SRU on multiple NLP tasks. SRU achieves | |
5{---}9x speed-up over cuDNN-optimized LSTM on classification | |
and question answering datasets, and delivers stronger | |
results than LSTM and convolutional models. We also obtain an | |
average of 0.7 BLEU improvement over the Transformer model | |
(Vaswani et al., 2017) on translation by incorporating SRU | |
into the architecture." | |
} | |
@article{xiong-2017-dcn, | |
author = "Xiong, Caiming and Zhong, Victor and Socher, Richard", | |
title = "Dcn+: Mixed Objective and Deep Residual Coattention for | |
Question Answering", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.00106v2", | |
abstract = "Traditional models for question answering optimize using | |
cross entropy loss, which encourages exact answers at the | |
cost of penalizing nearby or overlapping answers that are | |
sometimes equally accurate. We propose a mixed objective that | |
combines cross entropy loss with self-critical policy | |
learning. The objective uses rewards derived from word | |
overlap to solve the misalignment between evaluation metric | |
and optimization objective. In addition to the mixed | |
objective, we improve dynamic coattention networks (DCN) with | |
a deep residual coattention encoder that is inspired by | |
recent work in deep self-attention and residual networks. Our | |
proposals improve model performance across question types and | |
input lengths, especially for long questions that requires | |
the ability to capture long-term dependencies. On the | |
Stanford Question Answering Dataset, our model achieves | |
state-of-the-art results with 75.1 \% exact match accuracy | |
and 83.1 \% F1, while the ensemble obtains 78.9 \% exact | |
match accuracy and 86.0 \% F1.", | |
archivePrefix= "arXiv", | |
eprint = "1711.00106", | |
primaryClass = "cs.CL" | |
} | |
@article{andreas-2015-neural-module-networks, | |
author = "Andreas, Jacob and Rohrbach, Marcus and Darrell, Trevor and | |
Klein, Dan", | |
title = "Neural Module Networks", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1511.02799v4", | |
abstract = "Visual question answering is fundamentally compositional in | |
nature---a question like ``where is the dog?`` shares | |
substructure with questions like ``what color is the dog?`` | |
and ``where is the cat?`` This paper seeks to simultaneously | |
exploit the representational capacity of deep networks and | |
the compositional linguistic structure of questions. We | |
describe a procedure for constructing and learning *neural | |
module networks*, which compose collections of | |
jointly-trained neural ``modules`` into deep networks for | |
question answering. Our approach decomposes questions into | |
their linguistic substructures, and uses these structures to | |
dynamically instantiate modular networks (with reusable | |
components for recognizing dogs, classifying colors, | |
etc.). The resulting compound networks are jointly | |
trained. We evaluate our approach on two challenging datasets | |
for visual question answering, achieving state-of-the-art | |
results on both the VQA natural image dataset and a new | |
dataset of complex questions about abstract shapes.", | |
archivePrefix= "arXiv", | |
eprint = "1511.02799", | |
primaryClass = "cs.CV" | |
} | |
@article{bao-2020-unilmv2, | |
author = "Bao, Hangbo and Dong, Li and Wei, Furu and Wang, Wenhui and | |
Yang, Nan and Liu, Xiaodong and Wang, Yu and Piao, Songhao | |
and Gao, Jianfeng and Zhou, Ming and Hon, Hsiao-Wuen", | |
title = "Unilmv2: Pseudo-Masked Language Models for Unified Language | |
Model Pre-Training", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2002.12804v1", | |
abstract = "We propose to pre-train a unified language model for both | |
autoencoding and partially autoregressive language modeling | |
tasks using a novel training procedure, referred to as a | |
pseudo-masked language model (PMLM). Given an input text with | |
masked tokens, we rely on conventional masks to learn | |
inter-relations between corrupted tokens and context via | |
autoencoding, and pseudo masks to learn intra-relations | |
between masked spans via partially autoregressive | |
modeling. With well-designed position embeddings and | |
self-attention masks, the context encodings are reused to | |
avoid redundant computation. Moreover, conventional masks | |
used for autoencoding provide global masking information, so | |
that all the position embeddings are accessible in partially | |
autoregressive language modeling. In addition, the two tasks | |
pre-train a unified language model as a bidirectional encoder | |
and a sequence-to-sequence decoder, respectively. Our | |
experiments show that the unified language models pre-trained | |
using PMLM achieve new state-of-the-art results on a wide | |
range of natural language understanding and generation tasks | |
across several widely used benchmarks.", | |
archivePrefix= "arXiv", | |
eprint = "2002.12804", | |
primaryClass = "cs.CL" | |
} | |
@article{humeau-2019-poly-encoders, | |
author = "Humeau, Samuel and Shuster, Kurt and Lachaux, Marie-Anne and | |
Weston, Jason", | |
title = "Poly-Encoders: Transformer Architectures and Pre-Training | |
Strategies for Fast and Accurate Multi-Sentence Scoring", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1905.01969v4", | |
abstract = "The use of deep pre-trained bidirectional transformers has | |
led to remarkable progress in a number of applications | |
(Devlin et al., 2018). For tasks that make pairwise | |
comparisons between sequences, matching a given input with a | |
corresponding label, two approaches are common: | |
Cross-encoders performing full self-attention over the pair | |
and Bi-encoders encoding the pair separately. The former | |
often performs better, but is too slow for practical use. In | |
this work, we develop a new transformer architecture, the | |
Poly-encoder, that learns global rather than token level | |
self-attention features. We perform a detailed comparison of | |
all three approaches, including what pre-training and | |
fine-tuning strategies work best. We show our models achieve | |
state-of-the-art results on three existing tasks; that | |
Poly-encoders are faster than Cross-encoders and more | |
accurate than Bi-encoders; and that the best results are | |
obtained by pre-training on large datasets similar to the | |
downstream tasks.", | |
archivePrefix= "arXiv", | |
eprint = "1905.01969", | |
primaryClass = "cs.CL" | |
} | |
@article{wang-2020-multi-level, | |
author = "Wang, Shuohang and Lan, Yunshi and Tay, Yi and Jiang, Jing | |
and Liu, Jingjing", | |
title = "Multi-Level Head-Wise Match and Aggregation in Transformer | |
for Textual Sequence Matching", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2001.07234v1", | |
abstract = "Transformer has been successfully applied to many natural | |
language processing tasks. However, for textual sequence | |
matching, simple matching between the representation of a | |
pair of sequences might bring in unnecessary noise. In this | |
paper, we propose a new approach to sequence pair matching | |
with Transformer, by learning head-wise matching | |
representations on multiple levels. Experiments show that our | |
proposed approach can achieve new state-of-the-art | |
performance on multiple tasks that rely only on pre-computed | |
sequence-vector-representation, such as SNLI, MNLI-match, | |
MNLI-mismatch, QQP, and SQuAD-binary.", | |
archivePrefix= "arXiv", | |
eprint = "2001.07234", | |
primaryClass = "cs.CL" | |
} | |
@article{raffel-2015-feed-forwar, | |
author = "Raffel, Colin and Ellis, Daniel P. W.", | |
title = "Feed-Forward Networks With Attention Can Solve Some Long-Term | |
Memory Problems", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1512.08756v5", | |
abstract = "We propose a simplified model of attention which is | |
applicable to feed-forward neural networks and demonstrate | |
that the resulting model can solve the synthetic ``addition`` | |
and ``multiplication`` long-term memory problems for sequence | |
lengths which are both longer and more widely varying than | |
the best published results for these tasks.", | |
archivePrefix= "arXiv", | |
eprint = "1512.08756", | |
primaryClass = "cs.LG" | |
} | |
@article{singh-2016-black-box, | |
author = "Singh, Sameer and Ribeiro, Marco Tulio and Guestrin, Carlos", | |
title = "Programs As Black-Box Explanations", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.07579v1", | |
abstract = "Recent work in model-agnostic explanations of black-box | |
machine learning has demonstrated that interpretability of | |
complex models does not have to come at the cost of accuracy | |
or model flexibility. However, it is not clear what kind of | |
explanations, such as linear models, decision trees, and rule | |
lists, are the appropriate family to consider, and different | |
tasks and models may benefit from different kinds of | |
explanations. Instead of picking a single family of | |
representations, in this work we propose to use ``programs`` | |
as model-agnostic explanations. We show that small programs | |
can be expressive yet intuitive as explanations, and | |
generalize over a number of existing interpretable families. | |
We propose a prototype program induction method based on | |
simulated annealing that approximates the local behavior of | |
black-box classifiers around a specific prediction using | |
random perturbations. Finally, we present preliminary | |
application on small datasets and show that the generated | |
explanations are intuitive and accurate for a number of | |
classifiers.", | |
archivePrefix= "arXiv", | |
eprint = "1611.07579", | |
primaryClass = "stat.ML" | |
} | |
@article{ribeiro-2016-nothing-else, | |
author = "Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos", | |
title = "Nothing Else Matters: Model-Agnostic Explanations By | |
Identifying Prediction Invariance", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1611.05817v1", | |
abstract = "At the core of interpretable machine learning is the question | |
of whether humans are able to make accurate predictions about | |
a model's behavior. Assumed in this question are three | |
properties of the interpretable output: coverage, precision, | |
and effort. Coverage refers to how often humans think they | |
can predict the model's behavior, precision to how accurate | |
humans are in those predictions, and effort is either the | |
up-front effort required in interpreting the model, or the | |
effort required to make predictions about a model's behavior. | |
In this work, we propose anchor-LIME (aLIME), a | |
model-agnostic technique that produces high-precision | |
rule-based explanations for which the coverage boundaries are | |
very clear. We compare aLIME to linear LIME with simulated | |
experiments, and demonstrate the flexibility of aLIME with | |
qualitative examples from a variety of domains and tasks.", | |
archivePrefix= "arXiv", | |
eprint = "1611.05817", | |
primaryClass = "stat.ML" | |
} | |
@article{ribeiro-2016-model-agnostic, | |
author = "Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos", | |
title = "Model-Agnostic Interpretability of Machine Learning", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1606.05386v1", | |
abstract = "Understanding why machine learning models behave the way they | |
do empowers both system designers and end-users in many ways: | |
in model selection, feature engineering, in order to trust | |
and act upon the predictions, and in more intuitive user | |
interfaces. Thus, interpretability has become a vital concern | |
in machine learning, and work in the area of interpretable | |
models has found renewed interest. In some applications, such | |
models are as accurate as non-interpretable ones, and thus | |
are preferred for their transparency. Even when they are not | |
accurate, they may still be preferred when interpretability | |
is of paramount importance. However, restricting machine | |
learning to interpretable models is often a severe | |
limitation. In this paper we argue for explaining machine | |
learning predictions using model-agnostic approaches. By | |
treating the machine learning models as black-box functions, | |
these approaches provide crucial flexibility in the choice of | |
models, explanations, and representations, improving | |
debugging, comparison, and interfaces for a variety of users | |
and models. We also outline the main challenges for such | |
methods, and review a recently-introduced model-agnostic | |
explanation approach (LIME) that addresses these challenges.", | |
archivePrefix= "arXiv", | |
eprint = "1606.05386", | |
primaryClass = "stat.ML" | |
} | |
@article{alvarez-melis-2018-robustness, | |
author = "Alvarez-Melis, David and Jaakkola, Tommi S.", | |
title = "On the Robustness of Interpretability Methods", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1806.08049v1", | |
abstract = "We argue that robustness of explanations---i.e., that similar | |
inputs should give rise to similar explanations---is a key | |
desideratum for interpretability. We introduce metrics to | |
quantify robustness and demonstrate that current methods do | |
not perform well according to these metrics. Finally, we | |
propose ways that robustness can be enforced on existing | |
interpretability approaches.", | |
archivePrefix= "arXiv", | |
eprint = "1806.08049", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{ribeiro-2018-anchors, | |
title = "Anchors: High-Precision Model-Agnostic Explanations", | |
author = "Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin", | |
booktitle = "AAAI", | |
year = 2018 | |
} | |
@article{wu-2017-beyond-sparsity, | |
author = "Wu, Mike and Hughes, Michael C. and Parbhoo, Sonali and | |
Zazzi, Maurizio and Roth, Volker and Doshi-Velez, Finale", | |
title = "Beyond Sparsity: Tree Regularization of Deep Models for | |
Interpretability", | |
journal = "CoRR", | |
year = 2017, | |
url = "http://arxiv.org/abs/1711.06178v1", | |
abstract = "The lack of interpretability remains a key barrier to the | |
adoption of deep models in many applications. In this work, | |
we explicitly regularize deep models so human users might | |
step through the process behind their predictions in little | |
time. Specifically, we train deep time-series models so their | |
class-probability predictions have high accuracy while being | |
closely modeled by decision trees with few nodes. Using | |
intuitive toy examples as well as medical tasks for treating | |
sepsis and HIV, we demonstrate that this new tree | |
regularization yields models that are easier for humans to | |
simulate than simpler L1 or L2 penalties without sacrificing | |
predictive power.", | |
archivePrefix= "arXiv", | |
eprint = "1711.06178", | |
primaryClass = "stat.ML" | |
} | |
@article{zhou-2015-cam, | |
author = "Zhou, Bolei and Khosla, Aditya and Lapedriza, Agata and | |
Oliva, Aude and Torralba, Antonio", | |
title = "Learning Deep Features for Discriminative Localization", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1512.04150v1", | |
abstract = "In this work, we revisit the global average pooling layer | |
proposed in [13], and shed light on how it explicitly enables | |
the convolutional neural network to have remarkable | |
localization ability despite being trained on image-level | |
labels. While this technique was previously proposed as a | |
means for regularizing training, we find that it actually | |
builds a generic localizable deep representation that can be | |
applied to a variety of tasks. Despite the apparent | |
simplicity of global average pooling, we are able to achieve | |
37.1 \% top-5 error for object localization on ILSVRC 2014, | |
which is remarkably close to the 34.2 \% top-5 error achieved | |
by a fully supervised CNN approach. We demonstrate that our | |
network is able to localize the discriminative image regions | |
on a variety of tasks despite not being trained for them", | |
archivePrefix= "arXiv", | |
eprint = "1512.04150", | |
primaryClass = "cs.CV" | |
} | |
@article{selvaraju-2016-grad-cam, | |
author = "Selvaraju, Ramprasaath R. and Cogswell, Michael and Das, | |
Abhishek and Vedantam, Ramakrishna and Parikh, Devi and | |
Batra, Dhruv", | |
title = "Grad-Cam: Visual Explanations From Deep Networks Via | |
Gradient-Based Localization", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1610.02391v4", | |
abstract = "We propose a technique for producing ``visual explanations`` | |
for decisions from a large class of CNN-based models, making | |
them more transparent. Our approach - Gradient-weighted Class | |
Activation Mapping (Grad-CAM), uses the gradients of any | |
target concept, flowing into the final convolutional layer to | |
produce a coarse localization map highlighting important | |
regions in the image for predicting the concept. Grad-CAM is | |
applicable to a wide variety of CNN model-families: (1) CNNs | |
with fully-connected layers, (2) CNNs used for structured | |
outputs, (3) CNNs used in tasks with multimodal inputs or | |
reinforcement learning, without any architectural changes or | |
re-training. We combine Grad-CAM with fine-grained | |
visualizations to create a high-resolution | |
class-discriminative visualization and apply it to | |
off-the-shelf image classification, captioning, and visual | |
question answering (VQA) models, including ResNet-based | |
architectures. In the context of image classification models, | |
our visualizations (a) lend insights into their failure | |
modes, (b) are robust to adversarial images, (c) outperform | |
previous methods on localization, (d) are more faithful to | |
the underlying model and (e) help achieve generalization by | |
identifying dataset bias. For captioning and VQA, we show | |
that even non-attention based models can localize inputs. We | |
devise a way to identify important neurons through Grad-CAM | |
and combine it with neuron names to provide textual | |
explanations for model decisions. Finally, we design and | |
conduct human studies to measure if Grad-CAM helps users | |
establish appropriate trust in predictions from models and | |
show that Grad-CAM helps untrained users successfully discern | |
a `stronger` nodel from a `weaker` one even when both make | |
identical predictions. Our code is available at | |
https://github.com/ramprs/grad-cam/, along with a demo at | |
http://gradcam.cloudcv.org, and a video at | |
youtu.be/COjUB9Izk6E.", | |
archivePrefix= "arXiv", | |
eprint = "1610.02391", | |
primaryClass = "cs.CV" | |
} | |
@article{baehrens-2010-parzen, | |
author = {Baehrens, David and Schroeter, Timon and Harmeling, Stefan | |
and Kawanabe, Motoaki and Hansen, Katja and M\"{u}ller, | |
Klaus-Robert}, | |
title = "How to Explain Individual Classification Decisions", | |
year = 2010, | |
issue_date = "3/1/2010", | |
publisher = "JMLR.org", | |
volume = 11, | |
issn = "1532-4435", | |
journal = "J. Mach. Learn. Res.", | |
month = aug, | |
pages = "1803–1831", | |
numpages = 29 | |
} | |
@article{mcinnes-2017-hdbscan, | |
title = "hdbscan: Hierarchical density based clustering", | |
author = "McInnes, Leland and Healy, John and Astels, Steve", | |
journal = "Journal of Open Source Software", | |
volume = 2, | |
number = 11, | |
pages = 205, | |
year = 2017 | |
} | |
@article{osborne-2013-data-clearning, | |
title = "Is data cleaning and the testing of assumptions relevant in | |
the 21st century?", | |
author = "Osborne, Jason W", | |
journal = "Frontiers in Psychology", | |
volume = 4, | |
pages = 370, | |
year = 2013, | |
publisher = "Frontiers" | |
} | |
@inproceedings{fu-2019-graph-rel, | |
author = "Fu, Tsu-Jui and Li, Peng-Hsuan and Ma, Wei-Yun", | |
title = "{G}raph{R}el: Modeling Text as Relational Graphs for Joint | |
Entity and Relation Extraction", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
year = 2019, | |
pages = "1409-1418", | |
doi = "10.18653/v1/P19-1136", | |
url = "https://doi.org/10.18653/v1/P19-1136", | |
abstract = "In this paper, we present GraphRel, an end-to-end relation | |
extraction model which uses graph convolutional networks | |
(GCNs) to jointly learn named entities and relations. In | |
contrast to previous baselines, we consider the interaction | |
between named entities and relations via a 2nd-phase | |
relation-weighted GCN to better extract relations. Linear and | |
dependency structures are both used to extract both | |
sequential and regional features of the text, and a complete | |
word graph is further utilized to extract implicit features | |
among all word pairs of the text. With the graph-based | |
approach, the prediction for overlapping relations is | |
substantially improved over previous sequential | |
approaches. We evaluate GraphRel on two public datasets: NYT | |
and WebNLG. Results show that GraphRel maintains high | |
precision while increasing recall substantially. Also, | |
GraphRel outperforms previous work by 3.2{\%} and 5.8{\%} (F1 | |
score), achieving a new state-of-the-art for relation | |
extraction.", | |
address = "Florence, Italy", | |
month = jul, | |
publisher = "Association for Computational Linguistics" | |
} | |
@article{xie-2019-uda, | |
author = "Xie, Qizhe and Dai, Zihang and Hovy, Eduard and Luong, | |
Minh-Thang and Le, Quoc V.", | |
title = "Unsupervised Data Augmentation for Consistency Training", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1904.12848v5", | |
abstract = "Semi-supervised learning lately has shown much promise in | |
improving deep learning models when labeled data is | |
scarce. Common among recent approaches is the use of | |
consistency training on a large amount of unlabeled data to | |
constrain model predictions to be invariant to input | |
noise. In this work, we present a new perspective on how to | |
effectively noise unlabeled examples and argue that the | |
quality of noising, specifically those produced by advanced | |
data augmentation methods, plays a crucial role in | |
semi-supervised learning. By substituting simple noising | |
operations with advanced data augmentation methods such as | |
RandAugment and back-translation, our method brings | |
substantial improvements across six language and three vision | |
tasks under the same consistency training framework. On the | |
IMDb text classification dataset, with only 20 labeled | |
examples, our method achieves an error rate of 4.20, | |
outperforming the state-of-the-art model trained on 25,000 | |
labeled examples. On a standard semi-supervised learning | |
benchmark, CIFAR-10, our method outperforms all previous | |
approaches and achieves an error rate of 5.43 with only 250 | |
examples. Our method also combines well with transfer | |
learning, e.g., when finetuning from BERT, and yields | |
improvements in high-data regime, such as ImageNet, whether | |
when there is only 10 \% labeled data or when a full labeled | |
set with 1.3M extra unlabeled examples is used. Code is | |
available at https://github.com/google-research/uda.", | |
archivePrefix= "arXiv", | |
eprint = "1904.12848", | |
primaryClass = "cs.LG" | |
} | |
@article{cubuk-2019-randaugment, | |
author = "Cubuk, Ekin D. and Zoph, Barret and Shlens, Jonathon and Le, | |
Quoc V.", | |
title = "Randaugment: Practical Automated Data Augmentation With a | |
Reduced Search Space", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1909.13719v2", | |
abstract = "Recent work has shown that data augmentation has the | |
potential to significantly improve the generalization of deep | |
learning models. Recently, automated augmentation strategies | |
have led to state-of-the-art results in image classification | |
and object detection. While these strategies were optimized | |
for improving validation accuracy, they also led to | |
state-of-the-art results in semi-supervised learning and | |
improved robustness to common corruptions of images. An | |
obstacle to a large-scale adoption of these methods is a | |
separate search phase which increases the training complexity | |
and may substantially increase the computational | |
cost. Additionally, due to the separate search phase, these | |
approaches are unable to adjust the regularization strength | |
based on model or dataset size. Automated augmentation | |
policies are often found by training small models on small | |
datasets and subsequently applied to train larger models. In | |
this work, we remove both of these obstacles. RandAugment has | |
a significantly reduced search space which allows it to be | |
trained on the target task with no need for a separate proxy | |
task. Furthermore, due to the parameterization, the | |
regularization strength may be tailored to different model | |
and dataset sizes. RandAugment can be used uniformly across | |
different tasks and datasets and works out of the box, | |
matching or surpassing all previous automated augmentation | |
approaches on CIFAR-10/100, SVHN, and ImageNet. On the | |
ImageNet dataset we achieve 85.0 \% accuracy, a 0.6 \% | |
increase over the previous state-of-the-art and 1.0 \% | |
increase over baseline augmentation. On object detection, | |
RandAugment leads to 1.0-1.3 \% improvement over baseline | |
augmentation, and is within 0.3 \% mAP of AutoAugment on | |
COCO. Finally, due to its interpretable hyperparameter, | |
RandAugment may be used to investigate the role of data | |
augmentation with varying model and dataset size. Code is | |
available online.", | |
archivePrefix= "arXiv", | |
eprint = "1909.13719", | |
primaryClass = "cs.CV" | |
} | |
@article{pan-2020-adversarial-validation, | |
author = "Pan, Jing and Pham, Vincent and Dorairaj, Mohan and Chen, | |
Huigang and Lee, Jeong-Yoon", | |
title = "Adversarial Validation Approach To Concept Drift Problem in | |
User Targeting Automation Systems At Uber", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2004.03045v2", | |
abstract = "In user targeting automation systems, concept drift in input | |
data is one of the main challenges. It deteriorates model | |
performance on new data over time. Previous research on | |
concept drift mostly proposed model retraining after | |
observing performance decreases. However, this approach is | |
suboptimal because the system fixes the problem only after | |
suffering from poor performance on new data. Here, we | |
introduce an adversarial validation approach to concept drift | |
problems in user targeting automation systems. With our | |
approach, the system detects concept drift in new data before | |
making inference, trains a model, and produces predictions | |
adapted to the new data. We show that our approach addresses | |
concept drift effectively with the AutoML3 Lifelong Machine | |
Learning challenge data as well as in Uber's internal user | |
targeting automation system, MaLTA.", | |
archivePrefix= "arXiv", | |
eprint = "2004.03045", | |
primaryClass = "cs.LG" | |
} | |
@article{lin-2019-unknown-detection, | |
author = "Lin, Ting-En and Xu, Hua", | |
title = "Deep Unknown Intent Detection With Margin Loss", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1906.00434v1", | |
abstract = "Identifying the unknown (novel) user intents that have never | |
appeared in the training set is a challenging task in the | |
dialogue system. In this paper, we present a two-stage method | |
for detecting unknown intents. We use bidirectional long | |
short-term memory (BiLSTM) network with the margin loss as | |
the feature extractor. With margin loss, we can learn | |
discriminative deep features by forcing the network to | |
maximize inter-class variance and to minimize intra-class | |
variance. Then, we feed the feature vectors to the | |
density-based novelty detection algorithm, local outlier | |
factor (LOF), to detect unknown intents. Experiments on two | |
benchmark datasets show that our method can yield consistent | |
improvements compared with the baseline methods.", | |
archivePrefix= "arXiv", | |
eprint = "1906.00434", | |
primaryClass = "cs.CL" | |
} | |
@article{tompson-2014-spatial-dropout, | |
author = "Tompson, Jonathan and Goroshin, Ross and Jain, Arjun and | |
LeCun, Yann and Bregler, Christopher", | |
title = "Efficient Object Localization Using Convolutional Networks", | |
journal = "CoRR", | |
year = 2014, | |
url = "http://arxiv.org/abs/1411.4280v3", | |
abstract = "Recent state-of-the-art performance on human-body pose | |
estimation has been achieved with Deep Convolutional Networks | |
(ConvNets). Traditional ConvNet architectures include pooling | |
and sub-sampling layers which reduce computational | |
requirements, introduce invariance and prevent over-training. | |
These benefits of pooling come at the cost of reduced | |
localization accuracy. We introduce a novel architecture | |
which includes an efficient `position refinement' model that | |
is trained to estimate the joint offset location within a | |
small region of the image. This refinement model is jointly | |
trained in cascade with a state-of-the-art ConvNet model to | |
achieve improved accuracy in human joint location | |
estimation. We show that the variance of our detector | |
approaches the variance of human annotations on the FLIC | |
dataset and outperforms all existing approaches on the | |
MPII-human-pose dataset.", | |
archivePrefix= "arXiv", | |
eprint = "1411.4280", | |
primaryClass = "cs.CV" | |
} | |
@article{yang-2018-rethinking-structure, | |
author = "Yang, Yao-Yuan and Lin, Yi-An and Chu, Hong-Min and Lin, | |
Hsuan-Tien", | |
title = "Deep Learning With a Rethinking Structure for Multi-Label | |
Classification", | |
journal = "CoRR", | |
year = 2018, | |
url = "http://arxiv.org/abs/1802.01697v2", | |
abstract = "Multi-label classification (MLC) is an important class of | |
machine learning problems that come with a wide spectrum of | |
applications, each demanding a possibly different evaluation | |
criterion. When solving the MLC problems, we generally expect | |
the learning algorithm to take the hidden correlation of the | |
labels into account to improve the prediction | |
performance. Extracting the hidden correlation is generally a | |
challenging task. In this work, we propose a novel deep | |
learning framework to better extract the hidden correlation | |
with the help of the memory structure within recurrent neural | |
networks. The memory stores the temporary guesses on the | |
labels and effectively allows the framework to rethink about | |
the goodness and correlation of the guesses before making the | |
final prediction. Furthermore, the rethinking process makes | |
it easy to adapt to different evaluation criteria to match | |
real-world application needs. In particular, the framework | |
can be trained in an end-to-end style with respect to any | |
given MLC evaluation criteria. The end-to-end design can be | |
seamlessly combined with other deep learning techniques to | |
conquer challenging MLC problems like image | |
tagging. Experimental results across many real-world data | |
sets justify that the rethinking framework indeed improves | |
MLC performance across different evaluation criteria and | |
leads to superior performance over state-of-the-art MLC | |
algorithms.", | |
archivePrefix= "arXiv", | |
eprint = "1802.01697", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{yang-2019-seq2set, | |
author = "Yang, Pengcheng and Luo, Fuli and Ma, Shuming and Lin, | |
Junyang and Sun, Xu", | |
title = "A Deep Reinforced Sequence-to-Set Model for Multi-Label | |
Classification", | |
booktitle = "Proceedings of the 57th Annual Meeting of the Association for | |
Computational Linguistics", | |
year = 2019, | |
pages = "5252-5258", | |
doi = "10.18653/v1/P19-1518", | |
url = "https://doi.org/10.18653/v1/P19-1518", | |
abstract = "Multi-label classification (MLC) aims to predict a set of | |
labels for a given instance. Based on a pre-defined label | |
order, the sequence-to-sequence (Seq2Seq) model trained via | |
maximum likelihood estimation method has been successfully | |
applied to the MLC task and shows powerful ability to capture | |
high-order correlations between labels. However, the output | |
labels are essentially an unordered set rather than an | |
ordered sequence. This inconsistency tends to result in some | |
intractable problems, e.g., sensitivity to the label | |
order. To remedy this, we propose a simple but effective | |
sequence-to-set model. The proposed model is trained via | |
reinforcement learning, where reward feedback is designed to | |
be independent of the label order. In this way, we can reduce | |
the dependence of the model on the label order, as well as | |
capture high-order correlations between labels. Extensive | |
experiments show that our approach can substantially | |
outperform competitive baselines, as well as effectively | |
reduce the sensitivity to the label order.", | |
address = "Florence, Italy", | |
month = jul, | |
publisher = "Association for Computational Linguistics" | |
} | |
@ARTICLE{zhu-2018-label-correlation, | |
author = "Y. {Zhu} and J. T. {Kwok} and Z. {Zhou}", | |
journal = "IEEE Transactions on Knowledge and Data Engineering", | |
title = "Multi-Label Learning with Global and Local Label Correlation", | |
year = 2018, | |
volume = 30, | |
number = 6, | |
pages = "1081-1094" | |
} | |
@article{garg-2015-exploring-correlation, | |
author = "Garg, Amit and Noyola, Jonathan and Verma, Romil and Saxena, | |
Ashutosh and Jami, Aditya", | |
title = "Exploring Correlation Between Labels To Improve Multi-Label | |
Classification", | |
journal = "CoRR", | |
year = 2015, | |
url = "http://arxiv.org/abs/1511.07953v1", | |
abstract = "This paper attempts multi-label classification by extending | |
the idea of independent binary classification models for each | |
output label, and exploring how the inherent correlation | |
between output labels can be used to improve | |
predictions. Logistic Regression, Naive Bayes, Random Forest, | |
and SVM models were constructed, with SVM giving the best | |
results: an improvement of 12.9\% over binary models was | |
achieved for hold out cross validation by augmenting with | |
pairwise correlation probabilities of the labels.", | |
archivePrefix= "arXiv", | |
eprint = "1511.07953", | |
primaryClass = "cs.LG" | |
} | |
@inproceedings{huang-2012-multi-label, | |
author = "Huang, Sheng-Jun and Zhou, Zhi-Hua", | |
title = "Multi-Label Learning by Exploiting Label Correlations | |
Locally", | |
year = 2012, | |
publisher = "AAAI Press", | |
abstract = "It is well known that exploiting label correlations is | |
important for multi-label learning. Existing approaches | |
typically exploit label correlations globally, by assuming | |
that the label correlations are shared by all the | |
instances. In real-world tasks, however, different instances | |
may share different label correlations, and few correlations | |
are globally applicable. In this paper, we propose the ML-LOC | |
approach which allows label correlations to be exploited | |
locally. To encode the local influence of label correlations, | |
we derive a LOC code to enhance the feature representation of | |
each instance. The global discrimination fitting and local | |
correlation sensitivity are incorporated into a unified | |
framework, and an alternating solution is developed for the | |
optimization. Experimental results on a number of image, text | |
and gene data sets validate the effectiveness of our | |
approach.", | |
booktitle = "Proceedings of the Twenty-Sixth AAAI Conference on Artificial | |
Intelligence", | |
pages = "949–955", | |
numpages = 7, | |
location = "Toronto, Ontario, Canada", | |
series = "AAAI'12" | |
} | |
@article{li-2014-condensed-filter-tree, | |
author = "Chun-Liang Li and Hsuan-Tien Lin", | |
year = 2014, | |
month = 01, | |
pages = "663-673", | |
title = "Condensed filter tree for cost-sensitive multi-label | |
classification", | |
volume = 1, | |
journal = "31st International Conference on Machine Learning, ICML 2014" | |
} | |
@incollection{nam-2017-maximing-subset-accuracy, | |
title = "Maximizing Subset Accuracy with Recurrent Neural Networks in | |
Multi-label Classification", | |
author = {Nam, Jinseok and Loza Menc\'{\i}a, Eneldo and Kim, Hyunwoo J | |
and F\"{u}rnkranz, Johannes}, | |
booktitle = "Advances in Neural Information Processing Systems 30", | |
editor = "I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and | |
R. Fergus and S. Vishwanathan and R. Garnett", | |
pages = "5413-5423", | |
year = 2017, | |
publisher = "Curran Associates, Inc.", | |
url = | |
"http://papers.nips.cc/paper/7125-maximizing-subset-accuracy-with-recurrent-neural-networks-in-multi-label-classification.pdf" | |
} | |
@article{rennie-2016-self-critical, | |
author = "Rennie, Steven J. and Marcheret, Etienne and Mroueh, Youssef | |
and Ross, Jarret and Goel, Vaibhava", | |
title = "Self-Critical Sequence Training for Image Captioning", | |
journal = "CoRR", | |
year = 2016, | |
url = "http://arxiv.org/abs/1612.00563v2", | |
abstract = "Recently it has been shown that policy-gradient methods for | |
reinforcement learning can be utilized to train deep | |
end-to-end systems directly on non-differentiable metrics for | |
the task at hand. In this paper we consider the problem of | |
optimizing image captioning systems using reinforcement | |
learning, and show that by carefully optimizing our systems | |
using the test metrics of the MSCOCO task, significant gains | |
in performance can be realized. Our systems are built using a | |
new optimization approach that we call self-critical sequence | |
training (SCST). SCST is a form of the popular REINFORCE | |
algorithm that, rather than estimating a ``baseline`` to | |
normalize the rewards and reduce variance, utilizes the | |
output of its own test-time inference algorithm to normalize | |
the rewards it experiences. Using this approach, estimating | |
the reward signal (as actor-critic methods must do) and | |
estimating normalization (as REINFORCE algorithms typically | |
do) is avoided, while at the same time harmonizing the model | |
with respect to its test-time inference | |
procedure. Empirically we find that directly optimizing the | |
CIDEr metric with SCST and greedy decoding at test-time is | |
highly effective. Our results on the MSCOCO evaluation sever | |
establish a new state-of-the-art on the task, improving the | |
best result in terms of CIDEr from 104.9 to 114.7.", | |
archivePrefix= "arXiv", | |
eprint = "1612.00563", | |
primaryClass = "cs.LG" | |
} | |
@techreport{settles-2009-active-learning, | |
title = "Active learning literature survey", | |
author = "Settles, Burr", | |
year = 2009, | |
institution = "University of Wisconsin-Madison Department of Computer | |
Sciences" | |
} | |
@incollection{aggarwal-2014-active-learning, | |
title = "Active learning: A survey", | |
author = "Aggarwal, Charu C and Kong, Xiangnan and Gu, Quanquan and | |
Han, Jiawei and Philip, S Yu", | |
booktitle = "Data Classification: Algorithms and Applications", | |
pages = "571-605", | |
year = 2014, | |
publisher = "CRC Press" | |
} | |
@article{tang-2019-distilling-bert, | |
author = "Tang, Raphael and Lu, Yao and Liu, Linqing and Mou, Lili and | |
Vechtomova, Olga and Lin, Jimmy", | |
title = "Distilling Task-Specific Knowledge From Bert Into Simple | |
Neural Networks", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1903.12136v1", | |
abstract = "In the natural language processing literature, neural | |
networks are becoming increasingly deeper and complex. The | |
recent poster child of this trend is the deep language | |
representation model, which includes BERT, ELMo, and | |
GPT. These developments have led to the conviction that | |
previous-generation, shallower neural networks for language | |
understanding are obsolete. In this paper, however, we | |
demonstrate that rudimentary, lightweight neural networks can | |
still be made competitive without architecture changes, | |
external training data, or additional input features. We | |
propose to distill knowledge from BERT, a state-of-the-art | |
language representation model, into a single-layer BiLSTM, as | |
well as its siamese counterpart for sentence-pair | |
tasks. Across multiple datasets in paraphrasing, natural | |
language inference, and sentiment classification, we achieve | |
comparable results with ELMo, while using roughly 100 times | |
fewer parameters and 15 times less inference time.", | |
archivePrefix= "arXiv", | |
eprint = "1903.12136", | |
primaryClass = "cs.CL" | |
} | |
@article{tay-2020-efficient-transformers, | |
author = "Tay, Yi and Dehghani, Mostafa and Bahri, Dara and Metzler, | |
Donald", | |
title = "Efficient Transformers: a Survey", | |
journal = "CoRR", | |
year = 2020, | |
url = "http://arxiv.org/abs/2009.06732v1", | |
abstract = "Transformer model architectures have garnered immense | |
interest lately due to their effectiveness across a range of | |
domains like language, vision and reinforcement learning. In | |
the field of natural language processing for example, | |
Transformers have become an indispensable staple in the | |
modern deep learning stack. Recently, a dizzying number of | |
``X-former`` models have been proposed - Reformer, Linformer, | |
Performer, Longformer, to name a few - which improve upon the | |
original Transformer architecture, many of which make | |
improvements around computational and memory efficiency. With | |
the aim of helping the avid researcher navigate this flurry, | |
this paper characterizes a large and thoughtful selection of | |
recent efficiency-flavored ``X-former`` models, providing an | |
organized and comprehensive overview of existing work and | |
models across multiple domains.", | |
archivePrefix= "arXiv", | |
eprint = "2009.06732", | |
primaryClass = "cs.LG" | |
} | |
@article{wei-2019-casrel, | |
author = "Wei, Zhepei and Su, Jianlin and Wang, Yue and Tian, Yuan and | |
Chang, Yi", | |
title = "A Novel Cascade Binary Tagging Framework for Relational | |
Triple Extraction", | |
journal = "CoRR", | |
year = 2019, | |
url = "http://arxiv.org/abs/1909.03227v4", | |
abstract = "Extracting relational triples from unstructured text is | |
crucial for large-scale knowledge graph | |
construction. However, few existing works excel in solving | |
the overlapping triple problem where multiple relational | |
triples in the same sentence share the same entities. In this | |
work, we introduce a fresh perspective to revisit the | |
relational triple extraction task and propose a novel cascade | |
binary tagging framework (CasRel) derived from a principled | |
problem formulation. Instead of treating relations as | |
discrete labels as in previous works, our new framework | |
models relations as functions that map subjects to objects in | |
a sentence, which naturally handles the overlapping | |
problem. Experiments show that the CasRel framework already | |
outperforms state-of-the-art methods even when its encoder | |
module uses a randomly initialized BERT encoder, showing the | |
power of the new tagging framework. It enjoys further | |
performance boost when employing a pre-trained BERT encoder, | |
outperforming the strongest baseline by 17.5 and 30.2 | |
absolute gain in F1-score on two public datasets NYT and | |
WebNLG, respectively. In-depth analysis on different | |
scenarios of overlapping triples shows that the method | |
delivers consistent performance gain across all these | |
scenarios. The source code and data are released online.", | |
archivePrefix= "arXiv", | |
eprint = "1909.03227", | |
primaryClass = "cs.CL" | |
} | |
@inproceedings{ma-2020-simple-lexicon, | |
title = "Simplify the Usage of Lexicon in {C}hinese {NER}", | |
author = "Ma, Ruotian and Peng, Minlong and Zhang, Qi and Wei, Zhongyu | |
and Huang, Xuanjing", | |
booktitle = "Proceedings of the 58th Annual Meeting of the Association for | |
Computational Linguistics", | |
month = jul, | |
year = 2020, | |
address = "Online", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/2020.acl-main.528", | |
doi = "10.18653/v1/2020.acl-main.528", | |
pages = "5951-5960", | |
abstract = "Recently, many works have tried to augment the performance of | |
Chinese named entity recognition (NER) using word | |
lexicons. As a representative, Lattice-LSTM has achieved new | |
benchmark results on several public Chinese NER | |
datasets. However, Lattice-LSTM has a complex model | |
architecture. This limits its application in many industrial | |
areas where real-time NER response |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment