xziyue/example-bib.bib

## example-bib.bib


@MISC{charlotte_2020,
author = {C. Jee},
title={{An Indian politician is using deepfake technology to win new voters}},
url = {https://www.technologyreview.com/2020/02/19/868173/an-indian-politician-is-using-deepfakes-to-try-and-win-voters/}
}

@MISC{halle_2020,
author = {H. Jones},
title={{What are 'deepfakes' and the dangers as we head into election season}},
url = {https://www.kxxv.com/news/local-news/what-are-deepfakes-and-the-dangers-as-we-head-into-election-season}
}

@MISC{webb_2020,
author = {D. Webb},
title={{Avatarify: Create Real Time Deepfakes for Video Calls}},
url = {https://ccm.net/faq/64681-avatarify-video-call-deepfakes}
}


@MISC{elizakde_2020,
author = {E. Elizalde},
title={{'Deepfake' Queen Elizabeth II warns about misinformation}},
url = {https://nypost.com/2020/12/26/deepfake-queen-elizabeth-ii-warns-about-misinformation/}
}


@MISC{umawing_2020,
author = {J. Umawing},
title={{The face of tomorrows cybercrime: Deepfake ransomware explained}},
url = {https://www.terabitweb.com/2020/06/26/the-face-of-tomorrows-cybercrime-deepfake-ransomware-explained/}
}

@MISC{symon_2019,
author = {E. Symon},
title={{Deepfake Audio and Visual Political Ads Faces Ban With Looming Senate Vote}},
url = {https://californiaglobe.com/section-2/deepfake-audio-and-visual-political-ads-faces-ban-with-looming-senate-vote/}
}

@MISC{vaccari_2020,
author = {C. Vaccari, A. Chadwick},
title={{Deepfakes are here. These deceptive videos erode trust in all news media}},
url = {https://fortunascorner.com/2020/05/30/deepfakes-are-here-these-deceptive-videos-erode-trust-in-all-news-media/}
}

@MISC{ruiz_2020,
author = {D. Ruiz},
title={{Deepfakes laws and proposals flood US}},
url = {https://blog.malwarebytes.com/artificial-intelligence/2020/01/deepfakes-laws-and-proposals-flood-us/}
}

@MISC{ethics_2020,
author = {{Committee On Ethics}},
title={{Intentional Use of Audio-Visual Distortions and Deep Fakes}},
url = {https://ethics.house.gov/campaign-activity-pink-sheets/intentional-use-audio-visual-distortions-deep-fakes}
}

@MISC{raphael_2019,
author = {R. T. Garcia},
title={{Deepfakes Are Being Used to Puncture Politicians Bluster}},
url = {https://ffwd.medium.com/deepfakes-are-being-used-to-puncture-politicians-bluster-e4bb4473841}}

@Article{Guera2019_ICMLW,
  author   = {D. G{\"{u}}era and
             S. Baireddy and
             P. Bestagini and
             S. Tubaro and
             E. J. Delp},
  journal  = {arXiv preprint arXiv:1906.08743},
  title={{We Need No Pixels: Video Manipulation Detection Using Stream Descriptors}},
  url = {https://arxiv.org/abs/1906.08743},
  year     = {2019},
  month    = {6},
  doi={10.48550/arXiv.1906.08743}
}


@article{brundage2018,
author = {M. Brundage and S. Avin and J. Clark and H. Toner and P. Eckersley and B. Garfinkel and A. Dafoe and P. Scharre and T. Zeitzoff and B. Filar and H. Anderson and H. Roff and G. C. Allen and J. Steinhardt and C. Flynn and S. O hEigeartaigh and S. Beard and H. Belfield and S. Farquhar and C. Lyle and R. Crootof and O. Evans and M. Page and J. Bryson and R. Yampolskiy and D. Amode},
title={{The Malicious Use of Artificial Intelligence : Forecasting, Prevention, and Mitigation}},
journal = {arXiv preprint arXiv:1802.07228},
url = {https://arxiv.org/abs/1802.07228v1},
year = {2018},
month = {2},
doi={10.48550/arXiv.1802.07228}
}

@MISC{grobman_2019,
author = {S. Grobman},
title={{McAfee Labs 2020 Threats Predictions Report}},
url = {https://www.mcafee.com/blogs/other-blogs/mcafee-labs/mcafee-labs-2020-threats-predictions-report/}}

@MISC{dayma_2019,
author = {B. Dayma},
title={{Improving Deepfake Performance with Data}},
url = {https://www.wandb.com/articles/improving-deepfake-performance-with-data}}

@MISC{greenwald_2019,
author = {J. Greenwald},
title={{Deepfake hacks forecast for 2020: Report}},
url = {https://www.businessinsurance.com/article/20191202/NEWS06/912331975/Deepfake-hacks-forecast-for-2020-Experian-report#}}

@MISC{vincent_2018,
author = {J. Vincent},
title={{US lawmakers say AI deepfakes have the potential to disrupt every facet of our society}},
url = {https://www.theverge.com/2018/9/14/17859188/ai-deepfakes-national-security-threat-lawmakers-letter-intelligence-community}}

@MISC{citron_2018,
author = {D. K. Citron, R. Chesney},
title={{Disinformation on Steroids: The Threat of Deep Fakes}},
url = {https://scholarship.law.bu.edu/shorter_works/30/}}

@ARTICLE{bayram_2008,
author = {Bayram, Sevinc and Sencar, Husrev Taha and Memon, Nasir},
title={{Video Copy Detection Based on Source Device Characteristics: A Complementary Approach to Content-Based Methods}},
year = {2008},
month = {10},
doi = {10.1145/1460096.1460167},
journal = {Proceedings of the ACM International Conference on Multimedia Information Retrieval},
pages = {435 - 442},
note = {Vancouver, British Columbia, Canada},
}

@book{keith_2007,
title={{Chapter 13 - MPEG-2}},
booktitle = "Video Demystified (Fifth Edition)",
publisher = "Newnes",
pages = "577 - 737",
year = "2007",
url = "http://www.sciencedirect.com/science/article/pii/B9780750683951500134",
author = "Keith Jack",
address = {Burlington, MA},
doi={10.1016/B978-075068395-1/50013-4}
}

@ARTICLE{Zhou_2018,
author = {Zhou, P. and Han, X. and Morariu, V. I. and Davis, L. S.},
title={{Learning Rich Features for Image Manipulation Detection}},
journal = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition },
month = {June},
year = {2018},
pages={1053-1061},
note = {Salt Lake City, UT},
doi={10.1109/CVPR.2018.00116}
}

@ARTICLE{simonyan_2014,
title={{Two-Stream Convolutional Networks for Action Recognition in Videos}},
author = {Simonyan, K. and Zisserman, A.},
journal = {Proceedings of the Advances in Neural Information Processing Systems},
pages = {568--576},
year = {2014},
month={December},
note={Montreal,Canada},
url = {http://papers.nips.cc/paper/5353-two-stream-convolutional-networks-for-action-recognition-in-videos.pdf}
}

@ARTICLE{howard_2019,
author={M. J. {Howard} and A. S. {Williamson} and N. {Norouzi}},
journal={Proceedings of the IEEE Global Conference on Signal and Information Processing},
title={{Video Manipulation Detection via Recurrent Residual Feature Learning Networks}},
year={2019},
month={November},
note={Ottawa, Ontario, Canada},
pages={1-5},
doi={10.1109/GlobalSIP45357.2019.8969458}
}

@ARTICLE{bonettini_2020,
author = {Bonettini, N. and Cannas, E. and Mandelli, S. and Bondi, L. and Bestagini, P. and Tubaro, S.},
year = {2020},
month = {April},
journal = {arXiv preprint arXiv:2004.07676},
title={{Video Face Manipulation Detection Through Ensemble of CNNs}},
doi={10.48550/arXiv.2004.07676}
}


@ARTICLE{montserrat_2020,
author = {Montserrat, D. and Hao, H. and Yarlagadda, S. and Baireddy, S. and Shao, R. and Horvath, J. and Bartusiak, E. and Yang, J. and Guera, D. and Zhu, F. and Delp, E.},
journal = {Proceedings of 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops },
year = {2020},
month = {June},
pages = {2851-2859},
note={Seattle, WA},
title={{Deepfakes Detection with Automatic Face Weighting}},
doi={10.1109/CVPRW50498.2020.00342}
}


@article{rossetto_2017,
author    = {L. Rossetto and H. Schuldt},
title={{Web Video in Numbers - An Analysis of Web-Video Metadata}},
journal   = {arXiv preprint arXiv:1707.01340},
year      = {2017},
month     = {July},
doi = {10.48550/arXiv.1707.01340}
}

@article{apadopoulou_2017,
author = {Papadopoulou, O. and Zampoglou, M. and Papadopoulos, S. and Kompatsiaris, Y.},
title={{Web Video Verification Using Contextual Cues}},
year = {2017},
month = {June},
doi = {10.1145/3078897.3080535},
journal = {Proceedings of the International Workshop on Multimedia Forensics and Security},
pages = {6 - 10},
note = {Bucharest, Romania}
}

@article{palod_2019,
author = {Palod, P. and Patwari, A. and Bahety, S. and Bagchi, S. and Goyal, P.},
year = {2019},
month = {1},
title={{Misleading Metadata Detection on YouTube}},
journal = {arXiv preprint arXiv:1901.08759},
doi={10.48550/arXiv.1901.08759}
}

@article{shullani_2017,
author = {Shullani, D. and Fontani, M. and Iuliani, M. and Alshaya, O. and Piva, A.},
year = {2017},
month = {10},
pages = {15},
title={{VISION: a video and image dataset for source identification}},
volume = {2017},
journal = {EURASIP Journal on Information Security},
doi = {10.1186/s13635-017-0067-2}
}


@article{sabir_2019,
author = {Sabir, E. and Cheng, J. and Jaiswal, A. and AbdAlmageed, W. and Masi, I. and Natarajan, P.},
title={{Recurrent Convolutional Strategies for Face Manipulation Detection in Videos}},
journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
month = {6},
year = {2019},
note = {Long Beach, California, USA},
doi={10.1109/IJCB48548.2020.9304936}
}

@article{koopman_2018,
author = {Koopman, M. and Macarulla Rodriguez, A. and Geradts, Z.},
journal = {Proceedings of the Irish Machine Vision and Image Processing Conference},
year = {2018},
month = {8},
pages = {133-136},
title={{Detection of Deepfake Video Manipulation}},
note={Belfast, United Kingdom}
}

@article{huy_2019,
author    = {H. H. Nguyen and
             F. Fang and
             J. Yamagishi and
             I. Echizen},
title     = {Multi-task Learning For Detecting and Segmenting Manipulated Facial
             Images and Videos},
journal   = {Proceedings of the IEEE International Conference on Biometrics},
year      = {2019},
month = {9},
note= {Tampa, Florida},
doi={10.1109/BTAS46853.2019.9185974}
}

@article{milani_2012,
  title={{An overview on video forensics}},
  volume={1},
  DOI={10.1017/ATSIP.2012.2},
  journal={{APSIPA} Transactions on Signal and Information Processing},
  publisher={Cambridge University Press},
  author={Milani, S. and Fontani, M. and Bestagini, P. and Barni, M. and Piva, A. and Tagliasacchi, M. and Tubaro, S.},
  year={2012},
  pages={e2}
}


@ARTICLE{kakar_2012,
author={P. {Kakar} and N. {Sudha}},
journal={IEEE Transactions on Information Forensics and Security},
title={{Verifying Temporal Data in Geotagged Images Via Sun Azimuth Estimation}},
year={2012},
volume={7},
number={3},
pages={1029-1039},
month={June},
doi={10.1109/TIFS.2012.2188796}
}


@InProceedings{li_2017,
author="Li, Xiaopeng
and Xu, Wenyuan
and Wang, Song
and Qu, Xianshan",
editor="Gollmann, Dieter
and Miyaji, Atsuko
and Kikuchi, Hiroaki",
title={{Are You Lying: Validating the Time-Location of Outdoor Images}},
booktitle="Applied Cryptography and Network Security",
year="2017",
publisher="Springer International Publishing",
address="Cham",
pages="103--123",
doi={10.1007/978-3-319-61204-1_6}
}


@article{isola_2015,
title={{Learning visual groups from co-occurrences in space and time}},
author={Isola, P. and Zoran, D. and Krishnan, D. and Adelson, E. H.},
journal={arXiv preprint arXiv:1511.06811},
year={2015},
month={November},
doi={10.48550/arXiv.1511.06811}
}

@article{huh_2018,
title={{Fighting fake news: Image splice detection via learned self-consistency}},
author={Huh, M. and Liu, A. and Owens, A. and Efros, A. A.},
journal={Proceedings of the European Conference on Computer Vision (ECCV)},
pages={101--117},
year={2018},
month={September},
note={{Munich, Germany}},
doi={10.1007/978-3-030-01252-6_7}
}

@article{chen_2017,
author={B. {Chen} and P. {Ghosh} and V. I. {Morariu} and L. S. {Davis}},
journal={Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
title={{Detection of Metadata Tampering Through Discrepancy Between Image Content and Metadata Using Multi-task Deep Learning}},
year={2017},
pages={1872-1880},
month={July},
note={{Honolulu, Hawaii, USA}},
doi={10.1109/CVPRW.2017.234}
}

@article{chen_2019,
author={B. {Chen} and L. S. {Davis}},
journal={Proceedings of 2019 IEEE Winter Applications of Computer Vision Workshops (WACVW)},
title={{Deep Representation Learning for Metadata Verification}},
year={2019},
pages={73-82},
month={January},
note={{Waikoloa Village, Hawaii, USA}},
doi={10.1109/WACVW.2019.00019}
}

@article{bharati_2019,
title={{Beyond pixels: Image provenance analysis leveraging metadata}},
author={Bharati, A. and Moreira, D. and Brogan, J. and Hale, P. and Bowyer, K. and Flynn, P. and Rocha, A. and Scheirer, W.},
journal={Proceedings of 2019 IEEE Winter Conference on Applications of Computer Vision (WACV)},
pages={1692--1702},
year={2019},
month={January},
note={{Waikoloa Village, Hawaii, USA}},
doi={10.1109/WACV.2019.00185}
}

@article{gloe_2014,
  title={{Forensic analysis of video file formats}},
  journal = "Digital Investigation",
  volume = "11",
  pages = "S68 - S76",
  year = "2014",
  month = "May",
  author = "T. Gloe and A. Fischer and M. Kirchner",
  doi={10.1016/j.diin.2014.03.009}
}

@ARTICLE{iuliani_2019,
author={M. {Iuliani} and D. {Shullani} and M. {Fontani} and S. {Meucci} and A. {Piva}},
journal={IEEE Transactions on Information Forensics and Security},
title={{A Video Forensic Framework for the Unsupervised Analysis of MP4-Like File Container}},
year={2019},
volume={14},
number={3},
pages={635-645},
month={3},
doi={10.1109/TIFS.2018.2859760}
}

@ARTICLE{yang_2020,
author={P. {Yang} and D. {Baracchi} and M. {Iuliani} and D. {Shullani} and R. {Ni} and Y. {Zhao} and A. {Piva}},
journal={IEEE Journal of Selected Topics in Signal Processing},
title={{Efficient Video Integrity Analysis Through Container Characterization}},
year={2020},
volume={14},
number={5},
pages={947-954},
month={8},
doi={10.1109/JSTSP.2020.3008088}
}

@ARTICLE{padin_2020,
author={D. {Vázquez-Padín} and M. {Fontani} and D. {Shullani} and F. {Pérez-González} and A. {Piva} and M. {Barni}},
journal={IEEE Transactions on Information Forensics and Security},
title={{Video Integrity Verification and GOP Size Estimation Via Generalized Variation of Prediction Footprint}},
year={2020},
volume={15},
number={},
pages={1815-1830},
doi={10.1109/TIFS.2019.2951313}
}

@article{yao2020double,
title={{Double compression detection for H. 264 videos with adaptive GOP structure}},
author={Yao, H. and Ni, R. and Zhao, Y.},
journal={Multimedia Tools and Applications},
volume={79},
number={9},
pages={5789--5806},
year={2020},
month={3},
doi={10.1007/s11042-019-08306-5}
}

@article{shi2000normalized,
title={{Normalized cuts and image segmentation}},
author={Shi, J. and Malik, J.},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume={22},
number={8},
pages={888--905},
year={2000},
month={8},
doi={10.1109/34.868688}
}

@book{friedman2001elements,
title={{The elements of statistical learning}},
author={Friedman, J. and Hastie, T. and Tibshirani, R. and others},
volume={1},
year={2009},
pages={106-119},
publisher={Springer},
address={New York, USA},
}

@misc{mp4analyser,
  author={essential61},
  title={\texttt{mp4analyser}},
  url={https://github.com/essential61/mp4analyser}
}

@misc{quicktime_format_old,
  author={{Apple Inc.}},
  title={{Classic Version of the QuickTime File Format Specification}},
  url={https://developer.apple.com/standards/classic-quicktime/}
}

@misc{mp4_meta_breakdown,
  author={{Leo van Stee}},
  title={{On date, time, location and other metadata in MP4/MOV files}},
  url={https://leo-van-stee.github.io/}
}

@misc{qt_format_spec,
  author={{Apple Inc.}},
  title={{QuickTime File Format Specification--Metadata}},
  url={https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html}
}

@misc{qt_format_spec_moov,
  author={{Apple Inc.}},
  title={{QuickTime File Format Specification--Movie Atoms}},
  url={https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html}
}

@misc{android_formats,
  author={{Google LLC}},
  title={{Android Developers: Supported media formats}},
  url={https://developer.android.com/guide/topics/media/media-formats}
}

@misc{social_network_video_spec,
  author={A. York},
  title={{Always Up-to-Date Guide to Social Media Video Specs}},
  url={https://sproutsocial.com/insights/social-media-video-specs-guide/}
}

@misc{sony_camera_spec,
  author={{Sony Corporation}},
  title={{FDR-AX40 Specifications}},
  url={https://www.sony.co.in/electronics/handycam-camcorders/fdr-ax40/specifications}
}

@misc{iso_media_part12,
  author={{ISO}},
  title={{ISO/IEC 14496-12:2020 - Information technology - Coding of audio-visual objects - Part 12: ISO base media file format}},
  url={https://www.iso.org/standard/74428.html}
}

@article{yokoyama_2000,
author={Y. {Yokoyama}},
journal={Proceedings 2000 International Conference on Image Processing (Cat. No.00CH37101)},
title={{Adaptive GOP structure selection for real-time MPEG-2 video encoding}},
year={2000},
pages={832-835 vol.2},
month={September},
note={Vancouver, British Columbia, Canada}
}


@misc{iso_generic_media,
  author={{ISO}},
  title={{ISO/IEC 13818-1:2019 - Information technology - Generic coding of moving pictures and associated audio information - Part 1: Systems}},
  url={https://www.iso.org/standard/75928.html}
}

  @misc{iso_mp4,
  author={{ISO}},
  title={{ISO/IEC 14496-14:2020 - Information technology - Coding of audio-visual objects - Part 14: MP4 file format}},
  url={https://www.iso.org/standard/79110.html}
}

@ARTICLE{sullivan_2005,
author={G. J. {Sullivan} and T. {Wiegand}},
journal={Proceedings of the IEEE},
title={{Video Compression - From Concepts to the {H.264/AVC}} Standard},
year={2005},
volume={93},
number={1},
pages={18-31},
month={1},
doi={10.1109/JPROC.2004.839617}
}

@ARTICLE{sullivan_2012,
author={G. J. {Sullivan} and J. {Ohm} and W. {Han} and T. {Wiegand}},
journal={IEEE Transactions on Circuits and Systems for Video Technology},
title={{Overview of the {High Efficiency Video Coding (HEVC)}} Standard},
year={2012},
volume={22},
number={12},
pages={1649-1668},
month={12},
doi={10.1109/TCSVT.2012.2221191}
}

@misc{avidemux,
  author={{Avidemux contributors}},
  title={{Avidemux}},
  url={http://avidemux.sourceforge.net/}
}

@misc{kdenlive,
  author={{Kdenlive contributors}},
  title={{Kdenlive}},
  url={https://kdenlive.org/en/}
}

@misc{exiftool,
  author={P. Harvey},
  title={{ExifTool by Phil Harvey}},
  url={https://exiftool.org/}
}

@misc{premiere,
  author={{Adobe Inc.}},
  title={{Professional video editor \& video maker - Adobe Premiere Pro}},
  url={https://www.adobe.com/products/premiere.html}
}

@misc{ffmpeg,
  author={{FFmpeg contributors}},
  title={{FFmpeg}},
  url={https://www.ffmpeg.org/}
}

@ARTICLE{schafer_1995,
author={R. {Schafer} and T. {Sikora}},
journal={Proceedings of the IEEE},
title={{Digital video coding standards and their role in video communications}},
year={1995},
volume={83},
number={6},
pages={907-924},
month={6},
doi={10.1109/5.387092}
}

@misc{canon_cnth,
  author={{Exiftool contributors}},
  title={{Canon Tags}},
  url={https://exiftool.org/TagNames/Canon.html}
}

@article{filippone2008survey,
  title={{A Survey of Kernel and Spectral Methods for Clustering}},
  author={Filippone, Maurizio and Camastra, Francesco and Masulli, Francesco and Rovetta, Stefano},
  journal={Pattern recognition},
  volume={41},
  number={1},
  pages={176--190},
  year={2008},
  publisher={Elsevier},
  doi={10.1016/j.patcog.2007.05.018}
}

@article{ng2001spectral,
  title={{On Spectral Clustering: Analysis and an Algorithm}},
  author={Ng, Andrew and Jordan, Michael and Weiss, Yair},
  journal={Advances in neural information processing systems},
  volume={14},
  year={2001}
}

@book{chung1997spectral,
  title={{Spectral Graph theory}},
  author={Chung, Fan RK},
  volume={92},
  year={1997},
  publisher={American Mathematical Soc.},
  doi={10.1090/cbms/092}
}

@article{macqueen1967some,
  title={{Some Methods for Classification and Analysis of Multivariate Observations}},
  author={MacQueen, James},
  journal={Proceedings of the fifth Berkeley symposium on mathematical statistics and probability},
  volume={1},
  number={14},
  pages={281--297},
  year={1967},
  note={Berkeley, CA, USA}
}

@article{gu2011linear,
  title={{Linear Discriminant Dimensionality Reduction}},
  author={Gu, Quanquan and Li, Zhenhui and Han, Jiawei},
  journal={Proceedings of the Machine Learning and Knowledge Discovery in Databases: European Conference},
  pages={549--564},
  year={2011},
  doi={10.1007/978-3-642-23780-5_45},
  note={Athens, Greece}
}

@book{hastie2009elements,
  title={{The Elements of Statistical Learning: Data Mining, Inference, and Prediction}},
  author={Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome H and Friedman, Jerome H},
  year={2009},
  publisher={Springer},
  doi={10.1007/978-0-387-84858-7}
}

@book{fukunaga2013introduction,
  title={{Introduction to Statistical Pattern Recognition}},
  author={Fukunaga, Keinosuke},
  year={2013},
  publisher={Elsevier},
  doi={10.1016/C2009-0-27872-X}
}@misc{iso-13818-3,
    title={{ISO/IEC 13818-3:1995 - Information technology -- Generic coding of moving pictures and associated audio information -- Part 3: Audio}},
    howpublished={\url{https://www.iso.org/standard/22991.html}}
}

@misc{iso-13818-7,
    title={{ISO/IEC 13818-7:1997 - Information technology -- Generic coding of moving pictures and associated audio information -- Part 7: Advanced Audio Coding (AAC)}},
    howpublished={\url{https://www.iso.org/standard/25040.html}}
}

@article{brandenburg1999mp3,
  title={{MP3} and {AAC} explained},
  author={Brandenburg, Karlheinz},
  journal={Proceedings of the AES 17th International Conference on High-Quality Audio Coding},
  year={1999},
  note={Signa, Italy}
}

@article{prenger2019waveglow,
  title={{Waveglow: A flow-based generative network for speech synthesis}},
  author={Prenger, Ryan and Valle, Rafael and Catanzaro, Bryan},
  journal={Proceedings of 2019 IEEE International Conference on Acoustics, Speech and Signal Processing},
  pages={3617--3621},
  year={2019},
  note={Brighton, UK},
  doi={10.1109/ICASSP.2019.8683143}
}

@article{ren2020fastspeech,
  title={{Fastspeech 2: Fast and high-quality end-to-end text to speech}},
  author={Ren, Yi and Hu, Chenxu and Tan, Xu and Qin, Tao and Zhao, Sheng and Zhao, Zhou and Liu, Tie-Yan},
  journal={arXiv preprint arXiv:2006.04558},
  year={2020},
  doi={10.48550/arXiv.2006.04558}
}

@article{dhariwal2020jukebox,
  title={{Jukebox: A generative model for music}},
  author={Dhariwal, Prafulla and Jun, Heewoo and Payne, Christine and Kim, Jong Wook and Radford, Alec and Sutskever, Ilya},
  journal={arXiv preprint arXiv:2005.00341},
  year={2020},
  doi={10.48550/arXiv.2005.00341}
}

@inproceedings{vaswani2017attention,
  title={{Attention is all you need}},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  booktitle={Advances in neural information processing systems},
  pages={5998--6008},
  year={2017},
  volume={30}
}


@article{liu2010detection,
  title={{Detection of double {MP3} compression}},
  author={Liu, Qingzhong and Sung, Andrew H and Qiao, Mengyu},
  journal={Cognitive Computation},
  volume={2},
  number={4},
  pages={291--296},
  year={2010},
  publisher={Springer},
  doi={10.1007/s12559-010-9045-4}
}

@article{qiao2013improved,
  title={{Improved detection of {MP3} double compression using content-independent features}},
  author={Qiao, Mengyu and Sung, Andrew H and Liu, Qingzhong},
  journal={Proceedings of 2013 IEEE International Conference on Signal Processing, Communication and Computing},
  pages={1--4},
  year={2013},
  doi={10.1109/ICSPCC.2013.6664121},
  note={KunMing, China}
}

@article{yang2010detecting,
  author = {Rui Yang and Yun Q. Shi and Jiwu Huang},
  title={{Detecting double compression of audio signal}},
  volume = {7541},
  journal = {Media Forensics and Security II},
  publisher = {SPIE},
  pages = {200 -- 209},
  year = {2010},
  doi = {10.1117/12.838695},
}

@article{ma2014detecting,
  title={{Detecting double-compressed {MP3} with the Same Bit-rate.}},
  author={Ma, Pengfei and Wang, Rangding and Yan, Diqun and Jin, Chao},
  journal={Journal of Software},
  volume={9},
  number={10},
  pages={2522--2527},
  year={2014},
}

@InProceedings{ma2014huffman,
  author="Ma, Pengfei
  and Wang, Rangding
  and Yan, Diqun
  and Jin, Chao",
  title={{A Huffman Table Index Based Approach to Detect Double {MP3} Compression}},
  booktitle="Digital-Forensics and Watermarking",
  year="2014",
  address="Berlin, Heidelberg",
  pages="258--271",
  doi="10.1007/978-3-662-43886-2_19"
}

@article{yan2018compression,
  title={{Compression history detection for {MP3} audio}},
  author={Yan, Diqun and Wang, Rangding and Zhou, Jinglei and Jin, Chao and Wang, Zhifeng},
  journal={KSII Transactions on Internet and Information Systems (TIIS)},
  volume={12},
  number={2},
  pages={662--675},
  year={2018},
  doi={10.3837/tiis.2018.02.007}
}


@article{wijethunga2020deepfake,
  author={Wijethunga, R.L.M.A.P.C. and Matheesha, D.M.K. and Noman, Abdullah Al and De Silva, K.H.V.T.A. and Tissera, Muditha and Rupasinghe, Lakmal},
  journal={Proceedings of 2020 2nd International Conference on Advancements in Computing},
  title={{Deepfake Audio Detection: A Deep Learning Based Solution for Group Conversations}},
  year={2020},
  volume={1},
  number={},
  pages={192-197},
  doi={10.1109/ICAC51239.2020.9357161},
  note={Malabe, Sri Lanka}
}

@article{chen2020generalization,
  author={Tianxiang Chen and Avrosh Kumar and Parav Nagarsheth and Ganesh Sivaraman and Elie Khoury},
  title={{Generalization of Audio Deepfake Detection}},
  year={2020},
  journal={Proceedings of the Speaker and Language Recognition Workshop (Odyssey 2020)},
  pages={132--137},
  doi={10.21437/Odyssey.2020-19},
  note={Tokyo, Japan}
}

@InProceedings{luo2020compression,
author="Luo, Da
and Cheng, Wenqing
and Yuan, Huaqiang
and Luo, Weiqi
and Liu, Zhenghui",
title={{Compression Detection of Audio Waveforms Based on Stacked Autoencoders}},
booktitle="Artificial Intelligence and Security",
year="2020",
address="Cham",
pages="393--404",
doi={10.1007/978-3-030-57881-7_35}
}

@inproceedings{brown2020language,
 author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
 booktitle = {Advances in Neural Information Processing Systems},
 pages = {1877--1901},
 title={{Language Models are Few-Shot Learners}},
 volume = {33},
 year = {2020}
}

@article{dosovitskiy2020image,
  title={{An image is worth 16x16 words: Transformers for image recognition at scale}},
  author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
  journal={arXiv preprint arXiv:2010.11929},
  year={2020},
  doi={10.48550/arXiv.2010.11929}
}

@article{carion2020end,
author="Carion, Nicolas
and Massa, Francisco
and Synnaeve, Gabriel
and Usunier, Nicolas
and Kirillov, Alexander
and Zagoruyko, Sergey",
title={{End-to-End Object Detection with Transformers}},
journal="Proceedings of 2020 European Conference on Computer Vision",
year="2020",
pages="213--229",
doi={10.1007/978-3-030-58452-8_13}
}

@article{jumper2021highly,
  title={{Highly accurate protein structure prediction with AlphaFold}},
  author={Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and others},
  journal={Nature},
  volume={596},
  number={7873},
  pages={583--589},
  year={2021},
  doi={10.1038/s41586-021-03819-2}
}


@mastersthesis{sripada2006mp3,
  author = {Praveen Sripada},
  title={{MP3} decoder in theory and practice},
  school = {Blekinge Institute of Technology, Ronneby, Sweden},
  year = {2006},
  month={3},
  url={https://www.diva-portal.org/smash/get/diva2:830195/FULLTEXT01.pdf}
}

@article{yang2009defeating,
  title={Defeating fake-quality {MP3}},
  author={Yang, Rui and Shi, Yun-Qing and Huang, Jiwu},
  journal={Proceedings of the 11th ACM Workshop on Multimedia and Security},
  pages={117--124},
  year={2009},
  doi={10.1145/1597817.1597838},
  note={Princeton, NJ, USA}
}

@MISC{raissi02thetheory,
    author = {Rassol Raissi},
    title = {The Theory Behind {MP3}},
    year = {2002},
    url={http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.113.6804}
}

@article{boyarov2020forensic,
  title={{Forensic Investigation of {MP3} Audio Recordings}},
  author={Boyarov, A.G. and Siparov, I.S.},
  journal={Theory and Practice of Forensic Science},
  year={2019},
  volume={14},
  number={4},
  page={125--136},
  doi={10.30764//1819-2785-2019-14-4-125-136}
}

@article{painter1997review,
  author={Painter, T. and Spanias, A.},
  journal={Proceedings of 13th International Conference on Digital Signal Processing},
  title={{A review of algorithms for perceptual coding of digital audio signals}},
  year={1997},
  volume={1},
  number={},
  pages={179-208},
  doi={10.1109/ICDSP.1997.628010},
  note={Santorini, Greece}
}

@article{ba2016layer,
  title={{Layer normalization}},
  author={Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E},
  journal={arXiv preprint arXiv:1607.06450},
  year={2016},
  doi={10.48550/arXiv.1607.06450}
}

@article{he2016deep,
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  journal={Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition},
  title={{Deep Residual Learning for Image Recognition}},
  year={2016},
  pages={770-778},
  doi={10.1109/CVPR.2016.90}
}

@article{srivastava2014dropout,
  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
  title={{Dropout: A Simple Way to Prevent Neural Networks from Overfitting}},
  journal = {Journal of Machine Learning Research},
  year    = {2014},
  volume  = {15},
  number  = {56},
  pages   = {1929-1958}
}

@ARTICLE{li2021survey,
  author={Li, Zewen and Liu, Fan and Yang, Wenjie and Peng, Shouheng and Zhou, Jun},
  journal={IEEE Transactions on Neural Networks and Learning Systems},
  title={{A Survey of Convolutional Neural Networks: Analysis, Applications, and Prospects}},
  year={2021},
  volume={},
  number={},
  pages={1-21},
  doi={10.1109/TNNLS.2021.3084827}}

@misc{ito2017ljspeech,
  author       = {Keith Ito and Linda Johnson},
  title={{The LJ Speech Dataset}},
  url = {https://keithito.com/LJ-Speech-Dataset/},
  year         = 2017
}

@ARTICLE{tzanetakis2002musical,
  author={Tzanetakis, G. and Cook, P.},
  journal={IEEE Transactions on Speech and Audio Processing},
  title={{Musical genre classification of audio signals}},
  year={2002},
  volume={10},
  number={5},
  pages={293-302},
  doi={10.1109/TSA.2002.800560}}

@article{hawthorne2018enabling,
  title={{Enabling Factorized Piano Music Modeling and Generation with the {MAESTRO} Dataset}},
  author={Curtis Hawthorne and Andriy Stasyuk and Adam Roberts and Ian Simon and Cheng-Zhi Anna Huang and Sander Dieleman and Erich Elsen and Jesse Engel and Douglas Eck},
  journal={Proceedings of the International Conference on Learning Representations},
  year={2019},
}

@article{hendrycks2016gaussian,
  title={{Gaussian error linear units (GELUs)}},
  author={Hendrycks, Dan and Gimpel, Kevin},
  journal={arXiv preprint arXiv:1606.08415},
  year={2016},
  doi={10.48550/arXiv.1606.08415}
}

@misc{ffmpegmp3,
  title={{FFmpeg MP3} Encoding Guide},
  url={https://trac.ffmpeg.org/wiki/Encode/MP3}
}

@article{kingma2014adam,
  title={{Adam: A method for stochastic optimization}},
  author={Kingma, Diederik P and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014},
  doi={10.48550/arXiv.1412.6980}
}

@ARTICLE{borrelli2021synthetic,
  author={Borrelli, C. and Bestagini, P. and Antonacci, F. and Sarti, A. and Tubaro, S.},
  journal={EURASIP Journal on Information Security},
  title={{Synthetic speech detection through short-term and long-term prediction traces}},
  year={2021},
  volume={2021},
  number={2},
  pages={1-14},
  doi={10.1186/s13635-021-00116-3}}

@article{liu2015very,
  author={Liu, Shuying and Deng, Weihong},
  journal={Proceedings of 2015 3rd IAPR Asian Conference on Pattern Recognition},
  title={{Very deep convolutional neural network based image classification using small training sample size}},
  year={2015},
  volume={},
  number={},
  pages={730-734},
  doi={10.1109/ACPR.2015.7486599},
  notes={Kuala Lumpur, Malaysia}
  }


@article{ruder2016overview,
  title={{An overview of gradient descent optimization algorithms}},
  author={Ruder, Sebastian},
  journal={arXiv preprint arXiv:1609.04747},
  year={2016},
  doi={10.48550/arXiv.1609.04747}
}

@ARTICLE{musmann2006genesis,
  author={Musmann, H.G.},
  journal={IEEE Transactions on Consumer Electronics},
  title={{Genesis of the MP3 audio coding standard}},
  year={2006},
  volume={52},
  number={3},
  pages={1043-1049},
  doi={10.1109/TCE.2006.1706505}}

@ARTICLE{jayant1993signal,
  author={Jayant, N. and Johnston, J. and Safranek, R.},
  journal={Proceedings of the IEEE},
  title={{Signal compression based on models of human perception}},
  year={1993},
  volume={81},
  number={10},
  pages={1385-1422},
  doi={10.1109/5.241504}}

@article{levandowsky1971distance,
  title={{Distance between sets}},
  author={Levandowsky, Michael and Winter, David},
  journal={Nature},
  volume={234},
  number={5323},
  pages={34--35},
  year={1971},
  doi={10.1038/234034a0}
}

@article{powers2020evaluation,
  title={{Evaluation: from precision, recall and {F}-measure to {ROC}, informedness, markedness and correlation}},
  author={Powers, David MW},
  journal={arXiv preprint arXiv:2010.16061},
  year={2020},
  doi={10.48550/arXiv.2010.16061}
}

@article{brodersen2010balanced,
  author={Brodersen, Kay Henning and Ong, Cheng Soon and Stephan, Klaas Enno and Buhmann, Joachim M.},
  journal={2010 20th International Conference on Pattern Recognition},
  title={{The Balanced Accuracy and Its Posterior Distribution}},
  year={2010},
  volume={},
  number={},
  pages={3121-3124},
  doi={10.1109/ICPR.2010.764},
  note={Istanbul, Turkey}
}

@article{bianchi2013detection,
author = {Bianchi, Tiziano and De Rosa, Alessia and Fontani, Marco and Rocciolo, Giovanni and Piva, Alessandro},
title={{Detection and Classification of Double Compressed MP3 Audio Tracks}},
year = {2013},
doi = {10.1145/2482513.2482523},
journal = {Proceedings of the First ACM Workshop on Information Hiding and Multimedia Security},
pages = {159-164},
note={Montpellier, France}
}

@article{bianchi2014detection,
  title={{Detection and localization of double compression in MP3 audio tracks}},
  author={Bianchi, Tiziano and De Rosa, Alessia and Fontani, Marco and Rocciolo, Giovanni and Piva, Alessandro},
  journal={EURASIP Journal on information Security},
  number={10},
  year={2014},
  volume={2014},
  doi={10.1186/1687-417X-2014-10}
}

@misc{iso-mp4-standard,
author = {{International Organization for Standardization}},
title = {{ISO/IEC 14496--14:2020--Information Technology--Coding of Audio-visual Objects--Part 14: MP4 File Format}},
howpublished = {\url{https://www.iso.org/standard/79110.html}},
year = {2020},
}

@misc{libavformat,
author = {{FFmpeg contributors}},
title = {{\texttt{libavformat} Documentation}},
howpublished = {\url{https://ffmpeg.org/libavformat.html}},
year = {2022},
}

@ARTICLE{yang2020efficient,
  author={Yang, Pengpeng and Baracchi, Daniele and Iuliani, Massimo and Shullani, Dasara and Ni, Rongrong and Zhao, Yao and Piva, Alessandro},
  journal={IEEE Journal of Selected Topics in Signal Processing},
  title={{Efficient Video Integrity Analysis Through Container Characterization}},
  year={2020},
  volume={14},
  number={5},
  pages={947-954},
  doi={10.1109/JSTSP.2020.3008088}
}

@ARTICLE{iuliani2019video,
  author={Iuliani, Massimo and Shullani, Dasara and Fontani, Marco and Meucci, Saverio and Piva, Alessandro},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{A Video Forensic Framework for the Unsupervised Analysis of MP4-Like File Container}},
  year={2019},
  volume={14},
  number={3},
  pages={635-645},
  doi={10.1109/TIFS.2018.2859760}
}

@article{mielke2021between,
  title={{Between words and Characters: A Brief History of Open-Vocabulary Modeling and Tokenization in NLP}},
  author={Mielke, Sabrina J and Alyafeai, Zaid and Salesky, Elizabeth and Raffel, Colin and Dey, Manan and Gall{\'e}, Matthias and Raja, Arun and Si, Chenglei and Lee, Wilson Y and Sagot, Beno{\^\i}t and others},
  journal={arXiv preprint arXiv:2112.10508},
  year={2021}
}

@article{devlin2018bert,
  title={{BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}},
  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  journal={arXiv preprint arXiv:1810.04805},
  year={2018}
}


@article{velivckovic2017graph,
  title={{Graph Attention Networks}},
  author={Veli{\v{c}}kovi{\'c}, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Lio, Pietro and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1710.10903},
  year={2017}
}


@article{wang2019heterogeneous,
  title={{Heterogeneous Graph Attention Network}},
  author={Wang, Xiao and Ji, Houye and Shi, Chuan and Wang, Bai and Ye, Yanfang and Cui, Peng and Yu, Philip S},
  journal={Proceedings of the 2019 World Wide Web Conference},
  pages={2022--2032},
  year={2019},
  note={San Francisco, CA, USA}
}

@article{otter2020survey,
  title={{A Survey of the Usages of Deep Learning for Natural Language Processing}},
  author={Otter, Daniel W and Medina, Julian R and Kalita, Jugal K},
  journal={IEEE Transactions on Neural Networks and Learning Systems},
  volume={32},
  number={2},
  pages={604--624},
  year={2020},
  publisher={IEEE}
}

@article{thawani2021representing,
  title={{Representing Numbers in NLP: a Survey and a Vision}},
  author={Thawani, Avijit and Pujara, Jay and Szekely, Pedro A and Ilievski, Filip},
  journal={arXiv preprint arXiv:2103.13136},
  year={2021}
}


@article{hendrycks2019using,
 author = {Hendrycks, Dan and Mazeika, Mantas and Kadavath, Saurav and Song, Dawn},
 journal = {Advances in Neural Information Processing Systems},
 title = {{Using Self-Supervised Learning Can Improve Model Robustness and Uncertainty}},
 volume = {32},
 year = {2019}
}

@article{gong2022ssast,
  title={{SSAST: Self-supervised Audio Spectrogram Transformer}},
  author={Gong, Yuan and Lai, Cheng-I and Chung, Yu-An and Glass, James},
  journal={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={36},
  number={10},
  pages={10699--10709},
  year={2022},
  note={Virtual}
}

@article{misra2020self,
  title={{Self-supervised Learning of Pretext-invariant Representations}},
  author={Misra, Ishan and Maaten, Laurens van der},
  journal={Proceedings of the 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={6707--6717},
  year={2020},
  note={Virtual}
}

@article{zhang2017split,
  title={{Split-brain Autoencoders: Unsupervised Learning by Cross-channel Prediction}},
  author={Zhang, Richard and Isola, Phillip and Efros, Alexei A},
  journal={Proceedings of the 2017 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={1058--1067},
  year={2017},
  note={Honolulu, HI, USA}
}


@article{wang2022comprehensive,
  title={{A Comprehensive Survey of Loss Functions in Machine Learning}},
  author={Wang, Qi and Ma, Yue and Zhao, Kun and Tian, Yingjie},
  journal={Annals of Data Science},
  volume={9},
  number={2},
  pages={187--212},
  year={2022},
  publisher={Springer}
}

@article{loshchilov2017decoupled,
  title={{Decoupled Weight Decay Regularization}},
  author={Loshchilov, Ilya and Hutter, Frank},
  journal={arXiv preprint arXiv:1711.05101},
  year={2017}
}

@article{lee2019sagpool,
  title={{Self-attention Graph Pooling}},
  author={Lee, Junhyun and Lee, Inyeop and Kang, Jaewoo},
  journal={Proceedings of 2019 International Conference on Machine Learning},
  pages={3734--3743},
  year={2019},
  note={Long Beach, CA, USA}
}

@article{kipf2016gcn,
  title={{Semi-supervised Classification with Graph Convolutional Networks}},
  author={Kipf, Thomas N and Welling, Max},
  journal={arXiv preprint arXiv:1609.02907},
  year={2016}
}

@article{simonyan2014very,
  title={{Very Deep Convolutional Networks for Large-scale Image Recognition}},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}


@article{quinlan1986induction,
  title={{Induction of Decision Trees}},
  author={Quinlan, J. Ross},
  journal={Machine learning},
  volume={1},
  pages={81--106},
  year={1986},
  publisher={Springer}
}

@article{biau2016random,
  title={{A Random Forest Guided Tour}},
  author={Biau, G{\'e}rard and Scornet, Erwan},
  journal={Test},
  volume={25},
  pages={197--227},
  year={2016},
  publisher={Springer}
}

@article{guan2019mfc,
  title={{MFC datasets: Large-scale Benchmark Datasets for Media Forensic Challenge Evaluation}},
  author={Guan, Haiying and Kozak, Mark and Robertson, Eric and Lee, Yooyoung and Yates, Amy N and Delgado, Andrew and Zhou, Daniel and Kheyrkhah, Timothee and Smith, Jeff and Fiscus, Jonathan},
  journal={Proceedings of the 2019 IEEE Winter Applications of Computer Vision Workshops},
  pages={63--72},
  year={2019},
  organization={IEEE},
  note={Waikoloa, HI, USA}
}

@article{guera2019we,
  title={{We Need No Pixels: Video Manipulation Detection Using Stream Descriptors}},
  author={G{\"u}era, David and Baireddy, Sriram and Bestagini, Paolo and Tubaro, Stefano and Delp, Edward J},
  journal={arXiv preprint arXiv:1906.08743},
  year={2019}
}

@article{altinisik2022video,
  title={{Video Source Characterization Using Encoding and Encapsulation Characteristics}},
  author={Altinisik, Enes and Sencar, H{\"u}srev Taha and Tabaa, Diram},
  journal={IEEE Transactions on Information Forensics and Security},
  volume={17},
  pages={3211--3224},
  year={2022},
  publisher={IEEE}
}

@article{hearst1998support,
  title={{Support Vector Machines}},
  author={Hearst, Marti A. and Dumais, Susan T and Osuna, Edgar and Platt, John and Scholkopf, Bernhard},
  journal={IEEE Intelligent Systems and Their Applications},
  volume={13},
  number={4},
  pages={18--28},
  year={1998},
  publisher={IEEE}
}

@article{bhagtani2022overview,
  title={{An Overview of Recent Work in Media Forensics: Methods and Threats}},
  author={Bhagtani, Kratika and Yadav, Amit Kumar Singh and Bartusiak, Emily R and Xiang, Ziyue and Shao, Ruiting and Baireddy, Sriram and Delp, Edward J},
  journal={arXiv preprint arXiv:2204.12067},
  year={2022}
}

@ARTICLE{verdoliva2020deepfake,
  author={Verdoliva, Luisa},
  journal={IEEE Journal of Selected Topics in Signal Processing},
  title={{Media Forensics and DeepFakes: An Overview}},
  year={2020},
  volume={14},
  number={5},
  pages={910-932},
  doi={10.1109/JSTSP.2020.3002101}}


@article{bestagini2012overview,
  title={{An Overview on Video Forensics}},
  author={Milani, Simone and Fontani, Marco and Bestagini, Paolo and Barni, Mauro and Piva, Alessandro and Tagliasacchi, Marco and Tubaro, Stefano},
  journal={APSIPA Transactions on Signal and Information Processing},
  volume={1},
  pages={e2},
  year={2012},
  publisher={Cambridge University Press}
}

@article{powers2011evaluation,
  title={{Evaluation: From Precision, Recall and F-Measure to ROC, Informedness, Markedness \& Correlation}},
  author={Powers, David M. W.},
  journal={Journal of Machine Learning Technologies},
  volume={2},
  number={1},
  pages={37--63},
  year={2011},
  publisher={BioInfo Publications}
}


@article{tharwat2021classification,
  title={{Classification Assessment Methods}},
  author={Tharwat, Alaa},
  journal={Applied Computing and Informatics},
  volume={17},
  number={1},
  pages={168--192},
  year={2021},
  publisher={Emerald Publishing Limited}
}@article{Scheirer_2013_TPAMI,
author = {Walter J. Scheirer and Anderson Rocha and Archana Sapkota and Terrance E. Boult},
title={{Towards Open Set Recognition}},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {35},
issue = {7},
month = {7},
year = {2013},
doi={10.1109/TPAMI.2012.256}
}


@article{koch2015siamese,
title={{Siamese neural networks for one-shot image recognition}},
author={Koch, Gregory and Zemel, Richard and Salakhutdinov, Ruslan and others},
journal={2015 International Conference on Machine Learning Deep Learning Workshop},
year={2015},
note={Lille, France}
}


@inproceedings{vargas2020one,
  title={{One shot logo recognition based on siamese neural networks}},
  author={Vargas, Camilo and Zhang, Qianni and Izquierdo, Ebroul},
  booktitle={Proceedings of the 2020 International Conference on Multimedia Retrieval},
  pages={321--325},
  year={2020}
}

@article{hsiao2019malware,
  title={{Malware image classification using one-shot learning with siamese networks}},
  author={Hsiao, Shou-Ching and Kao, Da-Yu and Liu, Zi-Yuan and Tso, Raylin},
  journal={Procedia Computer Science},
  volume={159},
  pages={1863--1871},
  year={2019}
}

@incollection{chicco2021siamese,
    author="Chicco, Davide",
    title={{Siamese Neural Networks: An Overview}},
    booktitle="Artificial Neural Networks",
    year="2021",
    publisher="Springer US",
    address="New York, NY",
    pages="73--94",
    doi="10.1007/978-1-0716-0826-5_3",
}


@article{mikolov2013efficient,
  title={{Efficient estimation of word representations in vector space}},
  author={Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
  journal={arXiv preprint arXiv:1301.3781},
  year={2013},
  doi={10.48550/arXiv.1301.3781}
}


@article{shullani2017vision,
  title={{VISION}: A video and image dataset for source identification},
  author={Shullani, Dasara and Fontani, Marco and Iuliani, Massimo and Al Shaya, Omar and Piva, Alessandro},
  journal={EURASIP Journal on Information Security},
  volume={15},
  year={2017},
  doi={10.1186/s13635-017-0067-2}
}

@ARTICLE{mayer2020forensic,
  author={Mayer, Owen and Stamm, Matthew C.},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{Forensic Similarity for Digital Images}},
  year={2020},
  volume={15},
  number={},
  pages={1331-1346},
  doi={10.1109/TIFS.2019.2924552}}


@ARTICLE{huang2005using,
  author={Jin Huang and Ling, C.X.},
  journal={IEEE Transactions on Knowledge and Data Engineering},
  title={{Using AUC and accuracy in evaluating learning algorithms}},
  year={2005},
  volume={17},
  number={3},
  pages={299-310},
  doi={10.1109/TKDE.2005.50}}


@article{liu2021swin,
  title={{Swin transformer: Hierarchical vision transformer using shifted windows}},
  author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
  journal={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={10012--10022},
  year={2021},
  note={Nashville, CA, USA},
  doi={10.1109/ICCV48922.2021.00986}
}


@article{liu2021video,
  title={{Video swin transformer}},
  author={Liu, Ze and Ning, Jia and Cao, Yue and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Hu, Han},
  journal={arXiv preprint arXiv:2106.13230},
  year={2021},
  doi={10.48550/arXiv.2106.13230}
}

@article{verma2021audio,
  title={{Audio transformers: Transformer architectures for large scale audio understanding. adieu convolutions}},
  author={Verma, Prateek and Berger, Jonathan},
  journal={arXiv preprint arXiv:2105.00335},
  year={2021},
  doi={10.48550/arXiv.2105.00335}
}

@article{zeyer2019comparison,
  author={Zeyer, Albert and Bahar, Parnia and Irie, Kazuki and Schl\"{u}ter, Ralf and Ney, Hermann},
  journal={Proceedings of the IEEE Automatic Speech Recognition and Understanding Workshop},
  title={{A Comparison of Transformer and LSTM Encoder Decoder Models for ASR}},
  year={2019},
  volume={},
  number={},
  pages={8-15},
  doi={10.1109/ASRU46091.2019.9004025},
  note={Sentosa, Singapore}
}

@ARTICLE{hochreiter1997long,
  author={Hochreiter, Sepp and Schmidhuber, J\"{u}rgen},
  journal={Neural Computation},
  title={{Long Short-Term Memory}},
  year={1997},
  volume={9},
  number={8},
  pages={1735-1780},
  doi={10.1162/neco.1997.9.8.1735}}

@article{cho2014properties,
  title={{On the properties of neural machine translation: Encoder-decoder approaches}},
  author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau, Dzmitry and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1409.1259},
  year={2014},
  doi={10.48550/arXiv.1409.1259}
}

@article{he2015deep,
	author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun},
	title={{Deep Residual Learning for Image Recognition}},
	journal = {arXiv preprint arXiv:1512.03385},
	year = {2015},
  doi={10.48550/arXiv.1512.03385}
}


@article{fawcett2006introduction,
  title={{An introduction to ROC analysis}},
  author={Fawcett, Tom},
  journal={Pattern recognition letters},
  volume={27},
  number={8},
  pages={861--874},
  year={2006},
  publisher={Elsevier},
  doi={10.1016/j.patrec.2005.10.010}
}

@article{hripcsak2005agreement,
  title={{Agreement, the f-measure, and reliability in information retrieval}},
  author={Hripcsak, George and Rothschild, Adam S},
  journal={Journal of the American Medical Informatics Association},
  volume={12},
  number={3},
  pages={296--298},
  year={2005},
  doi={10.1197/jamia.M1733}
}

@article{iuliani2019hybrid,
  title={{Hybrid reference-based video source identification}},
  author={Iuliani, Massimo and Fontani, Marco and Shullani, Dasara and Piva, Alessandro},
  journal={Sensors},
  volume={19},
  number={3},
  pages={649},
  year={2019},
  doi={10.3390/s19030649}
}

@ARTICLE{mandelli2020facing,
  author={Mandelli, Sara and Bestagini, Paolo and Verdoliva, Luisa and Tubaro, Stefano},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{Facing Device Attribution Problem for Stabilized Video Sequences}},
  year={2020},
  volume={15},
  number={},
  pages={14-27},
  doi={10.1109/TIFS.2019.2918644}}

@ARTICLE{lukas2006digital,
author={Lukas, J. and Fridrich, J. and Goljan, M.},
journal={IEEE Transactions on Information Forensics and Security},
title={{Digital camera identification from sensor pattern noise}},
year={2006},
volume={1},
number={2},
pages={205-214},
doi={10.1109/TIFS.2006.873602}}

@article{mandelli2018blind,
  author={Mandelli, Sara and Bestagini, Paolo and Tubaro, Stefano and Cozzolino, Davide and Verdoliva, Luisa},
  journal={Proceedings of the European Signal Processing Conference},
  title={{Blind Detection and Localization of Video Temporal Splicing Exploiting Sensor-Based Footprints}},
  year={2018},
  volume={},
  number={},
  pages={1362-1366},
  doi={10.23919/EUSIPCO.2018.8553511},
  note={Rome, Italy}
}

@ARTICLE{marra2017blind,
author={Marra, Francesco and Poggi, Giovanni and Sansone, Carlo and Verdoliva, Luisa},
journal={IEEE Transactions on Information Forensics and Security},
title={{Blind PRNU-Based Image Clustering for Source Identification}},
year={2017},
volume={12},
number={9},
pages={2197-2211},
doi={10.1109/TIFS.2017.2701335}}

@ARTICLE{mandelli2020cnn,
author={Mandelli, Sara and Cozzolino, Davide and Bestagini, Paolo and Verdoliva, Luisa and Tubaro, Stefano},
journal={IEEE Signal Processing Letters},
title={{CNN-Based Fast Source Device Identification}},
year={2020},
volume={27},
number={},
pages={1285-1289},
doi={10.1109/LSP.2020.3008855}}

@misc{iso:avc,
  author={{International Organization for Standardization}},
  title={{ISO/IEC} 14496-10:2020 Information technology--Coding of audio-visual objects--Part 10: Advanced video coding},
  howpublished = {\url{https://www.iso.org/standard/75400.html}}
}

@book{richardson2011h264,
  title={{The H. 264 advanced video compression standard}},
  author={Richardson, Iain E},
  year={2011},
  publisher={John Wiley \& Sons}
}


@ARTICLE{altinisik2020mitigation,
  author={Altinisik, Enes and Tasdemir, Kasim and Sencar, Husrev Taha},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{Mitigation of H.264 and H.265 Video Compression for Reliable PRNU Estimation}},
  year={2020},
  volume={15},
  number={},
  pages={1557-1571},
  doi={10.1109/TIFS.2019.2945190}}

@article{villalba2016identification,
  title={{Identification of smartphone brand and model via forensic video analysis}},
  author={Villalba, Luis Javier Garc{\'\i}a and Orozco, Ana Lucila Sandoval and L{\'o}pez, Raquel Ramos and Castro, Julio Hernandez},
  journal={Expert Systems with Applications},
  volume={55},
  pages={59--69},
  year={2016},
  doi={10.1016/j.eswa.2016.01.025}
}

@article{mayer2019forensic,
  title={{Forensic similarity for digital images}},
  author={Mayer, Owen and Stamm, Matthew C},
  journal={IEEE Transactions on Information Forensics and Security},
  volume={15},
  pages={1331--1346},
  year={2019},
  publisher={IEEE},
  doi={10.1109/TIFS.2019.2924552}
}

@article{mayer2020open,
  author={Mayer, Owen and Hosler, Brian and Stamm, Matthew C.},
  journal={Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing},
  title={{Open Set Video Camera Model Verification}},
  year={2020},
  volume={},
  number={},
  pages={2962-2966},
  doi={10.1109/ICASSP40776.2020.9054261},
  note={Barcelona, Spain}
  }

@ARTICLE{lopez2020digital,
author={Ramos L\'{o}pez, Raquel and Almaraz Luengo, Elena and Sandoval Orozco, Ana Lucila and Villalba, Luis Javier García},
journal={IEEE Access},
title={{Digital Video Source Identification Based on Container's Structure Analysis}},
year={2020},
volume={8},
number={},
pages={36363-36375},
doi={10.1109/ACCESS.2020.2971785}}

@article{dal2021cnn,
  title={{CNN-Based Multi-Modal Camera Model Identification on Video Sequences}},
  author={Dal Cortivo, Davide and Mandelli, Sara and Bestagini, Paolo and Tubaro, Stefano},
  journal={Journal of Imaging},
  volume={7},
  number={8},
  pages={135},
  year={2021},
  doi={10.3390/jimaging7080135}
}

@article{timmerman2020video,
  title={{Video Camera Identification from Sensor Pattern Noise with a Constrained ConvNet}},
  author={Timmerman, Derrick and Bennabhaktula, Swaroop and Alegre, Enrique and Azzopardi, George},
  journal={arXiv preprint arXiv:2012.06277},
  year={2020},
  doi={10.48550/arXiv.2012.06277}
}

@article{hosler2019avideo,
  author={Hosler, B. and Mayer, O. and Bayar, B. and Zhao, X. and Chen, C. and Shackleford, J. A. and Stamm, M. C.},
  journal={Proceedings of the 2019 IEEE International Conference on Acoustics, Speech and Signal Processing},
  title={{A Video Camera Model Identification System Using Deep Learning and Fusion}},
  year={2019},
  volume={},
  number={},
  pages={8271-8275},
  doi={10.1109/ICASSP.2019.8682608},
  note={Brighton, UK}
  }

  @ARTICLE{altinisik2021source,
  author={Altinisik, Enes and Sencar, H\"{u}srev Taha},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{Source Camera Verification for Strongly Stabilized Videos}},
  year={2021},
  volume={16},
  number={},
  pages={643-657},
  doi={10.1109/TIFS.2020.3016830}}

@article{yang2021fast,
  title={{A fast source camera identification and verification method based on PRNU analysis for use in video forensic investigations}},
  author={Yang, Wen-Chao and Jiang, Jiajun and Chen, Chung-Hao},
  journal={Multimedia Tools and Applications},
  volume={80},
  number={5},
  pages={6617--6638},
  year={2021},
  doi={10.1007/s11042-020-09763-z}
}

@article{ferrara2022prnu,
  title={{PRNU-Based Video Source Attribution: Which Frames Are You Using?}},
  author={Ferrara, Pasquale and Iuliani, Massimo and Piva, Alessandro},
  journal={Journal of Imaging},
  volume={8},
  number={3},
  pages={57},
  year={2022},
  publisher={Multidisciplinary Digital Publishing Institute},
  doi={10.3390/jimaging8030057}
}

@article{vazquez2012detection,
  author={Vazquez-Padin, D. and Fontani, M. and Bianchi, T. and Comesana, P. and Piva, A. and Barni, M.},
  journal={Proceedings of the IEEE International Workshop on Information Forensics and Security},
  title={{Detection of video double encoding with GOP size estimation}},
  year={2012},
  volume={},
  number={},
  pages={151-156},
  doi={10.1109/WIFS.2012.6412641},
  note={Costa Adeje, Spain}
  }

@ARTICLE{bestagini2016codec,
  author={Bestagini, Paolo and Milani, Simone and Tagliasacchi, Marco and Tubaro, Stefano},
  journal={IEEE Transactions on Image Processing},
  title={{Codec and GOP Identification in Double Compressed Videos}},
  year={2016},
  volume={25},
  number={5},
  pages={2298-2310},
  doi={10.1109/TIP.2016.2541960}}

@article{yao2017detection,
  title={{Detection of double-compressed H. 264/AVC video incorporating the features of the string of data bits and skip macroblocks}},
  author={Yao, Heng and Song, Saihua and Qin, Chuan and Tang, Zhenjun and Liu, Xiaokai},
  journal={Symmetry},
  volume={9},
  number={12},
  pages={313},
  year={2017},
  doi={10.3390/sym9120313}
}

@ARTICLE{vazquez2020video,
  author={V{\'a}zquez-Pad{\'\i}n, David and Fontani, Marco and Shullani, Dasara and P{\'e}rez-Gonz{\'a}lez, Fernando and Piva, Alessandro and Barni, Mauro},
  journal={IEEE Transactions on Information Forensics and Security},
  title={{Video Integrity Verification and GOP Size Estimation Via Generalized Variation of Prediction Footprint}},
  year={2020},
  volume={15},
  number={},
  pages={1815-1830},
  doi={10.1109/TIFS.2019.2951313}}


@ARTICLE{mahfoudi2022statistical,
  author={Mahfoudi, Ga{\"e}l and Retraint, Florent and Morain-Nicolier, Fr{\'e}d{\'e}ric and Pic, Marc Michel},
  journal={IEEE Access},
  title={{Statistical H.264 Double Compression Detection Method Based on DCT Coefficients}},
  year={2022},
  volume={10},
  number={},
  pages={4271-4283},
  doi={10.1109/ACCESS.2022.3140588}}

@article{he2017frame,
  title={{Frame-wise detection of relocated I-frames in double compressed H. 264 videos based on convolutional neural network}},
  author={He, Peisong and Jiang, Xinghao and Sun, Tanfeng and Wang, Shilin and Li, Bin and Dong, Yi},
  journal={Journal of Visual Communication and Image Representation},
  volume={48},
  pages={149--158},
  year={2017},
  doi={10.1016/j.jvcir.2017.06.010}
}

@article{xu2015relocated,
  author={Xu, Qiang and Jiang, Xinghao and Sun, Tanfeng and He, Peisong and Wang, Shilin and Li, Bin},
  journal={The Proceedings of the Asia-Pacific Signal and Information Processing Association Annual Summit and Conference},
  title={{Relocated I-Frames Detection in H.264 Double Compressed Videos Based on Genetic-CNN}},
  year={2018},
  volume={},
  number={},
  pages={710-716},
  doi={10.23919/APSIPA.2018.8659519},
  note={Honolulu, HI, USA}
}


@article{valenzise2010estimating,
  author = {Valenzise, Giuseppe and Tagliasacchi, Marco and Tubaro, Stefano},
  title={{Estimating QP and Motion Vectors in H.264/AVC Video from Decoded Pixels}},
  year = {2010},
  doi = {10.1145/1877972.1877995},
  journal = {Proceedings of the 2nd ACM Workshop on Multimedia in Forensics, Security and Intelligence},
  pages = {89-92},
  note = {Firenze, Italy}
}

@article{xiang2021forensic,
  author={Xiang, Ziyue and Horv\'{a}th, Jnos and Baireddy, Sriram and Bestagini, Paolo and Tubaro, Stefano and Delp, Edward J.},
  journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops},
  title={{Forensic Analysis of Video Files Using Metadata}},
  year={2021},
  volume={},
  number={},
  pages={1042-1051},
  doi={10.1109/CVPRW53098.2021.00115},
  note={Nashville, TN, USA}
}

@article{altinisik2022camera,
  title={{Camera Model Identification Using Container and Encoding Characteristics of Video Files}},
  author={Altinisik, Enes and Sencar, Husrev Taha},
  journal={arXiv preprint arXiv:2201.02949},
  year={2022},
  doi={10.48550/arXiv.2201.02949}
}

@article{chen2007source,
  title={{Source digital camcorder identification using sensor photo response non-uniformity}},
  author={Chen, Mo and Fridrich, Jessica and Goljan, Miroslav and Luk{\'a}{\v{s}}, Jan},
  journal={Proceedings of the Security, Steganography, and Watermarking of Multimedia Contents IX},
  volume={6505},
  pages={517--528},
  year={2007},
  doi={10.1117/12.696519}
}

@article{su2009asource,
  author={Su, Yuting and Xu, Junyu and Dong, Bo},
  journal={Proceedings of the Second International Workshop on Computer Science and Engineering},
  title={{A Source Video Identification Algorithm Based on Motion Vectors}},
  year={2009},
  volume={2},
  number={},
  pages={312-316},
  doi={10.1109/WCSE.2009.820},
  note={Qingdao, China}
}

@article{yahaya2012advanced,
  author={Yahaya, Syamsul and Ho, Anthony T S and Wahab, Ainuddin Abdul},
  journal={Proceedings of the IET Conference on Image Processing},
  title={{Advanced video camera identification using Conditional Probability Features}},
  year={2012},
  volume={},
  number={},
  pages={1-5},
  doi={10.1049/cp.2012.0426},
  note={London, UK}
  }


  @article{kurosawa1999ccd,
  author={Kurosawa, K. and Kuroki, K. and Saitoh, N.},
  journal={Proceedings of the International Conference on Image Processing},
  title={{CCD fingerprint method-identification of a video camera from videotaped images}},
  year={1999},
  volume={3},
  number={},
  pages={537-540 vol.3},
  doi={10.1109/ICIP.1999.817172},
  note={Kobe, Japan}
}

@article{verdoliva2019extracting,
  title={{Extracting camera-based fingerprints for video forensics}},
  author={Davide Cozzolino and Giovanni Poggi and Luisa Verdoliva},
  journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops},
  year={2019},
  note={Long Beach, CA, USA}
}

@article{verde2018video,
  author={Verde, S. and Bondi, L. and Bestagini, P. and Milani, S. and Calvagno, G. and Tubaro, S.},
  journal={Proceedings of the IEEE International Conference on Image Processing},
  title={{Video Codec Forensics Based on Convolutional Neural Networks}},
  year={2018},
  volume={},
  number={},
  pages={530-534},
  doi={10.1109/ICIP.2018.8451143},
  note={Athens, Greece}
}

@article{sun2019videobert,
  author={Sun, Chen and Myers, Austin and Vondrick, Carl and Murphy, Kevin and Schmid, Cordelia},
  journal={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  title={{VideoBERT: A Joint Model for Video and Language Representation Learning}},
  year={2019},
  volume={},
  number={},
  pages={7463-7472},
  doi={10.1109/ICCV.2019.00756},
  note={Seoul, Korea}
}

@ARTICLE{sullivan2005video,
  author={Sullivan, G.J. and Wiegand, T.},
  journal={Proceedings of the IEEE},
  title={{Video Compression - From Concepts to the H.264/AVC Standard}},
  year={2005},
  volume={93},
  number={1},
  pages={18-31},
  doi={10.1109/JPROC.2004.839617}
}

@article{he2016resnet,
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  journal={Proceedings of the 2016 IEEE Conference on Computer Vision and Pattern Recognition},
  title={{Deep Residual Learning for Image Recognition}},
  year={2016},
  pages={770-778},
  doi={10.1109/CVPR.2016.90},
  note={Las Vegas, NV, USA}
}

@article{tan2021efficient,
  title = {{EfficientNetV2: Smaller Models and Faster Training}},
  author = {Tan, Mingxing and Le, Quoc},
  journal = {Proceedings of International Conference on Machine Learning},
  pages =  {10096--10106},
  year = {2021},
  volume =  {139},
  note={Virtual}
}


@article{xie2017resnext,
  author={Xie, Saining and Girshick, Ross and Dollár, Piotr and Tu, Zhuowen and He, Kaiming},
  journal={Proceedings of the 2017 IEEE Conference on Computer Vision and Pattern Recognition},
  title={{Aggregated Residual Transformations for Deep Neural Networks}},
  year={2017},
  pages={5987-5995},
  doi={10.1109/CVPR.2017.634},
  note={Honolulu, HI, USA}
}

@misc{minimp3,
    author={{lieff}},
    title={{\texttt{minimp3}: Minimalistic {MP3} Decoder Single Header Library}},
    url={https://github.com/lieff/minimp3},
    year={2018}
}


@misc{isomp3,
    title={{ISO/IEC 13818-3:1995 - Information technology -- Generic coding of moving pictures and associated audio information -- Part 3: Audio}},
    author={{International Organization for Standardization}},
    url={https://www.iso.org/standard/22991.html},
    year={1995}
}


@thesis{jacaba2001audio,
  author = {Joebert S. Jacaba},
  title={{Audio Compression Using Modified Discrete Cosine Transform: The MP3 Coding Standard}},
  school = {University of the Philippines, Manila},
  year = {2001},
  month={10},
  url={https://www.math.utah.edu/~gustafso/s2016/2270/project-ideas/audio-mp3-compression-MDCT-jacaba_main.pdf},
  note={Bachelor's Thesis}
}


@ARTICLE{painter2000perceptual,
  author={Ted Painter and Andreas Spanias},
  journal={Proceedings of the IEEE},
  title={{Perceptual Coding of Digital Audio}},
  year={2000},
  volume={88},
  number={4},
  pages={451-515},
  doi={10.1109/5.842996}}

@article{rothweiler1983polyphase,
  author={Joseph Rothweiler},
  journal={Proceedings of the 1983 IEEE International Conference on Acoustics, Speech, and Signal Processing},
  title={{Polyphase Quadrature Filters--A New Subband Coding Technique}},
  year={1983},
  pages={1280-1283},
  doi={10.1109/ICASSP.1983.1172005}
}

@article{wang2003modified,
  title={{Modified Discrete Cosine Transform: Its Implications for Audio Coding and Error Concealment}},
  author={Wang, Ye and Vilermo, Mikka},
  journal={Journal of the Audio Engineering Society},
  volume={51},
  number={1/2},
  pages={52--61},
  year={2003},
  publisher={Audio Engineering Society}
}


@ARTICLE{nautsch2021asvspoof,
  author={Nautsch, Andreas and Wang, Xin and Evans, Nicholas and Kinnunen, Tomi H. and Vestman, Ville and Todisco, Massimiliano and Delgado, Héctor and Sahidullah, Md and Yamagishi, Junichi and Lee, Kong Aik},
  journal={IEEE Transactions on Biometrics, Behavior, and Identity Science},
  title={{ASVspoof 2019: Spoofing Countermeasures for the Detection of Synthesized, Converted and Replayed Speech}},
  year={2021},
  volume={3},
  number={2},
  pages={252-265},
  doi={10.1109/TBIOM.2021.3059479}
}

@article{zheng2001comparison,
  title={{Comparison of Different Implementations of MFCC}},
  author={Zheng, Fang and Zhang, Guoliang and Song, Zhanjiang},
  journal={Journal of Computer science and Technology},
  volume={16},
  pages={582--589},
  year={2001},
  doi={10.1007/BF02943243}
}

@article{brown1991calculation,
  title={{Calculation of a Constant Q Spectral Transform}},
  author={Brown, Judith C},
  journal={The Journal of the Acoustical Society of America},
  volume={89},
  number={1},
  pages={425--434},
  year={1991},
  doi={10.1121/1.400476}
}


@ARTICLE{oppenheim1970speech,
  author={Oppenheim, Alan V.},
  journal={IEEE Spectrum},
  title={{Speech Spectrograms Using the Fast Fourier Transform}},
  year={1970},
  volume={7},
  number={8},
  pages={57-62},
  doi={10.1109/MSPEC.1970.5213512}
}

@article{kabir2021survey,
  title={{A Survey of Speaker Recognition: Fundamental Theories, Recognition Methods and Opportunities}},
  author={Kabir, Muhammad Mohsin and Mridha, Muhammad F and Shin, Jungpil and Jahan, Israt and Ohi, Abu Quwsar},
  journal={IEEE Access},
  volume={9},
  pages={79236--79263},
  year={2021},
  doi={10.1109/ACCESS.2021.3084299}
}


@article{singh2021nonlinear,
  title={{Non-linear Frequency Warping Using Constant-Q Transformation for Speech Emotion Recognition}},
  author={Singh, Premjeet and Saha, Goutam and Sahidullah, Md},
  journal={Proceedings of the 2021 International Conference on Computer Communication and Informatics},
  pages={1--6},
  year={2021},
  doi={10.1109/ICCCI50826.2021.9402569}
}

@article{bansal2020cough,
  author={Bansal, Vipin and Pahwa, Gaurav and Kannan, Nirmal},
  journal={Proceedings of the 2020 IEEE International Conference on Computing, Power and Communication Technologies},
  title={{Cough Classification for COVID-19 Based on Audio MFCC Features Using Convolutional Neural Networks}},
  year={2020},
  pages={604-608},
  doi={10.1109/GUCON48875.2020.9231094}
}

@article{ghosal2018music,
  author={Deepanway Ghosal and Maheshkumar H. Kolekar},
  title={{Music Genre Recognition Using Deep Neural Networks and Transfer Learning}},
  year={2018},
  journal={Proceedings of Interspeech 2018},
  pages={2087--2091},
  doi={10.21437/Interspeech.2018-2045}
}

@article{grinstein2018audio,
  author={Grinstein, Eric and Duong, Ngoc Q. K. and Ozerov, Alexey and P\'{e}rez, Patrick},
  journal={Proceedings of the 2018 IEEE International Conference on Acoustics, Speech and Signal Processing},
  title={Audio Style Transfer},
  year={2018},
  pages={586-590},
  doi={10.1109/ICASSP.2018.8461711}}

  @ARTICLE{nguyen1994near,
  author={Nguyen, T.Q.},
  journal={IEEE Transactions on Signal Processing},
  title={{Near-perfect-reconstruction Pseudo-QMF Banks}},
  year={1994},
  volume={42},
  number={1},
  pages={65-76},
  doi={10.1109/78.258122}}


@incollection{sobolewski2003data,
title = {{Data Transmission Media}},
editor = {Robert A. Meyers},
booktitle = {Encyclopedia of Physical Science and Technology (Third Edition)},
publisher = {Academic Press},
address = {New York},
pages = {277-303},
year = {2003},
isbn = {978-0-12-227410-7},
doi = {https://doi.org/10.1016/B0-12-227410-5/00165-4},
author = {John S. Sobolewski}
}

@article{howard2019searching,
  author={Howard, Andrew and Sandler, Mark and Chen, Bo and Wang, Weijun and Chen, Liang-Chieh and Tan, Mingxing and Chu, Grace and Vasudevan, Vijay and Zhu, Yukun and Pang, Ruoming and Adam, Hartwig and Le, Quoc},
  journal={Proceedings of the 2019 IEEE/CVF International Conference on Computer Vision},
  title={{Searching for MobileNetV3}},
  year={2019},
  volume={},
  number={},
  pages={1314-1324},
  doi={10.1109/ICCV.2019.00140}}

@misc{torchaudio,
    author={{\texttt{TorchAudio} Contributors}},
    title={{\texttt{TorchAudio} Documentation}},
    url={https://pytorch.org/audio/master/index.html},
    year={2023}
}

@misc{nnaudio,
    author={Cheuk Kin Wai},
    title={{\texttt{nnAudio} 0.3.1}},
    url={https://kinwaicheuk.github.io/nnAudio/index.html},
    year={2023}
}

@article{imagenet15russakovsky,
    Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
    Title = { {ImageNet Large Scale Visual Recognition Challenge} },
    Year = {2015},
    journal   = {International Journal of Computer Vision},
    doi = {10.1007/s11263-015-0816-y},
    volume={115},
    number={3},
    pages={211-252}
}


@incollection{schuckers2010roc,
author="Schuckers, Michael E.",
title="Receiver Operating Characteristic Curve and Equal Error Rate",
bookTitle="Computational Methods in Biometric Authentication: Statistical Methods for Performance Evaluation",
year="2010",
publisher="Springer London",
address="London",
pages="155--204",
isbn="978-1-84996-202-5",
doi="10.1007/978-1-84996-202-5_5",
}

@misc{isoaac,
    title={{ISO/IEC 13818-7:1997 Information technology - Generic Coding of Moving Pictures and Associated Audio Information - Part 7: Advanced Audio Coding (AAC)}},
    author={{International Organization for Standardization}},
    year={1997},
    url={https://www.iso.org/standard/25040.html}
}

@article{zakariah2018digital,
  title={{Digital Multimedia Audio Forensics: Past, Present and Future}},
  author={Zakariah, Mohammed and Khan, Muhammad Khurram and Malik, Hafiz},
  journal={Multimedia tools and applications},
  volume={77},
  pages={1009--1040},
  year={2018},
  doi={10.1007/s11042-016-4277-2}
}


@article{xiang2022mp3,
  author={Xiang, Ziyue and Bestagini, Paolo and Tubaro, Stefano and Delp, Edward J.},
  journal={Proceedings of the 2022 IEEE International Conference on Acoustics, Speech and Signal Processing},
  title={{Forensic Analysis and Localization of Multiply Compressed MP3 Audio Using Transformers}},
  year={2022},
  volume={},
  number={},
  pages={2929-2933},
  doi={10.1109/ICASSP43922.2022.9747639},
  note={Singapore}
  }

@article{bianchi2013mp3,
author = {Bianchi, Tiziano and De Rosa, Alessia and Fontani, Marco and Rocciolo, Giovanni and Piva, Alessandro},
title = {{Detection and Classification of Double Compressed MP3 Audio Tracks}},
year = {2013},
doi = {10.1145/2482513.2482523},
journal = {Proceedings of the First ACM Workshop on Information Hiding and Multimedia Security},
pages = {159–164},
note = {Montpellier, France},
}

@article{yamagishi2021asvspoof21,
  title={{ASVspoof 2021: Accelerating Progress in Spoofed and Deepfake Speech Detection}},
  author={Yamagishi, Junichi and Wang, Xin and Todisco, Massimiliano and Sahidullah, Md and Patino, Jose and Nautsch, Andreas and Liu, Xuechen and Lee, Kong Aik and Kinnunen, Tomi and Evans, Nicholas and others},
  journal={arXiv preprint arXiv:2109.00537},
  year={2021},
  doi={https://doi.org/10.48550/arXiv.2109.00537}
}

@article{almutairi2022review,
  title={{A Review of Modern Audio Deepfake Detection Methods: Challenges and Future Directions}},
  author={Almutairi, Zaynab and Elgibreen, Hebah},
  journal={Algorithms},
  volume={15},
  number={5},
  pages={155},
  year={2022},
  doi={10.3390/a15050155}
}

@article{reimao2019for,
  author={Reimao, Ricardo and Tzerpos, Vassilios},
  journal={Proceedings of the 2019 International Conference on Speech Technology and Human-Computer Dialogue},
  title={{FoR: A Dataset for Synthetic Speech Detection}},
  year={2019},
  volume={},
  number={},
  pages={1-10},
  doi={10.1109/SPED.2019.8906599},
  note={Timisoara, Romania}
}

@article{reimao2021synthetic,
  author={Reimao, Ricardo and Tzerpos, Vassilios},
  journal={Proceedings of the 2021 International Conference on Speech Technology and Human-Computer Dialogue},
  title={{Synthetic Speech Detection Using Neural Networks}},
  year={2021},
  pages={97-102},
  doi={10.1109/SpeD53181.2021.9587406}}

@article{ziabary2021cqt,
  author={Ziabary, Pedram Abdzadeh and Veisi, Hadi},
  journal={Proceedings of the 2021 7th International Conference on Signal Processing and Intelligent Systems},
  title={{A Countermeasure Based on CQT Spectrogram for Deepfake Speech Detection}},
  year={2021},
  volume={},
  number={},
  pages={1-5},
  doi={10.1109/ICSPIS54653.2021.9729387}}

@article{bartusiak2022frequency,
  title={{Frequency Domain-based Detection of Generated Audio}},
  author={Bartusiak, Emily R and Delp, Edward J},
  journal={Proceedings of IS\&T International Symposium on Electronic Imaging: Media Watermarking, Security, and Forensics},
  year={2021},
  pages={273-1 -- 273-7},
  doi={10.2352/ISSN.2470-1173.2021.4.MWSF-273},
  note={Virtual}
}

@article{khalid2021deepfake,
author = {Khalid, Hasam and Kim, Minha and Tariq, Shahroz and Woo, Simon S.},
title = {{Evaluation of an Audio-Video Multimodal Deepfake Dataset Using Unimodal and Multimodal Detectors}},
year = {2021},
doi = {10.1145/3476099.3484315},
journal = {Proceedings of the 1st Workshop on Synthetic Multimedia -- Audiovisual Deepfake Generation and Detection},
pages = {7-15},
note = {Virtual},
}


@book{schuller2020filter,
  title={{Filter Banks and Audio Coding: Compressing Audio Signals Using Python}},
  author={Schuller, Gerald},
  year={2020},
  publisher={Springer Nature},
  doi={10.1007/978-3-030-51249-1}
}

@phdthesis{purwins2005music,
  author      = {Purwins, Hendrik},
  school      = {Technische Universit{\"a}t Berlin},
  title       = {{Profiles of Pitch Classes Circularity of Relative Pitch and Key--Experiments, Models, Computational Music Analysis, and Perspectives}},
  year        = {2005},
  url={https://depositonce.tu-berlin.de/items/4cb9db3c-3ff1-4849-bb62-936df7fde7b3}
}