Skip to content

Instantly share code, notes, and snippets.

@ryokamoi
Last active April 24, 2024 17:46
Show Gist options
  • Save ryokamoi/59dc522fbd00b432c5de1f5692fdfb3a to your computer and use it in GitHub Desktop.
Save ryokamoi/59dc522fbd00b432c5de1f5692fdfb3a to your computer and use it in GitHub Desktop.
BibTex file for LLMs
comment = {Proprietary Models}
comment = {GPT-4}
@article{openai2023gpt4,
title={GPT-4 Technical Report},
author={OpenAI},
journal={arXiv preprint arXiv:2303.08774},
year={2023}
}
comment = {GPT-3.5, InstructGPT}
@article{Ouyang2022instructgpt,
title={Training language models to follow instructions with human feedback},
author={Long Ouyang and Jeff Wu and Xu Jiang and Diogo Almeida and Carroll L. Wainwright and Pamela Mishkin and Chong Zhang and Sandhini Agarwal and Katarina Slama and Alex Ray and John Schulman and Jacob Hilton and Fraser Kelton and Luke Miller and Maddie Simens and Amanda Askell and Peter Welinder and Paul Christiano and Jan Leike and Ryan Lowe},
journal={arXiv preprint arXiv:2203.02155},
year={2022}
}
comment = {GPT-3, InstructGPT}
@inproceedings{Brown2020gpt3,
author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
pages = {1877--1901},
publisher = {Curran Associates, Inc.},
title = {Language Models are Few-Shot Learners},
url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
volume = {33},
year = {2020}
}
comment = {Claude}
@misc{claude2,
title={Model Card and Evaluations for Claude Models},
author={Anthropic},
year={2023},
url={https://www.anthropic.com/news/claude-2}
}
@misc{claude3,
title={Introducing the next generation of Claude},
author={Anthropic},
year={2024},
url={https://www.anthropic.com/news/claude-3-family}
}
comment = {Gemini}
@article{geminiteam2023gemini,
title={Gemini: A Family of Highly Capable Multimodal Models},
author={{Gemini Team Google}},
year={2023},
journal={arXiv preprint arXiv:2312.11805},
}
comment = {Reka}
@article{ormazabal2024reka,
title={Reka Core, Flash, and Edge: A Series of Powerful Multimodal Language Models},
author={Aitor Ormazabal and Che Zheng and Cyprien de Masson d'Autume and Dani Yogatama and Deyu Fu and Donovan Ong and Eric Chen and Eugenie Lamprecht and Hai Pham and Isaac Ong and Kaloyan Aleksiev and Lei Li and Matthew Henderson and Max Bain and Mikel Artetxe and Nishant Relan and Piotr Padlewski and Qi Liu and Ren Chen and Samuel Phua and Yazheng Yang and Yi Tay and Yuqi Wang and Zhongkai Zhu and Zhihui Xie},
year={2024},
journal={arXiv preprint arXiv:2404.12387},
}
comment = {Open-source Models}
comment = {Llama}
@article{touvron2023llama,
title={LLaMA: Open and Efficient Foundation Language Models},
author={Hugo Touvron and Thibaut Lavril and Gautier Izacard and Xavier Martinet and Marie-Anne Lachaux and Timothée Lacroix and Baptiste Rozière and Naman Goyal and Eric Hambro and Faisal Azhar and Aurelien Rodriguez and Armand Joulin and Edouard Grave and Guillaume Lample},
year={2023},
journal={arXiv preprint arXiv:2302.13971},
}
comment = {Llama 2}
@article{touvron2023llama2,
title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom},
year={2023},
journal={arXiv preprint arXiv:2307.09288},
}
comment = {Llama 3 - technical report coming soon}
@misc{llama3,
title={Introducing Meta Llama 3: The most capable openly available LLM to date},
author={{Meta AI}},
year={2024},
url={https://ai.meta.com/blog/meta-llama-3}
}
comment = {Qwen 1.0}
@article{bai2023qwen,
title={Qwen Technical Report},
author={Jinze Bai and Shuai Bai and Yunfei Chu and Zeyu Cui and Kai Dang and Xiaodong Deng and Yang Fan and Wenbin Ge and Yu Han and Fei Huang and Binyuan Hui and Luo Ji and Mei Li and Junyang Lin and Runji Lin and Dayiheng Liu and Gao Liu and Chengqiang Lu and Keming Lu and Jianxin Ma and Rui Men and Xingzhang Ren and Xuancheng Ren and Chuanqi Tan and Sinan Tan and Jianhong Tu and Peng Wang and Shijie Wang and Wei Wang and Shengguang Wu and Benfeng Xu and Jin Xu and An Yang and Hao Yang and Jian Yang and Shusheng Yang and Yang Yao and Bowen Yu and Hongyi Yuan and Zheng Yuan and Jianwei Zhang and Xingxuan Zhang and Yichang Zhang and Zhenru Zhang and Chang Zhou and Jingren Zhou and Xiaohuan Zhou and Tianhang Zhu},
year={2023},
journal={arXiv preprint arXiv:2309.16609},
}
comment = {Qwen 1.5}
@misc{qwen1.5,
title={Introducing Qwen1.5},
author={{Qwen Team}},
year={2024},
url={https://qwenlm.github.io/blog/qwen1.5}
}
comment = {Mistral}
@article{jiang2023mistral,
title={Mistral 7B},
author={Albert Q. Jiang and Alexandre Sablayrolles and Arthur Mensch and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Florian Bressand and Gianna Lengyel and Guillaume Lample and Lucile Saulnier and Lélio Renard Lavaud and Marie-Anne Lachaux and Pierre Stock and Teven Le Scao and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
year={2023},
journal={arXiv preprint arXiv:2310.06825},
}
comment = {Mixtral 8x7B}
@article{mixtral,
title={Mixtral of Experts},
author={Albert Q. Jiang and Alexandre Sablayrolles and Antoine Roux and Arthur Mensch and Blanche Savary and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Emma Bou Hanna and Florian Bressand and Gianna Lengyel and Guillaume Bour and Guillaume Lample and Lélio Renard Lavaud and Lucile Saulnier and Marie-Anne Lachaux and Pierre Stock and Sandeep Subramanian and Sophia Yang and Szymon Antoniak and Teven Le Scao and Théophile Gervet and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed},
year={2024},
journal={arXiv preprint arXiv:2401.04088},
}
comment = {Cohere}
comment = {Command R+}
@misc{command-r-plus,
title={Introducing Command R+: A Scalable LLM Built for Business},
author={{Cohere}},
year={2024},
url={https://cohere.com/blog/command-r-plus-microsoft-azure}
}
comment = {Command R}
@misc{command-r,
title={Command R: Retrieval-Augmented Generation at Production Scale},
author={{Cohere}},
year={2024},
url={https://cohere.com/blog/command-r}
}
comment = {Phi-3}
@article{abdin2024phi3,
title={Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone},
author={Marah Abdin and Sam Ade Jacobs and Ammar Ahmad Awan and Jyoti Aneja and Ahmed Awadallah and Hany Awadalla and Nguyen Bach and Amit Bahree and Arash Bakhtiari and Harkirat Behl and Alon Benhaim and Misha Bilenko and Johan Bjorck and Sébastien Bubeck and Martin Cai and Caio César Teodoro Mendes and Weizhu Chen and Vishrav Chaudhary and Parul Chopra and Allie Del Giorno and Gustavo de Rosa and Matthew Dixon and Ronen Eldan and Dan Iter and Amit Garg and Abhishek Goswami and Suriya Gunasekar and Emman Haider and Junheng Hao and Russell J. Hewett and Jamie Huynh and Mojan Javaheripi and Xin Jin and Piero Kauffmann and Nikos Karampatziakis and Dongwoo Kim and Mahoud Khademi and Lev Kurilenko and James R. Lee and Yin Tat Lee and Yuanzhi Li and Chen Liang and Weishung Liu and Eric Lin and Zeqi Lin and Piyush Madan and Arindam Mitra and Hardik Modi and Anh Nguyen and Brandon Norick and Barun Patra and Daniel Perez-Becker and Thomas Portet and Reid Pryzant and Heyang Qin and Marko Radmilac and Corby Rosset and Sambudha Roy and Olatunji Ruwase and Olli Saarikivi and Amin Saied and Adil Salim and Michael Santacroce and Shital Shah and Ning Shang and Hiteshi Sharma and Xia Song and Masahiro Tanaka and Xin Wang and Rachel Ward and Guanhua Wang and Philipp Witte and Michael Wyatt and Can Xu and Jiahang Xu and Sonali Yadav and Fan Yang and Ziyi Yang and Donghan Yu and Chengruidong Zhang and Cyril Zhang and Jianwen Zhang and Li Lyna Zhang and Yi Zhang and Yue Zhang and Yunan Zhang and Xiren Zhou},
year={2024},
journal={arXiv preprint arXiv:2404.14219},
}
comment = {Gemma}
@misc{gemma,
title={Gemma Open Models},
author={Google},
year={2024},
url={https://ai.google.dev/gemma}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment