@article{4ef5008455e649c79e600e84cd099263,
title = "Large expert-curated database for benchmarking document similarity detection in biomedical literature search",
abstract = "Document recommendation systems for locating relevant literature have mostly relied on methods developed a decade ago. This is largely due to the lack of a large offline gold-standard benchmark of relevant documents that cover a variety of research fields such that newly developed literature search techniques can be compared, improved and translated into practice. To overcome this bottleneck, we have established the RElevant LIterature SearcH consortium consisting of more than 1500 scientists from 84 countries, who have collectively annotated the relevance of over 180 000 PubMed-listed articles with regard to their respective seed (input) article/s. The majority of annotations were contributed by highly experienced, original authors of the seed articles. The collected data cover 76% of all unique PubMed Medical Subject Headings descriptors. No systematic biases were observed across different experience levels, research fields or time spent on annotations. More importantly, annotations of the same document pairs contributed by different scientists were highly concordant. We further show that the three representative baseline methods used to generate recommended articles for evaluation (Okapi Best Matching 25, Term Frequency-Inverse Document Frequency and PubMed Related Articles) had similar overall performances. Additionally, we found that these methods each tend to produce distinct collections of recommended articles, suggesting that a hybrid method may be required to completely capture all relevant articles. The established database server located at https://relishdb.ict.griffith.edu.au is freely available for the downloading of annotation data and the blind testing of new methods. We expect that this benchmark will be useful for stimulating the development of new powerful techniques for title and title/abstract-based search engines for relevant articles in biomedical research.",
author = "{RELISH Consortium} and Peter Brown and Aik-Choon Tan and El-Esawi, {Mohamed A.} and Thomas Liehr and Oliver Blanck and Gladue, {Douglas P.} and Almeida, {Gabriel M. F.} and Tomislav Cernava and Sorzano, {Carlos O.} and Yeung, {Andy W. K.} and Engel, {Michael S.} and Chandrasekaran, {Arun Richard} and Thilo Muth and Staege, {Martin S.} and Daulatabad, {Swapna V.} and Darius Widera and Junpeng Zhang and Adrian Meule and Ken Honjo and Olivier Pourret and Cong-Cong Yin and Zhongheng Zhang and Marco Cascella and Flegel, {Willy A.} and Goodyear, {Carl S.} and {van Raaij}, {Mark J.} and Zuzanna Bukowy-Bieryllo and Campana, {Luca G.} and Kurniawan, {Nicholas A.} and David Lalaouna and Huttner, {Felix J.} and Ammerman, {Brooke A.} and Felix Ehret and Cobine, {Paul A.} and Ene-Choo Tan and Hyemin Han and Wenfeng Xia and Christopher McCrum and Dings, {Ruud P. M.} and Francesco Marinello and Henrik Nilsson and Brett Nixon and Konstantinos Voskarides and Long Yang and Costa, {Vincent D.} and Johan Bengtsson-Palme and William Bradshaw and Grimm, {Dominik G.} and Nitin Kumar and Elvis Martis and Daniel Prieto and Sabnis, {Sandeep C.} and Amer, {Said E. D. R.} and Liew, {Alan W. C.} and Paul Perco and Farid Rahimi and Giuseppe Riva and Chongxing Zhang and Devkota, {Hari P.} and Koichi Ogami and Zarrin Basharat and Walter Fierz and Robert Siebers and Kok-Hian Tan and Boehme, {Karen A.} and Peter Brenneisen and Brown, {James A. L.} and Dalrymple, {Brian P.} and Harvey, {David J.} and Grace Ng and Sebastiaan Werten and Mark Bleackley and Zhanwu Dai and Raman Dhariwal and Yael Gelfer and Hartmann, {Marcus D.} and Pawel Miotla and Radu Tamaian and Pragashnie Govender and Gurney-Champion, {Oliver J.} and Kauppila, {Joonas H.} and Xiaolei Zhang and Natalia Echeverria and Santhilal Subhash and Hannes Sallmon and Marco Tofani and Taeok Bae and Oliver Bosch and Cuiv, {Paraic O.} and Antoine Danchin and Barthelemy Diouf and Tuomas Eerola and Evangelos Evangelou and Filipp, {Fabian V.} and Hannes Klump and Lukasz Kurgan and Smith, {Simon S.} and Olivier Terrier and Neil Tuttle and Ascher, {David B.} and Janga, {Sarath C.} and Schulte, {Leon N.} and Daniel Becker and Christopher Browngardt and Bush, {Stephen J.} and Guillaume Gaullier and Kazuki Ide and Clement Meseko and Werner, {Gijsbert D. A.} and Jan Zaucha and Al-Farha, {Abd A.} and Greenwald, {Noah F.} and Popoola, {Segun I.} and Rahman, {Md Shaifur} and Jialin Xu and Yang, {Sunny Y.} and Noboru Hiroi and Alper, {Ozgul M.} and Baker, {Chris I.} and Michael Bitzer and George Chacko and Birgit Debrabant and Ray Dixon and Evelyne Forano and Matthew Gilliham and Sarah Kelly and Karl-Heinz Klempnauer and Lidbury, {Brett A.} and Lin, {Michael Z.} and Iseult Lynch and Wujun Ma and Maibach, {Edward W.} and Mather, {Diane E.} and Nandakumar, {Kutty S.} and Ohgami, {Robert S.} and Piero Parchi and Patrizio Tressoldi and Yu Xue and Charles Armitage and Pierre Barraud and Stella Chatzitheochari and Coelho, {Luis P.} and Jiajie Diao and Doxey, {Andrew C.} and Angelique Gobet and Pingzhao Hu and Stefan Kaiser and Mitchell, {Kate M.} and Salama, {Mohamed F.} and Shabalin, {Ivan G.} and Haijun Song and Dejan Stevanovic and Ali Yadollahpour and Erliang Zeng and Katharina Zinke and Alimba, {C. G.} and Beyene, {Tariku J.} and Zehong Cao and Chan, {Sherwin S.} and Michael Gatchell and Andreas Kleppe and Marcin Piotrowski and Gonzalo Torga and Woldesemayat, {Adugna A.} and Cosacak, {Mehmet I.} and Scott Haston and Ross, {Stephanie A.} and Richard Williams and Alvin Wong and Abramowitz, {Matthew K.} and Andem Effiong and Senhong Lee and Abid, {Muhammad Bilal} and Cyrus Agarabi and Cedric Alaux and Albrecht, {Dirk R.} and Atkins, {Gerald J.} and Beck, {Charles R.} and Bonvin, {A. M. J. J.} and Emer Bourke and Qian Han and Hui Yang and Shu Zhu and Xiao Fan and Jian Li and Boykin, {Laura M.} and Chen, {Dong F.} and Hao Chen and Hui Chen and Yinglong Chen and Corea, {Enoka M.} and Day, {David A.} and Dong, {X. C.} and Fox, {Simon A.} and Renzhi Han and Evan Ingley and Johnson, {Andrew M.} and King, {Sarah R. B.} and Kyung-Woo Lee and Haiyan Liu and Wei Lu and Mackey, {David A.} and Murphy, {Susan K.} and Andrew Page and Roach, {Neil W.} and Roberts, {David D.} and Ian Small and Steadman, {Kathryn J.} and Nils Stein and Stewart, {Douglas I.} and Yun Wang and Xu, {J. W.} and Li Zhang and Li Zhang and Matthew Campbell and Xin Chen and Fang Liu and Jin-Jian Lu and Moss, {W. N.} and Menuka Pallebage-Gamarallage and Pearce, {Simon P.} and Shaouli Shahid and Jun Wang and Yi-Rui Wu and Kai Zhang and Yi Zhang and Joshua Brown and Choi, {Jane R.} and Zoya Gridneva and Ying Li and Zhonghua Liu and Yao Lu and Kalina Makowiecki and Richardson, {Mark F.} and Robinson, {K. R.} and Fang Wang and Rebecca Webster and Wei Zhang and Wei Chang and Chen, {Andrew T. Y.} and Mason, {Leanda D.} and Mishra, {Anand K.} and Nguyen, {Thi T. H.} and Jun Tang and Teng Wang and Yi-Chun Chen and Adam Hall and Richard Hall and Adam Martin and Webb, {S. A.}",
year = "2019",
month = oct,
day = "29",
doi = "10.1093/database/baz085",
language = "English",
volume = "2019",
pages = "1--67",
journal = "Database",
issn = "1758-0463",
publisher = "Oxford University Press",
}