<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">izvestswsu</journal-id><journal-title-group><journal-title xml:lang="ru">Известия Юго-Западного государственного университета</journal-title><trans-title-group xml:lang="en"><trans-title>Proceedings of the Southwest State University</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">2223-1560</issn><issn pub-type="epub">2686-6757</issn><publisher><publisher-name>ЮЗГУ</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.21869/2223-1560-2022-26-2-159-171</article-id><article-id custom-type="elpub" pub-id-type="custom">izvestswsu-1031</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>Информатика, вычислительная техника и управление</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>Computer science, computer engineering and IT managment</subject></subj-group></article-categories><title-group><article-title>Увеличение производительности языковых моделей «трансформер» в информационных вопросно-ответных системах</article-title><trans-title-group xml:lang="en"><trans-title>Increased Performance of Transformers Language Models in Information Question and Response Systems</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-7677-1800</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Галеев</surname><given-names>Д. Т.</given-names></name><name name-style="western" xml:lang="en"><surname>Galeev</surname><given-names>D. T.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Галеев Денис Талгатович, аспирант</p><p>ул. 50 лет Октября, д. 94, г. Курск 305040</p></bio><bio xml:lang="en"><p>Denis T. Galeev, Post-Graduate Student</p><p>50 Let Oktyabrya str. 94, Kursk 305040</p></bio><email xlink:type="simple">ra3wvw@mail.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1772-7663</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Панищев</surname><given-names>В. С.</given-names></name><name name-style="western" xml:lang="en"><surname>Panishchev</surname><given-names>V. S.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Панищев Владимир Славиевич, кандидат технических наук</p><p>ул. 50 лет Октября, д. 94, г. Курск 305040</p></bio><bio xml:lang="en"><p>Vladimir S. Panishchev, Cand. of Sci. (Engineering)</p><p>50 Let Oktyabrya str. 94, Kursk 305040</p></bio><email xlink:type="simple">gskunk@yandex.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Титов</surname><given-names>Д. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Titov</surname><given-names>D. V.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Титов Дмитрий Витальевич, доктор технических наук, доцент</p><p>ул. 50 лет Октября, д. 94, г. Курск 305040</p></bio><bio xml:lang="en"><p>Dmitry V. Titov, Dr. of Sci. (Engineering), Associate Professor</p><p>50 Let Oktyabrya str. 94, Kursk 305040</p></bio><email xlink:type="simple">titov.swsu@gmail.com</email><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Юго-Западный государственный университет</institution></aff><aff xml:lang="en"><institution>Southwest State University</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2022</year></pub-date><pub-date pub-type="epub"><day>13</day><month>02</month><year>2023</year></pub-date><volume>26</volume><issue>2</issue><fpage>159</fpage><lpage>171</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Галеев Д.Т., Панищев В.С., Титов Д.В., 2023</copyright-statement><copyright-year>2023</copyright-year><copyright-holder xml:lang="ru">Галеев Д.Т., Панищев В.С., Титов Д.В.</copyright-holder><copyright-holder xml:lang="en">Galeev D.T., Panishchev V.S., Titov D.V.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://izvestswsu.elpub.ru/jour/article/view/1031">https://izvestswsu.elpub.ru/jour/article/view/1031</self-uri><abstract><sec><title>Цель исследования</title><p>Цель исследования. Целью работы является увеличение производительности вопросно-ответных информационных систем на русском языке. Научная новизна работы состоит в увеличении производительности для модели RuBERT, которая была обучена для нахождения ответа на вопрос в тексте. Поскольку более производительная языковая модель позволяет обрабатывать большее количество запросов за то же самое время, результаты работы могут найти применение в различных информационных вопросно-ответных системах, для которых важна скорость отклика.</p></sec><sec><title>Методы</title><p>Методы. В настоящей работе используются методы обработки естественного языка, машинного обучения, уменьшения размера искусственных нейронных сетей. Языковая модель была настроена и обучена при помощи библиотек машинного обучения Torch и Onnxruntime. Оригинальная модель и набор данных для обучения были взяты в библиотеке Huggingface.</p></sec><sec><title>Результаты</title><p>Результаты. В результате исследования была увеличена производительность работы языковой модели RuBERT при помощи методов уменьшения размера нейронных сетей, таких как дистилляция знаний и квантизация, а также при помощи экспорта модели в формат ONNX и её запуска в среде выполнения ONNX.</p></sec><sec><title>Заключение</title><p>Заключение. В результате, модель, к которой одновременно были применены дистилляция знаний, квантизация и ONNX оптимизация, получила увеличение производительности в ~4.6 раза (с 66.57 до 404.46 запросов в минуту), при этом размер модели уменьшился в ~13 раз (с 676.29 Мб до 51.66 Мб). Обратной стороной полученной производительности стало ухудшение показателей EM (с 61.3 до 56.87) и F-мера (с 81.66 до 76.97).</p></sec></abstract><trans-abstract xml:lang="en"><sec><title>Purpose of research</title><p>Purpose of research. The purpose of this work is to increase the performance of question and response information systems in Russian. Scientific novelty of the work is to increase the performance for RuBERT model, which was trained to find the answer to the question in the text. As far as a more efficient language model allows more requests to be processed in the same time, the results of this work can be used in various information question and response systems for which response speed is important.</p></sec><sec><title>Methods</title><p>Methods. The present work uses methods of processing natural language, machine learning, reducing the size of artificial neural networks. The language model was configured and trained using Torch and Onnxruntime machine learning libraries. The original model and training dataset were taken from the Huggingface Library.</p></sec><sec><title>Results</title><p>Results. As a result of the study, the performance of RuBERT language model was increased using methods to reduce the size of neural networks, such as distillation of knowledge and quantization, as well as by exporting the model to ONNX format and running it in ONNX runtime.</p></sec><sec><title>Conclusion</title><p>Conclusion. As a result, the model, to which knowledge distillation, quantization and ONNX optimization were simultaneously applied, received a performance increase of ~ 4.6 times (from 66.57 to 404.46 requests per minute), while the size of the model decreased ~ 13 times (from 676.29 MB to 51.66 MB). The downside of obtained performance was EM deterioration (from 61.3 to 56.87) and F-measure (from 81.66 to 76.97).</p></sec></trans-abstract><kwd-group xml:lang="ru"><kwd>машинное обучение</kwd><kwd>глубокое обучение</kwd><kwd>нейронные сети</kwd><kwd>обработка естественного языка</kwd><kwd>трансформер</kwd></kwd-group><kwd-group xml:lang="en"><kwd>machine learning</kwd><kwd>deep learning</kwd><kwd>neural networks</kwd><kwd>natural language processing</kwd><kwd>transformer</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Рябинов А.В., Уздяев М.Ю., Ватаманюк И.В. Применение многозадачного глубокого обучения в задаче распознавания эмоций в речи // Известия Юго-Западного государственного университета. 2021; 25(1): 82-109. https://doi.org/10.21869/2223-1560-2021-25-1-82-109</mixed-citation><mixed-citation xml:lang="en">Ryabinov A.V., Uzdiaev M.Yu., Vatamaniuk I.V. [Applying Multitask Deep Learning to Emotion Recognition in Speech]. Izvestiya Yugo-Zapadnogo gosudarstvennogo universiteta = Proceedings of the Southwest State University 2021;25(1):82-109. (In Russ.) https://doi.org/10.21869/2223-1560-2021-25-1-82-109.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Vaswani A. et al. Attention is all you need // Advances in Neural Information Processing Systems 2017-December, 5999–6009 (Neural information processing systems foundation, 2017).</mixed-citation><mixed-citation xml:lang="en">Vaswani A. et al. Attention is all you need. Advances in Neural Information Processing Systems 2017-December, 5999–6009 (Neural information processing systems foundation, 2017).</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Lewis M. et al. BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. in 7871–7880 (Association for Computational Linguistics (ACL), 2020).</mixed-citation><mixed-citation xml:lang="en">Lewis M. et al. BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. in 7871–7880 (Association for Computational Linguistics (ACL), 2020).</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Raffel C. et al. Exploring the limits of transfer learning with a unified text-to-text transformer // Journal of Machine Learning Research 21, (2020).</mixed-citation><mixed-citation xml:lang="en">Raffel C. et al. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of Machine Learning Research 21, (2020).</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang J., Zhao Y., Saleh M., Liu P. J. PEGASUS: Pre-Training with extracted gapsentences for abstractive summarization // 37th International Conference on Machine Learning, ICML 2020 PartF168147-15, 11265–11276 (International Machine Learning Society (IMLS), 2020).</mixed-citation><mixed-citation xml:lang="en">Zhang J., Zhao Y., Saleh M., Liu P. J. PEGASUS: Pre-Training with extracted gapsentences for abstractive summarization. 37th International Conference on Machine Learning, ICML 2020 PartF168147-15, 11265–11276 (International Machine Learning Society (IMLS), 2020).</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Qi W. et al. ProphetNet: Predicting future n-gram for sequence-to-sequence pretraining // Findings of the Association for Computational Linguistics Findings of ACL: EMNLP 2020 2401–2410 (Association for Computational Linguistics (ACL), 2020).</mixed-citation><mixed-citation xml:lang="en">Qi W. et al. ProphetNet: Predicting future n-gram for sequence-to-sequence pretraining. Findings of the Association for Computational Linguistics Findings of ACL: EMNLP 2020; 2401–2410 (Association for Computational Linguistics (ACL), 2020).</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Devlin J., Chang M. W., Lee K., Toutanova K. BERT: Pre-training of deep bidirectional transformers for language understanding // NAACL HLT 2019 - 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies - Proceedings of the Conference 1, 4171–4186 (Association for Computational Linguistics (ACL), 2019).</mixed-citation><mixed-citation xml:lang="en">Devlin J., Chang M. W., Lee K., Toutanova K. BERT: Pre-training of deep bidirectional transformers for language understanding. NAACL HLT 2019 - 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies - Proceedings of the Conference 1, 4171–4186 (Association for Computational Linguistics (ACL), 2019).</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Lan Z. et al. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. in ICLR (OpenReview.net, 2020).</mixed-citation><mixed-citation xml:lang="en">Lan Z. et al. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. in ICLR (OpenReview.net, 2020).</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Liu Y. et al. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs/1907.11692, (2019).</mixed-citation><mixed-citation xml:lang="en">Liu Y. et al. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs/1907.11692, (2019).</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Clark K., Luong M.-T., Le Q. V., Manning, C. D. ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. CoRR abs/2003.10555, (2020).</mixed-citation><mixed-citation xml:lang="en">Clark K., Luong M.-T., Le Q. V., Manning C. D. ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. CoRR abs/2003.10555, (2020).</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Dai Z. et al. Transformer-XL: Attentive language models beyond a fixed-length context // ACL 2019 - 57th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference 2978–2988 (Association for Computational Linguistics (ACL), 2020).</mixed-citation><mixed-citation xml:lang="en">Dai Z. et al. Transformer-XL: Attentive language models beyond a fixed-length context. ACL 2019 - 57th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference, 2978–2988 (Association for Computational Linguistics (ACL), 2020).</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">keskar n. s., mccann b., varshney l. r., xiong c., socher r. ctrl: a conditional transformer language model for controllable generation. corr abs/1909.05858, (2019).</mixed-citation><mixed-citation xml:lang="en">Keskar N. S., McCann B., Varshney L. R., Xiong C., Socher R. CTRL: A Conditional Transformer Language Model for Controllable Generation. CoRR abs/1909.05858, (2019).</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Radford A., Narasimhan K., Salimans T., Sutskever I. (OpenAI Transformer): Improving Language Understanding by Generative Pre-Training. OpenAI 1–10 (2018).</mixed-citation><mixed-citation xml:lang="en">Radford A., Narasimhan K., Salimans T., Sutskever I. (OpenAI Transformer): Improving Language Understanding by Generative Pre-Training. OpenAI 1–10 (2018).</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, I. S. Language Models are Unsupervised Multitask Learners. OpenAI Blog 1, 1–7 (2020).</mixed-citation><mixed-citation xml:lang="en">Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, I. S. Language Models are Unsupervised Multitask Learners. OpenAI Blog 1, 1–7 (2020).</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Brown T. B. et al. Language models are few-shot learners // Advances in Neural Information Processing Systems 2020-December, (Neural information processing systems foundation, 2020).</mixed-citation><mixed-citation xml:lang="en">Brown T. B. et al. Language models are few-shot learners. Advances in Neural Information Processing Systems 2020-December, (Neural information processing systems foundation, 2020).</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Hahn S., Choi H. Self-knowledge distillation in natural language processing // International Conference Recent Advances in Natural Language Processing, RANLP 2019-September, 423–430 (Incoma Ltd, 2019).</mixed-citation><mixed-citation xml:lang="en">Hahn S., Choi H. Self-knowledge distillation in natural language processing. International Conference Recent Advances in Natural Language Processing, RANLP 2019-September, 423–430 (Incoma Ltd, 2019).</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Sanh V., Debut L., Chaumond J., Wolf T. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs/1910.01108, (2019).</mixed-citation><mixed-citation xml:lang="en">Sanh V., Debut L., Chaumond J., Wolf T. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs/1910.01108, (2019).</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Li T., El Mesbahi Y., Kobyzev I., Rashid A., Mahmud A., Anchuri N., Hajimolahoseini H., Liu Y., Rezagholizadeh M. A Short Study on Compressing Decoder-Based Language Models. CoRR abs/2110.08460 (2021).</mixed-citation><mixed-citation xml:lang="en">Li T., El Mesbahi Y., Kobyzev I., Rashid A., Mahmud A., Anchuri N., Hajimolahoseini H., Liu Y., Rezagholizadeh M. A Short Study on Compressing Decoder-Based Language Models. CoRR abs/2110.08460 (2021).</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Le T. D. et al. Compiling ONNX Neural Network Models Using MLIR. CoRR abs/2008.08272, (2020).</mixed-citation><mixed-citation xml:lang="en">Le T. D. et al. Compiling ONNX Neural Network Models Using MLIR. CoRR abs/2008.08272, (2020).</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Efimov P., Chertok A., Boytsov L., Braslavski, P. SberQuAD – Russian Reading Comprehension Dataset: Description and Analysis // Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 12260 LNCS, 3–15 (Springer Science and Business Media Deutschland GmbH, 2020).</mixed-citation><mixed-citation xml:lang="en">Efimov P., Chertok A., Boytsov L., Braslavski P. SberQuAD – Russian Reading Comprehension Dataset: Description and Analysis. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 12260 LNCS, 3–15 (Springer Science and Business Media Deutschland GmbH, 2020).</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Kuratov Y., Arkhipov M. Adaptation of deep bidirectional multilingual transformers for Russian language // Komp’juternaja Lingvistika i Intellektual’nye Tehnologii 2019-May, 333–339 (ABBYY PRODUCTION LLC, 2019).</mixed-citation><mixed-citation xml:lang="en">Kuratov Y., Arkhipov M. Adaptation of deep bidirectional multilingual transformers for Russian language. Komp’juternaja Lingvistika i Intellektual’nye Tehnologii 2019-May, 333–339 (ABBYY PRODUCTION LLC, 2019).</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">Abdaoui A., Pradel C., Sigel G. Load What You Need: Smaller Versions of Mutililingual BERT. in 119–123 (Association for Computational Linguistics (ACL), 2020).</mixed-citation><mixed-citation xml:lang="en">Abdaoui A., Pradel C., Sigel G. Load What You Need: Smaller Versions of Mutililingual BERT. in 119–123 (Association for Computational Linguistics (ACL), 2020).</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
