<?xml version="1.0" encoding="UTF-8"?>
<article article-type="research-article" dtd-version="1.3" xml:lang="ru" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://metafora.rcsi.science/xsd_files/journal3.xsd">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">moitvivt</journal-id>
      <journal-title-group>
        <journal-title xml:lang="ru">Моделирование, оптимизация и информационные технологии</journal-title>
        <trans-title-group xml:lang="en">
          <trans-title>Modeling, Optimization and Information Technology</trans-title>
        </trans-title-group>
      </journal-title-group>
      <issn pub-type="epub">2310-6018</issn>
      <publisher>
        <publisher-name>Издательство</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.26102/2310-6018/2024.44.1.002</article-id>
      <article-id pub-id-type="custom" custom-type="elpub">1471</article-id>
      <title-group>
        <article-title xml:lang="ru">Распознавание дизартричной речи по фонемам с использованием скрытых марковских моделей</article-title>
        <trans-title-group xml:lang="en">
          <trans-title>Dysarthria speech recognition by phonemes using hidden Markov models</trans-title>
        </trans-title-group>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0009-0005-7370-9947</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Бредихин</surname>
              <given-names>Борис Андреевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Bredikhin</surname>
              <given-names>Boris Andreevich</given-names>
            </name>
          </name-alternatives>
          <email>Boris.Bredikhin@urfu.me</email>
          <xref ref-type="aff">aff-1</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0001-7926-9245</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Антор</surname>
              <given-names>Махамудул</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Antor</surname>
              <given-names>Mahamudul</given-names>
            </name>
          </name-alternatives>
          <email>hashan.antor@gmail.com</email>
          <xref ref-type="aff">aff-2</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Хлебников</surname>
              <given-names>Николай Александрович</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Khlebnikov</surname>
              <given-names>Nikolay Aleksandrovich</given-names>
            </name>
          </name-alternatives>
          <email>na.khlebnikov@urfu.ru</email>
          <xref ref-type="aff">aff-3</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Мельников</surname>
              <given-names>Александр Валерьевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Melnikov</surname>
              <given-names>Aleksandr Valerievich</given-names>
            </name>
          </name-alternatives>
          <email>sanek.melnikov@mail.ru</email>
          <xref ref-type="aff">aff-4</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Бачурин</surname>
              <given-names>Матвей Владимирович</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Bachurin</surname>
              <given-names>Matvey Vladimirovich</given-names>
            </name>
          </name-alternatives>
          <email>matvey_1703@mail.ru</email>
          <xref ref-type="aff">aff-5</xref>
        </contrib>
      </contrib-group>
      <aff-alternatives id="aff-1">
        <aff xml:lang="ru">Уральский федеральный университет ООО "СайберЛимфа"</aff>
        <aff xml:lang="en">Ural Federal University CyberLympha</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-2">
        <aff xml:lang="ru">Уральский федеральный университет</aff>
        <aff xml:lang="en">Ural Federal University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-3">
        <aff xml:lang="ru">Уральский федеральный университет</aff>
        <aff xml:lang="en">Ural Federal University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-4">
        <aff xml:lang="ru">Уральский федеральный университет</aff>
        <aff xml:lang="en">Ural Federal University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-5">
        <aff xml:lang="ru">Уральский федеральный университет</aff>
        <aff xml:lang="en">Ural Federal University</aff>
      </aff-alternatives>
      <pub-date pub-type="epub">
        <day>01</day>
        <month>01</month>
        <year>2026</year>
      </pub-date>
      <volume>1</volume>
      <issue>1</issue>
      <elocation-id>10.26102/2310-6018/2024.44.1.002</elocation-id>
      <permissions>
        <copyright-statement>Copyright © Авторы, 2026</copyright-statement>
        <copyright-year>2026</copyright-year>
        <license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This work is licensed under a Creative Commons Attribution 4.0 International License</license-p>
        </license>
      </permissions>
      <self-uri xlink:href="https://moitvivt.ru/ru/journal/article?id=1471"/>
      <abstract xml:lang="ru">
        <p>Актуальность работы обусловлена сложностями устного взаимодействия людей с нарушениями речи с нормотипичными собеседниками, а также низким качеством распознавания аномальной речи стандартными системами распознавания речи и невозможностью создания системы, способной обработать любые нарушения речи. В связи с этим данная статья направлена на разработку метода автоматического распознавания дизартричной речи с применением предобученной нейронной сети для распознавания фонем и скрытых марковских моделей для преобразования фонем в текст и последующей коррекции результатов распознавания с помощью поиска в пространстве допустимых слов ближайшего по расстоянию Левенштейна слова и динамического алгоритма разбиения выхода модели на отдельные слова. Основное преимущество использования скрытых марковских моделей по сравнению с нейронными сетями заключается в малом размере обучающего набора данных, собираемого индивидуально для каждого пользователя, а также в простоте дообучения модели в случае прогрессирующих нарушений речи. Описывается набор данных для обучения модели, и даются рекомендации по сбору и разметке данных для обучения модели. Эффективность предложенного метода проверяется на индивидуальном наборе данных, записанных человеком с дизартрией; качество распознавания сравнивается с нейросетевыми моделями, обученными на используемом наборе данных. Материалы статьи представляют практическую ценность для создания средства дополненной коммуникации для людей с нарушениями речи.</p>
      </abstract>
      <trans-abstract xml:lang="en">
        <p>The relevance of the paper is due to the difficulties of oral interaction between people with speech disorders and normotypic interlocutors as well as the low quality of abnormal speech recognition by standard speech recognition systems and the inability to create a system capable of processing any speech disorders. In this regard, this article is aimed at developing a method for automatic recognition of dysarthric speech using a pre-trained neural network for recognizing phonemes and hidden Markov models for converting phonemes into text and subsequent correction of recognition results using a search in the space of acceptable words of the nearest Levenshtein word and a dynamic algorithm for splitting the output of the model into separate words. The main advantage of using hidden Markov models in comparison with neural networks is the small size of the training data set collected individually for each user, as well as the ease of training the model further in case of progressive speech disorders. The data set for model training is described, and recommendations for collecting and marking data for model training are given. The effectiveness of the proposed method is tested on an individual data set recorded by a person with dysarthria; the recognition quality is compared with neural network models trained on the data set used. The materials of the article are of practical value for creating an augmented communication system for people with speech disorders.</p>
      </trans-abstract>
      <kwd-group xml:lang="ru">
        <kwd>скрытые марковские модели</kwd>
        <kwd>дизартрия</kwd>
        <kwd>автоматическое распознавание речи</kwd>
        <kwd>распознавание фонем</kwd>
        <kwd>коррекция фонем</kwd>
      </kwd-group>
      <kwd-group xml:lang="en">
        <kwd>hidden Markov models</kwd>
        <kwd>dysarthria</kwd>
        <kwd>automatic speech recognition</kwd>
        <kwd>phonemes recognition</kwd>
        <kwd>phoneme correction</kwd>
      </kwd-group>
      <funding-group>
        <funding-statement xml:lang="ru">Исследование выполнено без спонсорской поддержки.</funding-statement>
        <funding-statement xml:lang="en">The study was performed without external funding.</funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="cit1">
        <label>1</label>
        <mixed-citation xml:lang="ru">Rowe H.P., Gutz S.E., Maffei M.F., Tomanek K., Green J.R. Characterizing dysarthria diversity for automatic speech recognition: a tutorial from the clinical perspective. Front. Comput. Sci. 4:770210. DOI: 10.3389/fcomp.2022.770210.</mixed-citation>
      </ref>
      <ref id="cit2">
        <label>2</label>
        <mixed-citation xml:lang="ru">Balaji V., Sadashivappa G. Speech disabilities in adults and the suitable speech recognition software tools – a review. In: 2015 International Conference on Computing and Network Communications (CoCoNet), Trivandrum, India, 2015. p. 559–564. DOI: 10.1109/CoCoNet.2015.7411243.</mixed-citation>
      </ref>
      <ref id="cit3">
        <label>3</label>
        <mixed-citation xml:lang="ru">Xiong F., Barker J., Christensen H. Deep learning of articulatory-based representations and applications for improving dysarthric speech recognition. Speech Communication; 13th ITG-Symposium, Oldenburg, Germany, 2018. p. 1–5.</mixed-citation>
      </ref>
      <ref id="cit4">
        <label>4</label>
        <mixed-citation xml:lang="ru">Xiong F., Barker J., Christensen H. Phonetic analysis of dysarthric speech tempo and applications to robust personalised dysarthric speech recognition. ICASSP 2019 – 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, UK, 2019. p. 5836–5840. DOI: 10.1109/ICASSP.2019.8683091.</mixed-citation>
      </ref>
      <ref id="cit5">
        <label>5</label>
        <mixed-citation xml:lang="ru">Hawley M.S., Cunningham S.P., Green P.D., Enderby P., Palmer R., Sehgal S., et al. A voice-input voice-output communication aid for people with severe speech impairment. IEEE Transactions on Neural Systems and Rehabilitation Engineering. 2013;21(1):23–31.</mixed-citation>
      </ref>
      <ref id="cit6">
        <label>6</label>
        <mixed-citation xml:lang="ru">Yeo E.J., Choi K., Kim S., Chung M. Automatic severity classification of dysarthric speech by using self-supervised model with multi-task learning. In: ICASSP 2023 – 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Rhodes Island, Greece, 2023. p. 1–5. DOI: 10.1109/ICASSP49357.2023.10094605.</mixed-citation>
      </ref>
      <ref id="cit7">
        <label>7</label>
        <mixed-citation xml:lang="ru">Hashan A.M., Bredikhin B. Russian Voice Dataset. Kaggle. URL: https://www.kaggle.com/dsv/5954738 (дата обращения: 12.08.2023).</mixed-citation>
      </ref>
      <ref id="cit8">
        <label>8</label>
        <mixed-citation xml:lang="ru">Xu Q., Baevski A., Auli M. Simple and effective zero-shot cross-lingual phoneme recognition. arXiv; 2021. URL: http://arxiv.org/abs/2109.11680 (дата обращения: 18.05.2023).</mixed-citation>
      </ref>
      <ref id="cit9">
        <label>9</label>
        <mixed-citation xml:lang="ru">Левенштейн ВИ. Двоичные коды с исправлением выпадений, вставок и замещений символов. Докл. АН СССР. 1965;163(4):845–848.</mixed-citation>
      </ref>
      <ref id="cit10">
        <label>10</label>
        <mixed-citation xml:lang="ru">Baevski A., Zhou Y., Mohamed A., Auli M. wav2vec 2.0: A framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems. Curran Associates, Inc.; 2020. p. 12449–12460. DOI: 10.48550/arXiv.2006.11477.</mixed-citation>
      </ref>
    </ref-list>
    <fn-group>
      <fn fn-type="conflict">
        <p>The authors declare that there are no conflicts of interest present.</p>
      </fn>
    </fn-group>
  </back>
</article>