<?xml version="1.0" encoding="UTF-8"?>
<article article-type="research-article" dtd-version="1.3" xml:lang="ru" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://metafora.rcsi.science/xsd_files/journal3.xsd">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">moitvivt</journal-id>
      <journal-title-group>
        <journal-title xml:lang="ru">Моделирование, оптимизация и информационные технологии</journal-title>
        <trans-title-group xml:lang="en">
          <trans-title>Modeling, Optimization and Information Technology</trans-title>
        </trans-title-group>
      </journal-title-group>
      <issn pub-type="epub">2310-6018</issn>
      <publisher>
        <publisher-name>Издательство</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.26102/2310-6018/2022.39.4.006</article-id>
      <article-id pub-id-type="custom" custom-type="elpub">1246</article-id>
      <title-group>
        <article-title xml:lang="ru">Извлечение морфологических признаков технических систем из русскоязычных патентов по деревьям зависимостей</article-title>
        <trans-title-group xml:lang="en">
          <trans-title>Extraction of morphological features of technical systems from Russian patents using dependency tree analysis</trans-title>
        </trans-title-group>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0001-5044-9787</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Васильев</surname>
              <given-names>Сергей Сергеевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Vasiliev</surname>
              <given-names>Sergey Sergeevich</given-names>
            </name>
          </name-alternatives>
          <email>svasilev2012@yandex.ru</email>
          <xref ref-type="aff">aff-1</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0002-4684-1011</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Коробкин</surname>
              <given-names>Дмитрий Михайлович</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Korobkin</surname>
              <given-names>Dmitry Mikhailovich</given-names>
            </name>
          </name-alternatives>
          <email>dkorobkin80@mail.ru</email>
          <xref ref-type="aff">aff-2</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Фоменков</surname>
              <given-names>Сергей Алексеевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Fomenkov</surname>
              <given-names>Sergey Alekseevich</given-names>
            </name>
          </name-alternatives>
          <email>saf550@yandex.ru</email>
          <xref ref-type="aff">aff-3</xref>
        </contrib>
      </contrib-group>
      <aff-alternatives id="aff-1">
        <aff xml:lang="ru">Волгоградский государственный технический университет</aff>
        <aff xml:lang="en">Volgograd State Technical University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-2">
        <aff xml:lang="ru">Волгоградский государственный технический университет</aff>
        <aff xml:lang="en">Volgograd State Technical University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-3">
        <aff xml:lang="ru">Волгоградский государственный технический университет</aff>
        <aff xml:lang="en">Volgograd State Technical University</aff>
      </aff-alternatives>
      <pub-date pub-type="epub">
        <day>01</day>
        <month>01</month>
        <year>2026</year>
      </pub-date>
      <volume>1</volume>
      <issue>1</issue>
      <elocation-id>10.26102/2310-6018/2022.39.4.006</elocation-id>
      <permissions>
        <copyright-statement>Copyright © Авторы, 2026</copyright-statement>
        <copyright-year>2026</copyright-year>
        <license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This work is licensed under a Creative Commons Attribution 4.0 International License</license-p>
        </license>
      </permissions>
      <self-uri xlink:href="https://moitvivt.ru/ru/journal/article?id=1246"/>
      <abstract xml:lang="ru">
        <p>В статье представлена методология извлечения морфологических признаков технических систем в виде компонентов устройства и связей между ними. Объектом анализа для извлечения данных выступает главный пункт формулы изобретения в текстах русскоязычных патентов. Информация о компонентах устройства является наиболее фундаментальной и важной и может использоваться во множестве задач анализа патентного массива, а поиск эффективных подходов по извлечению такой информации все еще продолжается. В настоящем исследовании областью применения указанных данных рассматривается направление автоматизированного изобретательства. Целью работы являлся анализ качества извлечения данных по деревьям зависимостей для русского языка. Деревья зависимостей являются результатом работы систем синтаксической разметки естественного языка. Для сравнения были выбраны следующие синтаксические анализаторы: UdPipe, Stanza, DeepPavlov и spaCy. Выходные данные представлены в виде семантических структур SAO (Subject-Action-Object). Дана оценка качества извлечения данных с помощью метрик точности (precision), полноты (recall) и F1-меры. Для этого вручную было размечено 20 патентных формул с 252 структурами SAO. При текущих методологических ограничениях из тестовой выборки в лучшем случае удалось извлечь 79 % связок SAO в терминах метрики recall при нестрогой оценке данных, т. е. без учета полноты именных групп субъекта и объекта. Значение F1-меры по инструментам несколько ниже и находится в пределах от 48 % до 66 % в зависимости от типа оценки. Сделаны общие выводы по текущему уровню работы синтаксических анализаторов в рамках исследуемой области применения. Материалы статьи представляют практическую ценность при проработке эффективных подходов извлечения структурированных данных из русскоязычного патентного массива.</p>
      </abstract>
      <trans-abstract xml:lang="en">
        <p>The article presents a methodology for extracting morphological features of technical systems in the form of device components and connections between them. The main section of Russian patents claims is chosen as the subject of the study for data extraction. Information about device components is the most fundamental and important part. It can be used in many tasks of computer-aided patent analysis, while the search for effective approaches to extracting such information is still in progress. In the present inquiry, computer-aided development of inventions is considered as a range of applications for such data. The aim of the study was to explore the quality of data extraction using dependency tree analysis for Russian language. The dependency tree is the result of markup by natural language processing tools. Several parsers were chosen for the comparison: UdPipe, Stanza, DeepPavlov and spaCy. The output data are presented in the form of semantic SAO (Subject-Action-Object) structures. The quality of data extraction has been evaluated using precision, recall and F1 metrics. For this purpose, 20 patent claims with 252 SAO structures were manually marked. Under the current methodological constraints, we were able to extract from the dataset 79 % of the SAO structures at best according to the recall metric with a non-strict data evaluation, i.e. without accounting for the completeness of noun groups. The value of F1-measure is lower and ranges from 48 % to 66 % depending on the evaluation type. Conclusions are drawn about the current level of the syntactic analyzer performance within the field of application under review. The results can be useful for developing efficient approaches to extracting structured data from Russian patent arrays.</p>
      </trans-abstract>
      <kwd-group xml:lang="ru">
        <kwd>патент</kwd>
        <kwd>извлечение данных</kwd>
        <kwd>компоненты устройств</kwd>
        <kwd>деревья зависимостей</kwd>
        <kwd>SAO</kwd>
      </kwd-group>
      <kwd-group xml:lang="en">
        <kwd>patent</kwd>
        <kwd>data extraction</kwd>
        <kwd>device components</kwd>
        <kwd>dependency trees</kwd>
        <kwd>SAO</kwd>
      </kwd-group>
      <funding-group>
        <funding-statement xml:lang="ru">Исследование выполнено за счет гранта Российского научного фонда № 22-21-20125, https://rscf.ru/project/22-21-20125/, и Администрации Волгоградской области.</funding-statement>
        <funding-statement xml:lang="en">The research was supported by the grant of the Russian Science Foundation No. 22-21-20125, https://rscf.ru/project/22-21-20125/, and the Administration of Volgograd Oblast.</funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="cit1">
        <label>1</label>
        <mixed-citation xml:lang="ru">Li X., Song H., Zhang X., Xu Q. Fine-grained Construction of Semantic Technology Network for Technology Evolution Analysis. Proc. of the 3rd International Conference on Computer Science and Application Engineering. 2019:1–7. DOI: 10.1145/3331453.3361638.</mixed-citation>
      </ref>
      <ref id="cit2">
        <label>2</label>
        <mixed-citation xml:lang="ru">You H., Li M., Hipel K.W. et al. Development trend forecasting for coherent light generator technology based on patent citation network analysis. Scientometrics. 2017;111:297–315. DOI: 10.1007/s11192-017-2252-y.</mixed-citation>
      </ref>
      <ref id="cit3">
        <label>3</label>
        <mixed-citation xml:lang="ru">Kim S., Yoon B. Patent infringement analysis using a text mining technique based on SAO structure. Computers in Industry. 2021;125:103379. DOI: 10.1016/j.compind.2020.103379.</mixed-citation>
      </ref>
      <ref id="cit4">
        <label>4</label>
        <mixed-citation xml:lang="ru">Feng L., Niu Y., Wang J. Development of Morphology Analysis-Based Technology Roadmap Considering Layer Expansion Paths: Application of TRIZ and Text Mining. Applied Sciences. 2020;10(23):8498. DOI: 10.3390/app10238498.</mixed-citation>
      </ref>
      <ref id="cit5">
        <label>5</label>
        <mixed-citation xml:lang="ru">Liu L., Li Y., Xiong Y., Cavallucci, D. A new function-based patent knowledge retrieval tool for conceptual design of innovative products. Computers in Industry. 2020;115:103154. DOI: 10.1016/j.compind.2019.103154.</mixed-citation>
      </ref>
      <ref id="cit6">
        <label>6</label>
        <mixed-citation xml:lang="ru">Зарипова В.М., Петрова И.Ю., Цырульников Е.С. Классификация автоматизированных систем поддержки инновационных процессов на предприятии (Computer Aided Innovation – CAI). Прикаспийский журнал: управление и высокие технологии. 2012;1(17):26–35. Доступно по: https://elibrary.ru/download/elibrary_17708904_61173989.pdf (дата обращения: 20.10.2022).</mixed-citation>
      </ref>
      <ref id="cit7">
        <label>7</label>
        <mixed-citation xml:lang="ru">Васильев С.С., Коробкин Д.М., Фоменков С.А. Метод формирования информационного обеспечения синтеза новых технических решений на основе анализа патентного массива. Часть 1. Вестник компьютерных и информационных технологий. 2021;18(11):3–12. DOI: 10.14489/vkit.2021.11.pp.003-012.</mixed-citation>
      </ref>
      <ref id="cit8">
        <label>8</label>
        <mixed-citation xml:lang="ru">Boting G., Wenqing W. Open Relation Extraction in Patent Claims with a Hybrid Network. Wireless Communications and Mobile Computing. 2021;2021(1):1–7. DOI: 10.1155/2021/5547281.</mixed-citation>
      </ref>
      <ref id="cit9">
        <label>9</label>
        <mixed-citation xml:lang="ru">Yang S.-Y., Soo V.-W. Extract conceptual graphs from plain texts in patent claims. Engineering Applications of Artificial Intelligence. 2012;25(4):874–887. DOI: 10.1016/j.engappai.2011.11.006</mixed-citation>
      </ref>
      <ref id="cit10">
        <label>10</label>
        <mixed-citation xml:lang="ru">Lyashevskaya O.N., Shavrina T.O., Trofimov I.V., Vlasova N.A. Grameval 2020 Shared Task: Russian Full Morphology And Universal Dependencies Parsing. Proc. of the International Conference «Dialogue 2020». 2020:553–569. DOI: 10.28995/2075-7182-2020-19-553-569.</mixed-citation>
      </ref>
      <ref id="cit11">
        <label>11</label>
        <mixed-citation xml:lang="ru">Ki W., Kim K. Generating Information Relation Matrix Using Semantic Patent Mining for Technology Planning: A Case of Nano-Sensor. IEEE Access. 2017;5:26783–26797. DOI: 10.1109/access.2017.2771371.</mixed-citation>
      </ref>
      <ref id="cit12">
        <label>12</label>
        <mixed-citation xml:lang="ru">Lin W., Liu X., Xiao R. Research on Product Core Component Acquisition Based on Patent Semantic Network. Entropy (Basel). 2022;24(4):549. DOI: 10.3390/e24040549.</mixed-citation>
      </ref>
      <ref id="cit13">
        <label>13</label>
        <mixed-citation xml:lang="ru">Honnibal M., Montani I. spaCy 2: Natural language understanding with Bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.</mixed-citation>
      </ref>
      <ref id="cit14">
        <label>14</label>
        <mixed-citation xml:lang="ru">Yindi S., Wei L., Guozhong C., Qingjin P., Jianjie G., Jiaming F. Effective design knowledge abstraction from Chinese patents based on a meta-model of the patent design knowledge graph. Computers in Industry. 2022;142:103749. DOI: 10.1016/j.compind.2022.103749.</mixed-citation>
      </ref>
      <ref id="cit15">
        <label>15</label>
        <mixed-citation xml:lang="ru">Krestel R., Chikkamath R., Hewel C., Risch J. A survey on deep learning for patent analysis. World Patent Information. 2021;65:102035. DOI: 10.1016/j.wpi.2021.102035.</mixed-citation>
      </ref>
      <ref id="cit16">
        <label>16</label>
        <mixed-citation xml:lang="ru">Chen L., Xu S., Zhu L., Zhang J., Lei X., Yang G. A deep learning based method for extracting semantic information from patent documents. Scientometrics. 2020;125:289–312. DOI: 10.1007/s11192-020-03634-y.</mixed-citation>
      </ref>
      <ref id="cit17">
        <label>17</label>
        <mixed-citation xml:lang="ru">Xueqiang L., Xiangru L., Xindong Y., Zhian D., Junmei H. Relation Extraction Toward Patent Domain Based on Keyword Strategy and Attention+BiLSTM Model (Short Paper). Proc. of the 15th EAI International Conference, CollaborateCom. 2019. DOI: 10.1007/978-3-030-30146-0_28.</mixed-citation>
      </ref>
      <ref id="cit18">
        <label>18</label>
        <mixed-citation xml:lang="ru">Kolesnikova V., Korobkin D., Fomenkov S., Rayushkin E., Glushkin V. The Analysis of Technology Development Trends Based on the Network Semantic Structure «Subject-Action-Object». Cyber-Physical Systems: Intelligent Models and Algorithms. Studies in Systems, Decision and Control. 2022;417:43–53. DOI: 10.1007/978-3-030-95116-0_4.</mixed-citation>
      </ref>
      <ref id="cit19">
        <label>19</label>
        <mixed-citation xml:lang="ru">Straka M., Hajič J., Straková J. UDPipe: Trainable Pipeline for Processing CoNLL-U Files Performing Tokenization, Morphological Analysis, POS Tagging and Parsing. Proc. of the Tenth International Conference on Language Resources and Evaluation (LREC'16). 2016:4290–4297. Доступно по: https://aclanthology.org/L16-1680.pdf (дата обращения: 20.10.2022).</mixed-citation>
      </ref>
      <ref id="cit20">
        <label>20</label>
        <mixed-citation xml:lang="ru">Qi P., Zhang Y., Zhang Y., Bolton J., Manning C.D. Stanza: A Python Natural Language Processing Toolkit for Many Human Languages. Association for Computational Linguistics (ACL) System Demonstrations. 2020. Доступно по: https://nlp.stanford.edu/pubs/qi2020stanza.pdf (дата обращения: 20.10.2022).</mixed-citation>
      </ref>
      <ref id="cit21">
        <label>21</label>
        <mixed-citation xml:lang="ru">Burtsev M. et al. DeepPavlov: Open-Source Library for Dialogue Systems. Proc. of ACL 2018, System Demonstrations. 2018:122–127. DOI: 10.18653/v1/P18-4021.</mixed-citation>
      </ref>
    </ref-list>
    <fn-group>
      <fn fn-type="conflict">
        <p>The authors declare that there are no conflicts of interest present.</p>
      </fn>
    </fn-group>
  </back>
</article>