<?xml version="1.0" encoding="UTF-8"?>
<article article-type="research-article" dtd-version="1.3" xml:lang="ru" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://metafora.rcsi.science/xsd_files/journal3.xsd">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">moitvivt</journal-id>
      <journal-title-group>
        <journal-title xml:lang="ru">Моделирование, оптимизация и информационные технологии</journal-title>
        <trans-title-group xml:lang="en">
          <trans-title>Modeling, Optimization and Information Technology</trans-title>
        </trans-title-group>
      </journal-title-group>
      <issn pub-type="epub">2310-6018</issn>
      <publisher>
        <publisher-name>Издательство</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.26102/2310-6018/2022.37.2.021</article-id>
      <article-id pub-id-type="custom" custom-type="elpub">1187</article-id>
      <title-group>
        <article-title xml:lang="ru">Ситуационно-ориентированные базы данных: обработка офисных документов</article-title>
        <trans-title-group xml:lang="en">
          <trans-title>Situation-Oriented Databases: Processing Office Documents</trans-title>
        </trans-title-group>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0002-0550-4676</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Миронов</surname>
              <given-names>Валерий Викторович</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Mironov</surname>
              <given-names>Valeriy Viktorovich</given-names>
            </name>
          </name-alternatives>
          <email>mironov@list.ru</email>
          <xref ref-type="aff">aff-1</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0003-4132-6106</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Гусаренко</surname>
              <given-names>Артем Сергеевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Gusarenko</surname>
              <given-names>Artem Sergeevich</given-names>
            </name>
          </name-alternatives>
          <email>valter-hartman@mail.ru</email>
          <xref ref-type="aff">aff-2</xref>
        </contrib>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0000-0002-7114-7638</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Юсупова</surname>
              <given-names>Нафиса Исламовна</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Yusupova</surname>
              <given-names>Nafisa Islamovna</given-names>
            </name>
          </name-alternatives>
          <email>yusupova.ni@ugatu.su</email>
          <xref ref-type="aff">aff-3</xref>
        </contrib>
      </contrib-group>
      <aff-alternatives id="aff-1">
        <aff xml:lang="ru">Уфимский государственный авиационный технический университет</aff>
        <aff xml:lang="en">Ufa State Aviation Technical University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-2">
        <aff xml:lang="ru">Уфимский государственный авиационный технический университет</aff>
        <aff xml:lang="en">Ufa State Aviation Technical University</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-3">
        <aff xml:lang="ru">Уфимский государственный авиационный технический университет</aff>
        <aff xml:lang="en">Ufa State Aviation Technical University</aff>
      </aff-alternatives>
      <pub-date pub-type="epub">
        <day>01</day>
        <month>01</month>
        <year>2026</year>
      </pub-date>
      <volume>1</volume>
      <issue>1</issue>
      <elocation-id>10.26102/2310-6018/2022.37.2.021</elocation-id>
      <permissions>
        <copyright-statement>Copyright © Авторы, 2026</copyright-statement>
        <copyright-year>2026</copyright-year>
        <license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This work is licensed under a Creative Commons Attribution 4.0 International License</license-p>
        </license>
      </permissions>
      <self-uri xlink:href="https://moitvivt.ru/ru/journal/article?id=1187"/>
      <abstract xml:lang="ru">
        <p>В статье рассматривается подход построения документоориентированных веб-приложений на основе ситуационно-ориентированных баз данных. Приложения на базе ситуационно-ориентированных баз данных решают проблемы с извлечением и обработкой семантической информации из офисных документов. В уже имеющихся исследованиях рассматривались вопросы заполнения офисных документов, в данном же исследовании рассматриваются методы извлечения информации из графических документов и текстовых документов, созданных в обычных офисных пакетах. Создание и задействование таких методов достигается за счет характера внутреннего представления офисных документов в XML и возможности обработки такого содержимого программным способом. Рассматривается обработка XML-файлов в ситуационно-ориентированных базах данных, где Word-документы программно загружаются как XML-файлы, извлекаемые из ZIP-архивов. В дальнейшем после загрузки документы могут быть представлены как виртуальные документы или множество таких документов, объединенных в виртуальный массив данных и отображаемых на реальные данные XML или ZIP-архивы с XML файлами внутри. Разработанные и применяемые методы работают в отношении как графических, так и текстовых документов. В статье также рассматриваются методы отыскания и идентификации нужных фрагментов данных внутри документа во время его обработки, базирующейся на стандартах описания в закладках, ключевых фразах, и текстовых метках. Модели и алгоритмы для извлечения требующейся информации обсуждаются и демонстрируются на практических примерах, где рассматривается система дистанционного выполнения курсовых проектов студентами. В дополнение к примерам из учебного процесса рассматривается извлечение метаданных научных публикаций из международной издательской системы Open Journal Systems.</p>
      </abstract>
      <trans-abstract xml:lang="en">
        <p>This article discusses the application of a situation-oriented approach to the problem of extracting semantic information from office documents. Office documents created by vector graphics editors and word processors are reviewed. The ability to extract semantic information is due to the fact that such documents are based on open XML formats that can be processed by external programs. Processing of documents based on a situational database where word documents are programmatically loaded as XML files extracted from zip-archives is considered. In the situation-oriented database, it is possible to present an office document as a virtual document that is mapped both on XML files and the ZIP archive with XML files. This applies not only to text documents, but also to graphic documents that have an internal XML representation. This enables processing of documents in Office Open XML and Open Document Format. The article discusses various aspects of identifying and finding the necessary information during document processing by means of special standard definitions as bookmarks, key phrases and text labels. Models and algorithms for extracting the required information are examined. Examples of the practical use of this approach in the field of distance learning of students at the university are given. In addition, an example of extracting metadata of scientific publications in the Open Journal Systems publishing system is regarded.</p>
      </trans-abstract>
      <kwd-group xml:lang="ru">
        <kwd>ситуационно-ориентированная база данных</kwd>
        <kwd>встроенная динамическая модель</kwd>
        <kwd>Office Open XML</kwd>
        <kwd>Open Document Format</kwd>
      </kwd-group>
      <kwd-group xml:lang="en">
        <kwd>situation-oriented database</kwd>
        <kwd>built-in dynamic model</kwd>
        <kwd>Office Open XML</kwd>
        <kwd>Open Document Format</kwd>
      </kwd-group>
      <funding-group>
        <funding-statement xml:lang="ru">Исследование выполнено при финансовой поддержке РФФИ (грант № 19-07-00682). Результаты исследования, отображённые в структуре разработанного программного решения, были получены в рамках государственного задания № FEUE-2020-0007.</funding-statement>
        <funding-statement xml:lang="en">This research is supported by RFBR (grant 19-07-00682). The results of the study, reflecting the structure of the developed software solution, were obtained as part of the state task No. FEUE-2020-0007.</funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="cit1">
        <label>1</label>
        <mixed-citation xml:lang="ru">Hou X., Li N., Yang H., Liang Q. Comparison of Wordprocessing Document Format in OOXML and ODF. In: 2010 Sixth International Conference on Semantics, Knowledge and Grids. 2010:297–300. DOI:10.1109/SKG.2010.44</mixed-citation>
      </ref>
      <ref id="cit2">
        <label>2</label>
        <mixed-citation xml:lang="ru">Schubert S. The Next Millennium Document Format. In DocEng’19: Proceedings of the ACM Symposium on Document Engineering 2019. New York, NY, USA: Association for Computing Machinery. 2019:1–4. DOI:10.1145/3342558.3345419</mixed-citation>
      </ref>
      <ref id="cit3">
        <label>3</label>
        <mixed-citation xml:lang="ru">Roig J., Ribera M. Implementation of the OOXML standard since its approval until today. In DSAI’2020: 9th International Conference on Software Development and Technologies for Enhancing Accessibility and Fighting Info-exclusion. New York, NY, USA: Association for Computing Machinery. 2020:129–134. DOI:10.1145/3439231.3440607</mixed-citation>
      </ref>
      <ref id="cit4">
        <label>4</label>
        <mixed-citation xml:lang="ru">Миронов В.В., Гусаренко А.С., Юсупова Н.И. Структурирование виртуальных мультидокументов в ситуационно-ориентированных базах данных с помощью entry-элементов. Труды СПИИРАН. 2017;(53):225–43. DOI:10.15622/sp.53.11</mixed-citation>
      </ref>
      <ref id="cit5">
        <label>5</label>
        <mixed-citation xml:lang="ru">Миронов В.В., Гусаренко А.С., Юсупова Н.И. Ситуационно-ориентированные базы данных: polyglot persistence на основе REST-микросервисов. Прикладная информатика. 2019;14(5(83)):87–97. DOI:10.24411/1993-8314-2019-10038</mixed-citation>
      </ref>
      <ref id="cit6">
        <label>6</label>
        <mixed-citation xml:lang="ru">Mironov V.V., Gusarenko A.S., Yusupova N.I., Smetanin Y.G. JSON documents processing using situation-oriented databases. Acta Polytechnica Hungarica. 2020;17(8):29–40. DOI:10.12700/APH.17.8.2020.8.3</mixed-citation>
      </ref>
      <ref id="cit7">
        <label>7</label>
        <mixed-citation xml:lang="ru">Mironov V.V., Gusarenko A.S., Tuguzbaev G.A. Graphic Documents Parametric Personalization for Information Support of Educational Design Using Situation-Oriented Databases. In ITIDS’2020: 8th Scientific Conference on Information Technologies for Intelligent Decision Making Support. Atlantis Press. 2020:260–267. DOI:10.2991/assehr.k.201029.050</mixed-citation>
      </ref>
      <ref id="cit8">
        <label>8</label>
        <mixed-citation xml:lang="ru">Миронов В.В., Гусаренко А.С., Тугузбаев Г.А. Извлечение семантической информации из графических схем. Информатика и автоматизация. 2021;20(4):940–70. DOI:10.15622/IA.20.4.7</mixed-citation>
      </ref>
      <ref id="cit9">
        <label>9</label>
        <mixed-citation xml:lang="ru">Mironov V.V., Gusarenko A.S., Yusupova N.I. Building of Virtual Multidocuments Mapping to Real Sources of Data in Situation-Oriented Databases. Communications in Computer and Information Science. 1204 CCIS. 2021:167–178. DOI:10.1007/978-3-030-78273-3_17</mixed-citation>
      </ref>
      <ref id="cit10">
        <label>10</label>
        <mixed-citation xml:lang="ru">Mironov V.V., Gusarenko A.S., Yusupova N.I. Monitoring YouTube Video Views in the Educational Environment Based on Situation-Oriented Database and RESTful Web Services. SIIT. 2021;3(1(5)):39–49.</mixed-citation>
      </ref>
      <ref id="cit11">
        <label>11</label>
        <mixed-citation xml:lang="ru">Kulkarni A., Shivananda A. Extracting the Data – Natural Language Processing Recipes. Springer; 2019.</mixed-citation>
      </ref>
      <ref id="cit12">
        <label>12</label>
        <mixed-citation xml:lang="ru">Bolotova LS, Danchul AN, Novikov AP, Surkhaev MA, Nikishina AA. Initial identification in technology of informational search (part 1). Prikladnaya Informatika = Journal of Applied Informatics. 2015;4(58):128–142.</mixed-citation>
      </ref>
      <ref id="cit13">
        <label>13</label>
        <mixed-citation xml:lang="ru">Bolotova L.S., Danchul A.N., Novikov A.P., Surkhaev M.A., Nikishina A.A. Initial identification in technology of informational search (part 2). Prikladnaya Informatika = Journal of Applied Informatics. 2015;6(60):128–143.</mixed-citation>
      </ref>
      <ref id="cit14">
        <label>14</label>
        <mixed-citation xml:lang="ru">Joun J., Chung H., Park J., Lee S. Relevance analysis using revision identifier in MS word. Journal of Forensic Sciences. 2021;66(1):323–335.</mixed-citation>
      </ref>
      <ref id="cit15">
        <label>15</label>
        <mixed-citation xml:lang="ru">Jarzabek S., Dan D. Documentation Reuse: Managing Similar Documents. In: 2017 IEEE International Conference on Information Reuse and Integration (IRI). 2017:372–375. DOI:10.1109/IRI.2017.52</mixed-citation>
      </ref>
      <ref id="cit16">
        <label>16</label>
        <mixed-citation xml:lang="ru">Bešić D. Microservice for text extraction from word and pdf documents. In: Proceeding of the Faculty of technical Sciences, Novi Sad. 2021;36(07):1252–1256. DOI:10.24867/13BE26Besic</mixed-citation>
      </ref>
      <ref id="cit17">
        <label>17</label>
        <mixed-citation xml:lang="ru">Duretec K., Rauber A., Becker C. A Text Extraction Software Benchmark Based on a Synthesized Dataset. In: 2017 ACM/IEEE Joint Conference on Digital Libraries (JCDL). 2017:1–10. DOI:10.1109/JCDL.2017.7991565</mixed-citation>
      </ref>
      <ref id="cit18">
        <label>18</label>
        <mixed-citation xml:lang="ru">Karcioğlu A.A., Yaşa A.C. Automatic Summary Extraction in Texts Using Genetic Algorithms. In: 2020 28th Signal Processing and Communications Applications Conference (SIU). 2020:1–4. DOI:10.1109/SIU49456.2020.9302205</mixed-citation>
      </ref>
      <ref id="cit19">
        <label>19</label>
        <mixed-citation xml:lang="ru">Harmata S., Hofer-Schmitz K., Nguyen P.H., Quix C., Bakiu B. Layout-Aware Semi-automatic Information Extraction for Pharmaceutical Documents. In: Da Silveira M, Pruski C, Schneider R, editors. Data Integration in the Life Sciences. Cham: Springer International Publishing. 2017:71–85. (Lecture Notes in Computer Science). DOI:10.1007/978-3-319-69751-2_8</mixed-citation>
      </ref>
      <ref id="cit20">
        <label>20</label>
        <mixed-citation xml:lang="ru">Zhang J., Xie Y., Shen J., Wang L., Lin H. Text Information Hiding Method Using the Custom Components. In: Sun X, Pan Z, Bertino E, editors. Cloud Computing and Security. Cham: Springer International Publishing. 2018:473–84. (Lecture Notes in Computer Science). DOI:10.1007/978-3-030-00015-8_41</mixed-citation>
      </ref>
      <ref id="cit21">
        <label>21</label>
        <mixed-citation xml:lang="ru">Lubenets Y.V., Miroshnikov A.I. Software Supports for Remote Examination on Mathematical Disciplines in Higher Education. In TBLE: 2021 1st International Conference on Technology Enhanced Learning in Higher Education. 2021:274–277. DOI:10.1109/TELE52840.2021.9482472</mixed-citation>
      </ref>
      <ref id="cit22">
        <label>22</label>
        <mixed-citation xml:lang="ru">Abramova I.A., Syrkin V.V., Stepanov A.P. Extensions of the standard functionality and interface of MS Office applications based on the development of custom add-ins. Nauka i Voennaya Bezopasnost'. 2020;2(21):192–199.</mixed-citation>
      </ref>
      <ref id="cit23">
        <label>23</label>
        <mixed-citation xml:lang="ru">Miroshnikova E.P., Levonevskiy D.K., Motienko A.I. Modules for import, export and data analytics in the electronic journal management system of the ‘Spiiras Proceedings’ journal for automated interaction with global indices and aggregators. Problemy iskusstvennogo intellekta = Problems of Artificial Intelligence. 2019;3(14):58–75.</mixed-citation>
      </ref>
      <ref id="cit24">
        <label>24</label>
        <mixed-citation xml:lang="ru">Reznichenko O.S., Sivakov S.I., Reznichenko T.A. Method of automated generation of information about university’s scientific publications for reporting in the research management system of the russian ministry of science and higher education. Universitetskoe Upravlenie: Praktika i Analiz = University Management: Practice and Analysis. 2020;24(2):44–58. DOI: 10.15826/umpa.2020.02.013</mixed-citation>
      </ref>
      <ref id="cit25">
        <label>25</label>
        <mixed-citation xml:lang="ru">Pinto J., Rathod D., and Quadros A. Text summarizer for URL and .DOCX files. International Journal of Advanced Research in Computer Science. 2020;11(4):18–21. DOI: 10.26483/ijarcs.v11i4.6639</mixed-citation>
      </ref>
      <ref id="cit26">
        <label>26</label>
        <mixed-citation xml:lang="ru">Baynova M.S., Sokolov A.M. Tools for automated collection and analysis of sociological information on the territorial identity of city residents. Prikladnaya Informatika = Journal of Applied Informatics. 2021;2(92):92–102.</mixed-citation>
      </ref>
      <ref id="cit27">
        <label>27</label>
        <mixed-citation xml:lang="ru">Novikov A., Keyno P. Heterogenius data collecting in scientific communities using portfolio management system in ConfID service. Prikladnaya Informatika = Journal of Applied Informatics. 2020; 2(86):28–36.</mixed-citation>
      </ref>
      <ref id="cit28">
        <label>28</label>
        <mixed-citation xml:lang="ru">Izmailov V.V., Novoselova M.V. Automated system for generating task options based on MS Word document. Software Journal: Theory and Applications. 2017;1:1–5. DOI:10.15827/2311-6749.17.1.1</mixed-citation>
      </ref>
      <ref id="cit29">
        <label>29</label>
        <mixed-citation xml:lang="ru">Yu Z., Xiong Z. Comparative analyses for the performance of Rational Rose and Visio in software engineering teaching. In: J. Physics: Conf. Series, IOP Publishing. 2018;1087(6):062–041. DOI:10.1088/1742-6596/1087/6/062041</mixed-citation>
      </ref>
      <ref id="cit30">
        <label>30</label>
        <mixed-citation xml:lang="ru">Parker D.J. Mastering Data Visualization with Microsoft Visio Professional 2016. Packt Publishing Ltd; 2016.</mixed-citation>
      </ref>
      <ref id="cit31">
        <label>31</label>
        <mixed-citation xml:lang="ru">He L., Lian J. Instructional design of practice course of logistics system planning and design based on Visio. In ITME’2018: Proc. 9th Int. Conf. on Information Technology in Medicine and Education. 2018:526–530. DOI:10.1109/ITME.2018.00122</mixed-citation>
      </ref>
      <ref id="cit32">
        <label>32</label>
        <mixed-citation xml:lang="ru">Ruiz Ledesma E.F. et al. Educational tool for generation and analysis of multidimensional modeling on data warehouse. Int. J. Advanced Computer Science and Applications. 2020;11(9):261–267. DOI:10.14569/IJACSA.2020.0110930</mixed-citation>
      </ref>
      <ref id="cit33">
        <label>33</label>
        <mixed-citation xml:lang="ru">Shafiee S. et al. Evaluating the benefits of a computer-aided software engineering tool to develop and document product configuration systems. Computers in Industry. 2021;128. DOI:10.1016/j.compind.2021.103432</mixed-citation>
      </ref>
      <ref id="cit34">
        <label>34</label>
        <mixed-citation xml:lang="ru">Medoh C., Telukdarie A. Business process modelling tool selection: a review. In IEEM’2017: Proc. IEEE Int. Conf. on Industrial Engineering and Engineering Management. IEEE;2017;524–528. DOI:10.1109/IEEM.2017.8289946</mixed-citation>
      </ref>
      <ref id="cit35">
        <label>35</label>
        <mixed-citation xml:lang="ru">Afanasyev A., Voit N., Gaynullin R. The analysis of diagrammatic of workflows in design of the automated systems. In: Uncertainty Modelling in Knowledge Engineering and Decision Making. 2016:509–514. DOI:10.1142/9789813146976_0082</mixed-citation>
      </ref>
      <ref id="cit36">
        <label>36</label>
        <mixed-citation xml:lang="ru">Voit N., Bochkov S., Kirillov S. Temporal automaton RVTI-grammar for the diagrammatic design workflow models analysis. In AICT’2020: IEEE 14th Int. Conf. on Application of Information and Communication Technologies, Tashkent, Uzbekistan. 2020:1–6. DOI:10.1109/AICT50176.2020.9368810</mixed-citation>
      </ref>
      <ref id="cit37">
        <label>37</label>
        <mixed-citation xml:lang="ru">Afanasyev A., Voit N., Ukhanova M., Ionova I. Development of the approach to check the correctness of workflows. In: Data Science and Knowledge Engineering for Sensing Decision Support. P. 1392–1399. DOI:10.1142/9789813273238_0173</mixed-citation>
      </ref>
      <ref id="cit38">
        <label>38</label>
        <mixed-citation xml:lang="ru">Shah R., Kesan J. Interoperability challenges for open standards: ODF and OOXML as examples. In dg.o’09: Proceedings of the 10th Annual International Conference on Digital Government Research: Social Networks: Making Connections between Citizens, Data and Government. Puebla: Digital Government Society of North America. 2009:56–62.</mixed-citation>
      </ref>
      <ref id="cit39">
        <label>39</label>
        <mixed-citation xml:lang="ru">Doncevic J., Fertalj K. Database integration systems. In MIPRO’2020: Proc. 43rd Int. Convention on Information, Communication and Electronic Technology. 2020:1617–1622. DOI:10.23919/MIPRO48935.2020.9245245</mixed-citation>
      </ref>
      <ref id="cit40">
        <label>40</label>
        <mixed-citation xml:lang="ru">Kolonko M., Mullenbach S. Polyglot Persistence in conceptual modeling for information analysis. In ACIT’2020: Proc. 10th Int. Conf. on Advanced Computer Information Technologies. 2020:590–594. DOI:10.1109/ACIT49673.2020.9208928</mixed-citation>
      </ref>
      <ref id="cit41">
        <label>41</label>
        <mixed-citation xml:lang="ru">Kosmerl I., Rabuzin K., Sestak M. Multi-model databases – introducing polyglot persistence in the big data world. In MIPRO’2020: Proc. 43rd Int. Convention on Information, Communication and Electronic Technology. 2020:1724–1729. DOI:10.23919/MIPRO48935.2020.9245178</mixed-citation>
      </ref>
      <ref id="cit42">
        <label>42</label>
        <mixed-citation xml:lang="ru">Montgomery C., Isah H., Zulkernine F. Towards a natural language query processing system. In IBDAP’2020: Proc. 1st Int. Conf. on Big Data Analytics and Practices. 2020. DOI:10.1109/IBDAP50342.2020.9245462</mixed-citation>
      </ref>
    </ref-list>
    <fn-group>
      <fn fn-type="conflict">
        <p>The authors declare that there are no conflicts of interest present.</p>
      </fn>
    </fn-group>
  </back>
</article>