<?xml version="1.0" encoding="UTF-8"?>
<article article-type="research-article" dtd-version="1.3" xml:lang="ru" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://metafora.rcsi.science/xsd_files/journal3.xsd">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">moitvivt</journal-id>
      <journal-title-group>
        <journal-title xml:lang="ru">Моделирование, оптимизация и информационные технологии</journal-title>
        <trans-title-group xml:lang="en">
          <trans-title>Modeling, Optimization and Information Technology</trans-title>
        </trans-title-group>
      </journal-title-group>
      <issn pub-type="epub">2310-6018</issn>
      <publisher>
        <publisher-name>Издательство</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.26102/2310-6018/2025.50.3.006</article-id>
      <article-id pub-id-type="custom" custom-type="elpub">1918</article-id>
      <title-group>
        <article-title xml:lang="ru">Разработка легковесной модели автоматической классификации структурированных и неструктурированных данных в потоковых источниках для оптимизации оптического распознавания символов</article-title>
        <trans-title-group xml:lang="en">
          <trans-title>Development of a lightweight model for automatic classification of structured and unstructured data in streaming sources to optimize optical character recognition</trans-title>
        </trans-title-group>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="yes">
          <contrib-id contrib-id-type="orcid">0009-0006-0849-4561</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Гаврилов</surname>
              <given-names>Вадим Сергеевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Gavrilov</surname>
              <given-names>Vadim Sergeevich</given-names>
            </name>
          </name-alternatives>
          <email>vad093@mail.ru</email>
          <xref ref-type="aff">aff-1</xref>
        </contrib>
        <contrib contrib-type="author">
          <contrib-id contrib-id-type="orcid">0000-0001-8042-4089</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Корчагин</surname>
              <given-names>Сергей Алексеевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Korchagin</surname>
              <given-names>Sergei Alekseevich</given-names>
            </name>
          </name-alternatives>
          <email>sakorchagin@fa.ru</email>
          <xref ref-type="aff">aff-2</xref>
        </contrib>
        <contrib contrib-type="author">
          <contrib-id contrib-id-type="orcid">0000-0003-2413-7880</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Долгов</surname>
              <given-names>Виталий Игоревич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Dolgov</surname>
              <given-names>Vitaly Igorevich</given-names>
            </name>
          </name-alternatives>
          <email>vidolgov@fa.ru</email>
          <xref ref-type="aff">aff-3</xref>
        </contrib>
        <contrib contrib-type="author">
          <contrib-id contrib-id-type="orcid">0000-0003-0735-7697</contrib-id>
          <name-alternatives>
            <name name-style="eastern" xml:lang="ru">
              <surname>Андриянов</surname>
              <given-names>Никита Андреевич</given-names>
            </name>
            <name name-style="western" xml:lang="en">
              <surname>Andriyanov</surname>
              <given-names>Nikita Andreevich</given-names>
            </name>
          </name-alternatives>
          <email>naandriyanov@fa.ru</email>
          <xref ref-type="aff">aff-4</xref>
        </contrib>
      </contrib-group>
      <aff-alternatives id="aff-1">
        <aff xml:lang="ru">Финансовый университет при Правительстве Российской Федерации</aff>
        <aff xml:lang="en">Financial University under the Government of the Russian Federation</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-2">
        <aff xml:lang="ru">Финансовый университет при Правительстве Российской Федерации Институт цифровых технологий</aff>
        <aff xml:lang="en">Financial University under the Government of the Russian Federation Institute of Digital Technologies</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-3">
        <aff xml:lang="ru">Финансовый университет при Правительстве Российской Федерации</aff>
        <aff xml:lang="en">Financial University under the Government of the Russian Federation</aff>
      </aff-alternatives>
      <aff-alternatives id="aff-4">
        <aff xml:lang="ru">Финансовый университет при Правительстве Российской Федерации</aff>
        <aff xml:lang="en">Financial University under the Government of the Russian Federation</aff>
      </aff-alternatives>
      <pub-date pub-type="epub">
        <day>01</day>
        <month>01</month>
        <year>2026</year>
      </pub-date>
      <volume>1</volume>
      <issue>1</issue>
      <elocation-id>10.26102/2310-6018/2025.50.3.006</elocation-id>
      <permissions>
        <copyright-statement>Copyright © Авторы, 2026</copyright-statement>
        <copyright-year>2026</copyright-year>
        <license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This work is licensed under a Creative Commons Attribution 4.0 International License</license-p>
        </license>
      </permissions>
      <self-uri xlink:href="https://moitvivt.ru/ru/journal/article?id=1918"/>
      <abstract xml:lang="ru">
        <p>В настоящей статье рассмотрена задача предварительной оценки входящего электронного документооборота на основе технологий компьютерного зрения. Авторами был синтезирован датасет изображений со структурированными данными на основе формы счета-фактуры, а также собраны сканы различных документов от страниц научных статей и документации в электронном почтовом ящике научной организации до отчетности Росстата. Таким образом, первая часть датасета относится к структурированным данным, имеющим строгую форму, а вторая часть относится к неструктурированным сканам, поскольку на разных отсканированных документах информация может быть представлена по-разному: только текст, текст и изображения, графики, так как разные источники имеют разные требования и свои стандарты. Первичный анализ данных в потоковых источниках можно делать с помощью моделей компьютерного зрения. Проведенные эксперименты показали высокую точность работы сверточных нейронных сетей. В частности, для нейросети с архитектурой Xception достигается результат с точностью более 99 %. Преимущество по сравнению с более простой моделью MobileNetV2 достигает около 9 %. Предложенный подход позволит проводить первичную фильтрацию документов по отделам без применения больших языковых моделей и моделей распознавания символов, что обеспечит повышение скорости и снижение вычислительных затрат.</p>
      </abstract>
      <trans-abstract xml:lang="en">
        <p>This article discusses the task of preliminary assessment of incoming electronic document management based on computer vision technologies. The authors synthesized a dataset of images with structured data based on the invoice form and also collected scans of various documents from pages of scientific articles and documentation in the electronic mailbox of a scientific organization to Rosstat reports. Thus, the first part of the dataset refers to structured data with a strict form, and the second part refers to unstructured scans, since information can be presented in different ways on different scanned documents: only text, text and images, graphs, since different sources have different requirements and their own standards. The primary analysis of data in streaming sources can be done using computer vision models. The experiments performed have shown high accuracy of convolutional neural networks. In particular, for a neural network with the Xception architecture, the result is achieved with an accuracy of more than 99%. The advantage over the simpler MobileNetV2 model is about 9%. The proposed approach will allow for the primary filtering of documents by department without using large language and character recognition models, which will increase speed and reduce computational costs.</p>
      </trans-abstract>
      <kwd-group xml:lang="ru">
        <kwd>интеллектуальная обработка документов</kwd>
        <kwd>компьютерное зрение</kwd>
        <kwd>сверточные нейронные сети</kwd>
        <kwd>обработка потоковых данных</kwd>
        <kwd>машинное обучение</kwd>
      </kwd-group>
      <kwd-group xml:lang="en">
        <kwd>intelligent document processing</kwd>
        <kwd>computer vision</kwd>
        <kwd>convolutional neural networks</kwd>
        <kwd>stream data processing</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
      <funding-group>
        <funding-statement xml:lang="ru">Статья подготовлена по результатам исследований, выполненных за счет бюджетных средств по государственному заданию Финансового университета при Правительстве РФ.</funding-statement>
        <funding-statement xml:lang="en">The article is based on the results of the research carried out at the expense of budgetary funds under the state assignment of Financial University under the Government of the Russian Federation.</funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="cit1">
        <label>1</label>
        <mixed-citation xml:lang="ru">Shi B., Bai X., Yao C. An End-to-End Trainable Neural Network for Image-Based Sequence Recognition and Its Application to Scene Text Recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence. 2017;39(11):2298–2304. https://doi.org/10.1109/TPAMI.2016.2646371</mixed-citation>
      </ref>
      <ref id="cit2">
        <label>2</label>
        <mixed-citation xml:lang="ru">Inoue K. Context-Independent OCR with Multimodal LLMs: Effects of Image Resolution and Visual Complexity. arXiv. URL: https://arxiv.org/abs/2503.23667 [Accessed 12th March 2025].</mixed-citation>
      </ref>
      <ref id="cit3">
        <label>3</label>
        <mixed-citation xml:lang="ru">Fujitake M. DTrOCR: Decoder-Only Transformer for Optical Character Recognition. In: 2024 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 03–08 January 2024, Waikoloa, HI, USA. IEEE; 2024. P. 8010–8020. https://doi.org/10.1109/WACV57701.2024.00784</mixed-citation>
      </ref>
      <ref id="cit4">
        <label>4</label>
        <mixed-citation xml:lang="ru">Tian Yu., Ye Q., Doermann D. YOLOv12: Attention-Centric Real-Time Object Detectors. arXiv. URL: https://arxiv.org/abs/2502.12524 [Accessed 12th March 2025].</mixed-citation>
      </ref>
      <ref id="cit5">
        <label>5</label>
        <mixed-citation xml:lang="ru">Alif M.A.R., Hussain M. YOLOv12: A Breakdown of the Key Architectural Features. arXiv. URL: https://arxiv.org/abs/2502.14740 [Accessed 12th March 2025].</mixed-citation>
      </ref>
      <ref id="cit6">
        <label>6</label>
        <mixed-citation xml:lang="ru">Wang X., Li Ye., Liu J., et al. Intelligent Micron Optical Character Recognition of DFB Chip Using Deep Convolutional Neural Network. IEEE Transactions on Instrumentation and Measurement. 2022;71. https://doi.org/10.1109/TIM.2022.3154831</mixed-citation>
      </ref>
      <ref id="cit7">
        <label>7</label>
        <mixed-citation xml:lang="ru">Li M., Lv T., Chen J., et al. TrOCR: Transformer-Based Optical Character Recognition with Pre-trained Models. In: Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI 2023, Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, IAAI 2023, Thirteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2023, 07–14 February 2023, Washington, DC, USA. AAAI Press; 2023. P. 13094–13102.</mixed-citation>
      </ref>
      <ref id="cit8">
        <label>8</label>
        <mixed-citation xml:lang="ru">Dosovitskiy A., Beyer L., Kolesnikov A., et al. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In: 9th International Conference on Learning Representations, ICLR 2021, 03–07 May 2021, Virtual Event, Austria. 2021. https://doi.org/10.48550/arXiv.2010.11929</mixed-citation>
      </ref>
      <ref id="cit9">
        <label>9</label>
        <mixed-citation xml:lang="ru">Xiong S., Chen X., Zhang H. Deep Learning-Based Multifunctional End-to-End Model for Optical Character Classification and Denoising. Journal of Computational Methods in Engineering Applications. 2023;3(1):1–13. https://doi.org/10.62836/jcmea.v3i1.030103</mixed-citation>
      </ref>
      <ref id="cit10">
        <label>10</label>
        <mixed-citation xml:lang="ru">Kasem M.S.E., Mahmoud M., Kang H.-S. Advancements and Challenges in Arabic Optical Character Recognition: A Comprehensive Survey. [Preprint]. arXiv. URL: https://arxiv.org/abs/2312.11812 [Accessed 14th March 2025].</mixed-citation>
      </ref>
      <ref id="cit11">
        <label>11</label>
        <mixed-citation xml:lang="ru">Baek Yo., Lee B., Han D., Yun S., Lee H. Character Region Awareness for Text Detection. arXiv. URL: https://doi.org/10.48550/arXiv.1904.01941 [Accessed 14th March 2025].</mixed-citation>
      </ref>
      <ref id="cit12">
        <label>12</label>
        <mixed-citation xml:lang="ru">Zhang Ya., Ye Yu-L., Guo D.-J., Huang T.  PCA-VGG16 Model for Classification of Rock Types. Earth Science Informatics. 2024;17(2):1553–1567. https://doi.org/10.1007/s12145-023-01217-y</mixed-citation>
      </ref>
      <ref id="cit13">
        <label>13</label>
        <mixed-citation xml:lang="ru">Sarwinda D., Paradisa R.H., Bustamam A., Anggia P. Deep Learning in Image Classification Using Residual Network (ResNet) Variants for Detection of Colorectal Cancer. Procedia Computer Science. 2021;179:423–431. https://doi.org/10.1016/j.procs.2021.01.025</mixed-citation>
      </ref>
      <ref id="cit14">
        <label>14</label>
        <mixed-citation xml:lang="ru">Morani K., Ayana E.K., Kollias D., Unay D. COVID‐19 Detection from Computed Tomography Images Using Slice Processing Techniques and a Modified Xception Classifier. International Journal of Biomedical Imaging. 2024;2024. https://doi.org/10.1155/2024/9962839</mixed-citation>
      </ref>
      <ref id="cit15">
        <label>15</label>
        <mixed-citation xml:lang="ru">Andriyanov N., Andriyanov D. Pattern Recognition on Radar Images Using Augmentation. In: 2020 Ural Symposium on Biomedical Engineering, Radioelectronics and Information Technology (USBEREIT), 14–15 May 2020, Yekaterinburg, Russia. IEEE; 2020. P. 0289–0291. https://doi.org/10.1109/USBEREIT48449.2020.9117669</mixed-citation>
      </ref>
      <ref id="cit16">
        <label>16</label>
        <mixed-citation xml:lang="ru">Sandler M., Howard A., Zhu M., Zhmoginov A., Chen L.-Ch. MobileNetV2: Inverted Residuals and Linear Bottlenecks. In: 2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition, 18–23 June 2018, Salt Lake City, UT, USA. IEEE; 2018. P. 4510–4520. https://doi.org/10.1109/CVPR.2018.00474</mixed-citation>
      </ref>
      <ref id="cit17">
        <label>17</label>
        <mixed-citation xml:lang="ru">Tan M., Le Q.V. EfficientNetV2: Smaller Models and Faster Training. In: Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18–24 July 2021, Virtual Event. 2021. P. 10096–10106.</mixed-citation>
      </ref>
      <ref id="cit18">
        <label>18</label>
        <mixed-citation xml:lang="ru">Ахмад А., Андриянов Н.А., Соловьев В.И., Соломатин Д.А. Применение глубокого обучения для аугментации и генерации подводного набора данных с промышленными объектами. Вестник Южно-Уральского государственного университета. Серия: Компьютерные технологии, управление, радиоэлектроника. 2023;23(2):5–16. https://doi.org/10.14529/ctcr230201</mixed-citation>
      </ref>
    </ref-list>
    <fn-group>
      <fn fn-type="conflict">
        <p>The authors declare that there are no conflicts of interest present.</p>
      </fn>
    </fn-group>
  </back>
</article>