From cbd77dfb24562e29eb322aba72c41f84a213b30b Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Sat, 10 Apr 2021 10:23:33 +0400 Subject: [PATCH] #12 -- Improve performance --- .../api/StructuralUnitIdentifier.java | 5 ++++- .../heuristic/service/DetectorService.java | 18 +++++++++++++++++- .../heuristic/service/DirectoryService.java | 9 +++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/main/java/ru/ulstu/extractor/heuristic/api/StructuralUnitIdentifier.java b/src/main/java/ru/ulstu/extractor/heuristic/api/StructuralUnitIdentifier.java index f086fb8..5b779cf 100644 --- a/src/main/java/ru/ulstu/extractor/heuristic/api/StructuralUnitIdentifier.java +++ b/src/main/java/ru/ulstu/extractor/heuristic/api/StructuralUnitIdentifier.java @@ -51,7 +51,10 @@ public abstract class StructuralUnitIdentifier { .filter(file -> StringUtils.fileInSubdirectory(file.getPath(), projectPath, subDirectory)) .forEach(projectFile -> { try { - String detectedLanguage = getDetectorService().getDetectedLanguage(new String(Files.readAllBytes(projectFile.toPath()))); + String detectedLanguage = getDetectorService().getDetectedLanguage( + projectFile.getName(), + new String(Files.readAllBytes(projectFile.toPath())), + DetectorService.LangDetectScrupulousness.LOW); projectFileLanguageFrequency.put(detectedLanguage, projectFileLanguageFrequency.getOrDefault(detectedLanguage, 0) + 1); } catch (IOException e) { e.printStackTrace(); diff --git a/src/main/java/ru/ulstu/extractor/heuristic/service/DetectorService.java b/src/main/java/ru/ulstu/extractor/heuristic/service/DetectorService.java index 9b729d6..c989a2f 100644 --- a/src/main/java/ru/ulstu/extractor/heuristic/service/DetectorService.java +++ b/src/main/java/ru/ulstu/extractor/heuristic/service/DetectorService.java @@ -1,3 +1,8 @@ +/* + * Copyright (C) 2021 Anton Romanov - All Rights Reserved + * You may use, distribute and modify this code, please write to: romanov73@gmail.com. + */ + package ru.ulstu.extractor.heuristic.service; import com.gargoylesoftware.htmlunit.WebClient; @@ -9,11 +14,22 @@ import java.util.Arrays; import java.util.List; import java.util.Locale; +import static ru.ulstu.extractor.heuristic.service.DetectorService.LangDetectScrupulousness.LOW; + @Service public class DetectorService { + public enum LangDetectScrupulousness {LOW, HIGH} + private final static String BASE_URL = "http://localhost:8080/lang-detector.html"; - public String getDetectedLanguage(String code) { + public String getDetectedLanguage(String fileName, String code, LangDetectScrupulousness scrupulousness) { + if (scrupulousness == LOW) { + return DirectoryService.getFileExtension(fileName).orElse(""); + } + return getDetectedLanguage(code); + } + + private String getDetectedLanguage(String code) { String selectedLang = null; try (WebClient webClient = new WebClient()) { webClient.setJavaScriptTimeout(60 * 1000); diff --git a/src/main/java/ru/ulstu/extractor/heuristic/service/DirectoryService.java b/src/main/java/ru/ulstu/extractor/heuristic/service/DirectoryService.java index c2d78e8..344e09d 100644 --- a/src/main/java/ru/ulstu/extractor/heuristic/service/DirectoryService.java +++ b/src/main/java/ru/ulstu/extractor/heuristic/service/DirectoryService.java @@ -16,6 +16,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; /** @@ -23,6 +24,7 @@ import java.util.stream.Collectors; */ @Service public class DirectoryService { + private static final String FILE_EXTENSION_DELIMITER = "."; /** * Получить список файлов, рекурсивно обойдя все дерево каталогов проекта. @@ -51,4 +53,11 @@ public class DirectoryService { } return Collections.emptyList(); } + + public static Optional getFileExtension(String fileName) { + if (fileName == null || fileName.isEmpty() || fileName.lastIndexOf(FILE_EXTENSION_DELIMITER) < 0) { + return Optional.empty(); + } + return Optional.of(fileName.substring(fileName.lastIndexOf(FILE_EXTENSION_DELIMITER) + 1)); + } }