use HtmlUnit instead of selenium webdriver for grants loading

This commit is contained in:
Anton Romanov 2019-06-08 14:36:12 +04:00
parent 18bb0b85ac
commit d84b1fcbdc
5 changed files with 68 additions and 67 deletions

View File

@ -124,9 +124,9 @@ dependencies {
compile group: 'io.springfox', name: 'springfox-swagger2', version: '2.6.0'
compile group: 'io.springfox', name: 'springfox-swagger-ui', version: '2.6.0'
compile group: 'net.sourceforge.htmlunit', name: 'htmlunit', version: '2.35.0'
testCompile group: 'org.springframework.boot', name: 'spring-boot-starter-test'
compile group: 'org.seleniumhq.selenium', name: 'selenium-java', version: '3.3.1'
testCompile group: 'org.seleniumhq.selenium', name: 'selenium-support', version: '3.3.1'
testCompile group: 'com.google.guava', name: 'guava', version: '21.0'

View File

@ -1,8 +1,13 @@
package ru.ulstu.grant.page;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNode;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTableDataCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -11,16 +16,17 @@ import java.util.List;
public class KiasPage {
private final static String KIAS_GRANT_DATE_FORMAT = "dd.MM.yyyy HH:mm";
private WebDriver driver;
private final HtmlPage page;
public KiasPage(WebDriver webDriver) {
this.driver = webDriver;
public KiasPage(HtmlPage page) {
this.page = page;
}
public boolean goToNextPage() {
try {
if (driver.findElements(By.id("js-ctrlNext")).size() > 0) {
driver.findElement(By.id("js-ctrlNext")).click();
HtmlElement nextPageLink = page.getHtmlElementById("js-ctrlNext");
if (nextPageLink.isDisplayed()) {
nextPageLink.click();
return true;
}
} finally {
@ -28,23 +34,25 @@ public class KiasPage {
}
}
public List<WebElement> getPageOfGrants() {
WebElement listContest = driver.findElement(By.tagName("tBody"));
List<WebElement> grants = listContest.findElements(By.cssSelector("tr.tr"));
return grants;
public List<DomNode> getPageOfGrants() {
return page.getByXPath("/html/body/div[2]/div/div[2]/main/div[1]/table/tbody/tr");
}
public String getGrantTitle(WebElement grant) {
return grant.findElement(By.cssSelector("td.tertiary")).findElement(By.tagName("a")).getText();
public String getGrantTitle(DomNode grant) {
return ((DomNode)grant.getFirstByXPath("td[@class='tertiary']/a")).getTextContent();
}
public Date parseDeadLineDate(WebElement grantElement) throws ParseException {
public Date parseDeadLineDate(DomNode grantElement) throws ParseException {
String deadlineDate = getFirstDeadline(grantElement); //10.06.2019 23:59
SimpleDateFormat formatter = new SimpleDateFormat(KIAS_GRANT_DATE_FORMAT);
return formatter.parse(deadlineDate);
}
private String getFirstDeadline(WebElement grantElement) {
return grantElement.findElement(By.xpath("./td[5]")).getText();
private String getFirstDeadline(DomNode grantElement) {
return ((DomNode)grantElement.getFirstByXPath("td[5]")).getTextContent();
}
public boolean isTrGrantLine(DomNode grantElement) {
return !((HtmlTableRow)grantElement).getAttribute("class").contains("pagerSavedHeightSpacer");
}
}

View File

@ -1,35 +1,29 @@
package ru.ulstu.grant.service;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNode;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import org.springframework.stereotype.Service;
import ru.ulstu.configuration.ApplicationProperties;
import ru.ulstu.grant.model.GrantDto;
import ru.ulstu.grant.page.KiasPage;
import ru.ulstu.user.service.UserService;
import java.nio.file.Paths;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.List;
import static org.apache.commons.lang3.StringUtils.isEmpty;
@Service
public class KiasService {
private final static String BASE_URL = "https://www.rfbr.ru/rffi/ru/contest_search?CONTEST_STATUS_ID=%s&CONTEST_TYPE=%s&CONTEST_YEAR=%s";
private final static String CONTEST_STATUS_ID = "1";
private final static String CONTEST_TYPE = "-1";
private final static String DRIVER_LOCATION = "drivers/%s";
private final static String WINDOWS_DRIVER = "chromedriver.exe";
private final static String LINUX_DRIVER = "chromedriver";
private final static String DRIVER_TYPE = "webdriver.chrome.driver";
private final UserService userService;
private final ApplicationProperties applicationProperties;
@ -39,22 +33,22 @@ public class KiasService {
this.applicationProperties = applicationProperties;
}
public List<GrantDto> getNewGrantsDto() throws ParseException {
WebDriver webDriver = getDriver();
public List<GrantDto> getNewGrantsDto() throws ParseException, IOException {
Integer leaderId = userService.findOneByLoginIgnoreCase("admin").getId();
List<GrantDto> grants = new ArrayList<>();
for (Integer year : generateGrantYears()) {
webDriver.get(String.format(BASE_URL, CONTEST_STATUS_ID, CONTEST_TYPE, year));
grants.addAll(getKiasGrants(webDriver));
try (final WebClient webClient = new WebClient()) {
for (Integer year : generateGrantYears()) {
final HtmlPage page = webClient.getPage(String.format(BASE_URL, CONTEST_STATUS_ID, CONTEST_TYPE, year));
grants.addAll(getKiasGrants(page));
}
}
grants.forEach(grantDto -> grantDto.setLeaderId(leaderId));
webDriver.quit();
return grants;
}
public List<GrantDto> getKiasGrants(WebDriver webDriver) throws ParseException {
public List<GrantDto> getKiasGrants(HtmlPage page) throws ParseException {
List<GrantDto> newGrants = new ArrayList<>();
KiasPage kiasPage = new KiasPage(webDriver);
KiasPage kiasPage = new KiasPage(page);
do {
newGrants.addAll(getGrantsFromPage(kiasPage));
} while (kiasPage.goToNextPage()); //проверка существования следующей страницы с грантами
@ -63,11 +57,13 @@ public class KiasService {
private List<GrantDto> getGrantsFromPage(KiasPage kiasPage) throws ParseException {
List<GrantDto> grants = new ArrayList<>();
for (WebElement grantElement : kiasPage.getPageOfGrants()) {
GrantDto grantDto = new GrantDto(
kiasPage.getGrantTitle(grantElement),
kiasPage.parseDeadLineDate(grantElement));
grants.add(grantDto);
for (DomNode grantElement : kiasPage.getPageOfGrants()) {
if (kiasPage.isTrGrantLine(grantElement)) {
GrantDto grantDto = new GrantDto(
kiasPage.getGrantTitle(grantElement),
kiasPage.parseDeadLineDate(grantElement));
grants.add(grantDto);
}
}
return grants;
}
@ -76,28 +72,4 @@ public class KiasService {
return Arrays.asList(Calendar.getInstance().get(Calendar.YEAR),
Calendar.getInstance().get(Calendar.YEAR) + 1);
}
private WebDriver getDriver() {
System.setProperty(DRIVER_TYPE, getDriverExecutablePath());
System.out.println("drive search path: " + getDriverExecutablePath());
final ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.setBinary("/usr/bin/google-chrome");
chromeOptions.addArguments("--headless");
return new ChromeDriver(chromeOptions);
}
private String getDriverExecutablePath() {
return isEmpty(applicationProperties.getDriverPath())
? KiasService.class.getClassLoader()
.getResource(String.format(DRIVER_LOCATION, getDriverExecutable(isWindows()))).getFile()
: Paths.get(applicationProperties.getDriverPath(), getDriverExecutable(isWindows())).toString();
}
private String getDriverExecutable(boolean isWindows) {
return isWindows ? WINDOWS_DRIVER : LINUX_DRIVER;
}
private boolean isWindows() {
return System.getProperty("os.name").toLowerCase().contains("windows");
}
}

View File

@ -13,6 +13,8 @@ server.ssl.key-store-password=secret
server.ssl.key-password=password
# Log settings (TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF)
logging.level.ru.ulstu=DEBUG
#HtmlUnit
logging.level.com.gargoylesoftware.htmlunit=ERROR
# Mail Settings
spring.mail.host=smtp.yandex.ru
spring.mail.port=465
@ -31,6 +33,7 @@ spring.jpa.hibernate.ddl-auto=validate
liquibase.drop-first=false
liquibase.enabled=true
liquibase.change-log=classpath:db/changelog-master.xml
# Application Settings
ng-tracker.base-url=http://127.0.0.1:8080
ng-tracker.undead-user-login=admin

View File

@ -8,4 +8,22 @@
<column name="activity_id" type="integer"/>
</addColumn>
</changeSet>
<changeSet author="orion" id="20190525_000000-2">
<modifyDataType tableName="grants"
columnName="title"
newDataType="text"
schemaName="public"/>
</changeSet>
<changeSet author="orion" id="20190525_000000-3">
<modifyDataType tableName="event"
columnName="title"
newDataType="text"
schemaName="public"/>
</changeSet>
<changeSet author="orion" id="20190525_000000-4">
<modifyDataType tableName="event"
columnName="description"
newDataType="text"
schemaName="public"/>
</changeSet>
</databaseChangeLog>