mtg-decks-downloader

Tool to download Magic: The Gathering decklists from the Internet
git clone https://kevincorvisier.fr/git/mtg-decks-downloader.git
Log | Files | Refs | README

commit 12f8aebc3dea18a2d197e08d17ece47a32fa4f46
parent fdd854ef17258ed854c06a26def887224d14a8f0
Author: Kevin Corvisier <git@kevincorvisier.fr>
Date:   Thu, 16 Jan 2025 18:15:02 +0900

Catch page/decklist parsing exceptions
Diffstat:
Msrc/main/java/fr/kevincorvisier/mtg/dd/consumers/DecklistConsumersService.java | 3+--
Msrc/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java | 49++++++++++++++++++++++++++++---------------------
2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/consumers/DecklistConsumersService.java b/src/main/java/fr/kevincorvisier/mtg/dd/consumers/DecklistConsumersService.java @@ -3,7 +3,6 @@ package fr.kevincorvisier.mtg.dd.consumers; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.sql.SQLException; import java.util.Collection; import org.springframework.stereotype.Service; @@ -42,7 +41,7 @@ public class DecklistConsumersService content = new DeckContent(MagicOnlineFileReader.read(reader)); cache.save(metadata.getUrl(), content); } - catch (SQLException | IOException e) + catch (final Exception e) { log.error("Error while downloading deck, skipping {}", metadata, e); return; diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java b/src/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java @@ -166,34 +166,41 @@ public class TcdecksDecklistDownloader implements DecklistDownloader private void downloadTournament(final URL url) throws MalformedURLException { - crawler.navigateTo(url); + try + { + crawler.navigateTo(url); - final String description = crawler.findElement(By.xpath("//article[@class='span8']/fieldset/legend/h5")).getText(); + final String description = crawler.findElement(By.xpath("//article[@class='span8']/fieldset/legend/h5")).getText(); - log.info("download tournament: {}", description); + log.info("download tournament: {}", description); - final Matcher matcher = pattern.matcher(description); - if (!matcher.find()) - { - log.error("Cannot parse tournament description: {}", description); - return; - } + final Matcher matcher = pattern.matcher(description); + if (!matcher.find()) + { + log.error("Cannot parse tournament description: {}", description); + return; + } - final LocalDate date = LocalDate.of( // - Integer.parseInt(matcher.group("year")), // - Integer.parseInt(matcher.group("month")), // - Integer.parseInt(matcher.group("dayOfMonth"))); + final LocalDate date = LocalDate.of( // + Integer.parseInt(matcher.group("year")), // + Integer.parseInt(matcher.group("month")), // + Integer.parseInt(matcher.group("dayOfMonth"))); - for (final WebElement tr : crawler.findElements(By.xpath("//table[@class='tourney_list']/tbody/tr"))) - { - if (hasElement(tr, By.xpath("th"))) - continue; // Header row + for (final WebElement tr : crawler.findElements(By.xpath("//table[@class='tourney_list']/tbody/tr"))) + { + if (hasElement(tr, By.xpath("th"))) + continue; // Header row - final WebElement archetype = tr.findElement(By.xpath("td[@data-th='Archetype']/a")); - final WebElement player = tr.findElement(By.xpath("td[@data-th='Player']/a")); - final URL downloadUrl = toDownloadUrl(archetype.getAttribute("href")); + final WebElement archetype = tr.findElement(By.xpath("td[@data-th='Archetype']/a")); + final WebElement player = tr.findElement(By.xpath("td[@data-th='Player']/a")); + final URL downloadUrl = toDownloadUrl(archetype.getAttribute("href")); - consumers.process(metadataFactory.create(downloadUrl, player.getText(), archetype.getText(), date)); + consumers.process(metadataFactory.create(downloadUrl, player.getText(), archetype.getText(), date)); + } + } + catch (final Exception e) + { + log.error("downloadTournament: url={}", url, e); } }