mtg-decks-downloader

Tool to download Magic: The Gathering decklists from the Internet
git clone https://kevincorvisier.fr/git/mtg-decks-downloader.git
Log | Files | Refs | README

commit 1fabb7435b9eb6ffefbc33c6a204a794847f1797
parent bf6c9d97d6c0ec19419f08b6bde0ae7c5662b3e8
Author: Kevin Corvisier <git@kevincorvisier.fr>
Date:   Sun,  2 Mar 2025 14:18:42 +0900

TC Decks downloader: handle search URLs
Diffstat:
Msrc/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java | 49+++++++++++++++++++++++++++++++++++++++----------
Asrc/main/packaged-resources/cfg/config-available/pm-top-budget.properties | 9+++++++++
2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java b/src/main/java/fr/kevincorvisier/mtg/dd/downloaders/TcdecksDecklistDownloader.java @@ -28,7 +28,9 @@ import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor public class TcdecksDecklistDownloader implements DecklistDownloader { - private static final Pattern PATTERN_URL_FORMAT = Pattern.compile("https://www\\.tcdecks\\.net/format\\.php\\?format=(?:[A-Za-z])+"); + private static final String URL_FORMAT = "https://www.tcdecks.net/format.php"; + private static final String URL_ARCHETYPE = "https://www.tcdecks.net/archetype.php"; + private static final String URL_SEARCH = "https://www.tcdecks.net/results.php"; private static final Pattern PATTERN_URL_ARCHETYPE = Pattern .compile("https://www\\.tcdecks\\.net/archetype\\.php\\?archetype=(?<archetype>[A-Za-z]+)&format=[A-Za-z]+"); private static final Pattern PATTERN_URL_DECK = Pattern.compile("https://www.tcdecks.net/deck\\.php\\?id=(?<id>\\d+)&iddeck=(?<iddeck>\\d+)"); @@ -59,18 +61,19 @@ public class TcdecksDecklistDownloader implements DecklistDownloader { final String urlStr = url.toString(); - if (PATTERN_URL_FORMAT.matcher(urlStr).matches()) - { + if (urlStr.startsWith(URL_FORMAT)) downloadFormat(url); - return; - } - - final Matcher matcher = PATTERN_URL_ARCHETYPE.matcher(urlStr); - if (matcher.matches()) + else if (urlStr.startsWith(URL_ARCHETYPE)) { - final String archetype = matcher.group("archetype"); - downloadArchetype(url, archetype); + final Matcher matcher = PATTERN_URL_ARCHETYPE.matcher(urlStr); + if (matcher.matches()) + { + final String archetype = matcher.group("archetype"); + downloadArchetype(url, archetype); + } } + else if (urlStr.startsWith(URL_SEARCH)) + downloadSearch(url); } private void downloadFormat(final URL url) throws MalformedURLException @@ -210,6 +213,32 @@ public class TcdecksDecklistDownloader implements DecklistDownloader } } + private void downloadSearch(final URL url) + { + try + { + crawler.navigateTo(url); + + for (final WebElement tr : crawler.findElements(By.xpath("//table[@class='search_list']/tbody/tr"))) + { + if (hasElement(tr, By.xpath("th"))) + continue; // Header row + + final WebElement archetype = tr.findElement(By.xpath("td[@data-th='Archetype']/a")); + final WebElement player = tr.findElement(By.xpath("td[@data-th='Player']/a")); + final WebElement date = tr.findElement(By.xpath("td[@data-th='Date']/a")); + final URL downloadUrl = toDownloadUrl(archetype.getAttribute("href")); + + consumers.process(metadataFactory.create(downloadUrl, player.getText(), archetype.getText(), + LocalDate.parse(date.getText(), DateTimeFormatter.ofPattern("dd/MM/yyyy")))); + } + } + catch (final Exception e) + { + log.error("downloadSearch: url={}", url, e); + } + } + private boolean hasElement(final WebElement element, final By by) { try diff --git a/src/main/packaged-resources/cfg/config-available/pm-top-budget.properties b/src/main/packaged-resources/cfg/config-available/pm-top-budget.properties @@ -0,0 +1,8 @@ +archetype-limit=1 +sources=\ + https://www.tcdecks.net/results.php?token=&tname=&nlow=128&nhigh=&from=2024%2F09%2F02&to=&player=&aname=&dname=&format=Premodern&pos1=on&main=&nomain=Ancient+Tomb%3BCity+of+Traitors%3BDark+Ritual%3BGaea%27s+Cradle%3BIntuition%3BMox+Diamond%3BPhyrexian+Dreadnought%3BSurvival+of+the+Fittest&side=&noside=&strict=on | \ + https://www.tcdecks.net/results.php?token=&tname=&nlow=128&nhigh=&from=2024%2F09%2F02&to=&player=&aname=&dname=&format=Premodern&pos2=on&main=&nomain=Ancient+Tomb%3BCity+of+Traitors%3BDark+Ritual%3BGaea%27s+Cradle%3BIntuition%3BMox+Diamond%3BPhyrexian+Dreadnought%3BSurvival+of+the+Fittest&side=&noside=&strict=on | \ + https://www.tcdecks.net/results.php?token=&tname=&nlow=128&nhigh=&from=2024%2F09%2F02&to=&player=&aname=&dname=&format=Premodern&pos34=on&main=&nomain=Ancient+Tomb%3BCity+of+Traitors%3BDark+Ritual%3BGaea%27s+Cradle%3BIntuition%3BMox+Diamond%3BPhyrexian+Dreadnought%3BSurvival+of+the+Fittest&side=&noside=&strict=on | \ + https://www.tcdecks.net/results.php?token=&tname=&nlow=128&nhigh=&from=2024%2F09%2F02&to=&player=&aname=&dname=&format=Premodern&pos58=on&main=&nomain=Ancient+Tomb%3BCity+of+Traitors%3BDark+Ritual%3BGaea%27s+Cradle%3BIntuition%3BMox+Diamond%3BPhyrexian+Dreadnought%3BSurvival+of+the+Fittest&side=&noside=&strict=on +output-dir=output/pm-top-budget +only-ai-playable-cards=false +\ No newline at end of file