mtg-decks-downloader

Tool to download Magic: The Gathering decklists from the Internet
git clone https://kevincorvisier.fr/git/mtg-decks-downloader.git
Log | Files | Refs | README

commit ad3cadbc1b3d5a21faed0275912b04fd413ed989
parent eb7e63fc5ab0334cb2925c8d8c22278bd09268be
Author: Kevin Corvisier <git@kevincorvisier.fr>
Date:   Fri,  6 Jun 2025 05:17:57 +0900

Use selenium web driver to download decklists, avoid 403 errors
Diffstat:
Msrc/main/java/fr/kevincorvisier/mtg/dd/Crawler.java | 12++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java b/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java @@ -1,5 +1,6 @@ package fr.kevincorvisier.mtg.dd; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; @@ -8,6 +9,7 @@ import java.util.List; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; +import org.openqa.selenium.WindowType; import org.openqa.selenium.htmlunit.HtmlUnitDriver; import org.springframework.stereotype.Service; @@ -58,10 +60,16 @@ public class Crawler try { rateLimiter.acquire(); - final InputStream result = url.openStream(); + final String currHandle = driver.getWindowHandle(); + + driver.switchTo().newWindow(WindowType.TAB); + driver.navigate().to(url); + final String pageSource = driver.getPageSource(); + driver.close(); + driver.switchTo().window(currHandle); log.info("openStream: url={}", url); - return result; + return new ByteArrayInputStream(pageSource.getBytes()); } catch (final Exception e) {