Use selenium web driver to download decklists, avoid 403 errors - mtg-decks-downloader - Tool to download Magic: The Gathering decklists from the Internet

commit ad3cadbc1b3d5a21faed0275912b04fd413ed989
parent eb7e63fc5ab0334cb2925c8d8c22278bd09268be
Author: Kevin Corvisier <git@kevincorvisier.fr>
Date:   Fri,  6 Jun 2025 05:17:57 +0900

Use selenium web driver to download decklists, avoid 403 errors
Diffstat:
M src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java  | 12 ++++++++++--

1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java b/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java
@@ -1,5 +1,6 @@
 package fr.kevincorvisier.mtg.dd;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
@@ -8,6 +9,7 @@ import java.util.List;
 import org.openqa.selenium.By;
 import org.openqa.selenium.WebDriver;
 import org.openqa.selenium.WebElement;
+import org.openqa.selenium.WindowType;
 import org.openqa.selenium.htmlunit.HtmlUnitDriver;
 import org.springframework.stereotype.Service;
 
@@ -58,10 +60,16 @@ public class Crawler
 		try
 		{
 			rateLimiter.acquire();
-			final InputStream result = url.openStream();
+			final String currHandle = driver.getWindowHandle();
+
+			driver.switchTo().newWindow(WindowType.TAB);
+			driver.navigate().to(url);
+			final String pageSource = driver.getPageSource();
+			driver.close();
+			driver.switchTo().window(currHandle);
 
 			log.info("openStream: url={}", url);
-			return result;
+			return new ByteArrayInputStream(pageSource.getBytes());
 		}
 		catch (final Exception e)
 		{

	mtg-decks-downloader Tool to download Magic: The Gathering decklists from the Internet
	git clone https://kevincorvisier.fr/git/mtg-decks-downloader.git
	Log \| Files \| Refs \| README