commit ad3cadbc1b3d5a21faed0275912b04fd413ed989
parent eb7e63fc5ab0334cb2925c8d8c22278bd09268be
Author: Kevin Corvisier <git@kevincorvisier.fr>
Date: Fri, 6 Jun 2025 05:17:57 +0900
Use selenium web driver to download decklists, avoid 403 errors
Diffstat:
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java b/src/main/java/fr/kevincorvisier/mtg/dd/Crawler.java
@@ -1,5 +1,6 @@
package fr.kevincorvisier.mtg.dd;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
@@ -8,6 +9,7 @@ import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
+import org.openqa.selenium.WindowType;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.springframework.stereotype.Service;
@@ -58,10 +60,16 @@ public class Crawler
try
{
rateLimiter.acquire();
- final InputStream result = url.openStream();
+ final String currHandle = driver.getWindowHandle();
+
+ driver.switchTo().newWindow(WindowType.TAB);
+ driver.navigate().to(url);
+ final String pageSource = driver.getPageSource();
+ driver.close();
+ driver.switchTo().window(currHandle);
log.info("openStream: url={}", url);
- return result;
+ return new ByteArrayInputStream(pageSource.getBytes());
}
catch (final Exception e)
{