From 51ebffa5b896502c0ff296dfc7c2b88b41794a28 Mon Sep 17 00:00:00 2001 From: Owain van Brakel Date: Wed, 23 Mar 2022 01:00:25 +0100 Subject: [PATCH] project: Update scraper --- .../src/main/java/net/runelite/data/App.java | 5 +-- .../net/runelite/data/dump/MediaWiki.java | 31 ++++++++++++++++++- .../data/dump/wiki/ItemStatsDumper.java | 12 ++++++- .../data/dump/wiki/NpcStatsDumper.java | 12 ++++++- 4 files changed, 53 insertions(+), 7 deletions(-) diff --git a/wiki-scraper/src/main/java/net/runelite/data/App.java b/wiki-scraper/src/main/java/net/runelite/data/App.java index 3f4bd9a8ac..ef8294e692 100644 --- a/wiki-scraper/src/main/java/net/runelite/data/App.java +++ b/wiki-scraper/src/main/java/net/runelite/data/App.java @@ -68,7 +68,7 @@ public class App private static Store cacheStore() throws IOException { - Path path = Paths.get(System.getProperty("user.home"), "jagexcache" + File.separator + "oldschool" + File.separator + "LIVE"); + Path path = Paths.get(System.getProperty("user.home"), ".openosrs" + File.separator + "jagexcache" + File.separator + "oldschool" + File.separator + "LIVE"); final File jagexcache = new File(String.valueOf(path)); if (!Files.exists(path)) @@ -80,9 +80,6 @@ public class App cacheStore.load(); - // Try to make this go faster (probably not very smart) - System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "100"); - return cacheStore; } diff --git a/wiki-scraper/src/main/java/net/runelite/data/dump/MediaWiki.java b/wiki-scraper/src/main/java/net/runelite/data/dump/MediaWiki.java index f0ac4c76ea..fccc205faa 100644 --- a/wiki-scraper/src/main/java/net/runelite/data/dump/MediaWiki.java +++ b/wiki-scraper/src/main/java/net/runelite/data/dump/MediaWiki.java @@ -27,6 +27,9 @@ import java.io.UnsupportedEncodingException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Map; +import java.util.concurrent.TimeUnit; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; import net.runelite.data.App; import okhttp3.HttpUrl; import okhttp3.OkHttpClient; @@ -35,6 +38,7 @@ import okhttp3.Response; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; +@Slf4j public class MediaWiki { private static final class WikiInnerResponse @@ -53,6 +57,7 @@ public class MediaWiki .followSslRedirects(false) .build(); + @Getter private final HttpUrl base; public MediaWiki(final String base) @@ -75,15 +80,28 @@ public class MediaWiki try (final Response response = clientNoRedirect.newCall(request).execute()) { + log.info("original url: {}", url); if (response.isRedirect()) { final String page = response.header("Location") .replace(base.newBuilder().addPathSegment("w").build().toString() + "/", ""); + log.info("redirect url: {}", page); return getPageData(page, section); } + else + { + log.info("unsuccessful: {}", response.code()); + + if (response.code() == 429) + { + Thread.sleep(2500); + return getSpecialLookupData(type, id, section); + } + } } catch (Exception e) { + log.info("exception: {}", e.getMessage()); return ""; } @@ -109,7 +127,7 @@ public class MediaWiki .addQueryParameter("format", "json") .addQueryParameter("prop", "wikitext") .addQueryParameter("redirects", "true") - .addQueryParameter("page", page.replaceAll(" ", "_")); + .addQueryParameter("page", page.replaceAll(" ", "_")) if (section != -1) { @@ -129,9 +147,20 @@ public class MediaWiki final InputStream in = response.body().byteStream(); return App.GSON.fromJson(new InputStreamReader(in), WikiResponse.class).parse.wikitext.get("*"); } + else + { + log.info("page data unsuccessful: {}", response.code()); + + if (response.code() == 429) + { + Thread.sleep(2500); + return getPageData(page, section); + } + } } catch (Exception e) { + log.info("exception page data: {}", e.getMessage()); return ""; } diff --git a/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/ItemStatsDumper.java b/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/ItemStatsDumper.java index 51b9580d6d..24d36cbfbb 100644 --- a/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/ItemStatsDumper.java +++ b/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/ItemStatsDumper.java @@ -57,7 +57,10 @@ public class ItemStatsDumper final Map itemStats = new TreeMap<>(); final Collection items = itemManager.getItems(); - final Stream itemDefinitionStream = items.parallelStream(); + + log.info("{}", items.size()); + + final Stream itemDefinitionStream = items.stream(); itemDefinitionStream.forEach(item -> { @@ -109,6 +112,12 @@ public class ItemStatsDumper continue; } + itemStat.wiki(wiki.getBase().newBuilder() + .addPathSegment("w") + .addPathSegment("Special:Lookup") + .addQueryParameter("type", "item") + .addQueryParameter("id", String.valueOf(item.id)) + .build().toString()); itemStat.name(getVarString(base, "name", offset) == null ? getVarString(base, "name1", offset) : getVarString(base, "name", offset)); itemStat.quest(getVarBoolean(base, "quest", offset)); itemStat.equipable(getVarBoolean(base, "equipable", offset) == null @@ -347,6 +356,7 @@ public class ItemStatsDumper { static final ItemStats DEFAULT = ItemStats.builder().build(); + private final String wiki; private final String name; private final Boolean quest; private final Boolean equipable; diff --git a/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/NpcStatsDumper.java b/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/NpcStatsDumper.java index 494fc87039..817c09646b 100644 --- a/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/NpcStatsDumper.java +++ b/wiki-scraper/src/main/java/net/runelite/data/dump/wiki/NpcStatsDumper.java @@ -56,6 +56,7 @@ public class NpcStatsDumper private static final class NpcStats { private String name; + private String wiki; private final Integer hitpoints; private final Integer hitpoints1; private final Integer combatLevel; @@ -133,7 +134,10 @@ public class NpcStatsDumper final Map npcStats = new HashMap<>(); final Collection definitions = npcManager.getNpcs(); - final Stream npcDefinitionStream = definitions.parallelStream(); + + log.info("{}", definitions.size()); + + final Stream npcDefinitionStream = definitions.stream(); // Ensure variant names match cache as wiki isn't always correct final Map nameMap = new HashMap<>(); @@ -232,6 +236,12 @@ public class NpcStatsDumper // Update variant name or fall back to current name final String curName = nameMap.get(curID); stats.setName(curName == null ? stats.getName() : curName); + stats.setWiki(wiki.getBase().newBuilder() + .addPathSegment("w") + .addPathSegment("Special:Lookup") + .addQueryParameter("type", "npc") + .addQueryParameter("id", String.valueOf(n.getId())) + .build().toString()); npcStats.put(curID, stats); log.debug("Dumped npc stats for npc id: {}", curID);