|
| 1 | +package com.rarchives.ripme.ripper.rippers; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.net.MalformedURLException; |
| 5 | +import java.net.URL; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.Arrays; |
| 8 | +import java.util.HashMap; |
| 9 | +import java.util.List; |
| 10 | +import java.util.Map; |
| 11 | +import java.util.regex.Matcher; |
| 12 | +import java.util.regex.Pattern; |
| 13 | + |
| 14 | +import org.apache.log4j.Logger; |
| 15 | +import org.jsoup.Connection.Method; |
| 16 | +import org.jsoup.Connection.Response; |
| 17 | +import org.jsoup.Jsoup; |
| 18 | +import org.jsoup.nodes.Document; |
| 19 | +import org.jsoup.nodes.Element; |
| 20 | + |
| 21 | +import com.rarchives.ripme.ripper.AbstractRipper; |
| 22 | +import com.rarchives.ripme.utils.Utils; |
| 23 | + |
| 24 | +public class DeviantartRipper extends AbstractRipper { |
| 25 | + |
| 26 | + private static final String DOMAIN = "deviantart.com", |
| 27 | + HOST = "deviantart"; |
| 28 | + |
| 29 | + private static final int SLEEP_TIME = 2000; |
| 30 | + private static final Logger logger = Logger.getLogger(DeviantartRipper.class); |
| 31 | + |
| 32 | + public DeviantartRipper(URL url) throws IOException { |
| 33 | + super(url); |
| 34 | + } |
| 35 | + |
| 36 | + @Override |
| 37 | + public boolean canRip(URL url) { |
| 38 | + return url.getHost().endsWith(DOMAIN); |
| 39 | + } |
| 40 | + |
| 41 | + @Override |
| 42 | + public URL sanitizeURL(URL url) throws MalformedURLException { |
| 43 | + String u = url.toExternalForm(); |
| 44 | + u = u.replaceAll("\\?.*", ""); |
| 45 | + return new URL(u); |
| 46 | + } |
| 47 | + |
| 48 | + @Override |
| 49 | + public void rip() throws IOException { |
| 50 | + int index = 0; |
| 51 | + String nextURL = this.url.toExternalForm(); |
| 52 | + while (nextURL != null) { |
| 53 | + logger.info(" Retrieving " + nextURL); |
| 54 | + Document doc = Jsoup.connect(nextURL) |
| 55 | + .userAgent(USER_AGENT) |
| 56 | + .get(); |
| 57 | + try { |
| 58 | + Thread.sleep(SLEEP_TIME); |
| 59 | + } catch (InterruptedException e) { |
| 60 | + logger.error("[!] Interrupted while waiting for page to load", e); |
| 61 | + break; |
| 62 | + } |
| 63 | + for (Element thumb : doc.select("a.thumb img")) { |
| 64 | + String fullSize = thumbToFull(thumb.attr("src")); |
| 65 | + URL pageURL; |
| 66 | + try { |
| 67 | + pageURL = new URL(fullSize); |
| 68 | + } catch (MalformedURLException e) { |
| 69 | + logger.error("[!] Invalid thumbnail image: " + thumbToFull(fullSize)); |
| 70 | + continue; |
| 71 | + } |
| 72 | + index++; |
| 73 | + addURLToDownload(pageURL, String.format("%03d_", index)); |
| 74 | + } |
| 75 | + nextURL = null; |
| 76 | + for (Element nextButton : doc.select("a.away")) { |
| 77 | + if (nextButton.attr("href").contains("offset=" + index)) { |
| 78 | + nextURL = this.url.toExternalForm() + "?offset=" + index; |
| 79 | + } |
| 80 | + } |
| 81 | + } |
| 82 | + waitForThreads(); |
| 83 | + } |
| 84 | + |
| 85 | + public static String thumbToFull(String thumb) { |
| 86 | + thumb = thumb.replace("http://th", "http://fc"); |
| 87 | + List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/"))); |
| 88 | + fields.remove(4); |
| 89 | + StringBuilder result = new StringBuilder(); |
| 90 | + for (int i = 0; i < fields.size(); i++) { |
| 91 | + if (i > 0) { |
| 92 | + result.append("/"); |
| 93 | + } |
| 94 | + result.append(fields.get(i)); |
| 95 | + } |
| 96 | + return result.toString(); |
| 97 | + } |
| 98 | + |
| 99 | + @Override |
| 100 | + public String getHost() { |
| 101 | + return HOST; |
| 102 | + } |
| 103 | + |
| 104 | + @Override |
| 105 | + public String getGID(URL url) throws MalformedURLException { |
| 106 | + Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com(/gallery)?/?$"); |
| 107 | + Matcher m = p.matcher(url.toExternalForm()); |
| 108 | + if (m.matches()) { |
| 109 | + // Root gallery |
| 110 | + return m.group(1); |
| 111 | + } |
| 112 | + p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); |
| 113 | + m = p.matcher(url.toExternalForm()); |
| 114 | + if (m.matches()) { |
| 115 | + // Subgallery |
| 116 | + return m.group(1) + "_" + m.group(2); |
| 117 | + } |
| 118 | + throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); |
| 119 | + } |
| 120 | + |
| 121 | + /** |
| 122 | + * Logs into deviant art. Not required to rip NSFW images. |
| 123 | + * @return Map of cookies containing session data. |
| 124 | + */ |
| 125 | + @SuppressWarnings("unused") |
| 126 | + private Map<String, String> loginToDeviantart() throws IOException { |
| 127 | + // Populate postData fields |
| 128 | + Map<String,String> postData = new HashMap<String,String>(); |
| 129 | + String username = Utils.getConfigString("deviantart.username", null); |
| 130 | + String password = Utils.getConfigString("deviantart.password", null); |
| 131 | + if (username == null || password == null) { |
| 132 | + throw new IOException("could not find username or password in config"); |
| 133 | + } |
| 134 | + Response resp = Jsoup.connect("http://www.deviantart.com/") |
| 135 | + .userAgent(USER_AGENT) |
| 136 | + .method(Method.GET) |
| 137 | + .execute(); |
| 138 | + for (Element input : resp.parse().select("form#form-login input[type=hidden]")) { |
| 139 | + postData.put(input.attr("name"), input.attr("value")); |
| 140 | + } |
| 141 | + postData.put("username", username); |
| 142 | + postData.put("password", password); |
| 143 | + postData.put("remember_me", "1"); |
| 144 | + |
| 145 | + // Send login request |
| 146 | + resp = Jsoup.connect("https://www.deviantart.com/users/login") |
| 147 | + .userAgent(USER_AGENT) |
| 148 | + .data(postData) |
| 149 | + .cookies(resp.cookies()) |
| 150 | + .method(Method.POST) |
| 151 | + .execute(); |
| 152 | + |
| 153 | + // Assert we are logged in |
| 154 | + if (resp.hasHeader("Location") && resp.header("Location").contains("password")) { |
| 155 | + // Wrong password |
| 156 | + throw new IOException("Wrong pasword"); |
| 157 | + } |
| 158 | + if (resp.url().toExternalForm().contains("bad_form")) { |
| 159 | + throw new IOException("Login form was incorrectly submitted"); |
| 160 | + } |
| 161 | + if (resp.cookie("auth_secure") == null || |
| 162 | + resp.cookie("auth") == null) { |
| 163 | + throw new IOException("No auth_secure or auth cookies received"); |
| 164 | + } |
| 165 | + // We are logged in, save the cookies |
| 166 | + return resp.cookies(); |
| 167 | + } |
| 168 | + |
| 169 | +} |
0 commit comments