1+ package com .rarchives .ripme .ripper .rippers ;
2+
3+ import java .io .IOException ;
4+ import java .net .MalformedURLException ;
5+ import java .net .URL ;
6+ import java .util .ArrayList ;
7+ import java .util .List ;
8+ import java .util .regex .Matcher ;
9+ import java .util .regex .Pattern ;
10+
11+ import org .apache .log4j .Logger ;
12+ import org .json .JSONArray ;
13+ import org .json .JSONObject ;
14+ import org .jsoup .Connection .Response ;
15+ import org .jsoup .Jsoup ;
16+ import org .jsoup .nodes .Document ;
17+ import org .jsoup .nodes .Element ;
18+ import org .jsoup .select .Elements ;
19+
20+ import com .rarchives .ripme .ripper .AlbumRipper ;
21+
22+ public class PhotobucketRipper extends AlbumRipper {
23+
24+ private static final String DOMAIN = "photobucket.com" ,
25+ HOST = "photobucket" ;
26+ private static final Logger logger = Logger .getLogger (PhotobucketRipper .class );
27+
28+ private Response pageResponse = null ;
29+
30+ public PhotobucketRipper (URL url ) throws IOException {
31+ super (url );
32+ }
33+
34+ @ Override
35+ public String getHost () {
36+ return HOST ;
37+ }
38+
39+ public URL sanitizeURL (URL url ) throws MalformedURLException {
40+ logger .info (url );
41+ String u = url .toExternalForm ();
42+ if (u .contains ("?" )) {
43+ u = u .substring (0 , u .indexOf ("?" ));
44+ return new URL (u );
45+ }
46+ else {
47+ return url ;
48+ }
49+ }
50+
51+ public String getAlbumTitle (URL url ) throws MalformedURLException {
52+ try {
53+ // Attempt to use album title as GID
54+ if (pageResponse == null ) {
55+ pageResponse = Jsoup .connect (url .toExternalForm ()).execute ();
56+ }
57+ Document albumDoc = pageResponse .parse ();
58+ Elements els = albumDoc .select ("div.libraryTitle > h1" );
59+ if (els .size () == 0 ) {
60+ throw new IOException ("Could not find libraryTitle at " + url );
61+ }
62+ return els .get (0 ).text ();
63+ } catch (IOException e ) {
64+ // Fall back to default album naming convention
65+ }
66+ return super .getAlbumTitle (url );
67+ }
68+
69+ @ Override
70+ public String getGID (URL url ) throws MalformedURLException {
71+ Pattern p ; Matcher m ;
72+
73+ // http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1
74+ p = Pattern .compile ("^https?://[a-zA-Z0-9]+\\ .photobucket\\ .com/user/([a-zA-Z0-9_\\ -]+)/library.*$" );
75+ m = p .matcher (url .toExternalForm ());
76+ if (m .matches ()) {
77+ return m .group (1 );
78+ }
79+
80+ throw new MalformedURLException (
81+ "Expected photobucket.com gallery formats: "
82+ + "http://x###.photobucket.com/username/library/..."
83+ + " Got: " + url );
84+ }
85+
86+ @ Override
87+ public void rip () throws IOException {
88+ List <String > subalbums = ripAlbumAndGetSubalbums (this .url .toExternalForm ());
89+
90+ List <String > subsToRip = new ArrayList <String >(),
91+ rippedSubs = new ArrayList <String >();
92+
93+ for (String sub : subalbums ) {
94+ subsToRip .add (sub );
95+ }
96+
97+ while (subsToRip .size () > 0 && !isStopped ()) {
98+ try {
99+ Thread .sleep (1000 );
100+ } catch (InterruptedException e ) {
101+ break ;
102+ }
103+ String nextSub = subsToRip .remove (0 );
104+ rippedSubs .add (nextSub );
105+ logger .info ("Attempting to rip next subalbum: " + nextSub );
106+ try {
107+ pageResponse = null ;
108+ subalbums = ripAlbumAndGetSubalbums (nextSub );
109+ } catch (IOException e ) {
110+ logger .error ("Error while ripping " + nextSub , e );
111+ break ;
112+ }
113+ for (String subalbum : subalbums ) {
114+ if (!subsToRip .contains (subalbum ) && !rippedSubs .contains (subalbum )) {
115+ subsToRip .add (subalbum );
116+ }
117+ }
118+ }
119+ waitForThreads ();
120+ }
121+
122+ public List <String > ripAlbumAndGetSubalbums (String theUrl ) throws IOException {
123+ int filesIndex = 0 ,
124+ filesTotal = 0 ,
125+ pageIndex = 0 ;
126+ String currentAlbumPath = null ,
127+ url = null ;
128+
129+ while (pageIndex == 0 || filesIndex < filesTotal ) {
130+ if (isStopped ()) {
131+ break ;
132+ }
133+ pageIndex ++;
134+ if (pageIndex > 1 || pageResponse == null ) {
135+ url = theUrl + String .format ("?sort=3&page=" , pageIndex );
136+ logger .info (" Retrieving " + url );
137+ pageResponse = Jsoup .connect (url ).execute ();
138+ }
139+ Document albumDoc = pageResponse .parse ();
140+ // Retrieve JSON from request
141+ String jsonString = null ;
142+ for (Element script : albumDoc .select ("script[type=text/javascript]" )) {
143+ String data = script .data ();
144+ if (!data .contains ("libraryAlbumsPageCollectionData" )) {
145+ continue ;
146+ }
147+ // Ensure this chunk of javascript contains the album info
148+ // Grab the JSON
149+ Pattern p ; Matcher m ;
150+ p = Pattern .compile ("^.*collectionData: (\\ {.*\\ }).*$" , Pattern .DOTALL );
151+ m = p .matcher (data );
152+ if (m .matches ()) {
153+ jsonString = m .group (1 );
154+ break ;
155+ }
156+ }
157+ if (jsonString == null ) {
158+ logger .error ("Unable to find JSON data at URL: " + url );
159+ break ;
160+ }
161+ JSONObject json = new JSONObject (jsonString );
162+ JSONObject items = json .getJSONObject ("items" );
163+ JSONArray objects = items .getJSONArray ("objects" );
164+ filesTotal = items .getInt ("total" );
165+ currentAlbumPath = json .getString ("currentAlbumPath" );
166+ for (int i = 0 ; i < objects .length (); i ++) {
167+ JSONObject object = objects .getJSONObject (i );
168+ String image = object .getString ("fullsizeUrl" );
169+ filesIndex += 1 ;
170+ addURLToDownload (new URL (image ),
171+ "" ,
172+ object .getString ("location" ),
173+ albumDoc .location (),
174+ pageResponse .cookies ());
175+ }
176+ }
177+ // Get subalbums
178+ if (url != null ) {
179+ return getSubAlbums (url , currentAlbumPath );
180+ } else {
181+ return new ArrayList <String >();
182+ }
183+ }
184+
185+ private List <String > getSubAlbums (String url , String currentAlbumPath ) {
186+ List <String > result = new ArrayList <String >();
187+ String subdomain = url .substring (url .indexOf ("://" )+3 );
188+ subdomain = subdomain .substring (0 , subdomain .indexOf ("." ));
189+ String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
190+ + "?deferCollapsed=true"
191+ + "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy"
192+ + "&json=1" ;
193+ try {
194+ logger .info ("Loading " + apiUrl );
195+ Document doc = Jsoup .connect (apiUrl )
196+ .ignoreContentType (true )
197+ .referrer (url )
198+ .get ();
199+ String jsonString = doc .body ().html ().replace (""" , "\" " );
200+ JSONObject json = new JSONObject (jsonString );
201+ JSONArray subalbums = json .getJSONObject ("body" ).getJSONArray ("subAlbums" );
202+ for (int i = 0 ; i < subalbums .length (); i ++) {
203+ String suburl =
204+ "http://"
205+ + subdomain
206+ + ".photobucket.com"
207+ + subalbums .getJSONObject (i ).getString ("path" );
208+ suburl = suburl .replace (" " , "%20" );
209+ result .add (suburl );
210+ }
211+ } catch (IOException e ) {
212+ logger .error ("Failed to get subalbums from " + apiUrl , e );
213+ }
214+ return result ;
215+ }
216+
217+ public boolean canRip (URL url ) {
218+ return url .getHost ().endsWith (DOMAIN );
219+ }
220+
221+ }
0 commit comments