-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathhelpers.py
More file actions
207 lines (159 loc) · 7.75 KB
/
Copy pathhelpers.py
File metadata and controls
207 lines (159 loc) · 7.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Import the locators file
import sys
from pageobject.locators import locators
from pageobject.locators import *
def create_actions(driver) -> ActionChains:
actions = ActionChains(driver)
return actions
def create_waits(driver: WebDriver, duration: int) -> WebDriverWait:
# Explicit wait of 10 seconds
wait = WebDriverWait(driver, duration)
return wait
def locate_element(driver: WebDriver, by: By, locator: str) -> WebElement:
try:
web_element = driver.find_element(by, locator)
return web_element
except NoSuchElementException:
print(f"Element not found")
return None
def dropdown_select_element(select_dropdown: Select, method_name: str,
*args, **kwargs) -> None:
if method_name == "visible_text":
select_dropdown.select_by_visible_text(args[0])
elif method_name == "select_value":
select_dropdown.select_by_value(args[0])
elif method_name == "select_index":
select_dropdown.select_by_index(args[0])
return
def print_select_element(select_dropdown: Select, method_name: str) -> None:
# def all_selected_options(self) -> List[WebElement]:
selected_options = select_dropdown.all_selected_options
# Print the text of each selected option
for option in selected_options:
print(f"'{method_name}': Selected options text:", option.text)
# Deselect all the values
select_dropdown.deselect_all()
return
def locate_elements(driver: WebDriver, by: By, locator: str) -> list:
try:
web_element_list = driver.find_elements(by, locator)
return web_element_list
except NoSuchElementException:
print(f"Elements not found")
return None
def locate_elements_gen(element: WebElement, by: By, locator: str) -> list:
try:
web_element_list = element.find_elements(by, locator)
return web_element_list
except NoSuchElementException:
print(f"Elements not found")
return None
def enter_details(driver: WebDriver, by: By, locator: str,
input: str, sleep_time: int) -> None:
try:
key_str = driver.find_element(by, locator)
key_str.send_keys(input)
time.sleep(sleep_time)
except Exception as e:
print(f"An error occurred: {e}")
class helpers(object):
def scrap_ecomm_content(driver)->list:
meta_data_arr=[]
# Explicit wait of 10 seconds
wait = create_waits(driver, 10)
actions = create_actions(driver)
# Wait for the element to visible, it will be interactable once it is visible
# element_cat = driver.find_element(By.XPATH, "//a[contains(.,'Shop by Category')]")
element_cat = wait.until(EC.visibility_of_element_located((By.XPATH,
locators.shopcategory)))
# Move to the element and perform click operation
actions.move_to_element(element_cat).click().perform()
# element_category = driver.find_element(By.XPATH, "//span[contains(.,'Phone, Tablets & Ipod')]")
element_phcat = wait.until(EC.visibility_of_element_located((By.XPATH,
locators.phonecategory)))
actions.move_to_element(element_phcat).click().perform()
# Better to wait till the respective element is visible
# Tough nut : 1 - nested locators!
nested_elements = wait.until(EC.visibility_of_element_located((By.XPATH,
"//div[@id='entry_212391']//div[@id='entry_212408']//div[@class='row']")))
# Tough nut : 2 - nested locators!
actual_items = nested_elements.find_elements(By.CLASS_NAME,
"product-layout.product-grid.no-desc.col-xl-4.col-lg-4.col-md-4.col-sm-6.col-6")
count = len(actual_items)
# print("Number of elements found:" + str(count) + ".\n")
for ind_elem_props in actual_items:
# nested_img_elem = ind_elem_props.find_element(By.CSS_SELECTOR,
# "div.product-thumb > div.product-thumb-top > div.image")
################ Product Image Link ################
# item_image = nested_img_elem.find_element(By.XPATH,
# "//*[contains(@id, 'mz-product-grid-image')]")
nested_product_name_elem = ind_elem_props.find_element(By.CSS_SELECTOR,
"div.product-thumb > div.caption")
################ Product Name ################
nested_title_elem = nested_product_name_elem.find_element(By.CSS_SELECTOR,
".title .text-ellipsis-2")
################ Price #######################
nested_price_elem = nested_product_name_elem.find_element(By.CSS_SELECTOR,
".price .price-new")
# Create a dictionary of the meta-data of the items on e-commerce store
meta_data_dict = {
'product image': nested_title_elem.get_attribute('href'),
'product name': nested_title_elem.text,
'product price': nested_price_elem.text
}
meta_data_arr.append(meta_data_dict)
return meta_data_arr
def scrap_yt_content(driver)->list:
meta_data_arr=[]
# Explicit wait of 10 seconds
wait = create_waits(driver, 10)
actions = create_actions(driver)
# Explicit wait of 10 seconds
wait = WebDriverWait(driver, 10)
# Wait for 10 seconds till the Document State is not complete
wait.until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
# Once the page has loaded, scroll to the end of the page to load all the videos
# Scroll to the end of the page to load all the videos in the channel
# Reference - https://stackoverflow.com/a/51702698/126105
# Get scroll height
start_height = driver.execute_script("return document.documentElement.scrollHeight")
# Repeat scrolling until reaching the end of the page
# Taking cues from my own blog https://www.lambdatest.com/blog/scraping-dynamic-web-pages/
while True:
# Scroll to the bottom of the page
driver.execute_script("window.scrollTo(0, " + str(start_height) + ")")
# Wait for the content to load
time.sleep(2)
scroll_height = driver.execute_script("return document.documentElement.scrollHeight")
if (scroll_height == start_height):
# If heights are the same, we reached the end of page
break
# print("scroll_height = " + str(scroll_height))
time.sleep(2)
start_height = scroll_height
time.sleep(2)
elem_1 = driver.find_elements(By.CSS_SELECTOR,
"#dismissible > #details")
for video_metadata_1 in elem_1:
elem_2 = video_metadata_1.find_element(By.CSS_SELECTOR,
"#meta")
elem_3 = elem_2.find_element(By.CSS_SELECTOR,
"#video-title")
elem_4 = elem_2.find_element(By.CSS_SELECTOR,
"#metadata > #metadata-line > span:nth-child(3)")
elem_5 = elem_2.find_element(By.CSS_SELECTOR,
"#metadata > #metadata-line > span:nth-child(4)")
video_title = elem_3.get_attribute('innerText')
video_views = elem_4.get_attribute('innerText')
video_time = elem_5.get_attribute('innerText')
# Create a dictionary of the video meta-data
meta_data_dict = {
'video title': video_title,
'video views': video_views,
'video duration': video_time
}
meta_data_arr.append(meta_data_dict)
return meta_data_arr
def print_scrapped_content(meta_data):
for elem_info in meta_data:
print(elem_info)