-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathfeatures.yaml
More file actions
146 lines (136 loc) · 3.87 KB
/
features.yaml
File metadata and controls
146 lines (136 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Config file to set features
# Select the feature set you want by uncommenting the relevant set under features_to_extract
# graph_columns provides the name of the columns in the output graph CSV file
# Update this list if you add a new graph attribute
# feature_columns_adgraph and feature_columns provide the name of the columns
# in the output feature file for AdGraph and WebGraph respectively
# Note: If running AdGraph, change feature_columns_adgraph to feature_columns
# (the old feature_columns can be changed to feature_columns_webgraph)
# label_columns provides the name of the columns in the output label file
features_to_extract:
#- content
- structure
- dataflow
#- additional
graph_columns:
- visit_id
- name
- top_level_url
- type
- attr
- domain
- document_url
- setter
- setting_time_stamp
- top_level_domain
- setter_domain
- graph_attr
- party
- src
- dst
- action
- time_stamp
- reqattr
- respattr
- response_status
- content_hash
- post_body
- post_body_raw
feature_columns_adgraph:
- visit_id
- name
- content_policy_type
- url_length
- is_subdomain
- is_valid_qs
- is_third_party
- base_domain_in_query
- semicolon_in_query
- screen_size_present
- ad_size_present
- ad_size_in_qs_present
- keyword_raw_present
- keyword_char_present
- num_nodes
- num_edges
- nodes_div_by_edges
- edges_div_by_nodes
- in_degree
- out_degree
- in_out_degree
- average_degree_connectivity
- is_parent_script
- is_ancestor_script
- ascendant_has_ad_keyword
- is_eval_or_function
- descendant_of_eval_or_function
- ascendant_script_has_eval_or_function
- ascendant_script_has_fp_keyword
- ascendant_script_length
feature_columns:
- visit_id
- name
- num_nodes
- num_edges
- nodes_div_by_edges
- edges_div_by_nodes
- in_degree
- out_degree
- in_out_degree
- ancestors
- descendants
- closeness_centrality
- average_degree_connectivity
- eccentricity
- is_parent_script
- is_ancestor_script
- ascendant_has_ad_keyword
- is_eval_or_function
- descendant_of_eval_or_function
- ascendant_script_has_eval_or_function
- ascendant_script_has_fp_keyword
- ascendant_script_length
- num_get_storage
- num_set_storage
- num_get_cookie
- num_set_cookie
- num_script_predecessors
- num_script_successors
- num_requests_sent
- num_requests_received
- num_redirects_sent
- num_redirects_rec
- max_depth_redirect
- indirect_in_degree
- indirect_out_degree
- indirect_ancestors
- indirect_descendants
- indirect_closeness_centrality
- indirect_average_degree_connectivity
- indirect_eccentricity
- indirect_mean_in_weights
- indirect_min_in_weights
- indirect_max_in_weights
- indirect_mean_out_weights
- indirect_min_out_weights
- indirect_max_out_weights
- num_set_get_src
- num_set_mod_src
- num_set_url_src
- num_get_url_src
- num_set_get_dst
- num_set_mod_dst
- num_set_url_dst
- num_get_url_dst
- indirect_all_in_degree
- indirect_all_out_degree
- indirect_all_ancestors
- indirect_all_descendants
- indirect_all_closeness_centrality
- indirect_all_average_degree_connectivity
- indirect_all_eccentricity
label_columns:
- visit_id
- top_level_url
- name
- label