-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdsl-examples.json
More file actions
382 lines (382 loc) · 32.8 KB
/
dsl-examples.json
File metadata and controls
382 lines (382 loc) · 32.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
{
"WL": {
"ClCon": {
"install the package": "PacletInstall[\"AntonAntonov/MonadicContextualClassification\"]",
"load the package": "Needs[\"AntonAntonov`MonadicContextualClassification`\"]",
"use the data dfSOXE": "ClConUnit[dfSOXE]",
"split the data with ration 0.64": "ClConSplitData[0.64]",
"split data into training and testing parts using training data ratio 0.72": "ClConSplitData[0.72]",
"Summarize the data": "ClConEchoDataSummary",
"show data breakdown": "ClConEchoDataSummary",
"echo data summary": "ClConEchoDataSummary",
"Train a classifier using logistic regression algorithm": "ClConMakeClassifier[\"LogisticRegression\"]",
"classifier creation with using random forest": "ClConMakeClassifier[\"RandomForest\"]",
"make a nearest neighbors classifier": "ClConMakeClassfier[\"NearestNeighbors\"]",
"show measurements": "Function[{v,c},ClConUnit[v,c]⟹ClConClassifierMeasurements[{\"Accuracy\",\"Precision\",\"Recall\"}]⟹ClConEchoValue]",
"classifier information": "ClConEchoFunctionContext[Information[#classifier]&]",
"plot the ROC functions PPV and FPR": "ClConROCPlot[\"PPV\",\"FPR\"]",
"show ROC plots": "ClConROCPLot",
"give ROC plots with image size 900": "ClConROCPLot[ImageSize->900]",
"I wanna see the confusion matrix plot here": "Function[{v,c},ClConUnit[v,c]⟹ClConClassifierMeasurements[{\"ConfusionMatrixPlot\"}]⟹ClConEchoValue]",
"echo pipeline value": "ClConEchoValue",
"show the pipeline context": "ClConEchoContext",
"reduce the dimension to 14 topics": "ClConReduceDimension[14]"
},
"QRMon": {
"install the package": "PacletInstall[\"AntonAntonov/MonadicQuantileRegression\"]",
"load the package": "Needs[\"AntonAntonov`MonadicQuantileRegression`\"]",
"use tsTemp": "QRMonUnit[tsTemp]",
"with the data tsTemp": "QRMonUnit[tsTemp]",
"do regression with 12 knots": "QRMonQuantileRegression[12]",
"quantile regression with 30 knots and interpolation order 3": "QRMonQuantileRegression[12,InterpolationOrder->3]",
"compute the regression quantiles for probabilities 0.1, 0.5, and 0.9": "QRMonQuantileRegression[12, {0.1, 0.5, 0.9}]",
"compute linear regression": "QRMonLeastSquaresFit[6]",
"compute linear regression with 14 basis functions": "QRMonLeastSquaresFit[14]",
"use the color red for regression quantiles": "QRMonSetRegressionFunctionsPlotOptions[{PlotStyle->Red}]",
"use Orange for to plot the data": "QRMonSetDataPlotOptions[{PlotStyle->Orange}]",
"show date list plot": "QRMonDateListPlot[]",
"plot the data and fits": "QRMonPlot[]",
"plot with aspect ratio 1\/3": "QRMonPlot[AspectRatio->1\/3]",
"plot with image size 800 and plot theme scientific": "QRMonPlot[ImageSize -> 800, PlotTheme->\"Scientific\"]",
"show date list error plots": "QRMonErrorPlots[\"DateListPlot\"->True]",
"show relative error plots": "QRMonErrorPlots[\"RelativeErrors\"->True]",
"Give the error plots using absolute errors": "QRMonErrorPlots[\"RelativeErrors\"->False]",
"echo data summary": "QRMonEchoDataSummary[]",
"rescale the data": "QRMonRescale[Axes->{False,True}]",
"rescale the data in both dimensions": "QRMonRescale[Axes->{True,True}]",
"find outliers": "QRMonOutliers[]",
"plot outliers": "QRMonOutliersPlot[]",
"give a date plot for the outliers": "QRMonOutliersPlot[\"DateListPlot\"->True]",
"outliers with time axis": "QRMonOutliersPlot[\"DateListPlot\"->True]",
"take the value": "QRMonTakeValue",
"show the value": "QRMonEchoValue"
},
"LSAMon": {
"install the package": "PacletInstall[\"AntonAntonov/MonadicLatentSemanticAnalysis\"]",
"load the package": "Needs[\"AntonAntonov`MonadicLatentSemanticAnalysis`\"]",
"use the documents aDocs": "LSAMonUnit[aDocs]",
"use dfTemp": "LSAMonUnit[dfTemp]",
"make the document-term matrix": "LSAMonMakeDocumentTermMatrix[]",
"make the document-term matrix with automatic stop words": "LSAMonMakeDocumentTermMatrix[\"StemmingRules\"->Automatic,\"StopWords\"->Automatic]",
"make the document-term matrix without stemming": "LSAMonMakeDocumentTermMatrix[\"StemmingRules\"->{},\"StopWords\"->Automatic]",
"echo document term matrix statistics": "LSAMonEchoDocumentTermMatrixStatistics[]",
"echo document term matrix metrics with log base 10": "LSAMonEchoDocumentTermMatrixStatistics[\"LogBase\"->10]",
"apply term weight functions": "LSAMonApplyTermWeightFunctions[]",
"apply term weight functions: global IDF, local None, normalizer Cosine": "LSAMonApplyTermWeightFunctions[\"GlobalWeightFunction\"->\"IDF\",\"LocalWeightFunction\"->\"None\",\"NormalizerFunction\"->\"Cosine\"]",
"extract 30 topics using the method SVD": "LSAMonExtractTopics[\"NumberOfTopics\"->30,Method->\"SVD\"]",
"extract 24 topics using the method NNMF, max steps 12 and min number of documents per term 10": "LSAMonExtractTopics[\"NumberOfTopics\"->24,Method->\"NNMF\",\"MaxSteps\"->12,\"MinNumberOfDocumentsPerTerm\"->10]",
"Echo topics table": "LSAMonEchoTopicsTable[]",
"Echo topics table with 10 terms per topic": "LSAMonEchoTopicsTable[\"NumberOfTerms\"->10]",
"show the topics": "LSAMonEchoTopicsTable[]",
"find the statistical thesaurus for notebook, equation, and function": "LSAMonEchoStatisticalThesaurus[\"Words\"->{\"notebook\",\"equation\",\"funciton\"}]"
},
"SMRMon": {
"install the package": "PacletInstall[\"AntonAntonov/MonadicSparseMatrixRecommender\"]",
"load the package": "Needs[\"AntonAntonov`MonadicSparseMatrixRecommender`\"]",
"new recommender": "SMRMonUnit[]",
"monad's unit": "SMRMonUnit[]",
"create with dfTemp": "SMRMonCreate[dfTemp]",
"create using dfData with identifier column 'rec_id'": "SMRMonCreate[dfData, \"rec_id\"]",
"create from wide format dataset, identifier column \"id\"": "SMRMonCreateFromWideForm[dsTitanic, \"id\"]",
"recommend by history for prod1, prod32, and new_prod98": "SMRMonRecommend[{\"prod1\", \"prod32\", \"new_prod98\"}]",
"top 22 recommendations for the items prod1, prod32, and new_prod98": "SMRMonRecommend[{\"prod1\", \"prod32\", \"new_prod98\"}, 22]",
"compute profile for the items arm39, car23, and pod91": "SMRMonProfile[{\"arm39\", \"car23\", \"pod91\"}",
"recommend by profile: female, 34, 2nd": "SMRMonRecommendByProfile[{\"female\", 34, \"2nd\"}]",
"recommend by the profile: sex.female=2, age.34=1.2, class.2nd=3": "SMRMonRecommendByProfile[{\"sex.female\"->2, \"age.34\"->1.2, \"class.2nd\"->3}]",
"top 13 recommendations for the profile: male, 70, 3rd": "SMRMonRecommendByProfile[{\"male\", 70, \"3rd\"}, 13]",
"classify by profile for the tag type 'Survival' using the profile: female, 40, 1st": "SMRMonClassify[\"Survival\", {\"female\", 40, \"1st\"})",
"join across": "SMRMonJoinAcross[]",
"join across with dfData on identifier column OrderNum": "SMRMonJoinAcross[dfData, \"OrderNum\"]",
"apply the LSI term-weight functions IDF, None, Cosine": "SMRMonApplyTermWeightFunctions[\"IDF\", \"None\", \"Cosine\"]",
"apply the following LSI term-weight functions global weights by \"IDF\", local weights - \"None\", normalize with \"Cosine\"": "SMRMonApplyTermWeightFunctions[\"GlobalWeightFunction\" -> \"IDF\", \"LocalWeightFunction\" -> \"None\", \"NormalizerFunction\" -> \"Cosine\"]",
"echo value": "SMRMonEchoValue[]",
"echo pipeline value with the function SumFunc, prefix note \"Stat:\"": "SMRMonEchoFunctionValue[\"Stat:\", SumFunc]"
},
"DataReshaping": {
"load data wrangling package": "Needs[\"AntonAntonov`DataReshapers`\"]",
"load the CSV file ~/Downloads/data.CSV": "obj = Import[\"~/Downloads/data.CSV\",\"Dataset\"]",
"use the dataset dfTitanic": "obj = dfTitanic",
"show dimensions": "Echo[Dimensions[obj]]",
"summarize the data": "RecordsSummary[obj]",
"show data summary": "Echo[RecordsSummary[obj]]",
"drop the columns passengerAge and passengerSex": "obj = (KeyDrop[#1, {\"passengerAge\", \"passengerSex\"}]&) /@ obj",
"rename columns passengerAge as age, passengerSex as sex, passengerClass as class": "obj = (Join[KeyDrop[#1, {\"passengerAge\", \"passengerSex\", \"passengerClass\"}], Association[\"age\" -> #1[\"passengerAge\"], \"sex\" -> #1[\"passengerSex\"], \"class\" -> #1[\"passengerClass\"]]] &) /@ obj",
"filter by passengerAge greater than 12": "obj = Select[obj, #1[\"passengerAge\"] > 12 &]",
"group by passengerSex and passengerClass": "obj = GroupBy[obj, {#1[\"passengerSex\"], #1[\"passengerClass\"]} &];",
"show counts": "Echo[Length /@ obj, \"counts:\"]",
"counts": "obj = Length /@ obj",
"cross tabulate passengerSex with passengerSurvival": "obj = CrossTabulate[({#[\"passengerSex\"], #[\"passengerSurvival\"]} &) /@ obj]",
"cross tabulate passengerSex with passengerSurvival over passengerAge": "obj = CrossTabulate[({#[\"passengerSex\"], #[\"passengerSurvival\"], #[\"passengerAge\"]} &) /@ obj]"
},
"Tabular": {
"load the CSV file ~/Downloads/data.CSV": "obj = Import[\"~/Downloads/data.CSV\",\"Tabular\"]",
"use the dataset dfTitanic": "obj = Tabular[dfTitanic]",
"show dimensions": "Echo[Dimensions[obj]]",
"summarize the data": "obj = TabularStructure[obj]",
"show data summary": "Echo[TabularStructure[obj]]",
"show the columns of tblTrees": "ColumnKeys[tblTrees]",
"show the columns types of tblTrees": "ColumnType[tblTrees]",
"categorize the column passengerSex": "obj = CastColumns[obj, \"passengerSex\" -> \"Categorical\"]",
"drop the columns passengerAge and passengerSex": "obj = DeleteColumns[obj, {\"passengerAge\", \"passengerSex\"}]",
"rename columns passengerAge as age, passengerSex as sex, and passengerClass as class": "obj = RenameColumns[obj, {\"passengerAge\" -> \"age\", \"passengerSex\" -> \"sex\", \"passengerClass\" -> \"class\"}]",
"filter by passengerAge greater than 12": "obj = Select[obj, #1[\"passengerAge\"] > 12 &]",
"group by passengerSex and passengerClass": "obj = GroupBy[obj, {#1[\"passengerSex\"], #1[\"passengerClass\"]} &];",
"show aggregate values of (Mean @ #passengerAge) over the columns passengerSex and passengerSurvival": "Echo @ AggregateRows[obj, {\"mean\" -> (Mean @ #passengerAge] &)}, {\"passengerSex\",\n \"passengerClass\"}]",
"show counts": "Echo[Length /@ obj, \"counts:\"]",
"counts": "obj = Length /@ obj",
"cross tabulate passenger_sex with passenger_survival": "obj = PivotTable[obj, Length @ #[\"passenger_sex\"] &, \"passenger_sex\", \"passenger_survival\"]",
"cross tabulate passengerSex with passengerSurvival over passengerAge": "obj = PivotTable[obj, Total@#passengerAge &, \"passengerSex\", \"passengerSurvival\"]",
"cross tabulate passengerSex with passengerSurvival and give group aggregates": "obj = PivotTable[obj, Length@#passengerSex &, \"passengerSex\", \"passengerSurvival\", \"IncludeGroupAggregates\" -> True]",
"keep only the rows for which passenger_class is 1st": "obj = Select[obj, #[\"passenger_class\"] == \"1st\"&]",
"filter out the rows that have passengerAge greater than 29": "obj = Discard[obj, #passengerAge > 29&]",
"sort descendingly by passengerAge": "obj = ReverseSortBy[obj, #passengerAge&]",
"sort by mass / height ^ 2": "obj = ReverseSortBy[obj, #mass / #height^2 &]",
"transform missing values by interpolation": "obj = TransformMissing[obj, Automatic -> \"Interpolation\"]",
"transform missing values of column VAR1 by using the mean": "obj = TransformMissing[obj, \"VAR1\" -> \"Mean\"]",
"replace the missing of the column NAME54 with the constant \"MyNA\"": "obj = TransformMissing[obj, \"NAME54\" -> {\"Constant\", \"MyNA\"}]",
"transform missing of VAR1 the mean and of VAR23 with the median": "obj = TransformMissing[obj, {\"VAR1\" -> \"Mean\", \"VAR23\" -> \"Median\"}]",
"split the column name over PAT into the columns maker and model": "obj = TransformColumns[obj, {\"maker\", \"model\"} -> Function[StringSplit[#name, 2]]]",
"split the column name over PAT into the columns maker and model and drop name": "obj = TransformColumns[obj, {\"maker\", \"model\"} -> Function[StringSplit[#name, 2]], \"name\" -> Nothing]",
"combine the columns latitude and longitude with FUNC into the column geoLocation": "obj = TransformColumns[obj, \"geoLocation\" -> FUNC[#latitude, #longitude]]",
"combine the columns latitude and longitude of tblTrees using FUNC into the column geoLocation": "tblTrees = TransformColumns[tblTrees, \"geoLocation\" -> FUNC[#latitude, #longitude]]",
"transform elementwise using the function FF12": "obj = ToTabular @ Elementwise[FF12][obj]",
"transform elementwise using the function QuantityMagnitude if the element is satisfies QuantityQ": "obj = ToTabular@\n Elementwise[If[QuantityQ[#], QuantityMagnitude[#], #] &][obj]"
},
"TriesWithFrequencies": {
"load tries with frequencies package": "Needs[\"AntonAntonov`TriesWithFrequencies`\"]",
"Make a prefix tree (trie) with frequencies using word splitting over words2": "tr = TrieCreateBySplit[words2]",
"Merge the trie with the another trie made over words3": "tr = TrieMerge[TrieCreateBySplit[words2]]",
"Convert the node frequencies into probabilities": "tr = TrieNodeProbabilities[tr]",
"Shrink the trie (i.e. find the \"prefixes\")": "tr = TrieShrink[tr]",
"Show the tree-form of the trie": "TrieForm[tr]"
}
},
"Python": {
"QRMon": {
"load the package": "from Regressionizer import *",
"use tsTemp": "Regressionizer(temp_data)",
"with the data tsTemp": "Regressionizer(temp_data)",
"do regression with 12 knots": "quantile_regression(knots=12)",
"quantile regression with 30 knots and interpolation order 3": "quantile_regression(knots=12,order=3)",
"compute the regression quantiles for probabilities 0.1, 0.5, and 0.9": "quantile_regression(knots=12, probs=[0.1, 0.5, 0.9])",
"compute linear regression": "least_squares_fit(funcs=6)",
"compute linear regression with 14 basis functions": "least_squares_fit(funcs=14)",
"do a least squares fit with the basis functions MyFuncs": "least_squares_fit(funcs=MyFuncs)",
"show date list plot": "date_list_plot()",
"plot the data and fits": "plot()",
"plot with width 800 and aspect ratio 1\/3": "plot(width=800, height=800*1\/3)",
"plot with image size 1000 and plot theme plotly_dark": "plot(width=1000, template=\"plotly_dark\")",
"show error plots": "error_plots()",
"show date list error plots": "error_plots(date_list_plot=True)",
"show relative error plots": "error_plots(relative_errors=True)",
"echo data summary": "echo_data_summary()",
"rescale the data": "rescale(regressor=False, value=True)",
"rescale the data in both dimensions": "rescale(regressor=True, value=True)",
"find outliers": "outliers()",
"plot outliers": "outliers_plot()",
"take the value": "take_value()",
"show the value": "take_value()"
},
"LSAMon": {
"load the package": "from LatentSemanticAnalyzer import *",
"use the documents aDocs": "LatentSemanticAnalyzer(aDocs)",
"use dfTemp": "LatentSemanticAnalyzer(dfTemp)",
"make the document-term matrix": "make_document_term_matrix()",
"make the document-term matrix with automatic stop words": "make_document_term_matrix[stemming_rules=None,stopWords=True)",
"make the document-term matrix without stemming": "make_document_term_matrix[stemming_rules=False,stopWords=True)",
"apply term weight functions": "apply_term_weight_functions()",
"apply term weight functions: global IDF, local None, normalizer Cosine": "apply_term_weight_functions(global_weight_func='IDF', local_weight_func='None',normalizer_func='Cosine')",
"extract 30 topics using the method SVD": "extract_topics(number_of_topics=24, method='SVD')",
"extract 24 topics using the method NNMF, max steps 12 and min number of documents per term 10": "extract_topics(number_of_topics=24, min_number_of_documents_per_term=10, method='NNMF')",
"Echo topics table": "echo_topics_interpretation(wide_form=True)",
"show the topics": "echo_topics_interpretation(wide_form=True)",
"Echo topics table with 10 terms per topic": "echo_topics_interpretation(number_of_terms=10, wide_form=True)",
"find the statistical thesaurus for the words notebook, equation, and function": "echo_statistical_thesaurus(terms=stemmerObj.stemWords(['notebook','equation','function']), wide_form=True)",
"show statistical thesaurus for king, castle, poison, Denmark, each entry with 12 words": "echo_statistical_thesaurus(terms=stemmerObj.stemWords(['king','castle','poison','Denmark']), wide_form=True, number_of_nearest_neighbors=12)"
},
"SMRMon": {
"install the package": "python3 -m pip install SparseMatrixRecommender",
"load the package": "from SparseMatrixRecommender import *",
"new recommender": "SparseMatrixRecommender()",
"monad's unit": "SparseMatrixRecommender()",
"create with dfTemp": "create(dfTemp)",
"create using dfData with identifier column 'rec_id'": "create(data = dfData, item_column_name = \"rec_id\")",
"create from wide format dataset, identifier column \"id\"": "create(data = dfData, item_column_name = \"rec_id\")",
"recommend by history for prod1, prod32, and new_prod98": "recommend(['prod1', 'prod32', 'new_prod98'])",
"top 22 recommendations for the items prod1, prod32, and new_prod98": "recommend([\"prod1\", \"prod32\", \"new_prod98\"], nrecs = 22)",
"compute profile for the items arm39, car23, and pod91": "profile(['arm39', 'car23', 'pod91'])",
"recommend by profile: female, 34, 2nd": "recommend_by_profile(['female', 34, '2nd'])",
"recommend by the profile: sex.female=2, age.34=1.2, class.2nd=3": "recommend_by_profile({'sex.female':2, 'age.34':1.2, 'class.2nd':3})",
"top 13 recommendations for the profile: male, 70, 3rd": "recommend_by_profile(['male', 70, '3rd'], nrecs=13)",
"classify by profile for the tag type 'Survival' using the profile: female, 40, 1st": "classify_by_profile('Survival', ['female', 40, '1st'])",
"join across": "join_across()",
"join across with dfData on identifier column OrderNum": "join-across(dfData, on => 'OrderNum')",
"apply the LSI term-weight functions IDF, None, Cosine": "apply_term_weight_functions('IDF', 'None', 'Cosine')",
"apply the following LSI term-weight functions: global weights by \"IDF\", local weights - \"None\", normalize with \"Cosine\"": "apply_term_weight_functions(global-weight-func = 'IDF', local-weight-func = 'None', normalizer-func = 'Cosine')",
"echo value": "echo_value()",
"echo pipeline value with the function SumFunc": "echo_function_value(&SumFunc)"
},
"pandas": {
"load data wrangling package": "import pandas as pd",
"load the CSV file ~/Downloads/data.CSV": "obj = pd.read_csv('~/Downloads/data.CSV')",
"use the dataset dfTitanic": "obj = dfTitanic.copy()",
"show dimensions": "print(obj.shape)",
"summarize the data": "obj.describe(include='all')",
"show data summary": "print(obj.describe(include='all'))",
"show the columns of tblTrees": "tblTrees.columns.tolist()",
"show the columns types of tblTrees": "tblTrees.dtypes",
"categorize the column passengerSex": "obj['passengerSex'] = obj['passengerSex'].astype('category')",
"drop the columns passengerAge and passengerSex": "obj = obj.drop(columns=['passengerAge', 'passengerSex'])",
"rename columns passengerAge as age, passengerSex as sex, passengerClass as class": "obj = obj.rename(columns={'passengerAge': 'age', 'passengerSex': 'sex', 'passengerClass': 'class'})",
"filter by passengerAge greater than 12": "obj = obj[obj['passengerAge'] > 12]",
"group by passengerSex and passengerClass": "obj = obj.groupby(['passengerSex', 'passengerClass'])",
"show aggregate values of (Mean @ #passengerAge) over the columns passengerSex and passengerSurvival": "print(obj.groupby(['passengerSex', 'passengerSurvival'])['passengerAge'].mean())",
"show counts": "print(obj.size)",
"counts": "obj = obj.size()",
"cross tabulate passenger_sex with passenger_survival": "obj = pd.crosstab(obj['passenger_sex'], obj['passenger_survival'])",
"cross tabulate passengerSex with passengerSurvival over passengerAge": "obj = pd.pivot_table(obj, values='passengerAge', index='passengerSex', columns='passengerSurvival', aggfunc='sum')",
"cross tabulate passengerSex with passengerSurvival and give group aggregates": "obj = pd.pivot_table(obj, index='passengerSex', columns='passengerSurvival', aggfunc='size', margins=True)",
"keep only the rows for which passenger_class is 1st": "obj = obj[obj['passenger_class'] == '1st']",
"filter out the rows that have passengerAge greater than 29": "obj = obj[obj['passengerAge'] <= 29]",
"sort descendingly by passengerAge": "obj = obj.sort_values(by='passengerAge', ascending=False)",
"sort by mass / height ^ 2": "obj = obj.assign(bmi=obj['mass'] / obj['height']**2).sort_values('bmi', ascending=False)",
"transform missing values by interpolation": "obj = obj.interpolate()",
"transform missing values of column VAR1 by using the mean": "obj['VAR1'] = obj['VAR1'].fillna(obj['VAR1'].mean())",
"replace the missing of the column NAME54 with the constant 'MyNA'": "obj['NAME54'] = obj['NAME54'].fillna('MyNA')",
"transform missing of VAR1 the mean and of VAR23 with the median": "obj = obj.assign(VAR1=obj['VAR1'].fillna(obj['VAR1'].mean()), VAR23=obj['VAR23'].fillna(obj['VAR23'].median()))",
"split the column name over PAT into the columns maker and model": "obj[['maker', 'model']] = obj['name'].str.split(pat='PAT', n=1, expand=True)",
"split the column name over PAT into the columns maker and model and drop name": "obj[['maker', 'model']] = obj['name'].str.split(pat='PAT', n=1, expand=True); obj = obj.drop(columns=['name'])",
"combine the columns latitude and longitude with FUNC into the column geoLocation": "obj['geoLocation'] = obj.apply(lambda row: FUNC(row['latitude'], row['longitude']), axis=1)",
"combine the columns latitude and longitude of tblTrees using FUNC into the column geoLocation": "tblTrees['geoLocation'] = tblTrees.apply(lambda row: FUNC(row['latitude'], row['longitude']), axis=1)",
"transform elementwise using the function FF12": "obj = obj.applymap(FF12)",
"transform elementwise using the function QuantityMagnitude if the element is satisfies QuantityQ": "obj = obj.applymap(lambda x: QuantityMagnitude(x) if QuantityQ(x) else x)"
}
},
"R": {
"QRMon": {
"install the package": "devtools::install_github(repo = \"antononcube/QRMon-R\")",
"load the package": "library(QRMon)",
"use tsTemp": "QRMonUnit(tsTemp)",
"with the data tsTemp": "QRMonUnit(tsTemp)",
"do regression with 12 knots": "QRMonQuantileRegression(12)",
"quantile regression with 30 knots and interpolation order 3": "QRMonQuantileRegression(12,degree=3)",
"compute the regression quantiles for probabilities 0.1, 0.5, and 0.9": "QRMonQuantileRegression(12, probabilities = c(0.1, 0.5, 0.9))",
"compute linear regression": "QRMonLeastSquaresFit(6)",
"compute linear regression with 14 basis functions": "QRMonLeastSquaresFit(14)",
"plot and use the color red for regression quantiles": "QRMonPlot(regressionCurvesColor='red')",
"plot using orange for the data points": "QRMonPlot(dataPointsColor='orange')",
"show date list plot": "QRMonPlot(datePlotQ=TRUE)",
"plot the data and fits": "QRMonPlot()",
"plot with date origin 1900-01-01": "QRMonPlot(dateOrigin='1900-01-01')",
"show date list error plots": "QRMonErrorPlots(datePlotQ=TRUE)",
"show relative error plots": "QRMonErrorPlots(relativeErrorsQ=TRUE)",
"Give the error plots using absolute errors": "QRMonErrorPlots(relativeErrorsQ=FALSE)",
"echo data summary": "QRMonEchoDataSummary()",
"rescale the data": "QRMonRescale(regressorAxisQ=FALSE,valueAxisQ=TRUE}]",
"rescale the data in both dimensions": "QRMonRescale(regressorAxisQ=TRUE,valueAxisQ=TRUE}]",
"find outliers": "QRMonOutliers()",
"plot outliers": "QRMonOutliersPlot()",
"give a date plot for the outliers": "QRMonOutliersPlot(datePlotQ=TRUE)",
"outliers with time axis": "QRMonOutliersPlot(datePlotQ=TRUE)",
"take the value": "QRMonTakeValue",
"show the value": "QRMonEchoValue"
},
"LSAMon": {
"install the package": "devtools::install_github(repo = \"antononcube/R-packages\", subdir = \"LSAMon-R\")",
"load the package": "library(LSAMon)",
"use the documents aDocs": "LSAMonUnit(aDocs)",
"use dfTemp": "LSAMonUnit(dfTemp)",
"make the document-term matrix": "LSAMonMakeDocumentTermMatrix()",
"make the document-term matrix with automatic stop words": "LSAMonMakeDocumentTermMatrix(stemmingRules = NULL, stopWords = NULL)",
"make the document-term matrix without stemming": "LSAMonMakeDocumentTermMatrix(stemmingRules = NULL, stemWordsQ = FALSE, stopWords = NULL)",
"echo document term matrix statistics": "LSAMonEchoDocumentTermMatrixStatistics()",
"echo document term matrix metrics with log base 10": "LSAMonEchoDocumentTermMatrixStatistics(logBase=10)",
"apply term weight functions": "LSAMonApplyTermWeightFunctions()",
"apply term weight functions: global IDF, local None, normalizer Cosine": "LSAMonApplyTermWeightFunctions(globalWeightFunction=\"IDF\", localWeightFunction=\"None\", normalizerFunction=\"Cosine\")",
"extract 30 topics using the method SVD": "LSAMonExtractTopics(numberOfTopics=30, method=\"SVD\")",
"extract 24 topics using the method NNMF, max steps 12 and min number of documents per term 10": "LSAMonExtractTopics(numberOfTopics=24,method=\"NNMF\",maxSteps=12,minNumberOfDocumentsPerTerm=10)",
"Echo topics table": "LSAMonEchoTopicsTable()",
"Echo topics table with 10 terms per topic": "LSAMonEchoTopicsTable(numberOfTerms=10)",
"show the topics": "LSAMonEchoTopicsTable()",
"find the statistical thesaurus for notebook, equation, and function": "LSAMonEchoStatisticalThesaurus(words=c(\"notebook\",\"equation\",\"funciton\"))"
},
"SMRMon": {
"install the package": "devtools::install_github(repo = \"antononcube/R-packages\", subdir = \"SMRMon-R\")",
"load the package": "library(SMRMon)",
"new recommender": "SMRMonUnit()",
"monad's unit": "SMRMonUnit()",
"create with dfTemp": "SMRMonCreate(data = dfTemp)",
"create using dfData with identifier column 'rec_id'": "SMRMonCreate(data = dfData, itemColumnName = \"rec_id\")",
"create from wide format dataset, identifier column \"id\"": "SMRMonCreate(data = dfData, itemColumnName = \"rec_id\")",
"recommend by history for prod1, prod32, and new_prod98": "SMRMonRecommend(c('prod1', 'prod32', 'new_prod98'))",
"top 22 recommendations for the items prod1, prod32, and new_prod98": "SMRMonRecommend(c(\"prod1\", \"prod32\", \"new_prod98\"), nrecs = 22)",
"compute profile for the items arm39, car23, and pod91": "SMRMonProfile(c('arm39', 'car23', 'pod91'))",
"recommend by profile: female, 34, 2nd": "SMRMonRecommendByProfile(c('female', 34, '2nd'))",
"recommend by the profile: sex.female=2, age.34=1.2, class.2nd=3": "SMRMonRecommendByProfile(c('sex.female'=2, 'age.34'=1.2, 'class.2nd'=3))",
"top 13 recommendations for the profile: male, 70, 3rd": "SMRMonRecommendByProfile(c('male', 70, '3rd'), nrecs = 13)",
"classify by profile for the tag type 'Survival' using the profile: female, 40, 1st": "SMRMonClassifyByProfile(tagType = 'Survival', profile = c('female', 40, '1st'))",
"join across": "SMRMonJoinAcross()",
"join across with dfData on identifier column OrderNum": "SMRMonJoinAcross(data = dfData, by = 'OrderNum')",
"apply the LSI term-weight functions IDF, None, Cosine": "SMRMonApplyTermWeightFunctions('IDF', 'None', 'Cosine')",
"apply the following LSI term-weight functions: global weights by \"IDF\", local weights - \"None\", normalize with \"Cosine\"": "SMRMonApplyTermWeightFunctions(globalWeightFunction = 'IDF', localWeightFunction = 'None', normalizerFunction = 'Cosine')",
"echo value": "SMRMonEchoValue()",
"echo pipeline value with the function SumFunc": "SMRMonEchoFunctionValue(f = &SumFunc)"
},
"DataReshaping": {
"load data wrangling package": "library(tidyverse)",
"use the dataset dfTitanic": "dfTitanic",
"show dimensions": "(function(x) { print(dim(x)); x })",
"rename columns passengerAge as age, passengerSex as sex, passengerClass as class": "dplyr::rename(age = passengerAge, sex = passengerSex, class = passengerClass)",
"filter by passengerAge greater than 12": "dplyr::filter(passengerAge > 12)",
"group by passengerSex and passengerClass": "group_by(passengerSex, passengerClass)",
"show counts": "(function(x) { print(counts(x)); x })",
"counts": "dplyr::count()",
"cross tabulate passengerSex with passengerSurvival": "(function(x) {xtabs(~ passengerSex + passengerSurvival, data = x) })",
"cross tabulate passengerSex with passengerSurvival over passengerAge": "(function(x) {xtabs(passengerAge ~ passengerSex + passengerSurvival, data = x) })"
}
},
"Raku": {
"SMRMon": {
"install the package": "zef install ML::SparseMatrixRecommender",
"load the package": "use ML::SparseMatrixRecommender;",
"new recommender": "ML::SparseMatrixRecommender.new",
"monad's unit": "ML::SparseMatrixRecommender.new",
"create with dfTemp": "create(dfTemp)",
"create using dfData with identifier column 'rec_id'": "create(dfData, item-column-name => \"rec_id\")",
"create from wide format dataset, identifier column \"id\"": "create(dfData, item-column-name => \"rec_id\")",
"recommend by history for prod1, prod32, and new_prod98": "recommend(['prod1', 'prod32', 'new_prod98'])",
"top 22 recommendations for the items prod1, prod32, and new_prod98": "recommend([\"prod1\", \"prod32\", \"new_prod98\"], 22)",
"compute profile for the items arm39, car23, and pod91": "profile(['arm39', 'car23', 'pod91'])",
"recommend by profile: female, 34, 2nd": "recommend-by-profile(['female', 34, '2nd'])",
"recommend by the profile: sex.female=2, age.34=1.2, class.2nd=3": "recommend-by-profile({'sex.female'=>2, 'age.34'=>1.2, 'class.2nd'=>3})",
"top 13 recommendations for the profile: male, 70, 3rd": "recommend-by-profile(['male', 70, '3rd'], 13)",
"classify by profile for the tag type 'Survival' using the profile: female, 40, 1st": "classify-by-profile('Survival', ['female', 40, '1st'])",
"join across": "join-across()",
"join across with dfData on identifier column OrderNum": "join-across(dfData, on => 'OrderNum')",
"apply the LSI term-weight functions IDF, None, Cosine": "apply-term-weight-functions('IDF', 'None', 'Cosine')",
"apply the following LSI term-weight functions: global weights by \"IDF\", local weights - \"None\", normalize with \"Cosine\"": "apply-term-weight-functions(global-weight-function => 'IDF', local-weight-function => 'None', normalizer-function => 'Cosine')",
"echo value": "echo-value()",
"echo pipeline value with the function SumFunc, prefix note \"Stat:\"": "echo-value(\"Stat:\", as => &SumFunc)"
},
"DataReshaping": {
"load data wrangling package": "use Data::Reshapers; use Data::Summarizers; use Data::TypeSystem",
"use the dataset @dfTitanic": "my $obj = @dfTitanic",
"show dimensions": "say $obj.&dimensions",
"rename columns passengerAge as age, passengerSex as sex, passengerClass as class": "$obj = rename-columns($obj, %(passengerAge => 'age', passengerSex => 'sex', passengerClass => 'class'))",
"filter by passengerAge greater than 12": "$obj = $obj.grep({ $_{'passengerAge'} > 12 }).Array",
"group by passengerSex and passengerClass": "$obj = group-by($obj, ('passengerSex', 'passengerClass'))",
"show counts": "say $obj>>.elems",
"counts": "$obj = $obj>>.elems",
"cross tabulate passengerSex with passengerSurvival": "$obj = cross-tabulate($obj, 'passengerSex', 'passengerSurvival')",
"cross tabulate passengerSex with passengerSurvival over passengerAge": "$obj = cross-tabulate($obj, 'passengerSex', 'passengerSurvival', 'passengerAge')"
},
"TriesWithFrequencies": {
"load tries with frequencies package": "use ML::TriesWithFrequencies;",
"Make a prefix tree (trie) with frequencies using word splitting over @words2": "$tr = trie-create-by-split(@words2)",
"Merge the trie with the another trie made over @words3": "$tr = trie-merge(trie-create-by-split(@words2))",
"Convert the node frequencies into probabilities": "$tr = trie-node-probabilities($tr)",
"Shrink the trie (i.e. find the \"prefixes\")": "$tr = trie-shrink($tr)",
"Show the tree-form of the trie": "trie-say($tr)"
}
}
}