-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Description
import pandas as pd
import dask.dataframe as df
pd.read_csv('/Users/michael/Downloads/sample3.csv', sep=';', decimal=',', quotechar='"', encoding='cp1251', quoting=3)
Out[32]:
"ID1" "ID2" "NAME" "VALUE" "LIFETIME"
0 639975684501 105 "Behold" 8.75 406
1 146852986916 105 "Let it go" 65.85 496
2 573494607668 105 "Yellow submarine" 65.49 835
3 743247757392 105 "Proud Mary" 55.56 831
4 455643282685 105 "________ ___ " 83.39 822
5 680372869931 105 " __ _ ______ ____ " 6.24 570
6 628358094382 105 "___ _________ _____" 33.85 859
"CHARGES" "USAGE1" "USAGE2" "SEGMENT1" "SEGMENT2" "SEGMENT3"
0 87.04 94.1 14.3 20-30 0 ">12"
1 37.50 79.1 23.5 110-120 0-10 ">12"
2 88.81 84.1 86.1 200-210 0-10 ">12"
3 62.75 85.8 70.9 220-230 0 ">12"
4 11.07 58.1 21.6 60-70 0 ">12"
5 41.92 69.0 87.3 0-10 0 ">12"
6 34.00 65.7 35.1 410-420 0-10 0-10"
df_csv = df.read_csv('/Users/michael/Downloads/sample3.csv', sep=';', decimal=',', quotechar='"', encoding='cp1251', quoting=3).compute()
df_csv
Out[34]:
"ID1" "ID2" "NAME" "VALUE" "LIFETIME"
0 "ID1" "ID2" "NAME" "VALUE" "LIFETIME"
1 639975684501 105 "Behold" 8,75 406
2 146852986916 105 "Let it go" 65,85 496
3 573494607668 105 "Yellow submarine" 65,49 835
4 743247757392 105 "Proud Mary" 55,56 831
5 455643282685 105 "________ ___ " 83,39 822
6 680372869931 105 " __ _ ______ ____ " 6,24 570
7 628358094382 105 "___ _________ _____" 33,85 859
"CHARGES" "USAGE1" "USAGE2" "SEGMENT1" "SEGMENT2" "SEGMENT3"
0 "CHARGES" "USAGE1" "USAGE2" "SEGMENT1" "SEGMENT2" "SEGMENT3"
1 87,04 94,1 14,3 20-30 0 ">12"
2 37,5 79,1 23,5 110-120 0-10 ">12"
3 88,81 84,1 86,1 200-210 0-10 ">12"
4 62,75 85,8 70,9 220-230 0 ">12"
5 11,07 58,1 21,6 60-70 0 ">12"
6 41,92 69 87,3 0-10 0 ">12"
7 34 65,7 35,1 410-420 0-10 0-10"
(Please ignore my inability to create this sample file with the correct encoding)