Regex¶
In [ ]:
print('Hello'.rjust(20, '*'))
print('Hello'.ljust(20, '*'))
print('Hello'.center(20, '*'))
import re
phoneNumberReg = re.compile(r'\+\d\d\-\d\d\d\d\d\d\d\d\d\d')
mo = phoneNumberReg.search('Sumit sarkar +91-7407227346')
print(mo.group())
In [ ]:
spam={'name':'sumit','surname':'sarkar','education':{'school':'DHS','University':'MAKAUT'},'height':'5 ft.10 inch'}
for k,v in spam.items():
print(k,"is",v)
print(str(spam['education']['school']))
'DHS' in spam['education'].values()
'DHS' in spam['education']['school']
c=' '.join(['My', 'name', 'is', 'Simon'])
print(c)
print(type(c))
Data Scraping¶
In [ ]:
from bs4 import BeautifulSoup
with open("C:\\Users\\sumit\\Downloads\\sumit.html") as fp:
soup = BeautifulSoup(fp, "html.parser")
tags=[tag.name for tag in soup.find_all()]
gdp_table = soup.find("table")
# # Get all the headings of Lists
# headings = []
# for td in gdp_table_data[0].find_all("td"):
# # remove any newlines and extra spaces from left and right
# headings.append(td.b.text.replace('\n', ' ').strip())
# print(headings)
a=gdp_table.find_all('tr')
# for value in a:
# print(value.get_text(),end=" \n")
my_list=[]
for value in a:
aa=value.find_all('td')
for value1 in aa:
print(value1.get_text())
my_list.append(value1.get_text())
name=[]
for i in range(2,len(my_list),3):
name.append(my_list[i])
final_name=[]
for i in range(0,len(name),3):
final_name.append(name[i])
final_merit=[]
for i in range(1,len(name),3):
final_merit.append(int(name[i]))
import pandas as pd
df =pd.read_csv("C:\\Users\\sumit\\Downloads\\rGB.csv")
df = pd.DataFrame(list(zip(final_name, final_merit)),
columns =['Name', 'Merit'])
print(df)
Date Time Range in function¶
In [ ]:
%%time
import random
import datetime
from datetime import date
import pandas as pd
from copy import deepcopy
df =pd.read_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
dti = pd.date_range('2020-09-10-13', periods=7500, freq='S')
df['TimeStamp']=dti
#df.to_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
Scuffle data set in Pandas¶
In [ ]:
import pandas as pd
df =pd.read_csv("C:\\Users\\sumit\\Downloads\\sub_10.csv")
df.sample(frac=1).reset_index(drop=True)
Merging DataFrame in pandas¶
In [ ]:
import pandas as pd
import glob
path = r'C:\Users\sumit\Downloads\jog_16' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
In [ ]:
import pandas as pd
a = pd.read_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
b = pd.read_csv("C:\\Users\\sumit\\Downloads\\sss (1).csv")
del a['Unnamed: 0']
del b['Unnamed: 0']
a=a[0:2500]
a['TimeStamp']=pd.date_range('2020-09-08-10',periods=2500,freq='S')
b=b[0:2500]
b['TimeStamp']=pd.date_range('2020-09-09-13',periods=2500,freq='S')
frames = [a,b]
result = pd.concat(frames)
result.to_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
NumPy¶
In [ ]:
import numpy as np
matrix=np.array([[1,2,3],[4,5,6],[7,8,9]])
mat=np.matrix("1,2,3;4,5,6;7,8,9")
a=np.linalg.det(mat) #determine of a matrix
rank=np.linalg.matrix_rank(mat) # rank of a matrix
inv =np.linalg.inv(mat) #inverse of a matrix
'''so the adjoint of a matrix would be = inv(matrix) * det(matrix)'''
'''Matrix having det=0 cant be inversed'''
In [ ]:
# linear Equation
'''Lets three equation:
3x + 2y - 12z = 10
2x - 5y + 12z =-5
4x - 4y + z =1
fin x,y,z'''
A = np.matrix("3,2,-12;2,-5,12;4,-4,1")
b = np.matrix("10;-5;1")
sol_lin = np.linalg.solve(A,b) #value of x,y,z
In [ ]:
'''Mass balance
• The input flow rates of Gasoline, Kerosene, Diesel and Fuel Oil are given in Kmol/min
The output flow rates from the two distillation columns have been indicated in percentages by mass for each of the components
The actual output flow rates are need to be determined
Flow system can be represented by a set of equations considering mass flow rate in kgmol/min
0.24T1 + 0.15B1 + 0.18T2 + 0.07B2 = 75
0.65T1 +0.10B1 + 0.24T2 + 0.04B2 = 125
0.10T1 +0.54B1+0.42T2 + 0.54B2 = 200
0.01T1+0.21B1+ 0.18T2 +0.35B2 = 100'''
A = np.matrix("0.24,0.15,0.18,0.07;0.65,0.10,0.24,0.04;0.10,0.54,0.42,0.54;0.01,0.21,0.18,0.35")
b = np.matrix("75;125;200;100")
#the actucal flowrate :
rate = np.linalg.solve(A,b)
DataFrame¶
In [53]:
import os
import numpy as np
import pandas as pd
df = pd.read_csv("C:\\Users\\sumit\\Downloads\\table-1.csv",index_col=0,sep=",")
shape = df.shape
size = df.size #size of the dataframe
mem = df.memory_usage() #memory usage of dataframe
axis = df.ndim #dimenssion of the dataframe
'''Indexing and selecting data
Python slicing operator [] and attribute/ dot operator . are used for indexing
Provides quick and easy access to pandas
data structures'''
df.at['5',"Politicalparticipation"] # accessing the Value of dataframe
df.iat[5,6] #row ,column
# To access a group of rows and columns by label(s) .loc[ ] can be used
df.loc[:,'Score'] # get the column wise elements
df.loc[['1']] #get the row elements
'''Character types
Difference between category & object
###########category###########
A string variable
consisting of only a few
different values.
Converting such a
string variable to a
categorical variable will
save some memory
A categorical variable takes on a limited, fixed number of possible
values
##########object###########
The column will be assigned as object data type when it has mixed types (numbers and strings). If a column contains 'nan (blank cells), pandas will default to object datatype.
For strings, the length is not
fixed'''
df.dtypes #return types of datatypes in dataframe
# df.get_dtype_counts() #counts different types of datatypes
df.select_dtypes(include=[object],exclude=[int]) # Remove or add types of data in frame
df.info() #return the summary of the whole dataframe
'''convert a column into a list'''
arr = np.unique(df['Country'])
Out[53]:
In [ ]:
Comments
Post a Comment