Regex¶

print('Hello'.rjust(20, '*'))
print('Hello'.ljust(20, '*'))
print('Hello'.center(20, '*'))

import re
phoneNumberReg = re.compile(r'\+\d\d\-\d\d\d\d\d\d\d\d\d\d')
mo = phoneNumberReg.search('Sumit sarkar +91-7407227346')
print(mo.group())

spam={'name':'sumit','surname':'sarkar','education':{'school':'DHS','University':'MAKAUT'},'height':'5 ft.10 inch'}

for k,v in spam.items():
    
    print(k,"is",v)

print(str(spam['education']['school']))

'DHS' in spam['education'].values()

'DHS' in spam['education']['school']
c=' '.join(['My', 'name', 'is', 'Simon'])
print(c)
print(type(c))

Data Scraping¶

from bs4 import BeautifulSoup
with open("C:\\Users\\sumit\\Downloads\\sumit.html") as fp:
    
    soup = BeautifulSoup(fp, "html.parser")
tags=[tag.name for tag in soup.find_all()]

gdp_table = soup.find("table")

# # Get all the headings of Lists
# headings = []
# for td in gdp_table_data[0].find_all("td"):
#     # remove any newlines and extra spaces from left and right
#     headings.append(td.b.text.replace('\n', ' ').strip())

# print(headings)
a=gdp_table.find_all('tr')
# for value in a:
#     print(value.get_text(),end=" \n")
my_list=[]
for value in a:
    aa=value.find_all('td')
    for value1 in aa:
        print(value1.get_text())
        my_list.append(value1.get_text())
name=[]
for i in range(2,len(my_list),3):
    name.append(my_list[i])
final_name=[]
for i in range(0,len(name),3):
    final_name.append(name[i])
final_merit=[]
for i in range(1,len(name),3):
    final_merit.append(int(name[i]))
import pandas as pd

df =pd.read_csv("C:\\Users\\sumit\\Downloads\\rGB.csv")
df = pd.DataFrame(list(zip(final_name, final_merit)), 
               columns =['Name', 'Merit']) 
print(df)

Date Time Range in function¶

%%time
import random
import datetime
from datetime import date
import pandas as pd
from copy import deepcopy
df =pd.read_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
dti = pd.date_range('2020-09-10-13', periods=7500, freq='S')
df['TimeStamp']=dti
#df.to_csv("C:\\Users\\sumit\\Downloads\\sss.csv")

Scuffle data set in Pandas¶

import pandas as pd
df =pd.read_csv("C:\\Users\\sumit\\Downloads\\sub_10.csv")
df.sample(frac=1).reset_index(drop=True)

Merging DataFrame in pandas¶

import pandas as pd
import glob

path = r'C:\Users\sumit\Downloads\jog_16' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

import pandas as pd
a = pd.read_csv("C:\\Users\\sumit\\Downloads\\sss.csv")
b = pd.read_csv("C:\\Users\\sumit\\Downloads\\sss (1).csv")
del a['Unnamed: 0']
del b['Unnamed: 0']
a=a[0:2500]
a['TimeStamp']=pd.date_range('2020-09-08-10',periods=2500,freq='S')
b=b[0:2500]
b['TimeStamp']=pd.date_range('2020-09-09-13',periods=2500,freq='S')
frames = [a,b]
result = pd.concat(frames)
result.to_csv("C:\\Users\\sumit\\Downloads\\sss.csv")

NumPy¶

import numpy as np
matrix=np.array([[1,2,3],[4,5,6],[7,8,9]])
mat=np.matrix("1,2,3;4,5,6;7,8,9")
a=np.linalg.det(mat) #determine of a matrix
rank=np.linalg.matrix_rank(mat) # rank of a matrix
inv =np.linalg.inv(mat) #inverse of a matrix
'''so the adjoint of  a matrix would be = inv(matrix) * det(matrix)'''
'''Matrix having det=0 cant be inversed'''

# linear Equation
'''Lets three equation:
   3x + 2y - 12z = 10
   2x - 5y + 12z =-5
   4x - 4y + z =1
   fin x,y,z'''
A = np.matrix("3,2,-12;2,-5,12;4,-4,1")
b = np.matrix("10;-5;1")
sol_lin = np.linalg.solve(A,b) #value of x,y,z

'''Mass balance

• The input flow rates of Gasoline, Kerosene, Diesel and Fuel Oil are given in Kmol/min

The output flow rates from the two distillation columns have been indicated in percentages by mass for each of the components

The actual output flow rates are need to be determined

Flow system can be represented by a set of equations considering mass flow rate in kgmol/min

0.24T1 + 0.15B1 + 0.18T2 + 0.07B2 = 75 
0.65T1 +0.10B1 + 0.24T2 + 0.04B2 = 125 
0.10T1 +0.54B1+0.42T2 + 0.54B2 = 200 
0.01T1+0.21B1+ 0.18T2 +0.35B2 = 100'''
A = np.matrix("0.24,0.15,0.18,0.07;0.65,0.10,0.24,0.04;0.10,0.54,0.42,0.54;0.01,0.21,0.18,0.35")
b = np.matrix("75;125;200;100")

#the actucal flowrate :
rate = np.linalg.solve(A,b)

DataFrame¶

import os
import numpy as np
import pandas as pd
df = pd.read_csv("C:\\Users\\sumit\\Downloads\\table-1.csv",index_col=0,sep=",")
shape = df.shape
size = df.size #size of the dataframe
mem = df.memory_usage() #memory usage of dataframe
axis = df.ndim #dimenssion of the dataframe
'''Indexing and selecting data
Python slicing operator [] and attribute/ dot operator . are used for indexing
Provides quick and easy access to pandas
data structures'''
df.at['5',"Politicalpartici­pation"] # accessing the Value of dataframe
df.iat[5,6] #row ,column

# To access a group of rows and columns by label(s) .loc[ ] can be used
df.loc[:,'Score'] # get the column wise elements
df.loc[['1']] #get the row elements

'''Character types
Difference between category & object
###########category###########
A string variable
consisting of only a few
different values.
Converting such a
string variable to a
categorical variable will
save some memory
A categorical variable takes on a limited, fixed number of possible
values
##########object###########
The column will be assigned as object data type when it has mixed types (numbers and strings). If a column contains 'nan (blank cells), pandas will default to object datatype.
For strings, the length is not
fixed'''
df.dtypes #return types of datatypes in dataframe

# df.get_dtype_counts() #counts different types of datatypes

df.select_dtypes(include=[object],exclude=[int]) # Remove or add types of data in frame

df.info() #return the summary of the whole dataframe

'''convert a column into a list'''
arr = np.unique(df['Country'])

<class 'pandas.core.frame.DataFrame'>
Index: 168 entries, 1 to Rank
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Country                         168 non-null    object
 1   Score                           168 non-null    object
 2   Electoral processand pluralism  168 non-null    object
 3   Functio­ning ofgovern­ment      168 non-null    object
 4   Politicalpartici­pation         168 non-null    object
 5   Politicalculture                168 non-null    object
 6   Civilliberties                  168 non-null    object
 7   Regimetype                      168 non-null    object
 8   Region[n 1]                     168 non-null    object
 9   Changes fromlast year           168 non-null    object
dtypes: object(10)
memory usage: 19.4+ KB

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahrain',
       'Bangladesh', 'Belarus', 'Belgium', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria',
       'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada',
       'Cape Verde', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Costa Rica', 'Country', 'Croatia', 'Cuba',
       'Cyprus', 'Czech Republic', 'Democratic Republic of the Congo',
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras',
       'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran',
       'Iraq', 'Ireland', 'Israel', 'Italy', 'Ivory Coast', 'Jamaica',
       'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kuwait', 'Kyrgyzstan',
       'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya',
       'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia',
       'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mexico', 'Moldova',
       'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar',
       'Namibia', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua',
       'Niger', 'Nigeria', 'North Korea', 'North Macedonia', 'Norway',
       'Oman', 'Pakistan', 'Palestine', 'Panama', 'Papua New Guinea',
       'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar',
       'Republic of the Congo', 'Romania', 'Russia', 'Rwanda',
       'Saudi Arabia', 'Senegal', 'Serbia', 'Sierra Leone', 'Singapore',
       'Slovakia', 'Slovenia', 'South Africa', 'South Korea[n 2]',
       'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland',
       'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand',
       'Timor-Leste', 'Togo', 'Trinidad and Tobago', 'Tunisia', 'Turkey',
       'Turkmenistan', 'Uganda', 'Ukraine', 'United Arab Emirates',
       'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan',
       'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype=object)

Search This Blog

CodewithSumit

Regex¶

Data Scraping¶

Date Time Range in function¶

Scuffle data set in Pandas¶

Merging DataFrame in pandas¶

NumPy¶

DataFrame¶

Comments

Post a Comment

Popular posts from this blog

Blogging Website using Python-flask,MySql,Bootstrap 4

calculator in java with actionListener and KeyListener

Linked list Deletion in c