Combining multiple files from bucket and move it into another folder
Posted on by Sumit Kumarimport json
import boto3
import pandas as pd
from datetime import datetime
import s3fs
from urllib.parse import unquote
def lambda_handler(event, context):
# TODO implement
now = datetime.now()
date_time=now.strftime(“%Y-%m-%d_%H-%M-%S”)
s3 = boto3.client(‘s3′)
bucket=’deltafrog-training-dev’
src_Key=’real_time_data/’
dest_file=”s3://deltafrog-training-dev/combined_data/anual_combined_”
archive_Key=’archive/’
res=s3.list_objects(Bucket=bucket,Prefix=src_Key)
print(res)
fname_list=[]
final_df=pd.DataFrame()
if “Contents” in res:
for i in res[“Contents”]:
print(i)
if “csv” in i[‘Key’]:
filename=i[‘Key’].split(‘/’)[-1]
print(filename)
fname_list.append(filename)
df=pd.read_csv(‘s3://deltafrog-training-dev/real_time_data/’+filename)
print(final_df.shape)
print(len(final_df))
final_df=final_df.append(df)
print(“final”,final_df)
final_df.to_csv(dest_file+str(date_time)+”.csv”,index=False)
print(final_df.shape)
print(len(final_df))
#### move file from one bucket to other#
for file in fname_list:
print(file)
s3_res = boto3.resource(‘s3’)
copy_source = {
‘Bucket’: bucket,
‘Key’: src_Key+file
}
s3_res.meta.client.copy(copy_source, bucket, archive_Key+file)
s3.delete_object(Bucket = bucket, Key = src_Key+file)
Leave a Reply