Combining multiple files from bucket and move it into another folder

Posted on by Sumit Kumar

import json
import boto3
import pandas as pd
from datetime import datetime
import s3fs
from urllib.parse import unquote

def lambda_handler(event, context):
# TODO implement
now = datetime.now()
date_time=now.strftime(“%Y-%m-%d_%H-%M-%S”)
s3 = boto3.client(‘s3′)
bucket=’deltafrog-training-dev’
src_Key=’real_time_data/’
dest_file=”s3://deltafrog-training-dev/combined_data/anual_combined_”
archive_Key=’archive/’

res=s3.list_objects(Bucket=bucket,Prefix=src_Key)

print(res)
fname_list=[]
final_df=pd.DataFrame()
if “Contents” in res:
for i in res[“Contents”]:
print(i)
if “csv” in i[‘Key’]:
filename=i[‘Key’].split(‘/’)[-1]
print(filename)
fname_list.append(filename)
df=pd.read_csv(‘s3://deltafrog-training-dev/real_time_data/’+filename)
print(final_df.shape)
print(len(final_df))
final_df=final_df.append(df)
print(“final”,final_df)
final_df.to_csv(dest_file+str(date_time)+”.csv”,index=False)
print(final_df.shape)
print(len(final_df))

#### move file from one bucket to other#
for file in fname_list:
print(file)
s3_res = boto3.resource(‘s3’)
copy_source = {
‘Bucket’: bucket,
‘Key’: src_Key+file
}
s3_res.meta.client.copy(copy_source, bucket, archive_Key+file)
s3.delete_object(Bucket = bucket, Key = src_Key+file)

Posted in AWS.

Leave a Reply

Your email address will not be published. Required fields are marked *

*

*