Does anybody know how to run a Python function with Selenium which gets data from a webpage like the AWS Lambda function? When I run my Lambda function it hits the time limit (although I set the time limit to 1 min). I've tried running it on an AWS EC2 instance but, now, it throws this error:
this version of ChromeDriver only supports Chrome version 114Current browser version is 123.0.6312.105
I tried to find Chrome browser version 114, but unsuccessfully. When run my function into the Docker container it returns code 127 and I found information that this is a Chromedriver issueI don't think that the reason in my code, but just in case I add it here:
import csvimport jsonimport timeimport boto3import decimalimport datetime# To detect the latest version of the Chromedriver binary installed on the system# import chromedriver_binaryfrom io import StringIOfrom selenium import webdriverfrom selenium.webdriver import Chromefrom selenium.webdriver.common.by import By# Own except class to handle errorsclass Error(Exception): pass# Define the function which returns third Friday of the specific monthdef third_Friday(month: int, year: int) -> str: weekday_1 = datetime.date(year, month, 1).weekday() day_diff = 4 - weekday_1\ if weekday_1 <= 4 else 11 - weekday_1 return datetime.date(year, month, 15 + day_diff).isoformat()# Wrap all logic into a functiondef lambda_handler(event, context): # Define aws s3 bucket client s3_client = boto3.client('s3', aws_access_key_id='MY_KEY', aws_secret_access_key='MY_SECRET') # Define todays' date today = datetime.datetime.today() # Define file name fname = third_Friday(today.month, today.year).replace('-', '') # Get data from bucket bucket = s3_client\ .get_object(Bucket='BUCKET_ID', Key='BUCKET_KEY'+ fname +'.csv') raw = str(bucket['Body'].read().decode('utf-8')).strip('\'').split('\n') header = raw[0].split(',') df = [] for row in raw[1:]: slpited = row.split(',') if len(slpited) == len(header): df.append(slpited) # Config webdriver try: opts = webdriver.ChromeOptions() opts.add_argument("--headless") opts.add_argument("--no-sandbox") opts.add_argument("--single-process") opts.add_argument("--disable-dev-shm-usage") opts.add_experimental_option("detach", True) # Run driver driver = Chrome(options=opts) # Get page content driver.get("SOME_URL") time.sleep(10) tables = driver\ .find_elements(By.XPATH, value="//table[@class='Box-cui__sc-6h7t1h-0 TableComponents__StyledTable-cui__sc-19kjwg0-3 dISGpF']") for value in tables[0].text.split('\n'): if 'Russell 1000 Index (RIR)' in value: index_value = decimal.Decimal(value.split('')[-1]) df.append([fname, 'RIR', 'RIR', index_value]) s3_resource = boto3.resource('s3') df.sort(key=lambda x: x[1]) csv_buffer = StringIO() csv_writter = csv.writer(csv_buffer) csv_writter.writerow([col.strip() for col in header]) for row in df: csv_writter.writerow([str(el).strip() for el in row]) # Write back content to remote storage s3_resource.Object('BUCKET_ID','BUCKET_KEY'+ fname +'.csv').put( Body=csv_buffer.getvalue()) # Close driver driver.close() driver.quit() except Error as err: return {"statusCode": 500,"body": repr(err) } # Return statements for lambda Function return {"statusCode": 200,"body": json.dumps("OK") }
In advance thank you for any help
Tried to run my function by loading it manually with a zip archive with the browser and the Chromedriver as well. Tried running it in a Docker container, tried running it in an EC2 instance tried deploying it with Sam CLI