From 003b349ce1ffb62eec20fd7e8b13941b1cf9be79 Mon Sep 17 00:00:00 2001 From: Adnan Gonzaga Date: Fri, 28 Mar 2025 17:38:36 -0300 Subject: [PATCH] =?UTF-8?q?[Init]=20-=20Reposit=C3=B3rio=20de=20demonstra?= =?UTF-8?q?=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 484 +++++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 43 +++++ package.json | 11 ++ requirements.txt | 1 + run.sh | 3 + run_node.sh | 3 + run_python.sh | 3 + scrapper.js | 24 +++ scrapper.py | 29 +++ 9 files changed, 601 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 package.json create mode 100644 requirements.txt create mode 100644 run.sh create mode 100644 run_node.sh create mode 100644 run_python.sh create mode 100644 scrapper.js create mode 100644 scrapper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..104b544 --- /dev/null +++ b/.gitignore @@ -0,0 +1,484 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from `dotnet new gitignore` + +# dotenv files +.env + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET +project.lock.json +project.fragment.lock.json +artifacts/ + +# Tye +.tye/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml +.idea + +## +## Visual studio for Mac +## + + +# globs +Makefile.in +*.userprefs +*.usertasks +config.make +config.status +aclocal.m4 +install-sh +autom4te.cache/ +*.tar.gz +tarballs/ +test-results/ + +# Mac bundle stuff +*.dmg +*.app + +# content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# Vim temporary swap files +*.swp diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..895d328 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +# Use the official Playwright image as base +FROM mcr.microsoft.com/playwright:v1.42.1-jammy + +# Set the working directory +WORKDIR /app + +# Install Python and Node.js (for both Python and JS examples) +# RUN apt-get update && \ + # apt-get install -y python3 python3-pip && \ + # rm -rf /var/lib/apt/lists/* +RUN apt-get update && \ + apt-get install -y \ + python3 \ + python3-pip \ + xvfb \ + x11-utils \ + libgtk-3-0 \ + libnotify-dev \ + libgconf-2-4 \ + libnss3 \ + libxss1 \ + libasound2 \ + && rm -rf /var/lib/apt/lists/* +# Copy package files first to leverage Docker cache +COPY package.json requirements.txt ./ + +# Install Node.js and Python dependencies +RUN npm install && \ + pip install --no-cache-dir -r requirements.txt + +# Install Playwright browsers +RUN npx playwright install --with-deps + +# Copy the rest of the application + +COPY . . + +# Make our scripts executable +RUN chmod +x ./run*.sh + +# Set default command to display help +# CMD ["bash", "-c", "echo 'Choose a script: run_python.sh or run_node.sh' && bash"] +CMD ["xvfb-run", "bash", "-c", "echo 'Choose a script: run_python.sh or run_node.sh' && bash"] diff --git a/package.json b/package.json new file mode 100644 index 0000000..a6d959e --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "playwright-demo", + "version": "1.0.0", + "main": "scraper.js", + "scripts": { + "start": "node scraper.js" + }, + "dependencies": { + "playwright": "^1.42.1" + } +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c4d4b5a --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +playwright==1.51.0 diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..7c71203 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Running Python example..." +python3 scraper.py diff --git a/run_node.sh b/run_node.sh new file mode 100644 index 0000000..c6240e6 --- /dev/null +++ b/run_node.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Running Node.js example..." +node scrapper.js diff --git a/run_python.sh b/run_python.sh new file mode 100644 index 0000000..09f3509 --- /dev/null +++ b/run_python.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Running Python example..." +python3 scrapper.py diff --git a/scrapper.js b/scrapper.js new file mode 100644 index 0000000..15249a1 --- /dev/null +++ b/scrapper.js @@ -0,0 +1,24 @@ +const { chromium } = require('playwright'); + +(async () => { + const headless = process.env.HEADLESS !== 'false'; + const browser = await chromium.launch({ headless: headless }); + const page = await browser.newPage(); + + console.log('Navigating to example.com...'); + await page.goto('https://example.com'); + + // Take a screenshot + await page.screenshot({ path: 'example.png' }); + console.log('Screenshot saved to example.png'); + + // Extract page title + const title = await page.title(); + console.log(`Page title: ${title}`); + + // Demonstrate waiting + console.log('Waiting 2 seconds to demonstrate...'); + await new Promise(resolve => setTimeout(resolve, 2000)); + + await browser.close(); +})(); diff --git a/scrapper.py b/scrapper.py new file mode 100644 index 0000000..95225a3 --- /dev/null +++ b/scrapper.py @@ -0,0 +1,29 @@ +from playwright.sync_api import sync_playwright +import time + +def run_scraper(): + with sync_playwright() as p: + # Launch browser (visible in headed mode) + headless = os.getenv('HEADLESS', 'true').lower() == 'true' + browser = p.chromium.launch(headless=headless) + page = browser.new_page() + + print("Navigating to example.com...") + page.goto("https://example.com") + + # Take a screenshot + page.screenshot(path="example.png") + print("Screenshot saved to example.png") + + # Extract page title + title = page.title() + print(f"Page title: {title}") + + # Demonstrate waiting and interaction + print("Waiting 2 seconds to demonstrate...") + time.sleep(2) + + browser.close() + +if __name__ == "__main__": + run_scraper()