summaryrefslogtreecommitdiff
path: root/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java')
-rw-r--r--src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java189
1 files changed, 189 insertions, 0 deletions
diff --git a/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java b/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java
new file mode 100644
index 0000000..28c1f00
--- /dev/null
+++ b/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java
@@ -0,0 +1,189 @@
+package cc.andreasbaumann.grabbers.nzz;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.charset.StandardCharsets;
+
+import java.io.File;
+import java.io.BufferedReader;
+import java.io.IOException;
+
+import com.fasterxml.jackson.core.JsonParser.Feature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+import picocli.CommandLine.Parameters;
+import picocli.CommandLine.Help.Ansi;
+import java.util.concurrent.Callable;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.microsoft.playwright.Playwright;
+import com.microsoft.playwright.BrowserType;
+import com.microsoft.playwright.Browser;
+import com.microsoft.playwright.BrowserContext;
+import com.microsoft.playwright.Page;
+import com.microsoft.playwright.Download;
+import com.microsoft.playwright.options.AriaRole;
+
+import java.util.Locale;
+import java.util.Arrays;
+import java.util.Date;
+import java.text.SimpleDateFormat;
+
+public class Main
+{
+ private static final Logger LOGGER = LogManager.getFormatterLogger( Main.class );
+ public static final String USER_AGENT = "NZZ-Grabber/1.0";
+ public static final Locale DEFAULT_LOCALE = new Locale( "de", "CH" );
+ public static final int DEFAULT_TIMEOUT = 60000;
+
+ @Command( name = "nzzgatherer", mixinStandardHelpOptions = true, version = "1.0",
+ description = "Grabs NZZ ePaper PDFs and stores them locally.")
+ static class NZZGatherer implements Callable<Integer>
+ {
+ private static Configuration configuration;
+ private static Playwright playwright;
+ private static BrowserType browserType;
+ private static Browser browser = null;
+ private static BrowserContext context;
+ private static Locale locale = DEFAULT_LOCALE;
+ private static Page page;
+
+ private static void initializePlaywright( boolean notHeadless )
+ {
+ LOGGER.info( "Starting playwright..." );
+ playwright = Playwright.create( );
+ browserType = playwright.chromium( );
+ browser = browserType.launch( new BrowserType.LaunchOptions( )
+ .setHeadless( !notHeadless )
+ .setArgs( Arrays.asList( "--disable-gpu" ) )
+ );
+ context = browser.newContext( new Browser.NewContextOptions( )
+ .setUserAgent( USER_AGENT )
+ .setLocale( locale.toString( ) )
+ );
+ }
+
+ private static void logout( ) throws Exception
+ {
+ LOGGER.info( ">>> Logging out.." );
+ page.getByText( configuration.credentials.user ).click( );
+ page.getByText( "Abmelden" ).click( );
+ }
+
+ private static void downloadCurrent( ) throws Exception
+ {
+ LOGGER.info( ">>> Downloading current PDF..." );
+ Download download = page.waitForDownload( ( ) -> {
+ page.locator( "div:nth-child(2) > span" ).first( ).click( );
+ });
+ File directory = new File( configuration.downloads.directory );
+ Date today = new Date( );
+ String timeStamp = new SimpleDateFormat( "yyyyMMdd").format( today );
+ File file = new File( directory, "NZZ_" + timeStamp + ".pdf" );
+ LOGGER.info( ">>> Saving to '" + file + "'.." );
+ download.saveAs( file.toPath( ) );
+ }
+
+ private static void initialize( ) throws Exception
+ {
+ LOGGER.info( ">>> Opening NZZ ePaper..." );
+ page = context.newPage( );
+ page.setDefaultTimeout( DEFAULT_TIMEOUT );
+ page.route( "**", route -> {
+ LOGGER.info( route.request( ).url( ) );
+ route.resume( );
+ } );
+ page.onLoad( p -> LOGGER.info( "Page loaded!" ) );
+ page.onDOMContentLoaded( p -> LOGGER.info( "Page DOM content loaded!" ) );
+ }
+
+ private static void login( ) throws Exception
+ {
+
+ LOGGER.info( ">>> Opening NZZ ePaper.." );
+ page.navigate( "https://epaper.nzz.ch/" );
+ page.waitForSelector( ":text('Anmelden')" );
+ page.waitForLoadState( );
+
+ LOGGER.info( ">>> Navigate to login page.." );
+ page.getByText( "Anmelden" ).click( );
+ page.waitForSelector( ":text('E-Mail-Adresse')" );
+ page.waitForLoadState( );
+
+ LOGGER.info( ">>> Inserting email data.." );
+ page.getByPlaceholder( "E-Mail-Adresse" ).fill( configuration.credentials.login );
+ page.getByRole( AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Weiter" ) ).click( );
+ page.waitForLoadState( );
+
+ LOGGER.info( ">>> Inserting password.." );
+ page.getByRole( AriaRole.TEXTBOX, new Page.GetByRoleOptions( ).setName( "Passwort*" ) ).fill( configuration.credentials.password );
+ page.waitForLoadState( );
+ page.getByRole(AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Anmelden" )).click( );
+ page.waitForLoadState( );
+
+ LOGGER.info( ">>> Downloading current PDF.." );
+ Download download = page.waitForDownload(() -> {
+ page.locator("div:nth-child(2) > span").first( ).click( );
+ } );
+
+ Thread.sleep( 20000 );
+ }
+
+ private static void initializeFromFile( Path configurationFile )
+ {
+ ObjectMapper objectMapper = new ObjectMapper( );
+ objectMapper.enable( Feature.ALLOW_UNQUOTED_FIELD_NAMES );
+ objectMapper.enable( Feature.ALLOW_COMMENTS );
+
+ try( BufferedReader configurationReader = Files.newBufferedReader( configurationFile, StandardCharsets.UTF_8 ) ) {
+ LOGGER.info( "Reading configuration from '" + configurationFile + "'.." );
+ configuration = objectMapper.readValue( configurationReader, Configuration.class );
+ } catch ( IOException e ) {
+ LOGGER.error( "Failed to read the configuration file '" + configurationFile + "':\n", e );
+ System.exit( 1 );
+ }
+ }
+
+ @Option( names = { "-c", "--config" }, description = "file (in JSON)", defaultValue = "config.json" )
+ private String configFile = "config.json";
+
+ @Option( names = { "--download-current" }, description = "download only todays PDF" )
+ private boolean downloadCurrent = false;
+
+ @Option( names = { "--not-headless" }, description = "show browser" )
+ private boolean notHeadless = false;
+
+ @Override
+ public Integer call( ) throws Exception
+ {
+ initializeFromFile( new File( configFile ).toPath( ) );
+ initializePlaywright( notHeadless );
+ initialize( );
+ login( );
+ if( downloadCurrent ) {
+ downloadCurrent( );
+ }
+ logout( );
+
+ return 0;
+ }
+ }
+
+ public static void main( String... args )
+ {
+ try {
+ int exitCode = new CommandLine( new NZZGatherer( ) ).execute( args );
+ System.exit( exitCode );
+ } catch( Exception e ) {
+ LOGGER.error( e );
+ System.exit( 1 );
+ }
+ }
+}
+
+