diff options
Diffstat (limited to 'src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java')
-rw-r--r-- | src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java b/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java new file mode 100644 index 0000000..28c1f00 --- /dev/null +++ b/src/main/java/cc/andreasbaumann/grabbers/nzz/Main.java @@ -0,0 +1,189 @@ +package cc.andreasbaumann.grabbers.nzz; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.charset.StandardCharsets; + +import java.io.File; +import java.io.BufferedReader; +import java.io.IOException; + +import com.fasterxml.jackson.core.JsonParser.Feature; +import com.fasterxml.jackson.databind.ObjectMapper; + +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; +import picocli.CommandLine.Help.Ansi; +import java.util.concurrent.Callable; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.microsoft.playwright.Playwright; +import com.microsoft.playwright.BrowserType; +import com.microsoft.playwright.Browser; +import com.microsoft.playwright.BrowserContext; +import com.microsoft.playwright.Page; +import com.microsoft.playwright.Download; +import com.microsoft.playwright.options.AriaRole; + +import java.util.Locale; +import java.util.Arrays; +import java.util.Date; +import java.text.SimpleDateFormat; + +public class Main +{ + private static final Logger LOGGER = LogManager.getFormatterLogger( Main.class ); + public static final String USER_AGENT = "NZZ-Grabber/1.0"; + public static final Locale DEFAULT_LOCALE = new Locale( "de", "CH" ); + public static final int DEFAULT_TIMEOUT = 60000; + + @Command( name = "nzzgatherer", mixinStandardHelpOptions = true, version = "1.0", + description = "Grabs NZZ ePaper PDFs and stores them locally.") + static class NZZGatherer implements Callable<Integer> + { + private static Configuration configuration; + private static Playwright playwright; + private static BrowserType browserType; + private static Browser browser = null; + private static BrowserContext context; + private static Locale locale = DEFAULT_LOCALE; + private static Page page; + + private static void initializePlaywright( boolean notHeadless ) + { + LOGGER.info( "Starting playwright..." ); + playwright = Playwright.create( ); + browserType = playwright.chromium( ); + browser = browserType.launch( new BrowserType.LaunchOptions( ) + .setHeadless( !notHeadless ) + .setArgs( Arrays.asList( "--disable-gpu" ) ) + ); + context = browser.newContext( new Browser.NewContextOptions( ) + .setUserAgent( USER_AGENT ) + .setLocale( locale.toString( ) ) + ); + } + + private static void logout( ) throws Exception + { + LOGGER.info( ">>> Logging out.." ); + page.getByText( configuration.credentials.user ).click( ); + page.getByText( "Abmelden" ).click( ); + } + + private static void downloadCurrent( ) throws Exception + { + LOGGER.info( ">>> Downloading current PDF..." ); + Download download = page.waitForDownload( ( ) -> { + page.locator( "div:nth-child(2) > span" ).first( ).click( ); + }); + File directory = new File( configuration.downloads.directory ); + Date today = new Date( ); + String timeStamp = new SimpleDateFormat( "yyyyMMdd").format( today ); + File file = new File( directory, "NZZ_" + timeStamp + ".pdf" ); + LOGGER.info( ">>> Saving to '" + file + "'.." ); + download.saveAs( file.toPath( ) ); + } + + private static void initialize( ) throws Exception + { + LOGGER.info( ">>> Opening NZZ ePaper..." ); + page = context.newPage( ); + page.setDefaultTimeout( DEFAULT_TIMEOUT ); + page.route( "**", route -> { + LOGGER.info( route.request( ).url( ) ); + route.resume( ); + } ); + page.onLoad( p -> LOGGER.info( "Page loaded!" ) ); + page.onDOMContentLoaded( p -> LOGGER.info( "Page DOM content loaded!" ) ); + } + + private static void login( ) throws Exception + { + + LOGGER.info( ">>> Opening NZZ ePaper.." ); + page.navigate( "https://epaper.nzz.ch/" ); + page.waitForSelector( ":text('Anmelden')" ); + page.waitForLoadState( ); + + LOGGER.info( ">>> Navigate to login page.." ); + page.getByText( "Anmelden" ).click( ); + page.waitForSelector( ":text('E-Mail-Adresse')" ); + page.waitForLoadState( ); + + LOGGER.info( ">>> Inserting email data.." ); + page.getByPlaceholder( "E-Mail-Adresse" ).fill( configuration.credentials.login ); + page.getByRole( AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Weiter" ) ).click( ); + page.waitForLoadState( ); + + LOGGER.info( ">>> Inserting password.." ); + page.getByRole( AriaRole.TEXTBOX, new Page.GetByRoleOptions( ).setName( "Passwort*" ) ).fill( configuration.credentials.password ); + page.waitForLoadState( ); + page.getByRole(AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Anmelden" )).click( ); + page.waitForLoadState( ); + + LOGGER.info( ">>> Downloading current PDF.." ); + Download download = page.waitForDownload(() -> { + page.locator("div:nth-child(2) > span").first( ).click( ); + } ); + + Thread.sleep( 20000 ); + } + + private static void initializeFromFile( Path configurationFile ) + { + ObjectMapper objectMapper = new ObjectMapper( ); + objectMapper.enable( Feature.ALLOW_UNQUOTED_FIELD_NAMES ); + objectMapper.enable( Feature.ALLOW_COMMENTS ); + + try( BufferedReader configurationReader = Files.newBufferedReader( configurationFile, StandardCharsets.UTF_8 ) ) { + LOGGER.info( "Reading configuration from '" + configurationFile + "'.." ); + configuration = objectMapper.readValue( configurationReader, Configuration.class ); + } catch ( IOException e ) { + LOGGER.error( "Failed to read the configuration file '" + configurationFile + "':\n", e ); + System.exit( 1 ); + } + } + + @Option( names = { "-c", "--config" }, description = "file (in JSON)", defaultValue = "config.json" ) + private String configFile = "config.json"; + + @Option( names = { "--download-current" }, description = "download only todays PDF" ) + private boolean downloadCurrent = false; + + @Option( names = { "--not-headless" }, description = "show browser" ) + private boolean notHeadless = false; + + @Override + public Integer call( ) throws Exception + { + initializeFromFile( new File( configFile ).toPath( ) ); + initializePlaywright( notHeadless ); + initialize( ); + login( ); + if( downloadCurrent ) { + downloadCurrent( ); + } + logout( ); + + return 0; + } + } + + public static void main( String... args ) + { + try { + int exitCode = new CommandLine( new NZZGatherer( ) ).execute( args ); + System.exit( exitCode ); + } catch( Exception e ) { + LOGGER.error( e ); + System.exit( 1 ); + } + } +} + + |