package cc.andreasbaumann.grabbers.nzz; import java.nio.file.Files; import java.nio.file.Path; import java.nio.charset.StandardCharsets; import java.io.File; import java.io.BufferedReader; import java.io.IOException; import com.fasterxml.jackson.core.JsonParser.Feature; import com.fasterxml.jackson.databind.ObjectMapper; import picocli.CommandLine; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import picocli.CommandLine.Parameters; import picocli.CommandLine.Help.Ansi; import java.util.concurrent.Callable; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import com.microsoft.playwright.Playwright; import com.microsoft.playwright.BrowserType; import com.microsoft.playwright.Browser; import com.microsoft.playwright.BrowserContext; import com.microsoft.playwright.Page; import com.microsoft.playwright.Download; import com.microsoft.playwright.options.AriaRole; import java.util.Locale; import java.util.Arrays; import java.util.Date; import java.text.SimpleDateFormat; public class Main { private static final Logger LOGGER = LogManager.getFormatterLogger( Main.class ); private static final String VERSION = "1.0"; public static final String USER_AGENT = "NZZ-Grabber/" + VERSION; public static final Locale DEFAULT_LOCALE = new Locale( "de", "CH" ); public static final int DEFAULT_TIMEOUT = 60000; public static final String DEFAULT_CONFGURATION = "config.json"; @Command( name = "nzzgatherer", mixinStandardHelpOptions = true, version = VERSION, description = "Grabs NZZ ePaper PDFs and stores them locally.") static class NZZGatherer implements Callable { private Configuration configuration; private Playwright playwright; private BrowserType browserType; private Browser browser = null; private BrowserContext context; private Locale locale = DEFAULT_LOCALE; private Page page; private void initializePlaywright( boolean headless ) { LOGGER.info( "Starting playwright..." ); playwright = Playwright.create( ); browserType = playwright.chromium( ); browser = browserType.launch( new BrowserType.LaunchOptions( ) .setHeadless( headless ) .setArgs( Arrays.asList( "--disable-gpu" ) ) ); context = browser.newContext( new Browser.NewContextOptions( ) .setUserAgent( USER_AGENT ) .setLocale( locale.toString( ) ) ); } private void logout( ) throws Exception { LOGGER.info( ">>> Logging out.." ); page.getByText( configuration.credentials.user ).click( ); page.getByText( "Abmelden" ).click( ); } private void downloadCurrent( ) throws Exception { LOGGER.info( ">>> Downloading current PDF..." ); Download download = page.waitForDownload( ( ) -> { page.locator( "div:nth-child(2) > span" ).first( ).click( ); }); File directory = new File( configuration.downloads.directory ); Date today = new Date( ); String timeStamp = new SimpleDateFormat( "yyyyMMdd").format( today ); File file = new File( directory, "NZZ_" + timeStamp + ".pdf" ); LOGGER.info( ">>> Saving to '" + file + "'.." ); download.saveAs( file.toPath( ) ); } private void initializePage( ) throws Exception { LOGGER.info( ">>> Opening NZZ ePaper..." ); page = context.newPage( ); page.setDefaultTimeout( DEFAULT_TIMEOUT ); if( debug ) { page.route( "**", route -> { LOGGER.info( route.request( ).url( ) ); route.resume( ); } ); page.onLoad( p -> LOGGER.info( "Page loaded!" ) ); page.onDOMContentLoaded( p -> LOGGER.info( "Page DOM content loaded!" ) ); } } private void login( ) throws Exception { LOGGER.info( ">>> Opening NZZ ePaper.." ); page.navigate( "https://epaper.nzz.ch/" ); page.waitForSelector( ":text('Anmelden')" ); page.waitForLoadState( ); LOGGER.info( ">>> Navigate to login page.." ); page.getByText( "Anmelden" ).click( ); page.waitForSelector( ":text('E-Mail-Adresse')" ); page.waitForLoadState( ); LOGGER.info( ">>> Inserting email data.." ); page.getByPlaceholder( "E-Mail-Adresse" ).fill( configuration.credentials.login ); page.getByRole( AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Weiter" ) ).click( ); page.waitForLoadState( ); LOGGER.info( ">>> Inserting password.." ); page.getByRole( AriaRole.TEXTBOX, new Page.GetByRoleOptions( ).setName( "Passwort*" ) ).fill( configuration.credentials.password ); page.waitForLoadState( ); page.getByRole(AriaRole.BUTTON, new Page.GetByRoleOptions( ).setName( "Anmelden" )).click( ); page.waitForLoadState( ); LOGGER.info( ">>> Downloading current PDF.." ); Download download = page.waitForDownload(() -> { page.locator("div:nth-child(2) > span").first( ).click( ); } ); Thread.sleep( 20000 ); } private void initializeFromFile( Path configurationFile ) { ObjectMapper objectMapper = new ObjectMapper( ); objectMapper.enable( Feature.ALLOW_UNQUOTED_FIELD_NAMES ); objectMapper.enable( Feature.ALLOW_COMMENTS ); try( BufferedReader configurationReader = Files.newBufferedReader( configurationFile, StandardCharsets.UTF_8 ) ) { LOGGER.info( "Reading configuration from '" + configurationFile + "'.." ); configuration = objectMapper.readValue( configurationReader, Configuration.class ); } catch ( IOException e ) { LOGGER.error( "Failed to read the configuration file '" + configurationFile + "':\n", e ); System.exit( 1 ); } } @Option( names = { "-c", "--config" }, description = "file (in JSON)", defaultValue = DEFAULT_CONFGURATION ) private String configFile = DEFAULT_CONFGURATION; @Option( names = { "--download-current" }, description = "download only today's PDF" ) private boolean downloadCurrent = true; @Option( names = { "--headless" }, description = "do not show browser" ) private boolean headless = true; @Option( names = { "--debug" }, description = "show lots of debug output" ) private boolean debug = false; @Override public Integer call( ) throws Exception { initializeFromFile( new File( configFile ).toPath( ) ); initializePlaywright( headless ); initializePage( ); login( ); if( downloadCurrent ) { downloadCurrent( ); } logout( ); return 0; } } public static void main( String... args ) { try { int exitCode = new CommandLine( new NZZGatherer( ) ).execute( args ); System.exit( exitCode ); } catch( Exception e ) { LOGGER.error( e ); System.exit( 1 ); } } }