package SCANNER; import javax.swing.JFrame; import javax.swing.BorderFactory; import javax.swing.ImageIcon; import javax.swing.JButton; import javax.swing.JDialog; import javax.swing.JFileChooser; import java.awt.event.ActionListener; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.List; import java.awt.event.ActionEvent; import javax.swing.JProgressBar; import javax.swing.SwingUtilities; import javax.swing.SwingWorker; import javax.swing.UIManager; import javax.swing.filechooser.FileNameExtensionFilter; import org.jsoup.Connection.Response; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import javax.swing.JLabel; import javax.swing.JMenu; import javax.swing.JMenuBar; import javax.swing.JOptionPane; import javax.swing.JPanel; import java.awt.SystemColor; import java.awt.Toolkit; import java.awt.Color; import java.awt.Dimension; import java.awt.Font; import java.awt.GraphicsEnvironment; import javax.swing.SwingConstants; import javax.swing.JTextArea; import javax.swing.JScrollBar; import javax.swing.JScrollPane; import javax.swing.ScrollPaneConstants; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import javax.swing.JMenuItem; public class ADSTEXTSCANNER extends JFrame{ static boolean should_I_Run = true; JLabel lbl_Filepath = new JLabel(""); JTextArea textArea = new JTextArea(); public static List mysites = new ArrayList(); JLabel lbl_Count = new JLabel("Sites Found:"); JLabel lblNewLabel_countvalue = new JLabel(""); static String testsite = "https://www.yahoo.com/ads.txt"; static int isfirst = 0; static String ids = ""; static int timeout = 8000; static Color blueblue = Color.decode("#0a1d5a"); static String finalreport = "C:///JAVA/ADSTXTCRAWLER/ADSTEXTREPORTMARCH16.txt"; static String errorreport = "C:///JAVA/ADSTXTCRAWLER/ADSTEXTMISSINGMARCH16.txt"; static String IXmissingreport = "C:///JAVA/ADSTXTCRAWLER/IXMISSINGMARCH16.txt"; static String sitelist = "C:///JAVA/ADSTXTCRAWLER/NICK.txt"; //list of sites we could not get working static String IXfilenotfound = "C:///JAVA/ADSTXTCRAWLER/REPORTS/ADS_TXT_IS_MISSING.txt"; static Font custonFont18; static Font custonFont16; static Font custonFont12; public ADSTEXTSCANNER() { this.setSize(800,600); this.setTitle("Ads.txt Crawler"); getContentPane().setLayout(null); this.setIconImage(new ImageIcon(getClass().getClassLoader().getResource("redspider.JPG")).getImage()); //this.setIconImage(new ImageIcon("IMAGE/Icon.png").getImage()); //this.setIconImage(new ImageIcon(getClass().getClassLoader().getResource("Icon.png")).getImage()); try { UIManager.setLookAndFeel("com.sun.java.swing.plaf.windows.WindowsLookAndFeel"); SwingUtilities.updateComponentTreeUI(this); //this.pack(); //custonFont18 = Font.createFont(Font.TRUETYPE_FONT, new File("FONT/IBMPlexSans-Medium.ttf")).deriveFont(18f); //custonFont16 = Font.createFont(Font.TRUETYPE_FONT, new File("FONT/IBMPlexSans-Medium.ttf")).deriveFont(16f); //custonFont12 = Font.createFont(Font.TRUETYPE_FONT, new File("FONT/IBMPlexSans-Medium.ttf")).deriveFont(12f); custonFont18 = Font.createFont(Font.TRUETYPE_FONT, getClass().getClassLoader().getResourceAsStream("IBMPlexSans-Medium.ttf")).deriveFont(18f); custonFont16 = Font.createFont(Font.TRUETYPE_FONT, getClass().getClassLoader().getResourceAsStream("IBMPlexSans-Medium.ttf")).deriveFont(16f); custonFont12 = Font.createFont(Font.TRUETYPE_FONT, getClass().getClassLoader().getResourceAsStream("IBMPlexSans-Medium.ttf")).deriveFont(12f); GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment(); ge.registerFont(custonFont18); ge.registerFont(custonFont16); ge.registerFont(custonFont12); } catch(Exception dfd) { } JLabel lbl_TotalValue = new JLabel(""); lbl_TotalValue.setBounds(481, 177, 49, 14); getContentPane().add(lbl_TotalValue); JButton btnLoadSiteList = new JButton(new ImageIcon(getClass().getClassLoader().getResource("button_load.png"))); //JButton btnLoadSiteList = new JButton(getClass().getClassLoader().getResource("button_load.png"))); btnLoadSiteList.addMouseListener(new MouseAdapter() { @Override public void mouseEntered(MouseEvent e) { //mouse over event btnLoadSiteList.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_loaddown.png"))); } @Override public void mouseExited(MouseEvent e) { //mouse out event btnLoadSiteList.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_load.png"))); } }); btnLoadSiteList.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { //load site list function try { JFileChooser chooser = new JFileChooser("C:\\JAVA\\SIMILARWEB\\"); FileNameExtensionFilter filter = new FileNameExtensionFilter("csv", "txt"); chooser.setFileFilter(filter); int returnVal = chooser.showOpenDialog(null); String filepath = chooser.getSelectedFile().getAbsolutePath().toString(); String filename = chooser.getSelectedFile().getName(); String path = chooser.getSelectedFile().getPath(); if(returnVal == JFileChooser.APPROVE_OPTION) { System.out.println("You choose to open this file: " + filepath); lbl_Filepath.setText(filename); finalreport = path + "ADSTEXTREPORT.txt"; System.out.println("You choose to open this file: " + filepath); System.out.println("Report path is: " + finalreport); IXfilenotfound = path + "IXNOTFOUND.txt"; try { mysites.clear(); File file = new File(filepath); FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); String line; while((line = br.readLine()) != null){ //process the line mysites.add(line); } br.close(); lbl_TotalValue.setText(Integer.toString(mysites.size())); } catch(Exception dffdf) { } } } catch(Exception dfdf) { } } }); btnLoadSiteList.setBounds(180, 104, 139, 29); getContentPane().add(btnLoadSiteList); JLabel lblAdstxtCrawler = new JLabel("Ads.txt Crawler"); lblAdstxtCrawler.setHorizontalAlignment(SwingConstants.CENTER); lblAdstxtCrawler.setForeground(blueblue); lblAdstxtCrawler.setFont(custonFont18); lblAdstxtCrawler.setBackground(blueblue); lblAdstxtCrawler.setBounds(0, 31, 766, 85); getContentPane().add(lblAdstxtCrawler); /* JLabel lblAdstxtReportMade = new JLabel("Ads.txt report made easy "); lblAdstxtReportMade.setVerticalAlignment(SwingConstants.TOP); lblAdstxtReportMade.setHorizontalAlignment(SwingConstants.CENTER); lblAdstxtReportMade.setForeground(blueblue); lblAdstxtReportMade.setFont(custonFont16); lblAdstxtReportMade.setBackground(SystemColor.textInactiveText); lblAdstxtReportMade.setBounds(10, 71, 766, 59); getContentPane().add(lblAdstxtReportMade); */ //JTextArea textArea = new JTextArea(); textArea.setBounds(144, 218, 472, 206); //getContentPane().add(textArea); textArea.setForeground(blueblue); textArea.setFont(custonFont12); JScrollPane scrollPane = new JScrollPane(textArea); scrollPane.setVerticalScrollBarPolicy(ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS); scrollPane.setBounds(144, 218, 472, 206); getContentPane().add(scrollPane); JProgressBar progressBar = new JProgressBar(); progressBar.setForeground(Color.decode("#4e619f")); progressBar.setBounds(259, 452, 250, 23); progressBar.setStringPainted(true); getContentPane().add(progressBar); //JLabel lblNewLabel_IXICON = new JLabel(new ImageIcon("IMAGE/IX.JPG") JLabel lblNewLabel_IXICON = new JLabel(new ImageIcon(getClass().getClassLoader().getResource("IX.JPG"))); lblNewLabel_IXICON.setBounds(0, 40, 246, 59); getContentPane().add(lblNewLabel_IXICON); lbl_Count.setBounds(197, 177, 75, 14); lbl_Count.setForeground(blueblue); lbl_Count.setFont(custonFont12); getContentPane().add(lbl_Count); lblNewLabel_countvalue.setBounds(285, 177, 77, 14); lblNewLabel_countvalue.setForeground(blueblue); lblNewLabel_countvalue.setFont(custonFont12); getContentPane().add(lblNewLabel_countvalue); lbl_Filepath.setBounds(419, 104, 194, 29); lbl_Filepath.setForeground(blueblue); lbl_Filepath.setFont(custonFont12); getContentPane().add(lbl_Filepath); JLabel lbl_Total = new JLabel("Total:"); lbl_Total.setForeground(blueblue); lbl_Total.setFont(custonFont12); lbl_Total.setBounds(422, 177, 49, 14); getContentPane().add(lbl_Total); JButton btnStart = new JButton(new ImageIcon(getClass().getClassLoader().getResource("button_start.png"))); btnStart.addMouseListener(new MouseAdapter() { @Override public void mouseEntered(MouseEvent e) { //mouse over for start button //btnStart.setIcon(defaultIcon); btnStart.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_startdown.png"))); } @Override public void mouseExited(MouseEvent e) { //mouse out event btnStart.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_start.png"))); } }); btnStart.setBorder(BorderFactory.createEmptyBorder()); btnStart.setContentAreaFilled(false); btnStart.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { if(mysites.size()>0) { shouldIRun(true); updateGUI(textArea, lblNewLabel_countvalue, progressBar); } else { shouldIRun(false); JOptionPane.showMessageDialog(null, "Please load a site list first..."); } } }); JLabel lbl_ReportName = new JLabel("Report Name:"); lbl_ReportName.setFont(custonFont12); lbl_ReportName.setForeground(blueblue); lbl_ReportName.setBounds(197, 152, 122, 14); getContentPane().add(lbl_ReportName); JLabel lblFilename = new JLabel("Adstextreport.txt"); lblFilename.addMouseListener(new MouseAdapter() { @Override public void mouseClicked(MouseEvent e) { //show file chooser to select a different location to save the file try { JFileChooser choosersaverlocation = new JFileChooser("C:\\JAVA\\SIMILARWEB\\"); FileNameExtensionFilter filter = new FileNameExtensionFilter("csv", "txt"); choosersaverlocation.setFileFilter(filter); int returnVal = choosersaverlocation.showOpenDialog(null); String filepath2 = choosersaverlocation.getSelectedFile().getAbsolutePath().toString(); String filename2 = choosersaverlocation.getSelectedFile().getName(); String path2 = choosersaverlocation.getSelectedFile().getPath(); if(returnVal == JFileChooser.APPROVE_OPTION) { lblFilename.setText(filepath2 ); finalreport = filepath2; } } catch(Exception fdfd) { } } }); lblFilename.setFont(custonFont12); lblFilename.setForeground(blueblue); lblFilename.setBounds(419, 152, 267, 14); getContentPane().add(lblFilename); btnStart.setBounds(197, 498, 154, 54); getContentPane().add(btnStart); //JButton btnNewButton_2 = new JButton(new ImageIcon("IMAGE/button_stop.png")); ImageIcon image = new ImageIcon(getClass().getClassLoader().getResource("button_stop.png")); JButton btnStop = new JButton(image); btnStop.addMouseListener(new MouseAdapter() { @Override public void mouseEntered(MouseEvent e) { //mouse over btnStop.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_stopdown.png"))); } @Override public void mouseExited(MouseEvent e) { //mouse out event btnStop.setIcon(new ImageIcon(getClass().getClassLoader().getResource("button_stop.png"))); } }); btnStop.setBorder(BorderFactory.createEmptyBorder()); btnStop.setContentAreaFilled(false); btnStop.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { //stop crawler here shouldIRun(false); } }); btnStop.setBounds(413, 498, 160, 54); getContentPane().add(btnStop); //JLabel lblNewLabel_1 = new JLabel(""); //lblNewLabel_1.setIcon(new ImageIcon("IMAGE/background3.jpg")); //lblNewLabel_1.setBounds(0, 0, 800, 600); //getContentPane().add(lblNewLabel_1); getContentPane().setBackground(Color.WHITE); JMenuBar menuBar = new JMenuBar(); menuBar.setBounds(0, 0, 99, 22); JMenu file,help; file=new JMenu("File"); file.setFont(custonFont12); help=new JMenu("Help"); help.setFont(custonFont12); JMenuItem jsettings = new JMenuItem("Settings"); jsettings.setBackground(Color.WHITE); jsettings.setFont(custonFont12); jsettings.addActionListener( new ActionListener(){ public void actionPerformed(ActionEvent e) { ShowSettingPanel(); } } ); //mntmNewMenuItem.setBounds(20, 25, 135, 27); file.add(jsettings); JMenuItem jexit = new JMenuItem("Exit"); jexit.setBackground(Color.WHITE); jexit.setFont(custonFont12); jexit.addActionListener( new ActionListener(){ public void actionPerformed(ActionEvent e) { System.exit(-1); } } ); //mntmNewMenuItem.setBounds(20, 25, 135, 27); file.add(jexit); JMenuItem jabout = new JMenuItem("About"); jabout.setBackground(Color.WHITE); jabout.setFont(custonFont12); jabout.addActionListener( new ActionListener(){ public void actionPerformed(ActionEvent e) { ShowAboutPanel(); } } ); help.add(jabout); JMenuItem jhelp = new JMenuItem("Help"); jhelp.setBackground(Color.WHITE); jhelp.setFont(custonFont12); jhelp.addActionListener( new ActionListener(){ public void actionPerformed(ActionEvent e) { ShowHelpPanel(); } } ); //mntmNewMenuItem.setBounds(20, 25, 135, 27); help.add(jhelp); menuBar.add(file); menuBar.add(help); menuBar.setBackground(Color.WHITE); getContentPane().add(menuBar); Dimension dimension = Toolkit.getDefaultToolkit().getScreenSize(); int x1 = (int) ((dimension.getWidth() - this.getWidth()) / 2); int y1 = (int) ((dimension.getHeight() - this.getHeight()) / 2); this.setLocation(x1, y1); this.setResizable(false); this.setVisible(true); setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); } public static void ShowHelpPanel() { System.out.println("Help panel"); } public static void ShowAboutPanel() { System.out.println("About panel"); } public static void ShowSettingPanel() { System.out.println("Settings panel"); } public static void shouldIRun(boolean x) { should_I_Run = x; } public static void updateGUI(JTextArea textArea,JLabel lblNewLabel_countvalue, JProgressBar progressBar) { int total = mysites.size(); progressBar.setMaximum(total); System.out.println("Starting crawler"); textArea.append("Starting Crawler"); SwingWorker myWorker= new SwingWorker() { @Override protected String doInBackground() throws Exception { try { for(int i=0; i< mysites.size() && should_I_Run; i++) { textArea.append(mysites.get(i).toString() + "\n"); lblNewLabel_countvalue.setText(Integer.toString(i) ); //crawler code here try { testsite = "https://"+mysites.get(i)+"/ads.txt"; System.out.println("Checking ...https://"+ mysites.get(i)); Document doc = Jsoup.connect(testsite).followRedirects(false).timeout(timeout).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").followRedirects(true).ignoreHttpErrors(false).get(); BufferedWriter writerfinalreport = new BufferedWriter(new FileWriter(finalreport, true)); String body = doc.body().text(); doc.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing doc.select("br").append("\\n"); doc.select("p").prepend("\\n\\n"); String s = doc.html().replaceAll("\\\\n", "\n"); Response response = Jsoup.connect(testsite).followRedirects(false).timeout(timeout).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").ignoreHttpErrors(false).execute(); boolean noadstextfilefound = false; System.out.println("Response tester: for " + testsite +" " + response.statusCode() + " : " + response.url()); String responseURL = response.url() +"ads.txt"; responseURL = responseURL.replace("www.", ""); //!response.url().equals(testsite.toString()) /* if(!responseURL.equalsIgnoreCase(testsite)) { System.out.println("Did not find " + testsite + " " + responseURL); BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",404"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); noadstextfilefound = true; } */ System.out.println("Response" + response.statusCode() + " : " + response.url()); //write 404 response here file missing if (404 == response.statusCode() && !noadstextfilefound) { BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",404"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); }/* else if(301 == response.statusCode()) { //throw new Exception("301 error code"); }*/ else if(403 == response.statusCode()) { //file blocked BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",403"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); } else if(!noadstextfilefound) { String[]arrOfStr = s.split("\n"); ids = ""; boolean IXfound = false; boolean IXdirect = false; boolean IXreseller = false; String whattype = ""; for(int x=0; x < arrOfStr.length;x++) { //System.out.println("Checking this: " + arrOfStr[x].toLowerCase()); //if(arrOfStr[x].contains("Indexexchange.com") || arrOfStr[x].toLowerCase().contains("indexexchange.com") ) //we need to get the next id and write to file if(arrOfStr[x].toLowerCase().contains("indexexchange.com") || arrOfStr[x].toLowerCase().contains("indexexchange.com,")) //we need to get the next id and write to file { IXfound = true; //CHECKING FOR DIRECT OR RESELLER if(arrOfStr[x].toLowerCase().contains("direct") || arrOfStr[x].toLowerCase().contains("direct,")) { IXdirect = true; whattype = "DIRECT"; } else if(arrOfStr[x].toLowerCase().contains("reseller") || arrOfStr[x].toLowerCase().contains("reseller,")) { IXreseller = true; whattype = "RESELLER"; } else { whattype = " "; } //get ID here try { String[] idfind = arrOfStr[x].split(","); //indexexchange.com ,187454 ,RESELLER ##s2s HANDLE THIS isfirst++; System.out.println(mysites.get(i) + "," +idfind[1]+","+whattype); //arrOfStr[x+1] is id writerfinalreport.newLine(); writerfinalreport.write(mysites.get(i) + "," +idfind[1]+","+whattype); writerfinalreport.flush(); textArea.append(mysites.get(i) + "," +idfind[1]+","+whattype + "\n"); lblNewLabel_countvalue.setText(Integer.toString(i)); } catch(Exception sdsd)//TRY SPACES { String[] idfind = arrOfStr[x].split(" "); //indexexchange.com 187454 RESELLER ##s2s HANDLE THIS isfirst++; System.out.println(mysites.get(i) + "," +idfind[1]+","+whattype); //arrOfStr[x+1] is id writerfinalreport.newLine(); writerfinalreport.write(mysites.get(i) + "," +idfind[1]+","+whattype); writerfinalreport.flush(); textArea.append(mysites.get(i) + "," +idfind[1]+","+whattype + "\n"); lblNewLabel_countvalue.setText(Integer.toString(i)); } } } System.out.println("Counter: " + i); if(!IXfound) { //WE NEED TO RECORD THAT A FILE WAS FOUND BUT NO IX ENTRY FOUND. String titletext = ""; titletext = doc.title(); // System.out.println("No IX entry found " + titletext); if(titletext.equalsIgnoreCase("")) { System.out.println("Saving IX missing"); //check for title or body and if that is found then no ads.txt else it is missing BufferedWriter writerfinalerrorreport = new BufferedWriter(new FileWriter(IXmissingreport, true)); writerfinalerrorreport.newLine(); writerfinalerrorreport.write(testsite); writerfinalerrorreport.flush(); writerfinalerrorreport.close(); } else { System.out.println("Saving Ads.txt not file not found"); BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",403"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); } //writerfinalerrorreport.newLine(); } } } catch(Exception fdfdedfdd) { try { testsite = "http://"+mysites.get(i)+"/ads.txt"; System.out.println("Checking ...http://"+ mysites.get(i)); Document doc = Jsoup.connect(testsite).followRedirects(true).timeout(timeout).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").ignoreHttpErrors(false).get(); BufferedWriter writerfinalreport = new BufferedWriter(new FileWriter(finalreport, true)); String body = doc.body().text(); doc.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing doc.select("br").append("\\n"); doc.select("p").prepend("\\n\\n"); String s = doc.html().replaceAll("\\\\n", "\n"); Response response = Jsoup.connect(testsite).timeout(timeout).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").ignoreHttpErrors(false).execute(); System.out.println("Response" + response.statusCode() + " : " + response.url()); // Response response = Jsoup.connect(testsite).timeout(timeout).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").ignoreHttpErrors(false).execute(); boolean noadstextfilefound = false; //System.out.println("Response tester: " + response.statusCode() + " : " + response.url()); /* if(!response.url().equals(testsite.toString())) { System.out.println("Did not find ads.txt"); BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",404"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); noadstextfilefound = true; } */ //write 404 response here file missing // if(404 == response.statusCode()) { BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",404"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); } else if(403 == response.statusCode()) { //file blocked BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",403"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); } else { String[]arrOfStr = s.split("\n"); ids = ""; boolean IXfound = false; boolean IXdirect = false; boolean IXreseller = false; String whattype = ""; for(int x=0; x < arrOfStr.length;x++) { // System.out.println("Checking this: " + arrOfStr[x].toLowerCase()); if(arrOfStr[x].toLowerCase().contains("indexexchange.com")) //we need to get the next id and write to file { IXfound = true; isfirst++; //CHECKING FOR DIRECT OR RESELLER if(arrOfStr[x].toLowerCase().contains("direct") || arrOfStr[x].toLowerCase().contains("direct,")) { IXdirect = true; whattype = "DIRECT"; } else if(arrOfStr[x].toLowerCase().contains("reseller") || arrOfStr[x].toLowerCase().contains("reseller,")) { IXreseller = true; whattype = "RESELLER"; } else { whattype = " "; } try { String[] idfind = arrOfStr[x].split(","); isfirst++; String towritre = mysites.get(i) + "," +idfind[1]+","+whattype; towritre.replaceAll("(\\r|\\n)", ""); System.out.println("hre " + towritre); writerfinalreport.newLine(); writerfinalreport.write(towritre); writerfinalreport.flush(); textArea.append(mysites.get(i) + "," +idfind[1]+","+whattype + "\n"); lblNewLabel_countvalue.setText(Integer.toString(i)); } catch(Exception dfddd) { String[] idfind = arrOfStr[x].split(" "); isfirst++; System.out.println(mysites.get(i) + "," +idfind[1]+","+whattype); //arrOfStr[x+1] is id writerfinalreport.newLine(); writerfinalreport.write(mysites.get(i) + "," +idfind[1]+","+whattype); writerfinalreport.flush(); textArea.append(mysites.get(i) + "," +idfind[1]+","+whattype + "\n"); lblNewLabel_countvalue.setText(Integer.toString(i)); } } } System.out.println("Counter: " + i); if(!IXfound) { //WE NEED TO RECORD THAT A FILE WAS FOUND BUT NO IX ENTRY FOUND. String titletext = ""; titletext = doc.title(); // System.out.println("No IX entry found" + titletext); if(titletext.equalsIgnoreCase("")) { //check for title or body and if that is found then no ads.txt else it is missing BufferedWriter writerfinalerrorreport = new BufferedWriter(new FileWriter(IXmissingreport, true)); writerfinalerrorreport.newLine(); writerfinalerrorreport.write(testsite); writerfinalerrorreport.flush(); writerfinalerrorreport.close(); } else { BufferedWriter writerADSTEXTMISSING = new BufferedWriter(new FileWriter(IXfilenotfound, true)); writerADSTEXTMISSING.newLine(); String tempstring = testsite + ",403"; writerADSTEXTMISSING.write(tempstring); writerADSTEXTMISSING.flush(); writerADSTEXTMISSING.close(); } } } } catch(Exception dfd) { try { System.out.println("Crashed because: " + dfd.toString()); System.out.println("Counter: " + i); //WRITE ERROR REPORT HERE. BufferedWriter writerfinalerror = new BufferedWriter(new FileWriter(errorreport, true)); writerfinalerror.newLine(); writerfinalerror.write(testsite +","+dfd.toString().replace(",", "")); //remove ,'s writerfinalerror.flush(); } catch(Exception fdfd) { } } } progressBar.setValue(i); //end crawler code here } progressBar.setValue(progressBar.getMaximum()); alert_Done(); } catch(Exception ere) { } return null; } }; myWorker.execute(); } public static void alert_Done() { System.out.println("Done crawling report ready.."); JOptionPane.showMessageDialog(null, "Done crawling site list"); } public static void main(String[] args) { // TODO Auto-generated method stub ADSTEXTSCANNER ads = new ADSTEXTSCANNER(); } }