#!/usr/bin/env ruby require "nokogiri" require "open-uri" require 'optparse' options = {} opt_parser = OptionParser.new do |opt| opt.banner = 'Usage: convert-html-to-utf8.rb [OPTIONS]' opt.separator '' opt.separator 'Options' opt.separator ' ****' opt.separator ' ****' opt.separator ' ****' opt.separator '' opt.on('-p', '--printout PRINTOUT', 'which environment you want server run') do |printout| options['printout'] = printout end opt.on('-i', '--inputhtml HTMLIN', 'which environment you want server run') do |inputfile| options['inputhtmlfile'] = inputfile end opt.on('-o', '--outputhtml HTMLOUT', 'which environment you want server run') do |outputfile| options['outputhtmlfile'] = outputfile end opt.on('-h', '--help', 'help') do puts opt_parser end end opt_parser.parse! if options['inputhtmlfile'] == nil or options['inputhtmlfile'].class != String or options['inputhtmlfile'].empty? or !File.exists?(options['inputhtmlfile']) STDERR.puts("Missing INPUT filename OR the file doesn't exits") exit 1 end if ( options['outputhtmlfile'] == nil or options['outputhtmlfile'].class != String or options['outputhtmlfile'].empty?) and ( options['printout'] == nil or options['printout'].empty? ) STDERR.puts("Missing OUPUT filename OR --printout option is not used") exit 1 end if options['printout'] != nil or ( options['printout'].class == String and !options['printout'].empty? ) STDERR.puts("Printing output to STDOUT") end original_filename = options['inputhtmlfile'] STDERR.puts("Working on file => #{options['inputhtmlfile']}") $html_doc = "" begin $html_doc = File.open(original_filename) { |f| Nokogiri::HTML(f) } rescue => e STDERR.puts(e) STDERR.puts(e.inspect) STDERR.puts("Error accured while reading and parsing html file => #{options['inputhtmlfile']}") exit 1 end STDERR.puts("Finished processing html file => #{options['inputhtmlfile']}") if options['printout'] != nil or ( options['printout'].class == String and !options['printout'].empty?) STDOUT.puts($html_doc.to_html) else begin File.write(options['outputhtmlfile'], $html_doc.to_html) rescue => e STDERR.puts(e) STDERR.puts(e.inspect) STDERR.puts("Error accured while writingthe html file => #{options['outputhtmlfile']}") exit 1 end end