Create a grab-links.py file with following contents:
from bs4 import BeautifulSoup
import argparse
def get_args():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
'file',
type=str,
nargs='+',
help='saved html file to get urls from'
)
return parser.parse_args()
def main():
args = get_args()
print(args.file[0])
f = open(args.file[0],"r")
soup = BeautifulSoup(f.read(), 'lxml')
f.close()
for l in soup.findAll('a'):
print(l.get('href'))
if __name__ == "__main__":
main()