1、 使用python 造数据
import random
import time
url_paths=[
"/class/112.html",
"/class/132.html",
"/class/146.html",
"/class/177.html",
"/class/212.html",
"/class/342.html",
"/class/202.html",
"/class/562.html",
"/class/862.html",
"/course/111.html",
"/course/332.html",
"/learn/103.html",
"/learn/992.html",
"/error/172.html"
]
ip_slices=[12,152,153,198,214,123,26,45,99,45,36,72,99,203,204,129,238]
http_referers=[
"http://www.baidu.com/s?wd={query}",
"http://www.sogou.com/wb?wd={query}",
"http://www.bing.com/search?wd={query}",
"http://www.yahoo.com/q?wd={query}",
"http://www.meituan.com/look?se={query}",
"http://www.tencent.com/find?p={query}",
]
search_word=[
"sparksql实战",
"HIVE数据仓库",
"python爬虫实战",
"spark-streaming流计算",
"KAFKA数据传输",
"hadoop大数据基础",
"scala语言"
]
status_codes=["200","-","423","-","-","123","200","-","105","-","-","184","-","-"]
def course_gennerate():
return random.sample(url_paths,1)[0]
def ip_gennerate():
ip01=random.sample(ip_slices,4)
return '.'.join(str(i) for i in ip01)
def state_code():
state=random.sample(status_codes,1)[0]
return state
def link_gennerate():
link=random.sample(http_referers,1)[0]
refer=random.sample(search_word,1)[0]
return link.format(query=refer)
state_code()
def generate_log(count=20):
time_str=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
f=open("/home/hadoop/data/project_test/access.log","w+")
while count>=1:
query_log="{localtime}\t{url}\t{ip}\t{link}\t{state_code}".format(url=course_gennerate(),
ip=ip_gennerate(),
link=link_gennerate(),
state_code=state_code(),
localtime=time_str)
print(query_log)
f.write(query_log + "\n")
count=count-1
generate_log()
2. 设置定时脚本,并设置定时